开源软件构建与测试

Changes of Revision 2

_service:tar_scm:glibc.spec Changed

@@ -48,10 +48,14 @@
 %undefine with_valgrind
 %endif
 
+%ifarch loongarch64
+%global ENABLE_RELOC 0
+%else
 %global ENABLE_RELOC 1
+%endif
 
 # Only some architectures have static PIE support
-%define pie_arches %{ix86} x86_64 aarch64
+%define pie_arches %{ix86} x86_64 aarch64 loongarch64
 
 %define enablekernel 3.2
 %define target %{_target_cpu}-%{_vendor}-linux
@@ -67,7 +71,7 @@
 ##############################################################################
 Name: 	 	glibc
 Version: 	2.38
-Release: 	22
+Release: 	23
 Summary: 	The GNU libc libraries
 License:	%{all_license}
 URL: 		http://www.gnu.org/software/glibc/
@@ -141,6 +145,36 @@
 Patch53: sparc-Remove-unwind-information-from-signal-return-s.patch
 Patch54: arm-Remove-wrong-ldr-from-_dl_start_user-BZ-31339.patch
 Patch55: malloc-Use-__get_nprocs-on-arena_get2-BZ-30945.patch
+Patch56: LoongArch-Redefine-macro-LEAF-ENTRY.patch
+Patch57: LoongArch-Add-minuimum-binutils-required-version.patch
+Patch58: Loongarch-Add-ifunc-support-and-add-different-versio.patch
+Patch59: elf-Add-new-LoongArch-reloc-types-101-to-108-into-el.patch
+Patch60: LoongArch-elf-Add-new-LoongArch-reloc-types-109-into.patch
+Patch61: Loongarch-Add-ifunc-support-for-strchr-aligned-lsx-l.patch
+Patch62: Loongarch-Add-ifunc-support-for-memcpy-aligned-unali.patch
+Patch63: LoongArch-Add-ifunc-support-for-strnlen-aligned-lsx-.patch
+Patch64: LoongArch-Add-ifunc-support-for-strcmp-aligned-lsx.patch
+Patch65: LoongArch-Add-ifunc-support-for-strncmp-aligned-lsx.patch
+Patch66: LoongArch-Remove-support-code-for-old-linker-in-star.patch
+Patch67: LoongArch-Micro-optimize-LD_PCREL.patch
+Patch68: LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch
+Patch69: LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch
+Patch70: LoongArch-Add-ifunc-support-for-memrchr-lsx-lasx.patch
+Patch71: LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch
+Patch72: LoongArch-Add-ifunc-support-for-memcmp-aligned-lsx-l.patch
+Patch73: LoongArch-Change-loongarch-to-LoongArch-in-comments.patch
+Patch74: LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch
+Patch75: LoongArch-Replace-deprecated-v0-with-a0-to-eliminate.patch
+Patch76: LoongArch-Add-ifunc-support-for-strcpy-stpcpy-aligne.patch
+Patch77: LoongArch-Add-ifunc-support-for-strrchr-aligned-lsx-.patch
+Patch78: LoongArch-Change-to-put-magic-number-to-.rodata-sect.patch
+Patch79: LoongArch-Add-glibc.cpu.hwcap-support.patch
+Patch80: Revert-LoongArch-Add-glibc.cpu.hwcap-support.patch
+Patch81: LoongArch-Unify-Register-Names.patch
+Patch82: LoongArch-Update-hwcap.h-to-sync-with-LoongArch-kern.patch
+Patch83: linux-Sync-Linux-6.6-elf.h.patch
+Patch84: Decrease-value-of-arch_minimum_kernel-with-LoongArch.patch
+
 
 Patch9000: turn-default-value-of-x86_rep_stosb_threshold_form_2K_to_1M.patch
 Patch9001: locale-delete-no-hard-link-to-avoid-all_language-pac.patch 
@@ -759,7 +793,9 @@
 touch nscd.filelist
 touch nss_modules.filelist
 touch nss-devel.filelist
+%ifnarch loongarch64
 touch libnsl.filelist
+%endif
 touch debugutils.filelist
 touch benchtests.filelist
 touch help.filelist
@@ -818,7 +854,9 @@
     -e '%{_prefix}/share' \
     -e '/var/db/Makefile' \
     -e '/libnss_.*\.so0-9.*$' \
+%ifnarch loongarch64
     -e '/libnsl' \
+%endif
     -e 'glibc-benchtests' \
     -e 'aux-cache' \
     > glibc.filelist
@@ -891,8 +929,10 @@
 ##############################################################################
 # libnsl subpackage
 ##############################################################################
+%ifnarch loongarch64
 grep -E '/libnsl\.so\.0-9+$' master.filelist > libnsl.filelist
 test $(wc -l < libnsl.filelist) -eq 1
+%endif
 
 ##############################################################################
 # glibc debugutils sub-package
@@ -1331,8 +1371,10 @@
 
 %files -f nss-devel.filelist nss-devel
 
+%ifnarch loongarch64
 %files -f libnsl.filelist -n libnsl
 /%{_lib}/libnsl.so.1
+%endif
 
 %files -f debugutils.filelist debugutils
 
@@ -1354,6 +1396,10 @@
 %endif
 
 %changelog
+* Thu Feb 29 2024 Peng Fan <fanpeng@loongson.cn> - 2.38-23
+- LoongArch: sync patch from glibc upstream
+- Reduced kernel version requirements
+
 * Fri Feb 23 Jingxiao Lu <lujingxiao@huawei.com> - 2.38-22
 - malloc: Use __get_nprocs on arena_get2 (BZ 30945)

_service:tar_scm:Decrease-value-of-arch_minimum_kernel-with-LoongArch.patch Added

@@ -0,0 +1,40 @@
+From 2c8dfc45a8009e5110a9d2148b62d802e989fde7 Mon Sep 17 00:00:00 2001
+From: ticat_fp <fanpeng@loongson.cn>
+Date: Thu, 29 Feb 2024 15:58:31 +0800
+Subject: PATCH Decrease value of arch_minimum_kernel with LoongArch
+
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/unix/sysv/linux/loongarch/configure    | 2 +-
+ sysdeps/unix/sysv/linux/loongarch/configure.ac | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/sysdeps/unix/sysv/linux/loongarch/configure b/sysdeps/unix/sysv/linux/loongarch/configure
+index 0d1159e9..851b2285 100644
+--- a/sysdeps/unix/sysv/linux/loongarch/configure
++++ b/sysdeps/unix/sysv/linux/loongarch/configure
+@@ -1,7 +1,7 @@
+ # This file is generated from configure.ac by Autoconf.  DO NOT EDIT!
+  # Local configure fragment for sysdeps/unix/sysv/linux/loongarch.
+ 
+-arch_minimum_kernel=5.19.0
++arch_minimum_kernel=4.19.0
+ 
+ libc_cv_loongarch_int_abi=no
+ 
+diff --git a/sysdeps/unix/sysv/linux/loongarch/configure.ac b/sysdeps/unix/sysv/linux/loongarch/configure.ac
+index 04e9150a..00815c2f 100644
+--- a/sysdeps/unix/sysv/linux/loongarch/configure.ac
++++ b/sysdeps/unix/sysv/linux/loongarch/configure.ac
+@@ -2,7 +2,7 @@ sinclude(./aclocal.m4)dnl Autoconf lossage
+ GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
+ # Local configure fragment for sysdeps/unix/sysv/linux/loongarch.
+ 
+-arch_minimum_kernel=5.19.0
++arch_minimum_kernel=4.19.0
+ 
+ libc_cv_loongarch_int_abi=no
+ AC_EGREP_CPP(4 8 8, __SIZEOF_INT__ __SIZEOF_LONG__ __SIZEOF_POINTER__
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Add-glibc.cpu.hwcap-support.patch Added

@@ -0,0 +1,499 @@
+From 8923e4e9c79e672fd6b3b89aba598a60d5c01211 Mon Sep 17 00:00:00 2001
+From: caiyinyu <caiyinyu@loongson.cn>
+Date: Fri, 15 Sep 2023 17:35:19 +0800
+Subject: PATCH 25/29 LoongArch: Add glibc.cpu.hwcap support.
+
+Key Points:
+1. On lasx & lsx platforms, We must use _dl_runtime_{profile, resolve}_{lsx, lasx}
+   to save vector registers.
+2. Via "tunables", users can choose str/mem_{lasx,lsx,unaligned} functions with
+   `export GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,...`.
+   Note: glibc.cpu.hwcaps doesn't affect _dl_runtime_{profile, resolve}_{lsx, lasx}
+   selection.
+
+Usage Notes:
+1. Only valid inputs: LASX, LSX, UAL. Case-sensitive, comma-separated, no spaces.
+2. Example: `export GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,UAL` turns on LASX & UAL.
+   Unmentioned features turn off. With default ifunc: lasx > lsx > unaligned >
+   aligned > generic, effect is: lasx > unaligned > aligned > generic; lsx off.
+3. Incorrect GLIBC_TUNABLES settings will show error messages.
+   For example: On lsx platforms, you cannot enable lasx features. If you do
+   that, you will get error messages.
+4. Valid input examples:
+   - GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX: lasx > aligned > generic.
+   - GLIBC_TUNABLES=glibc.cpu.hwcaps=LSX,UAL: lsx > unaligned > aligned > generic.
+   - GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,UAL,LASX,UAL,LSX,LASX,UAL: Repetitions
+     allowed but not recommended. Results in: lasx > lsx > unaligned > aligned >
+     generic.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/Makefile                    |  4 +
+ sysdeps/loongarch/Versions                    |  5 ++
+ sysdeps/loongarch/cpu-tunables.c              | 89 +++++++++++++++++++
+ sysdeps/loongarch/dl-get-cpu-features.c       | 25 ++++++
+ sysdeps/loongarch/dl-machine.h                | 27 +++++-
+ sysdeps/loongarch/dl-tunables.list            | 25 ++++++
+ .../unix/sysv/linux/loongarch/cpu-features.c  | 29 ++++++
+ .../unix/sysv/linux/loongarch/cpu-features.h  | 18 +++-
+ .../unix/sysv/linux/loongarch/dl-procinfo.c   | 60 +++++++++++++
+ sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c | 21 +++++
+ .../unix/sysv/linux/loongarch/libc-start.c    | 34 +++++++
+ 11 files changed, 329 insertions(+), 8 deletions(-)
+ create mode 100644 sysdeps/loongarch/Versions
+ create mode 100644 sysdeps/loongarch/cpu-tunables.c
+ create mode 100644 sysdeps/loongarch/dl-get-cpu-features.c
+ create mode 100644 sysdeps/loongarch/dl-tunables.list
+ create mode 100644 sysdeps/unix/sysv/linux/loongarch/cpu-features.c
+ create mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
+ create mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
+ create mode 100644 sysdeps/unix/sysv/linux/loongarch/libc-start.c
+
+diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
+index 43d2f583..30a1f4a8 100644
+--- a/sysdeps/loongarch/Makefile
++++ b/sysdeps/loongarch/Makefile
+@@ -6,6 +6,10 @@ ifeq ($(subdir),elf)
+ gen-as-const-headers += dl-link.sym
+ endif
+ 
++ifeq ($(subdir),elf)
++  sysdep-dl-routines += dl-get-cpu-features
++endif
++
+ # LoongArch's assembler also needs to know about PIC as it changes the
+ # definition of some assembler macros.
+ ASFLAGS-.os += $(pic-ccflag)
+diff --git a/sysdeps/loongarch/Versions b/sysdeps/loongarch/Versions
+new file mode 100644
+index 00000000..33ae2cc0
+--- /dev/null
++++ b/sysdeps/loongarch/Versions
+@@ -0,0 +1,5 @@
++ld {
++  GLIBC_PRIVATE {
++    _dl_larch_get_cpu_features;
++  }
++}
+diff --git a/sysdeps/loongarch/cpu-tunables.c b/sysdeps/loongarch/cpu-tunables.c
+new file mode 100644
+index 00000000..8e9fab93
+--- /dev/null
++++ b/sysdeps/loongarch/cpu-tunables.c
+@@ -0,0 +1,89 @@
++/* LoongArch CPU feature tuning.
++   This file is part of the GNU C Library.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <http://www.gnu.org/licenses/>.  */
++
++# include <stdbool.h>
++# include <stdint.h>
++# include <unistd.h>		/* Get STDOUT_FILENO for _dl_printf.  */
++# include <elf/dl-tunables.h>
++# include <string.h>
++# include <cpu-features.h>
++# include <ldsodefs.h>
++# include <sys/auxv.h>
++
++# define HWCAP_LOONGARCH_IFUNC \
++  (HWCAP_LOONGARCH_UAL | HWCAP_LOONGARCH_LSX | HWCAP_LOONGARCH_LASX)
++
++# define CHECK_GLIBC_IFUNC_CPU_OFF(f, name, len)			\
++  _Static_assert (sizeof (#name) - 1 == len, #name " != " #len);	\
++  if (!memcmp (f, #name, len) &&					\
++      (GLRO (dl_hwcap) & HWCAP_LOONGARCH_##name))			\
++    {									\
++      hwcap |= (HWCAP_LOONGARCH_##name | (~HWCAP_LOONGARCH_IFUNC));	\
++      break;								\
++    }									\
++
++attribute_hidden
++void
++TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
++{
++  const char *p = valp->strval;
++  size_t len;
++  unsigned long hwcap = 0;
++  const char *c;
++
++  do {
++      for (c = p; *c != ','; c++)
++	if (*c == '\0')
++	  break;
++
++      len = c - p;
++
++      switch(len)
++      {
++	default:
++	  _dl_fatal_printf (
++	    "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
++			    );
++	  break;
++	case 3:
++	  {
++	    CHECK_GLIBC_IFUNC_CPU_OFF (p, LSX, 3);
++	    CHECK_GLIBC_IFUNC_CPU_OFF (p, UAL, 3);
++	    _dl_fatal_printf (
++		"Some features are invalid or not supported on this machine!!\n"
++		"The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
++                       );
++	  }
++	  break;
++	case 4:
++	  {
++	    CHECK_GLIBC_IFUNC_CPU_OFF (p, LASX, 4);
++	    _dl_fatal_printf (
++		"Some features are invalid or not supported on this machine!!\n"
++		"The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
++                       );
++	  }
++	  break;
++      }
++
++      p += len + 1;
++    }
++  while (*c != '\0');
++
++  GLRO (dl_larch_cpu_features).hwcap &= hwcap;
++}
+diff --git a/sysdeps/loongarch/dl-get-cpu-features.c b/sysdeps/loongarch/dl-get-cpu-features.c
+new file mode 100644
+index 00000000..7cd9bc15
+--- /dev/null
++++ b/sysdeps/loongarch/dl-get-cpu-features.c
+@@ -0,0 +1,25 @@
++/* Define _dl_larch_get_cpu_features.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++
++#include <ldsodefs.h>
++
++const struct cpu_features *
++_dl_larch_get_cpu_features (void)
++{
++  return &GLRO(dl_larch_cpu_features);
++}
+diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
+index 57913cef..b395a928 100644
+--- a/sysdeps/loongarch/dl-machine.h
++++ b/sysdeps/loongarch/dl-machine.h
+@@ -29,6 +29,8 @@
+ #include <dl-static-tls.h>
+ #include <dl-machine-rel.h>
+ 
++#include <cpu-features.c>
++
+ #ifndef _RTLD_PROLOGUE
+ # define _RTLD_PROLOGUE(entry)					\
+ 	".globl\t" __STRING (entry) "\n\t"			\
+@@ -53,6 +55,23 @@
+ #define ELF_MACHINE_NO_REL 1
+ #define ELF_MACHINE_NO_RELA 0
+ 
++#define DL_PLATFORM_INIT dl_platform_init ()
++
++static inline void __attribute__ ((unused))
++dl_platform_init (void)
++{
++  if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
++    /* Avoid an empty string which would disturb us.  */
++    GLRO(dl_platform) = NULL;
++
++#ifdef SHARED
++  /* init_cpu_features has been called early from __libc_start_main in
++     static executable.  */
++  init_cpu_features (&GLRO(dl_larch_cpu_features));
++#endif
++}
++
++
+ /* Return nonzero iff ELF header is compatible with the running host.  */
+ static inline int
+ elf_machine_matches_host (const ElfW (Ehdr) *ehdr)
+@@ -290,9 +309,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope,
+       if (profile != 0)
+ 	{
+ #if !defined __loongarch_soft_float
+-	  if (SUPPORT_LASX)
++	  if (RTLD_SUPPORT_LASX)
+ 	    gotplt0 = (ElfW(Addr)) &_dl_runtime_profile_lasx;
+-	  else if (SUPPORT_LSX)
++	  else if (RTLD_SUPPORT_LSX)
+ 	    gotplt0 = (ElfW(Addr)) &_dl_runtime_profile_lsx;
+ 	  else
+ #endif
+@@ -310,9 +329,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope,
+ 	     indicated by the offset on the stack, and then jump to
+ 	     the resolved address.  */
+ #if !defined __loongarch_soft_float
+-	  if (SUPPORT_LASX)
++	  if (RTLD_SUPPORT_LASX)
+ 	    gotplt0 = (ElfW(Addr)) &_dl_runtime_resolve_lasx;
+-	  else if (SUPPORT_LSX)
++	  else if (RTLD_SUPPORT_LSX)
+ 	    gotplt0 = (ElfW(Addr)) &_dl_runtime_resolve_lsx;
+ 	  else
+ #endif
+diff --git a/sysdeps/loongarch/dl-tunables.list b/sysdeps/loongarch/dl-tunables.list
+new file mode 100644
+index 00000000..66b34275
+--- /dev/null
++++ b/sysdeps/loongarch/dl-tunables.list
+@@ -0,0 +1,25 @@
++# LoongArch specific tunables.
++# Copyright (C) 2023 Free Software Foundation, Inc.
++# This file is part of the GNU C Library.
++
++# The GNU C Library is free software; you can redistribute it and/or
++# modify it under the terms of the GNU Lesser General Public
++# License as published by the Free Software Foundation; either
++# version 2.1 of the License, or (at your option) any later version.
++
++# The GNU C Library is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++# Lesser General Public License for more details.
++
++# You should have received a copy of the GNU Lesser General Public
++# License along with the GNU C Library; if not, see
++# <http://www.gnu.org/licenses/>.
++
++glibc {
++  cpu {
++    hwcaps {
++      type: STRING
++    }
++  }
++}
+diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
+new file mode 100644
+index 00000000..1290c4ce
+--- /dev/null
++++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
+@@ -0,0 +1,29 @@
++/* Initialize CPU feature data.  LoongArch64 version.
++   This file is part of the GNU C Library.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include <cpu-features.h>
++#include <elf/dl-hwcaps.h>
++#include <elf/dl-tunables.h>
++extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *) attribute_hidden;
++
++static inline void
++init_cpu_features (struct cpu_features *cpu_features)
++{
++    GLRO (dl_larch_cpu_features).hwcap = GLRO (dl_hwcap);
++    TUNABLE_GET (glibc, cpu, hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
++}
+diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+index d1a280a5..450963ce 100644
+--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
++++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+@@ -19,13 +19,23 @@
+ #ifndef _CPU_FEATURES_LOONGARCH64_H
+ #define _CPU_FEATURES_LOONGARCH64_H
+ 
++#include <stdint.h>
+ #include <sys/auxv.h>
+ 
+-#define SUPPORT_UAL (GLRO (dl_hwcap) & HWCAP_LOONGARCH_UAL)
+-#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
+-#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
++struct cpu_features
++ {
++    uint64_t hwcap;
++ };
+ 
++/* Get a pointer to the CPU features structure.  */
++extern const struct cpu_features *_dl_larch_get_cpu_features (void)
++     __attribute__ ((pure));
++
++#define SUPPORT_UAL (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_UAL)
++#define SUPPORT_LSX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LSX)
++#define SUPPORT_LASX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LASX)
++#define RTLD_SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
++#define RTLD_SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
+ #define INIT_ARCH()
+ 
+ #endif /* _CPU_FEATURES_LOONGARCH64_H  */
+-
+diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
+new file mode 100644
+index 00000000..6217fda9
+--- /dev/null
++++ b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
+@@ -0,0 +1,60 @@
++/* Data for LoongArch64 version of processor capability information.
++   Linux version.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* If anything should be added here check whether the size of each string
++   is still ok with the given array size.
++
++   All the #ifdefs in the definitions are quite irritating but
++   necessary if we want to avoid duplicating the information.  There
++   are three different modes:
++
++   - PROCINFO_DECL is defined.  This means we are only interested in
++     declarations.
++
++   - PROCINFO_DECL is not defined:
++
++     + if SHARED is defined the file is included in an array
++       initializer.  The .element = { ... } syntax is needed.
++
++     + if SHARED is not defined a normal array initialization is
++       needed.
++  */
++
++#ifndef PROCINFO_CLASS
++# define PROCINFO_CLASS
++#endif
++
++#if !IS_IN (ldconfig)
++# if !defined PROCINFO_DECL && defined SHARED
++  ._dl_larch_cpu_features
++# else
++PROCINFO_CLASS struct cpu_features _dl_larch_cpu_features
++# endif
++# ifndef PROCINFO_DECL
++= { }
++# endif
++# if !defined SHARED || defined PROCINFO_DECL
++;
++# else
++,
++# endif
++#endif
++
++#undef PROCINFO_DECL
++#undef PROCINFO_CLASS
+diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
+new file mode 100644
+index 00000000..455fd71a
+--- /dev/null
++++ b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
+@@ -0,0 +1,21 @@
++/* Operating system support for run-time dynamic linker.  LoongArch version.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include <config.h>
++#include <sysdeps/loongarch/cpu-tunables.c>
++#include <sysdeps/unix/sysv/linux/dl-sysdep.c>
+diff --git a/sysdeps/unix/sysv/linux/loongarch/libc-start.c b/sysdeps/unix/sysv/linux/loongarch/libc-start.c
+new file mode 100644
+index 00000000..f1346ece
+--- /dev/null
++++ b/sysdeps/unix/sysv/linux/loongarch/libc-start.c
+@@ -0,0 +1,34 @@
++/* Override csu/libc-start.c on LoongArch64.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#ifndef SHARED
++
++/* Mark symbols hidden in static PIE for early self relocation to work.  */
++# if BUILD_PIE_DEFAULT
++#  pragma GCC visibility push(hidden)
++# endif
++
++# include <ldsodefs.h>
++# include <cpu-features.c>
++
++extern struct cpu_features _dl_larch_cpu_features;
++
++# define ARCH_INIT_CPU_FEATURES() init_cpu_features (&_dl_larch_cpu_features)
++
++#endif
++#include <csu/libc-start.c>
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch Added

@@ -0,0 +1,485 @@
+From 3ee56bbc56faa7b85a6513340db4a4fdd6ce709d Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Mon, 28 Aug 2023 10:08:36 +0800
+Subject: PATCH 15/29 LoongArch: Add ifunc support for memchr{aligned, lsx,
+ lasx}
+
+According to glibc memchr microbenchmark, this implementation could reduce
+the runtime as following:
+
+Name               Percent of runtime reduced
+memchr-lasx        37%-83%
+memchr-lsx         30%-66%
+memchr-aligned     0%-15%
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   3 +
+ .../lp64/multiarch/ifunc-impl-list.c          |   7 ++
+ .../loongarch/lp64/multiarch/ifunc-memchr.h   |  40 ++++++
+ .../loongarch/lp64/multiarch/memchr-aligned.S |  95 ++++++++++++++
+ .../loongarch/lp64/multiarch/memchr-lasx.S    | 117 ++++++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/memchr-lsx.S | 102 +++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/memchr.c     |  37 ++++++
+ 7 files changed, 401 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index 64416b02..2f4802cf 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
++++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -24,5 +24,8 @@ sysdep_routines += \
+   rawmemchr-aligned \
+   rawmemchr-lsx \
+   rawmemchr-lasx \
++  memchr-aligned \
++  memchr-lsx \
++  memchr-lasx \
+ # sysdep_routines
+ endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index 3db9af14..a567b9cf 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -102,5 +102,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ 	      IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned)
+ 	      )
+ 
++  IFUNC_IMPL (i, name, memchr,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LASX, __memchr_lasx)
++	      IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LSX, __memchr_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_aligned)
++	      )
+   return i;
+ }
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h
+new file mode 100644
+index 00000000..9060ccd5
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h
+@@ -0,0 +1,40 @@
++/* Common definition for memchr ifunc selections.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S
+new file mode 100644
+index 00000000..81d0d004
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S
+@@ -0,0 +1,95 @@
++/* Optimized memchr implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define MEMCHR_NAME __memchr_aligned
++#else
++# define MEMCHR_NAME memchr
++#endif
++
++LEAF(MEMCHR_NAME, 6)
++    beqz        a2, L(out)
++    andi        t1, a0, 0x7
++    add.d       a5, a0, a2
++    bstrins.d   a0, zero, 2, 0
++
++    ld.d        t0, a0, 0
++    bstrins.d   a1, a1, 15, 8
++    lu12i.w     a3, 0x01010
++    slli.d      t2, t1, 03
++
++    bstrins.d   a1, a1, 31, 16
++    ori         a3, a3, 0x101
++    li.d        t7, -1
++    li.d        t8, 8
++
++    bstrins.d   a1, a1, 63, 32
++    bstrins.d   a3, a3, 63, 32
++    sll.d       t2, t7, t2
++    xor         t0, t0, a1
++
++
++    addi.d      a6, a5, -1
++    slli.d      a4, a3, 7
++    sub.d       t1, t8, t1
++    orn         t0, t0, t2
++
++    sub.d       t2, t0, a3
++    andn        t3, a4, t0
++    bstrins.d   a6, zero, 2, 0
++    and         t0, t2, t3
++
++    bgeu        t1, a2, L(end)
++L(loop):
++    bnez        t0, L(found)
++    ld.d        t1, a0, 8
++    xor         t0, t1, a1
++
++    addi.d      a0, a0, 8
++    sub.d       t2, t0, a3
++    andn        t3, a4, t0
++    and         t0, t2, t3
++
++
++    bne         a0, a6, L(loop)
++L(end):
++    sub.d       t1, a5, a6
++    ctz.d       t0, t0
++    srli.d      t0, t0, 3
++
++    sltu        t1, t0, t1
++    add.d       a0, a0, t0
++    maskeqz     a0, a0, t1
++    jr          ra
++
++L(found):
++    ctz.d       t0, t0
++    srli.d      t0, t0, 3
++    add.d       a0, a0, t0
++    jr          ra
++
++L(out):
++    move        a0, zero
++    jr          ra
++END(MEMCHR_NAME)
++
++libc_hidden_builtin_def (MEMCHR_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S
+new file mode 100644
+index 00000000..a26cdf48
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S
+@@ -0,0 +1,117 @@
++/* Optimized memchr implementation using LoongArch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define MEMCHR __memchr_lasx
++
++LEAF(MEMCHR, 6)
++    beqz            a2, L(ret0)
++    add.d           a3, a0, a2
++    andi            t0, a0, 0x3f
++    bstrins.d       a0, zero, 5, 0
++
++    xvld            xr0, a0, 0
++    xvld            xr1, a0, 32
++    li.d            t1, -1
++    li.d            t2, 64
++
++    xvreplgr2vr.b   xr2, a1
++    sll.d           t3, t1, t0
++    sub.d           t2, t2, t0
++    xvseq.b         xr0, xr0, xr2
++
++    xvseq.b         xr1, xr1, xr2
++    xvmsknz.b       xr0, xr0
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr3, xr0, 4
++
++
++    xvpickve.w      xr4, xr1, 4
++    vilvl.h         vr0, vr3, vr0
++    vilvl.h         vr1, vr4, vr1
++    vilvl.w         vr0, vr1, vr0
++
++    movfr2gr.d      t0, fa0
++    and             t0, t0, t3
++    bgeu            t2, a2, L(end)
++    bnez            t0, L(found)
++
++    addi.d          a4, a3, -1
++    bstrins.d       a4, zero, 5, 0
++L(loop):
++    xvld            xr0, a0, 64
++    xvld            xr1, a0, 96
++
++    addi.d          a0, a0, 64
++    xvseq.b         xr0, xr0, xr2
++    xvseq.b         xr1, xr1, xr2
++    beq             a0, a4, L(out)
++
++
++    xvmax.bu        xr3, xr0, xr1
++    xvseteqz.v      fcc0, xr3
++    bcnez           fcc0, L(loop)
++    xvmsknz.b       xr0, xr0
++
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr3, xr0, 4
++    xvpickve.w      xr4, xr1, 4
++    vilvl.h         vr0, vr3, vr0
++
++    vilvl.h         vr1, vr4, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++L(found):
++    ctz.d           t1, t0
++
++    add.d           a0, a0, t1
++    jr              ra
++L(ret0):
++    move            a0, zero
++    jr              ra
++
++
++L(out):
++    xvmsknz.b       xr0, xr0
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr3, xr0, 4
++    xvpickve.w      xr4, xr1, 4
++
++    vilvl.h         vr0, vr3, vr0
++    vilvl.h         vr1, vr4, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++
++L(end):
++    sub.d           t2, zero, a3
++    srl.d           t1, t1, t2
++    and             t0, t0, t1
++    ctz.d           t1, t0
++
++    add.d           a0, a0, t1
++    maskeqz         a0, a0, t0
++    jr              ra
++END(MEMCHR)
++
++libc_hidden_builtin_def (MEMCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S
+new file mode 100644
+index 00000000..a73ecd25
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S
+@@ -0,0 +1,102 @@
++/* Optimized memchr implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define MEMCHR __memchr_lsx
++
++LEAF(MEMCHR, 6)
++    beqz            a2, L(ret0)
++    add.d           a3, a0, a2
++    andi            t0, a0, 0x1f
++    bstrins.d       a0, zero, 4, 0
++
++    vld             vr0, a0, 0
++    vld             vr1, a0, 16
++    li.d            t1, -1
++    li.d            t2, 32
++
++    vreplgr2vr.b    vr2, a1
++    sll.d           t3, t1, t0
++    sub.d           t2, t2, t0
++    vseq.b          vr0, vr0, vr2
++
++    vseq.b          vr1, vr1, vr2
++    vmsknz.b        vr0, vr0
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++
++
++    movfr2gr.s      t0, fa0
++    and             t0, t0, t3
++    bgeu            t2, a2, L(end)
++    bnez            t0, L(found)
++
++    addi.d          a4, a3, -1
++    bstrins.d       a4, zero, 4, 0
++L(loop):
++    vld             vr0, a0, 32
++    vld             vr1, a0, 48
++
++    addi.d          a0, a0, 32
++    vseq.b          vr0, vr0, vr2
++    vseq.b          vr1, vr1, vr2
++    beq             a0, a4, L(out)
++
++    vmax.bu         vr3, vr0, vr1
++    vseteqz.v       fcc0, vr3
++    bcnez           fcc0, L(loop)
++    vmsknz.b        vr0, vr0
++
++
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++L(found):
++    ctz.w           t0, t0
++
++    add.d           a0, a0, t0
++    jr              ra
++L(ret0):
++    move            a0, zero
++    jr              ra
++
++L(out):
++    vmsknz.b        vr0, vr0
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++
++L(end):
++    sub.d           t2, zero, a3
++    srl.w           t1, t1, t2
++    and             t0, t0, t1
++    ctz.w           t1, t0
++
++
++    add.d           a0, a0, t1
++    maskeqz         a0, a0, t0
++    jr              ra
++END(MEMCHR)
++
++libc_hidden_builtin_def (MEMCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memchr.c b/sysdeps/loongarch/lp64/multiarch/memchr.c
+new file mode 100644
+index 00000000..059479c0
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memchr.c
+@@ -0,0 +1,37 @@
++/* Multiple versions of memchr.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define memchr __redirect_memchr
++# include <string.h>
++# undef memchr
++
++# define SYMBOL_NAME memchr
++# include "ifunc-memchr.h"
++
++libc_ifunc_redirected (__redirect_memchr, memchr,
++		       IFUNC_SELECTOR ());
++
++# ifdef SHARED
++__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr);
++# endif
++
++#endif
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Add-ifunc-support-for-memcmp-aligned-lsx-l.patch Added

@@ -0,0 +1,946 @@
+From 60f4bbd1eec528ba8df044ae6b3091f6337a7fcc Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Mon, 28 Aug 2023 10:08:39 +0800
+Subject: PATCH 18/29 LoongArch: Add ifunc support for memcmp{aligned, lsx,
+ lasx}
+
+According to glibc memcmp microbenchmark test results(Add generic
+memcmp), this implementation have performance improvement
+except the length is less than 3, details as below:
+
+Name             Percent of time reduced
+memcmp-lasx      16%-74%
+memcmp-lsx       20%-50%
+memcmp-aligned   5%-20%
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   3 +
+ .../lp64/multiarch/ifunc-impl-list.c          |   7 +
+ .../loongarch/lp64/multiarch/ifunc-memcmp.h   |  40 +++
+ .../loongarch/lp64/multiarch/memcmp-aligned.S | 292 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/memcmp-lasx.S    | 207 +++++++++++++
+ sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S | 269 ++++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/memcmp.c     |  43 +++
+ 7 files changed, 861 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index 216886c5..360a6718 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
++++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -34,5 +34,8 @@ sysdep_routines += \
+   memset-unaligned \
+   memset-lsx \
+   memset-lasx \
++  memcmp-aligned \
++  memcmp-lsx \
++  memcmp-lasx \
+ # sysdep_routines
+ endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index 37f60dde..e397d58c 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -127,5 +127,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_aligned)
+ 	      )
+ 
++  IFUNC_IMPL (i, name, memcmp,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, memcmp, SUPPORT_LASX, __memcmp_lasx)
++	      IFUNC_IMPL_ADD (array, i, memcmp, SUPPORT_LSX, __memcmp_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_aligned)
++	      )
+   return i;
+ }
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h
+new file mode 100644
+index 00000000..04adc2e5
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h
+@@ -0,0 +1,40 @@
++/* Common definition for memcmp ifunc selections.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S
+new file mode 100644
+index 00000000..14a7caa9
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S
+@@ -0,0 +1,292 @@
++/* Optimized memcmp implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define MEMCMP_NAME __memcmp_aligned
++#else
++# define MEMCMP_NAME memcmp
++#endif
++
++LEAF(MEMCMP_NAME, 6)
++    beqz        a2, L(ret)
++    andi        a4, a1, 0x7
++    andi        a3, a0, 0x7
++    sltu        a5, a4, a3
++
++    xor         t0, a0, a1
++    li.w        t8, 8
++    maskeqz     t0, t0, a5
++    li.w        t7, -1
++
++    xor         a0, a0, t0
++    xor         a1, a1, t0
++    andi        a3, a0, 0x7
++    andi        a4, a1, 0x7
++
++    xor         a0, a0, a3
++    xor         a1, a1, a4
++    ld.d        t2, a0, 0
++    ld.d        t1, a1, 0
++
++    slli.d      t3, a3, 3
++    slli.d      t4, a4, 3
++    sub.d       a6, t3, t4
++    srl.d       t1, t1, t4
++
++    srl.d       t0, t2, t3
++    srl.d       t5, t7, t4
++    sub.d       t6, t0, t1
++    and         t6, t6, t5
++
++    sub.d       t5, t8, a4
++    bnez        t6, L(first_out)
++    bgeu        t5, a2, L(ret)
++    sub.d       a2, a2, t5
++
++    bnez        a6, L(unaligned)
++    blt         a2, t8, L(al_less_8bytes)
++    andi        t1, a2, 31
++    beq         t1, a2, L(al_less_32bytes)
++
++    sub.d       t2, a2, t1
++    add.d       a4, a0, t2
++    move        a2, t1
++
++L(al_loop):
++    ld.d        t0, a0, 8
++
++    ld.d        t1, a1, 8
++    ld.d        t2, a0, 16
++    ld.d        t3, a1, 16
++    ld.d        t4, a0, 24
++
++    ld.d        t5, a1, 24
++    ld.d        t6, a0, 32
++    ld.d        t7, a1, 32
++    addi.d      a0, a0, 32
++
++    addi.d      a1, a1, 32
++    bne         t0, t1, L(out1)
++    bne         t2, t3, L(out2)
++    bne         t4, t5, L(out3)
++
++    bne         t6, t7, L(out4)
++    bne         a0, a4, L(al_loop)
++
++L(al_less_32bytes):
++    srai.d      a4, a2, 4
++    beqz        a4, L(al_less_16bytes)
++
++    ld.d        t0, a0, 8
++    ld.d        t1, a1, 8
++    ld.d        t2, a0, 16
++    ld.d        t3, a1, 16
++
++    addi.d      a0, a0, 16
++    addi.d      a1, a1, 16
++    addi.d      a2, a2, -16
++    bne         t0, t1, L(out1)
++
++    bne         t2, t3, L(out2)
++
++L(al_less_16bytes):
++    srai.d      a4, a2, 3
++    beqz        a4, L(al_less_8bytes)
++    ld.d        t0, a0, 8
++
++    ld.d        t1, a1, 8
++    addi.d      a0, a0, 8
++    addi.d      a1, a1, 8
++    addi.d      a2, a2, -8
++
++    bne         t0, t1, L(out1)
++
++L(al_less_8bytes):
++    beqz        a2, L(ret)
++    ld.d        t0, a0, 8
++    ld.d        t1, a1, 8
++
++    li.d        t7, -1
++    slli.d      t2, a2, 3
++    sll.d       t2, t7, t2
++    sub.d       t3, t0, t1
++
++    andn        t6, t3, t2
++    bnez        t6, L(count_diff)
++
++L(ret):
++    move        a0, zero
++    jr          ra
++
++L(out4):
++    move        t0, t6
++    move        t1, t7
++    sub.d       t6, t6, t7
++    b           L(count_diff)
++
++L(out3):
++    move        t0, t4
++    move        t1, t5
++    sub.d       t6, t4, t5
++    b           L(count_diff)
++
++L(out2):
++    move        t0, t2
++    move        t1, t3
++L(out1):
++    sub.d       t6, t0, t1
++    b           L(count_diff)
++
++L(first_out):
++    slli.d      t4, a2, 3
++    slt         t3, a2, t5
++    sll.d       t4, t7, t4
++    maskeqz     t4, t4, t3
++
++    andn        t6, t6, t4
++
++L(count_diff):
++    ctz.d       t2, t6
++    bstrins.d   t2, zero, 2, 0
++    srl.d       t0, t0, t2
++
++    srl.d       t1, t1, t2
++    andi        t0, t0, 0xff
++    andi        t1, t1, 0xff
++    sub.d       t2, t0, t1
++
++    sub.d       t3, t1, t0
++    masknez     t2, t2, a5
++    maskeqz     t3, t3, a5
++    or          a0, t2, t3
++
++    jr          ra
++
++L(unaligned):
++    sub.d       a7, zero, a6
++    srl.d       t0, t2, a6
++    blt         a2, t8, L(un_less_8bytes)
++
++    andi        t1, a2, 31
++    beq         t1, a2, L(un_less_32bytes)
++    sub.d       t2, a2, t1
++    add.d       a4, a0, t2
++
++    move        a2, t1
++
++L(un_loop):
++    ld.d        t2, a0, 8
++    ld.d        t1, a1, 8
++    ld.d        t4, a0, 16
++
++    ld.d        t3, a1, 16
++    ld.d        t6, a0, 24
++    ld.d        t5, a1, 24
++    ld.d        t8, a0, 32
++
++    ld.d        t7, a1, 32
++    addi.d      a0, a0, 32
++    addi.d      a1, a1, 32
++    sll.d       a3, t2, a7
++
++    or          t0, a3, t0
++    bne         t0, t1, L(out1)
++    srl.d       t0, t2, a6
++    sll.d       a3, t4, a7
++
++    or          t2, a3, t0
++    bne         t2, t3, L(out2)
++    srl.d       t0, t4, a6
++    sll.d       a3, t6, a7
++
++    or          t4, a3, t0
++    bne         t4, t5, L(out3)
++    srl.d       t0, t6, a6
++    sll.d       a3, t8, a7
++
++    or          t6, t0, a3
++    bne         t6, t7, L(out4)
++    srl.d       t0, t8, a6
++    bne         a0, a4, L(un_loop)
++
++L(un_less_32bytes):
++    srai.d      a4, a2, 4
++    beqz        a4, L(un_less_16bytes)
++    ld.d        t2, a0, 8
++    ld.d        t1, a1, 8
++
++    ld.d        t4, a0, 16
++    ld.d        t3, a1, 16
++    addi.d      a0, a0, 16
++    addi.d      a1, a1, 16
++
++    addi.d      a2, a2, -16
++    sll.d       a3, t2, a7
++    or          t0, a3, t0
++    bne         t0, t1, L(out1)
++
++    srl.d       t0, t2, a6
++    sll.d       a3, t4, a7
++    or          t2, a3, t0
++    bne         t2, t3, L(out2)
++
++    srl.d       t0, t4, a6
++
++L(un_less_16bytes):
++    srai.d      a4, a2, 3
++    beqz        a4, L(un_less_8bytes)
++    ld.d        t2, a0, 8
++
++    ld.d        t1, a1, 8
++    addi.d      a0, a0, 8
++    addi.d      a1, a1, 8
++    addi.d      a2, a2, -8
++
++    sll.d       a3, t2, a7
++    or          t0, a3, t0
++    bne         t0, t1, L(out1)
++    srl.d       t0, t2, a6
++
++L(un_less_8bytes):
++    beqz        a2, L(ret)
++    andi        a7, a7, 63
++    slli.d      a4, a2, 3
++    bgeu        a7, a4, L(last_cmp)
++
++    ld.d        t2, a0, 8
++    sll.d       a3, t2, a7
++    or          t0, a3, t0
++
++L(last_cmp):
++    ld.d        t1, a1, 8
++
++    li.d        t7, -1
++    sll.d       t2, t7, a4
++    sub.d       t3, t0, t1
++    andn        t6, t3, t2
++
++    bnez        t6, L(count_diff)
++    move        a0, zero
++    jr          ra
++END(MEMCMP_NAME)
++
++libc_hidden_builtin_def (MEMCMP_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S b/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S
+new file mode 100644
+index 00000000..3151a179
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S
+@@ -0,0 +1,207 @@
++/* Optimized memcmp implementation using LoongArch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define MEMCMP __memcmp_lasx
++
++LEAF(MEMCMP, 6)
++    li.d            t2, 32
++    add.d           a3, a0, a2
++    add.d           a4, a1, a2
++    bgeu            t2, a2, L(less32)
++
++    li.d            t1, 160
++    bgeu            a2, t1, L(make_aligned)
++L(loop32):
++    xvld            xr0, a0, 0
++    xvld            xr1, a1, 0
++
++    addi.d          a0, a0, 32
++    addi.d          a1, a1, 32
++    addi.d          a2, a2, -32
++    xvseq.b         xr2, xr0, xr1
++
++    xvsetanyeqz.b   fcc0, xr2
++    bcnez           fcc0, L(end)
++L(last_bytes):
++    bltu            t2, a2, L(loop32)
++    xvld            xr0, a3, -32
++
++
++    xvld            xr1, a4, -32
++    xvseq.b         xr2, xr0, xr1
++L(end):
++    xvmsknz.b       xr2, xr2
++    xvpermi.q       xr4, xr0, 1
++
++    xvpickve.w      xr3, xr2, 4
++    xvpermi.q       xr5, xr1, 1
++    vilvl.h         vr2, vr3, vr2
++    movfr2gr.s      t0, fa2
++
++    cto.w           t0, t0
++    vreplgr2vr.b    vr2, t0
++    vshuf.b         vr0, vr4, vr0, vr2
++    vshuf.b         vr1, vr5, vr1, vr2
++
++    vpickve2gr.bu   t0, vr0, 0
++    vpickve2gr.bu   t1, vr1, 0
++    sub.d           a0, t0, t1
++    jr              ra
++
++
++L(less32):
++    srli.d          t0, a2, 4
++    beqz            t0, L(less16)
++    vld             vr0, a0, 0
++    vld             vr1, a1, 0
++
++    vld             vr2, a3, -16
++    vld             vr3, a4, -16
++L(short_ret):
++    vseq.b          vr4, vr0, vr1
++    vseq.b          vr5, vr2, vr3
++
++    vmsknz.b        vr4, vr4
++    vmsknz.b        vr5, vr5
++    vilvl.h         vr4, vr5, vr4
++    movfr2gr.s      t0, fa4
++
++    cto.w           t0, t0
++    vreplgr2vr.b    vr4, t0
++    vshuf.b         vr0, vr2, vr0, vr4
++    vshuf.b         vr1, vr3, vr1, vr4
++
++
++    vpickve2gr.bu   t0, vr0, 0
++    vpickve2gr.bu   t1, vr1, 0
++    sub.d           a0, t0, t1
++    jr              ra
++
++L(less16):
++    srli.d          t0, a2, 3
++    beqz            t0, L(less8)
++    vldrepl.d       vr0, a0, 0
++    vldrepl.d       vr1, a1, 0
++
++    vldrepl.d       vr2, a3, -8
++    vldrepl.d       vr3, a4, -8
++    b               L(short_ret)
++    nop
++
++L(less8):
++    srli.d          t0, a2, 2
++    beqz            t0, L(less4)
++    vldrepl.w       vr0, a0, 0
++    vldrepl.w       vr1, a1, 0
++
++
++    vldrepl.w       vr2, a3, -4
++    vldrepl.w       vr3, a4, -4
++    b               L(short_ret)
++    nop
++
++L(less4):
++    srli.d          t0, a2, 1
++    beqz            t0, L(less2)
++    vldrepl.h       vr0, a0, 0
++    vldrepl.h       vr1, a1, 0
++
++    vldrepl.h       vr2, a3, -2
++    vldrepl.h       vr3, a4, -2
++    b               L(short_ret)
++    nop
++
++L(less2):
++    beqz            a2, L(ret0)
++    ld.bu           t0, a0, 0
++    ld.bu           t1, a1, 0
++    sub.d           a0, t0, t1
++
++    jr              ra
++L(ret0):
++    move            a0, zero
++    jr              ra
++
++L(make_aligned):
++    xvld            xr0, a0, 0
++
++    xvld            xr1, a1, 0
++    xvseq.b         xr2, xr0, xr1
++    xvsetanyeqz.b   fcc0, xr2
++    bcnez           fcc0, L(end)
++
++    andi            t0, a0, 0x1f
++    sub.d           t0, t2, t0
++    sub.d           t1, a2, t0
++    add.d           a0, a0, t0
++
++    add.d           a1, a1, t0
++    andi            a2, t1, 0x3f
++    sub.d           t0, t1, a2
++    add.d           a5, a0, t0
++
++
++L(loop_align):
++    xvld            xr0, a0, 0
++    xvld            xr1, a1, 0
++    xvld            xr2, a0, 32
++    xvld            xr3, a1, 32
++
++    xvseq.b         xr0, xr0, xr1
++    xvseq.b         xr1, xr2, xr3
++    xvmin.bu        xr2, xr1, xr0
++    xvsetanyeqz.b   fcc0, xr2
++
++    bcnez           fcc0, L(pair_end)
++    addi.d          a0, a0, 64
++    addi.d          a1, a1, 64
++    bne             a0, a5, L(loop_align)
++
++    bnez            a2, L(last_bytes)
++    move            a0, zero
++    jr              ra
++    nop
++
++
++L(pair_end):
++    xvmsknz.b       xr0, xr0
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr2, xr0, 4
++    xvpickve.w      xr3, xr1, 4
++
++    vilvl.h         vr0, vr2, vr0
++    vilvl.h         vr1, vr3, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++
++    cto.d           t0, t0
++    ldx.bu          t1, a0, t0
++    ldx.bu          t2, a1, t0
++    sub.d           a0, t1, t2
++
++    jr              ra
++END(MEMCMP)
++
++libc_hidden_builtin_def (MEMCMP)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S
+new file mode 100644
+index 00000000..38a50a4c
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S
+@@ -0,0 +1,269 @@
++/* Optimized memcmp implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++#define MEMCMP __memcmp_lsx
++
++LEAF(MEMCMP, 6)
++    beqz            a2, L(out)
++    pcalau12i       t0, %pc_hi20(L(INDEX))
++    andi            a3, a0, 0xf
++    vld             vr5, t0, %pc_lo12(L(INDEX))
++
++    andi            a4, a1, 0xf
++    bne             a3, a4, L(unaligned)
++    bstrins.d       a0, zero, 3, 0
++    xor             a1, a1, a4
++
++    vld             vr0, a0, 0
++    vld             vr1, a1, 0
++    li.d            t0, 16
++    vreplgr2vr.b    vr3, a3
++
++    sub.d           t1, t0, a3
++    vadd.b          vr3, vr3, vr5
++    vshuf.b         vr0, vr3, vr0, vr3
++    vshuf.b         vr1, vr3, vr1, vr3
++
++
++    vseq.b          vr4, vr0, vr1
++    bgeu            t1, a2, L(al_end)
++    vsetanyeqz.b    fcc0, vr4
++    bcnez           fcc0, L(al_found)
++
++    sub.d           t1, a2, t1
++    andi            a2, t1, 31
++    beq             a2, t1, L(al_less_32bytes)
++    sub.d           t2, t1, a2
++
++    add.d           a4, a0, t2
++L(al_loop):
++    vld             vr0, a0, 16
++    vld             vr1, a1, 16
++    vld             vr2, a0, 32
++
++    vld             vr3, a1, 32
++    addi.d          a0, a0, 32
++    addi.d          a1, a1, 32
++    vseq.b          vr4, vr0, vr1
++
++
++    vseq.b          vr6, vr2, vr3
++    vand.v          vr6, vr4, vr6
++    vsetanyeqz.b    fcc0, vr6
++    bcnez           fcc0, L(al_pair_end)
++
++    bne             a0, a4, L(al_loop)
++L(al_less_32bytes):
++    bgeu            t0, a2, L(al_less_16bytes)
++    vld             vr0, a0, 16
++    vld             vr1, a1, 16
++
++    vld             vr2, a0, 32
++    vld             vr3, a1, 32
++    addi.d          a2, a2, -16
++    vreplgr2vr.b    vr6, a2
++
++    vslt.b          vr5, vr5, vr6
++    vseq.b          vr4, vr0, vr1
++    vseq.b          vr6, vr2, vr3
++    vorn.v          vr6, vr6, vr5
++
++
++L(al_pair_end):
++    vsetanyeqz.b    fcc0, vr4
++    bcnez           fcc0, L(al_found)
++    vnori.b         vr4, vr6, 0
++    vfrstpi.b       vr4, vr4, 0
++
++    vshuf.b         vr0, vr2, vr2, vr4
++    vshuf.b         vr1, vr3, vr3, vr4
++    vpickve2gr.bu   t0, vr0, 0
++    vpickve2gr.bu   t1, vr1, 0
++
++    sub.d           a0, t0, t1
++    jr              ra
++    nop
++    nop
++
++L(al_less_16bytes):
++    beqz            a2, L(out)
++    vld             vr0, a0, 16
++    vld             vr1, a1, 16
++    vseq.b          vr4, vr0, vr1
++
++
++L(al_end):
++    vreplgr2vr.b    vr6, a2
++    vslt.b          vr5, vr5, vr6
++    vorn.v          vr4, vr4, vr5
++    nop
++
++L(al_found):
++    vnori.b         vr4, vr4, 0
++    vfrstpi.b       vr4, vr4, 0
++    vshuf.b         vr0, vr0, vr0, vr4
++    vshuf.b         vr1, vr1, vr1, vr4
++
++    vpickve2gr.bu   t0, vr0, 0
++    vpickve2gr.bu   t1, vr1, 0
++    sub.d           a0, t0, t1
++    jr              ra
++
++L(out):
++    move            a0, zero
++    jr              ra
++    nop
++    nop
++
++
++L(unaligned):
++    xor             t2, a0, a1
++    sltu            a5, a3, a4
++    masknez         t2, t2, a5
++    xor             a0, a0, t2
++
++    xor             a1, a1, t2
++    andi            a3, a0, 0xf
++    andi            a4, a1, 0xf
++    bstrins.d       a0, zero, 3, 0
++
++    xor             a1, a1, a4
++    vld             vr4, a0, 0
++    vld             vr1, a1, 0
++    li.d            t0, 16
++
++    vreplgr2vr.b    vr2, a4
++    sub.d           a6, a4, a3
++    sub.d           t1, t0, a4
++    sub.d           t2, t0, a6
++
++
++    vadd.b          vr2, vr2, vr5
++    vreplgr2vr.b    vr6, t2
++    vadd.b          vr6, vr6, vr5
++    vshuf.b         vr0, vr4, vr4, vr6
++
++    vshuf.b         vr1, vr2, vr1, vr2
++    vshuf.b         vr0, vr2, vr0, vr2
++    vseq.b          vr7, vr0, vr1
++    bgeu            t1, a2, L(un_end)
++
++    vsetanyeqz.b    fcc0, vr7
++    bcnez           fcc0, L(un_found)
++    sub.d           a2, a2, t1
++    andi            t1, a2, 31
++
++    beq             a2, t1, L(un_less_32bytes)
++    sub.d           t2, a2, t1
++    move            a2, t1
++    add.d           a4, a1, t2
++
++
++L(un_loop):
++    vld             vr2, a0, 16
++    vld             vr1, a1, 16
++    vld             vr3, a1, 32
++    addi.d          a1, a1, 32
++
++    addi.d          a0, a0, 32
++    vshuf.b         vr0, vr2, vr4, vr6
++    vld             vr4, a0, 0
++    vseq.b          vr7, vr0, vr1
++
++    vshuf.b         vr2, vr4, vr2, vr6
++    vseq.b          vr8, vr2, vr3
++    vand.v          vr8, vr7, vr8
++    vsetanyeqz.b    fcc0, vr8
++
++    bcnez           fcc0, L(un_pair_end)
++    bne             a1, a4, L(un_loop)
++
++L(un_less_32bytes):
++    bltu            a2, t0, L(un_less_16bytes)
++    vld             vr2, a0, 16
++    vld             vr1, a1, 16
++    addi.d          a0, a0, 16
++
++    addi.d          a1, a1, 16
++    addi.d          a2, a2, -16
++    vshuf.b         vr0, vr2, vr4, vr6
++    vor.v           vr4, vr2, vr2
++
++    vseq.b          vr7, vr0, vr1
++    vsetanyeqz.b    fcc0, vr7
++    bcnez           fcc0, L(un_found)
++L(un_less_16bytes):
++    beqz            a2, L(out)
++    vld             vr1, a1, 16
++    bgeu            a6, a2, 1f
++
++    vld             vr2, a0, 16
++1:
++    vshuf.b         vr0, vr2, vr4, vr6
++    vseq.b          vr7, vr0, vr1
++L(un_end):
++    vreplgr2vr.b    vr3, a2
++
++
++    vslt.b          vr3, vr5, vr3
++    vorn.v          vr7, vr7, vr3
++
++L(un_found):
++    vnori.b         vr7, vr7, 0
++    vfrstpi.b       vr7, vr7, 0
++
++    vshuf.b         vr0, vr0, vr0, vr7
++    vshuf.b         vr1, vr1, vr1, vr7
++L(calc_result):
++    vpickve2gr.bu   t0, vr0, 0
++    vpickve2gr.bu   t1, vr1, 0
++
++    sub.d           t2, t0, t1
++    sub.d           t3, t1, t0
++    masknez         t0, t3, a5
++    maskeqz         t1, t2, a5
++
++    or              a0, t0, t1
++    jr              ra
++L(un_pair_end):
++    vsetanyeqz.b    fcc0, vr7
++    bcnez           fcc0, L(un_found)
++
++
++    vnori.b         vr7, vr8, 0
++    vfrstpi.b       vr7, vr7, 0
++    vshuf.b         vr0, vr2, vr2, vr7
++    vshuf.b         vr1, vr3, vr3, vr7
++
++    b               L(calc_result)
++END(MEMCMP)
++
++    .section         .rodata.cst16,"M",@progbits,16
++    .align           4
++L(INDEX):
++    .dword           0x0706050403020100
++    .dword           0x0f0e0d0c0b0a0908
++
++libc_hidden_builtin_def (MEMCMP)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp.c b/sysdeps/loongarch/lp64/multiarch/memcmp.c
+new file mode 100644
+index 00000000..32eccac2
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memcmp.c
+@@ -0,0 +1,43 @@
++/* Multiple versions of memcmp.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define memcmp __redirect_memcmp
++# include <string.h>
++# undef memcmp
++
++# define SYMBOL_NAME memcmp
++# include "ifunc-memcmp.h"
++
++libc_ifunc_redirected (__redirect_memcmp, memcmp,
++		       IFUNC_SELECTOR ());
++# undef bcmp
++weak_alias (memcmp, bcmp)
++
++# undef __memcmpeq
++strong_alias (memcmp, __memcmpeq)
++libc_hidden_def (__memcmpeq)
++
++# ifdef SHARED
++__hidden_ver1 (memcmp, __GI_memcmp, __redirect_memcmp)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memcmp);
++# endif
++
++#endif
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Add-ifunc-support-for-memrchr-lsx-lasx.patch Added

@@ -0,0 +1,417 @@
+From c4c272fb8067364530a2a78df92c37403acc963f Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Mon, 28 Aug 2023 10:08:37 +0800
+Subject: PATCH 16/29 LoongArch: Add ifunc support for memrchr{lsx, lasx}
+
+According to glibc memrchr microbenchmark, this implementation could reduce
+the runtime as following:
+
+Name            Percent of rutime reduced
+memrchr-lasx    20%-83%
+memrchr-lsx     20%-64%
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   3 +
+ .../lp64/multiarch/ifunc-impl-list.c          |   8 ++
+ .../loongarch/lp64/multiarch/ifunc-memrchr.h  |  40 ++++++
+ .../lp64/multiarch/memrchr-generic.c          |  23 ++++
+ .../loongarch/lp64/multiarch/memrchr-lasx.S   | 123 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/memrchr-lsx.S    | 105 +++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/memrchr.c    |  33 +++++
+ 7 files changed, 335 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-generic.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index 2f4802cf..7b87bc90 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
++++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -27,5 +27,8 @@ sysdep_routines += \
+   memchr-aligned \
+   memchr-lsx \
+   memchr-lasx \
++  memrchr-generic \
++  memrchr-lsx \
++  memrchr-lasx \
+ # sysdep_routines
+ endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index a567b9cf..8bd5489e 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -109,5 +109,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ #endif
+ 	      IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_aligned)
+ 	      )
++
++  IFUNC_IMPL (i, name, memrchr,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, memrchr, SUPPORT_LASX, __memrchr_lasx)
++	      IFUNC_IMPL_ADD (array, i, memrchr, SUPPORT_LSX, __memrchr_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_generic)
++	      )
+   return i;
+ }
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h
+new file mode 100644
+index 00000000..8215f9ad
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h
+@@ -0,0 +1,40 @@
++/* Common definition for memrchr implementation.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (generic);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c b/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c
+new file mode 100644
+index 00000000..ced61ebc
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c
+@@ -0,0 +1,23 @@
++/* Generic implementation of memrchr.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#if IS_IN (libc)
++# define MEMRCHR __memrchr_generic
++#endif
++
++#include <string/memrchr.c>
+diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S
+new file mode 100644
+index 00000000..5f3e0d06
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S
+@@ -0,0 +1,123 @@
++/* Optimized memrchr implementation using LoongArch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++#ifndef MEMRCHR
++# define MEMRCHR __memrchr_lasx
++#endif
++
++LEAF(MEMRCHR, 6)
++    beqz            a2, L(ret0)
++    addi.d          a2, a2, -1
++    add.d           a3, a0, a2
++    andi            t1, a3, 0x3f
++
++    bstrins.d       a3, zero, 5, 0
++    addi.d          t1, t1, 1
++    xvld            xr0, a3, 0
++    xvld            xr1, a3, 32
++
++    sub.d           t2, zero, t1
++    li.d            t3, -1
++    xvreplgr2vr.b   xr2, a1
++    andi            t4, a0, 0x3f
++
++    srl.d           t2, t3, t2
++    xvseq.b         xr0, xr0, xr2
++    xvseq.b         xr1, xr1, xr2
++    xvmsknz.b       xr0, xr0
++
++
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr3, xr0, 4
++    xvpickve.w      xr4, xr1, 4
++    vilvl.h         vr0, vr3, vr0
++
++    vilvl.h         vr1, vr4, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++    and             t0, t0, t2
++
++    bltu            a2, t1, L(end)
++    bnez            t0, L(found)
++    bstrins.d       a0, zero, 5, 0
++L(loop):
++    xvld            xr0, a3, -64
++
++    xvld            xr1, a3, -32
++    addi.d          a3, a3, -64
++    xvseq.b         xr0, xr0, xr2
++    xvseq.b         xr1, xr1, xr2
++
++
++    beq             a0, a3, L(out)
++    xvmax.bu        xr3, xr0, xr1
++    xvseteqz.v      fcc0, xr3
++    bcnez           fcc0, L(loop)
++
++    xvmsknz.b       xr0, xr0
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr3, xr0, 4
++    xvpickve.w      xr4, xr1, 4
++
++    vilvl.h         vr0, vr3, vr0
++    vilvl.h         vr1, vr4, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++
++L(found):
++    addi.d          a0, a3, 63
++    clz.d           t1, t0
++    sub.d           a0, a0, t1
++    jr              ra
++
++
++L(out):
++    xvmsknz.b       xr0, xr0
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr3, xr0, 4
++    xvpickve.w      xr4, xr1, 4
++
++    vilvl.h         vr0, vr3, vr0
++    vilvl.h         vr1, vr4, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++
++L(end):
++    sll.d           t2, t3, t4
++    and             t0, t0, t2
++    addi.d          a0, a3, 63
++    clz.d           t1, t0
++
++    sub.d           a0, a0, t1
++    maskeqz         a0, a0, t0
++    jr              ra
++L(ret0):
++    move            a0, zero
++
++
++    jr              ra
++END(MEMRCHR)
++
++libc_hidden_builtin_def (MEMRCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S
+new file mode 100644
+index 00000000..39a7c8b0
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S
+@@ -0,0 +1,105 @@
++/* Optimized memrchr implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define MEMRCHR __memrchr_lsx
++
++LEAF(MEMRCHR, 6)
++    beqz            a2, L(ret0)
++    addi.d          a2, a2, -1
++    add.d           a3, a0, a2
++    andi            t1, a3, 0x1f
++
++    bstrins.d       a3, zero, 4, 0
++    addi.d          t1, t1, 1
++    vld             vr0, a3, 0
++    vld             vr1, a3, 16
++
++    sub.d           t2, zero, t1
++    li.d            t3, -1
++    vreplgr2vr.b    vr2, a1
++    andi            t4, a0, 0x1f
++
++    srl.d           t2, t3, t2
++    vseq.b          vr0, vr0, vr2
++    vseq.b          vr1, vr1, vr2
++    vmsknz.b        vr0, vr0
++
++
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++    and             t0, t0, t2
++
++    bltu            a2, t1, L(end)
++    bnez            t0, L(found)
++    bstrins.d       a0, zero, 4, 0
++L(loop):
++    vld             vr0, a3, -32
++
++    vld             vr1, a3, -16
++    addi.d          a3, a3, -32
++    vseq.b          vr0, vr0, vr2
++    vseq.b          vr1, vr1, vr2
++
++    beq             a0, a3, L(out)
++    vmax.bu         vr3, vr0, vr1
++    vseteqz.v       fcc0, vr3
++    bcnez           fcc0, L(loop)
++
++
++    vmsknz.b        vr0, vr0
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++
++L(found):
++    addi.d          a0, a3, 31
++    clz.w           t1, t0
++    sub.d           a0, a0, t1
++    jr              ra
++
++L(out):
++    vmsknz.b        vr0, vr0
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++
++L(end):
++    sll.d           t2, t3, t4
++    and             t0, t0, t2
++    addi.d          a0, a3, 31
++    clz.w           t1, t0
++
++
++    sub.d           a0, a0, t1
++    maskeqz         a0, a0, t0
++    jr              ra
++L(ret0):
++    move            a0, zero
++
++    jr              ra
++END(MEMRCHR)
++
++libc_hidden_builtin_def (MEMRCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr.c b/sysdeps/loongarch/lp64/multiarch/memrchr.c
+new file mode 100644
+index 00000000..8baba9ab
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memrchr.c
+@@ -0,0 +1,33 @@
++/* Multiple versions of memrchr.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define memrchr __redirect_memrchr
++# include <string.h>
++# undef memrchr
++
++# define SYMBOL_NAME memrchr
++# include "ifunc-memrchr.h"
++
++libc_ifunc_redirected (__redirect_memrchr, __memrchr, IFUNC_SELECTOR ());
++libc_hidden_def (__memrchr)
++weak_alias (__memrchr, memrchr)
++
++#endif
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch Added

@@ -0,0 +1,784 @@
+From 14032f7bbe18443af8492f5d0365f72b76701673 Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Mon, 28 Aug 2023 10:08:38 +0800
+Subject: PATCH 17/29 LoongArch: Add ifunc support for memset{aligned,
+ unaligned, lsx, lasx}
+
+According to glibc memset microbenchmark test results, for LSX and LASX
+versions, A few cases with length less than 8 experience performace
+degradation, overall, the LASX version could reduce the runtime about
+15% - 75%, LSX version could reduce the runtime about 15%-50%.
+
+The unaligned version uses unaligned memmory access to set data which
+length is less than 64 and make address aligned with 8. For this part,
+the performace is better than aligned version. Comparing with the generic
+version, the performance is close when the length is larger than 128. When
+the length is 8-128, the unaligned version could reduce the runtime about
+30%-70%, the aligned version could reduce the runtime about 20%-50%.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   4 +
+ .../lp64/multiarch/dl-symbol-redir-ifunc.h    |  24 +++
+ .../lp64/multiarch/ifunc-impl-list.c          |  10 +
+ .../loongarch/lp64/multiarch/memset-aligned.S | 174 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/memset-lasx.S    | 142 ++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/memset-lsx.S | 135 ++++++++++++++
+ .../lp64/multiarch/memset-unaligned.S         | 162 ++++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/memset.c     |  37 ++++
+ 8 files changed, 688 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-unaligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memset.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index 7b87bc90..216886c5 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
++++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -30,5 +30,9 @@ sysdep_routines += \
+   memrchr-generic \
+   memrchr-lsx \
+   memrchr-lasx \
++  memset-aligned \
++  memset-unaligned \
++  memset-lsx \
++  memset-lasx \
+ # sysdep_routines
+ endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h
+new file mode 100644
+index 00000000..e2723873
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h
+@@ -0,0 +1,24 @@
++/* Symbol rediretion for loader/static initialization code.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#ifndef _DL_IFUNC_GENERIC_H
++#define _DL_IFUNC_GENERIC_H
++
++asm ("memset = __memset_aligned");
++
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index 8bd5489e..37f60dde 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -117,5 +117,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ #endif
+ 	      IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_generic)
+ 	      )
++
++  IFUNC_IMPL (i, name, memset,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LASX, __memset_lasx)
++	      IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LSX, __memset_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, memset, SUPPORT_UAL, __memset_unaligned)
++	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_aligned)
++	      )
++
+   return i;
+ }
+diff --git a/sysdeps/loongarch/lp64/multiarch/memset-aligned.S b/sysdeps/loongarch/lp64/multiarch/memset-aligned.S
+new file mode 100644
+index 00000000..1fce95b7
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memset-aligned.S
+@@ -0,0 +1,174 @@
++/* Optimized memset aligned implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define MEMSET_NAME __memset_aligned
++#else
++# define MEMSET_NAME memset
++#endif
++
++LEAF(MEMSET_NAME, 6)
++    move        t0, a0
++    andi        a3, a0, 0x7
++    li.w        t6, 16
++    beqz        a3, L(align)
++    bltu        a2, t6, L(short_data)
++
++L(make_align):
++    li.w        t8, 8
++    sub.d       t2, t8, a3
++    pcaddi      t1, 11
++    slli.d      t3, t2, 2
++    sub.d       t1, t1, t3
++    jr          t1
++
++L(al7):
++    st.b        a1, t0, 6
++L(al6):
++    st.b        a1, t0, 5
++L(al5):
++    st.b        a1, t0, 4
++L(al4):
++    st.b        a1, t0, 3
++L(al3):
++    st.b        a1, t0, 2
++L(al2):
++    st.b        a1, t0, 1
++L(al1):
++    st.b        a1, t0, 0
++L(al0):
++    add.d       t0, t0, t2
++    sub.d       a2, a2, t2
++
++L(align):
++    bstrins.d   a1, a1, 15, 8
++    bstrins.d   a1, a1, 31, 16
++    bstrins.d   a1, a1, 63, 32
++    bltu        a2, t6, L(less_16bytes)
++
++    andi        a4, a2, 0x3f
++    beq         a4, a2, L(less_64bytes)
++
++    sub.d       t1, a2, a4
++    move        a2, a4
++    add.d       a5, t0, t1
++
++L(loop_64bytes):
++    addi.d      t0, t0, 64
++    st.d        a1, t0, -64
++    st.d        a1, t0, -56
++    st.d        a1, t0, -48
++    st.d        a1, t0, -40
++
++    st.d        a1, t0, -32
++    st.d        a1, t0, -24
++    st.d        a1, t0, -16
++    st.d        a1, t0, -8
++    bne         t0, a5, L(loop_64bytes)
++
++L(less_64bytes):
++    srai.d      a4, a2, 5
++    beqz        a4, L(less_32bytes)
++    addi.d      a2, a2, -32
++    st.d        a1, t0, 0
++
++    st.d        a1, t0, 8
++    st.d        a1, t0, 16
++    st.d        a1, t0, 24
++    addi.d      t0, t0, 32
++
++L(less_32bytes):
++    bltu        a2, t6, L(less_16bytes)
++    addi.d      a2, a2, -16
++    st.d        a1, t0, 0
++    st.d        a1, t0, 8
++    addi.d      t0, t0, 16
++
++L(less_16bytes):
++    srai.d      a4, a2, 3
++    beqz        a4, L(less_8bytes)
++    addi.d      a2, a2, -8
++    st.d        a1, t0, 0
++    addi.d      t0, t0, 8
++
++L(less_8bytes):
++    beqz        a2, L(less_1byte)
++    srai.d      a4, a2, 2
++    beqz        a4, L(less_4bytes)
++    addi.d      a2, a2, -4
++    st.w        a1, t0, 0
++    addi.d      t0, t0, 4
++
++L(less_4bytes):
++    srai.d      a3, a2, 1
++    beqz        a3, L(less_2bytes)
++    addi.d      a2, a2, -2
++    st.h        a1, t0, 0
++    addi.d      t0, t0, 2
++
++L(less_2bytes):
++    beqz        a2, L(less_1byte)
++    st.b        a1, t0, 0
++L(less_1byte):
++    jr          ra
++
++L(short_data):
++    pcaddi      t1, 19
++    slli.d      t3, a2, 2
++    sub.d       t1, t1, t3
++    jr          t1
++L(short_15):
++    st.b        a1, a0, 14
++L(short_14):
++    st.b        a1, a0, 13
++L(short_13):
++    st.b        a1, a0, 12
++L(short_12):
++    st.b        a1, a0, 11
++L(short_11):
++    st.b        a1, a0, 10
++L(short_10):
++    st.b        a1, a0, 9
++L(short_9):
++    st.b        a1, a0, 8
++L(short_8):
++    st.b        a1, a0, 7
++L(short_7):
++    st.b        a1, a0, 6
++L(short_6):
++    st.b        a1, a0, 5
++L(short_5):
++    st.b        a1, a0, 4
++L(short_4):
++    st.b        a1, a0, 3
++L(short_3):
++    st.b        a1, a0, 2
++L(short_2):
++    st.b        a1, a0, 1
++L(short_1):
++    st.b        a1, a0, 0
++L(short_0):
++    jr          ra
++END(MEMSET_NAME)
++
++libc_hidden_builtin_def (MEMSET_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/memset-lasx.S b/sysdeps/loongarch/lp64/multiarch/memset-lasx.S
+new file mode 100644
+index 00000000..041abbac
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memset-lasx.S
+@@ -0,0 +1,142 @@
++/* Optimized memset implementation using LoongArch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define MEMSET __memset_lasx
++
++LEAF(MEMSET, 6)
++    li.d            t1, 32
++    move            a3, a0
++    xvreplgr2vr.b   xr0, a1
++    add.d           a4, a0, a2
++
++    bgeu            t1, a2, L(less_32bytes)
++    li.d            t3, 128
++    li.d            t2, 64
++    blt             t3, a2, L(long_bytes)
++
++L(less_128bytes):
++    bgeu            t2, a2, L(less_64bytes)
++    xvst            xr0, a3, 0
++    xvst            xr0, a3, 32
++    xvst            xr0, a4, -32
++
++    xvst            xr0, a4, -64
++    jr              ra
++L(less_64bytes):
++    xvst            xr0, a3, 0
++    xvst            xr0, a4, -32
++
++
++    jr              ra
++L(less_32bytes):
++    srli.d          t0, a2, 4
++    beqz            t0, L(less_16bytes)
++    vst             vr0, a3, 0
++
++    vst             vr0, a4, -16
++    jr              ra
++L(less_16bytes):
++    srli.d          t0, a2, 3
++    beqz            t0, L(less_8bytes)
++
++    vstelm.d        vr0, a3, 0, 0
++    vstelm.d        vr0, a4, -8, 0
++    jr              ra
++L(less_8bytes):
++    srli.d          t0, a2, 2
++
++    beqz            t0, L(less_4bytes)
++    vstelm.w        vr0, a3, 0, 0
++    vstelm.w        vr0, a4, -4, 0
++    jr              ra
++
++
++L(less_4bytes):
++    srli.d          t0, a2, 1
++    beqz            t0, L(less_2bytes)
++    vstelm.h        vr0, a3, 0, 0
++    vstelm.h        vr0, a4, -2, 0
++
++    jr              ra
++L(less_2bytes):
++    beqz            a2, L(less_1bytes)
++    st.b            a1, a3, 0
++L(less_1bytes):
++    jr              ra
++
++L(long_bytes):
++    xvst            xr0, a3, 0
++    bstrins.d       a3, zero, 4, 0
++    addi.d          a3, a3, 32
++    sub.d           a2, a4, a3
++
++    andi            t0, a2, 0xff
++    beq             t0, a2, L(long_end)
++    move            a2, t0
++    sub.d           t0, a4, t0
++
++
++L(loop_256):
++    xvst            xr0, a3, 0
++    xvst            xr0, a3, 32
++    xvst            xr0, a3, 64
++    xvst            xr0, a3, 96
++
++    xvst            xr0, a3, 128
++    xvst            xr0, a3, 160
++    xvst            xr0, a3, 192
++    xvst            xr0, a3, 224
++
++    addi.d          a3, a3, 256
++    bne             a3, t0, L(loop_256)
++L(long_end):
++    bltu            a2, t3, L(end_less_128)
++    addi.d          a2, a2, -128
++
++    xvst            xr0, a3, 0
++    xvst            xr0, a3, 32
++    xvst            xr0, a3, 64
++    xvst            xr0, a3, 96
++
++
++    addi.d          a3, a3, 128
++L(end_less_128):
++    bltu            a2, t2, L(end_less_64)
++    addi.d          a2, a2, -64
++    xvst            xr0, a3, 0
++
++    xvst            xr0, a3, 32
++    addi.d          a3, a3, 64
++L(end_less_64):
++    bltu            a2, t1, L(end_less_32)
++    xvst            xr0, a3, 0
++
++L(end_less_32):
++    xvst            xr0, a4, -32
++    jr              ra
++END(MEMSET)
++
++libc_hidden_builtin_def (MEMSET)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memset-lsx.S b/sysdeps/loongarch/lp64/multiarch/memset-lsx.S
+new file mode 100644
+index 00000000..3d3982aa
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memset-lsx.S
+@@ -0,0 +1,135 @@
++/* Optimized memset implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define MEMSET __memset_lsx
++
++LEAF(MEMSET, 6)
++    li.d            t1, 16
++    move            a3, a0
++    vreplgr2vr.b    vr0, a1
++    add.d           a4, a0, a2
++
++    bgeu            t1, a2, L(less_16bytes)
++    li.d            t3, 64
++    li.d            t2, 32
++    bgeu            a2, t3, L(long_bytes)
++
++L(less_64bytes):
++    bgeu            t2, a2, L(less_32bytes)
++    vst             vr0, a3, 0
++    vst             vr0, a3, 16
++    vst             vr0, a4, -32
++
++    vst             vr0, a4, -16
++    jr              ra
++L(less_32bytes):
++    vst             vr0, a3, 0
++    vst             vr0, a4, -16
++
++
++    jr              ra
++L(less_16bytes):
++    srli.d          t0, a2, 3
++    beqz            t0, L(less_8bytes)
++    vstelm.d        vr0, a3, 0, 0
++
++    vstelm.d        vr0, a4, -8, 0
++    jr              ra
++L(less_8bytes):
++    srli.d          t0, a2, 2
++    beqz            t0, L(less_4bytes)
++
++    vstelm.w        vr0, a3, 0, 0
++    vstelm.w        vr0, a4, -4, 0
++    jr              ra
++L(less_4bytes):
++    srli.d          t0, a2, 1
++
++    beqz            t0, L(less_2bytes)
++    vstelm.h        vr0, a3, 0, 0
++    vstelm.h        vr0, a4, -2, 0
++    jr              ra
++
++
++L(less_2bytes):
++    beqz            a2, L(less_1bytes)
++    vstelm.b        vr0, a3, 0, 0
++L(less_1bytes):
++    jr              ra
++L(long_bytes):
++    vst             vr0, a3, 0
++
++    bstrins.d       a3, zero, 3, 0
++    addi.d          a3, a3, 16
++    sub.d           a2, a4, a3
++    andi            t0, a2, 0x7f
++
++    beq             t0, a2, L(long_end)
++    move            a2, t0
++    sub.d           t0, a4, t0
++
++L(loop_128):
++    vst             vr0, a3, 0
++
++    vst             vr0, a3, 16
++    vst             vr0, a3, 32
++    vst             vr0, a3, 48
++    vst             vr0, a3, 64
++
++
++    vst             vr0, a3, 80
++    vst             vr0, a3, 96
++    vst             vr0, a3, 112
++    addi.d          a3, a3, 128
++
++    bne             a3, t0, L(loop_128)
++L(long_end):
++    bltu            a2, t3, L(end_less_64)
++    addi.d          a2, a2, -64
++    vst             vr0, a3, 0
++
++    vst             vr0, a3, 16
++    vst             vr0, a3, 32
++    vst             vr0, a3, 48
++    addi.d          a3, a3, 64
++
++L(end_less_64):
++    bltu            a2, t2, L(end_less_32)
++    addi.d          a2, a2, -32
++    vst             vr0, a3, 0
++    vst             vr0, a3, 16
++
++    addi.d          a3, a3, 32
++L(end_less_32):
++    bltu            a2, t1, L(end_less_16)
++    vst             vr0, a3, 0
++
++L(end_less_16):
++    vst             vr0, a4, -16
++    jr              ra
++END(MEMSET)
++
++libc_hidden_builtin_def (MEMSET)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S
+new file mode 100644
+index 00000000..f7d32039
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S
+@@ -0,0 +1,162 @@
++/* Optimized memset unaligned implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++
++# define MEMSET_NAME __memset_unaligned
++
++#define ST_128(n)              \
++    st.d        a1, a0, n;     \
++    st.d        a1, a0, n+8  ; \
++    st.d        a1, a0, n+16 ; \
++    st.d        a1, a0, n+24 ; \
++    st.d        a1, a0, n+32 ; \
++    st.d        a1, a0, n+40 ; \
++    st.d        a1, a0, n+48 ; \
++    st.d        a1, a0, n+56 ; \
++    st.d        a1, a0, n+64 ; \
++    st.d        a1, a0, n+72 ; \
++    st.d        a1, a0, n+80 ; \
++    st.d        a1, a0, n+88 ; \
++    st.d        a1, a0, n+96 ; \
++    st.d        a1, a0, n+104; \
++    st.d        a1, a0, n+112; \
++    st.d        a1, a0, n+120;
++
++LEAF(MEMSET_NAME, 6)
++    bstrins.d   a1, a1, 15, 8
++    add.d       t7, a0, a2
++    bstrins.d   a1, a1, 31, 16
++    move        t0, a0
++
++    bstrins.d   a1, a1, 63, 32
++    srai.d      t8, a2, 4
++    beqz        t8, L(less_16bytes)
++    srai.d      t8, a2, 6
++
++    bnez        t8, L(more_64bytes)
++    srai.d      t8, a2, 5
++    beqz        t8, L(less_32bytes)
++
++    st.d        a1, a0, 0
++    st.d        a1, a0, 8
++    st.d        a1, a0, 16
++    st.d        a1, a0, 24
++
++    st.d        a1, t7, -32
++    st.d        a1, t7, -24
++    st.d        a1, t7, -16
++    st.d        a1, t7, -8
++
++    jr          ra
++
++L(less_32bytes):
++    st.d        a1, a0, 0
++    st.d        a1, a0, 8
++    st.d        a1, t7, -16
++    st.d        a1, t7, -8
++
++    jr          ra
++
++L(less_16bytes):
++    srai.d      t8, a2, 3
++    beqz        t8, L(less_8bytes)
++    st.d        a1, a0, 0
++    st.d        a1, t7, -8
++
++    jr          ra
++
++L(less_8bytes):
++    srai.d      t8, a2, 2
++    beqz        t8, L(less_4bytes)
++    st.w        a1, a0, 0
++    st.w        a1, t7, -4
++
++    jr          ra
++
++L(less_4bytes):
++    srai.d      t8, a2, 1
++    beqz        t8, L(less_2bytes)
++    st.h        a1, a0, 0
++    st.h        a1, t7, -2
++
++    jr          ra
++
++L(less_2bytes):
++    beqz        a2, L(less_1bytes)
++    st.b        a1, a0, 0
++
++    jr          ra
++
++L(less_1bytes):
++    jr          ra
++
++L(more_64bytes):
++    srli.d      a0, a0, 3
++    slli.d      a0, a0, 3
++    addi.d      a0, a0, 0x8
++    st.d        a1, t0, 0
++
++    sub.d       t2, t0, a0
++    add.d       a2, t2, a2
++    addi.d      a2, a2, -0x80
++    blt         a2, zero, L(end_unalign_proc)
++
++L(loop_less):
++    ST_128(0)
++    addi.d      a0, a0,  0x80
++    addi.d      a2, a2, -0x80
++    bge         a2, zero, L(loop_less)
++
++L(end_unalign_proc):
++    addi.d      a2, a2, 0x80
++    pcaddi      t1, 20
++    andi        t5, a2, 0x78
++    srli.d      t5, t5, 1
++
++    sub.d       t1, t1, t5
++    jr          t1
++
++    st.d        a1, a0, 112
++    st.d        a1, a0, 104
++    st.d        a1, a0, 96
++    st.d        a1, a0, 88
++    st.d        a1, a0, 80
++    st.d        a1, a0, 72
++    st.d        a1, a0, 64
++    st.d        a1, a0, 56
++    st.d        a1, a0, 48
++    st.d        a1, a0, 40
++    st.d        a1, a0, 32
++    st.d        a1, a0, 24
++    st.d        a1, a0, 16
++    st.d        a1, a0, 8
++    st.d        a1, a0, 0
++    st.d        a1, t7, -8
++
++    move        a0, t0
++    jr          ra
++END(MEMSET_NAME)
++
++libc_hidden_builtin_def (MEMSET_NAME)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memset.c b/sysdeps/loongarch/lp64/multiarch/memset.c
+new file mode 100644
+index 00000000..3ff60d8a
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memset.c
+@@ -0,0 +1,37 @@
++/* Multiple versions of memset.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define memset __redirect_memset
++# include <string.h>
++# undef memset
++
++# define SYMBOL_NAME memset
++# include "ifunc-lasx.h"
++
++libc_ifunc_redirected (__redirect_memset, memset,
++		       IFUNC_SELECTOR ());
++
++# ifdef SHARED
++__hidden_ver1 (memset, __GI_memset, __redirect_memset)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memset);
++# endif
++
++#endif
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch Added

@@ -0,0 +1,448 @@
+From b412bcb2cf4914a664bcd24924d670a2e37394b3 Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Mon, 28 Aug 2023 10:08:35 +0800
+Subject: PATCH 14/29 LoongArch: Add ifunc support for rawmemchr{aligned,
+ lsx, lasx}
+
+According to glibc rawmemchr microbenchmark, A few cases tested with
+char '\0' experience performance degradation due to the lasx and lsx
+versions don't handle the '\0' separately. Overall, rawmemchr-lasx
+implementation could reduce the runtime about 40%-80%, rawmemchr-lsx
+implementation could reduce the runtime about 40%-66%, rawmemchr-aligned
+implementation could reduce the runtime about 20%-40%.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   3 +
+ .../lp64/multiarch/ifunc-impl-list.c          |   8 ++
+ .../lp64/multiarch/ifunc-rawmemchr.h          |  40 ++++++
+ .../lp64/multiarch/rawmemchr-aligned.S        | 124 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/rawmemchr-lasx.S |  82 ++++++++++++
+ .../loongarch/lp64/multiarch/rawmemchr-lsx.S  |  71 ++++++++++
+ sysdeps/loongarch/lp64/multiarch/rawmemchr.c  |  37 ++++++
+ 7 files changed, 365 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index 5d7ae7ae..64416b02 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
++++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -21,5 +21,8 @@ sysdep_routines += \
+   memmove-unaligned \
+   memmove-lsx \
+   memmove-lasx \
++  rawmemchr-aligned \
++  rawmemchr-lsx \
++  rawmemchr-lasx \
+ # sysdep_routines
+ endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index c8ba87bd..3db9af14 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -94,5 +94,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+               IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_aligned)
+               )
+ 
++  IFUNC_IMPL (i, name, rawmemchr,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LASX, __rawmemchr_lasx)
++	      IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LSX, __rawmemchr_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned)
++	      )
++
+   return i;
+ }
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h
+new file mode 100644
+index 00000000..a7bb4cf9
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h
+@@ -0,0 +1,40 @@
++/* Common definition for rawmemchr ifunc selections.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S
+new file mode 100644
+index 00000000..9c7155ae
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S
+@@ -0,0 +1,124 @@
++/* Optimized rawmemchr implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define RAWMEMCHR_NAME __rawmemchr_aligned
++#else
++# define RAWMEMCHR_NAME __rawmemchr
++#endif
++
++LEAF(RAWMEMCHR_NAME, 6)
++    andi        t1, a0, 0x7
++    bstrins.d   a0, zero, 2, 0
++    lu12i.w     a2, 0x01010
++    bstrins.d   a1, a1, 15, 8
++
++    ld.d        t0, a0, 0
++    slli.d      t1, t1, 3
++    ori         a2, a2, 0x101
++    bstrins.d   a1, a1, 31, 16
++
++    li.w        t8, -1
++    bstrins.d   a1, a1, 63, 32
++    bstrins.d   a2, a2, 63, 32
++    sll.d       t2, t8, t1
++
++    sll.d       t3, a1, t1
++    orn         t0, t0, t2
++    slli.d      a3, a2, 7
++    beqz        a1, L(find_zero)
++
++    xor         t0, t0, t3
++    sub.d       t1, t0, a2
++    andn        t2, a3, t0
++    and         t3, t1, t2
++
++    bnez        t3, L(count_pos)
++    addi.d      a0, a0, 8
++
++L(loop):
++    ld.d        t0, a0, 0
++    xor         t0, t0, a1
++
++    sub.d       t1, t0, a2
++    andn        t2, a3, t0
++    and         t3, t1, t2
++    bnez        t3, L(count_pos)
++
++    ld.d        t0, a0, 8
++    addi.d      a0, a0, 16
++    xor         t0, t0, a1
++    sub.d       t1, t0, a2
++
++    andn        t2, a3, t0
++    and         t3, t1, t2
++    beqz        t3, L(loop)
++    addi.d      a0, a0, -8
++L(count_pos):
++    ctz.d       t0, t3
++    srli.d      t0, t0, 3
++    add.d       a0, a0, t0
++    jr          ra
++
++L(loop_7bit):
++    ld.d        t0, a0, 0
++L(find_zero):
++    sub.d       t1, t0, a2
++    and         t2, t1, a3
++    bnez        t2, L(more_check)
++
++    ld.d        t0, a0, 8
++    addi.d      a0, a0, 16
++    sub.d       t1, t0, a2
++    and         t2, t1, a3
++
++    beqz        t2, L(loop_7bit)
++    addi.d      a0, a0, -8
++
++L(more_check):
++    andn        t2, a3, t0
++    and         t3, t1, t2
++    bnez        t3, L(count_pos)
++    addi.d      a0, a0, 8
++
++L(loop_8bit):
++    ld.d        t0, a0, 0
++
++    sub.d       t1, t0, a2
++    andn        t2, a3, t0
++    and         t3, t1, t2
++    bnez        t3, L(count_pos)
++
++    ld.d        t0, a0, 8
++    addi.d      a0, a0, 16
++    sub.d       t1, t0, a2
++
++    andn        t2, a3, t0
++    and         t3, t1, t2
++    beqz        t3, L(loop_8bit)
++
++    addi.d      a0, a0, -8
++    b           L(count_pos)
++
++END(RAWMEMCHR_NAME)
++
++libc_hidden_builtin_def (__rawmemchr)
+diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S
+new file mode 100644
+index 00000000..be2eb59d
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S
+@@ -0,0 +1,82 @@
++/* Optimized rawmemchr implementation using LoongArch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/asm.h>
++#include <sys/regdef.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define RAWMEMCHR __rawmemchr_lasx
++
++LEAF(RAWMEMCHR, 6)
++    move            a2, a0
++    bstrins.d       a0, zero, 5, 0
++    xvld            xr0, a0, 0
++    xvld            xr1, a0, 32
++
++    xvreplgr2vr.b   xr2, a1
++    xvseq.b         xr0, xr0, xr2
++    xvseq.b         xr1, xr1, xr2
++    xvmsknz.b       xr0, xr0
++
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr3, xr0, 4
++    xvpickve.w      xr4, xr1, 4
++    vilvl.h         vr0, vr3, vr0
++
++    vilvl.h         vr1, vr4, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++    sra.d           t0, t0, a2
++
++
++    beqz            t0, L(loop)
++    ctz.d           t0, t0
++    add.d           a0, a2, t0
++    jr              ra
++
++L(loop):
++    xvld            xr0, a0, 64
++    xvld            xr1, a0, 96
++    addi.d          a0, a0, 64
++    xvseq.b         xr0, xr0, xr2
++
++    xvseq.b         xr1, xr1, xr2
++    xvmax.bu        xr3, xr0, xr1
++    xvseteqz.v      fcc0, xr3
++    bcnez           fcc0, L(loop)
++
++    xvmsknz.b       xr0, xr0
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr3, xr0, 4
++    xvpickve.w      xr4, xr1, 4
++
++
++    vilvl.h         vr0, vr3, vr0
++    vilvl.h         vr1, vr4, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++
++    ctz.d           t0, t0
++    add.d           a0, a0, t0
++    jr              ra
++END(RAWMEMCHR)
++
++libc_hidden_builtin_def (RAWMEMCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S
+new file mode 100644
+index 00000000..2f6fe024
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S
+@@ -0,0 +1,71 @@
++/* Optimized rawmemchr implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define RAWMEMCHR __rawmemchr_lsx
++
++LEAF(RAWMEMCHR, 6)
++    move            a2, a0
++    bstrins.d       a0, zero, 4, 0
++    vld             vr0, a0, 0
++    vld             vr1, a0, 16
++
++    vreplgr2vr.b    vr2, a1
++    vseq.b          vr0, vr0, vr2
++    vseq.b          vr1, vr1, vr2
++    vmsknz.b        vr0, vr0
++
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++    sra.w           t0, t0, a2
++
++    beqz            t0, L(loop)
++    ctz.w           t0, t0
++    add.d           a0, a2, t0
++    jr              ra
++
++
++L(loop):
++    vld             vr0, a0, 32
++    vld             vr1, a0, 48
++    addi.d          a0, a0, 32
++    vseq.b          vr0, vr0, vr2
++
++    vseq.b          vr1, vr1, vr2
++    vmax.bu         vr3, vr0, vr1
++    vseteqz.v       fcc0, vr3
++    bcnez           fcc0, L(loop)
++
++    vmsknz.b        vr0, vr0
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++
++    ctz.w           t0, t0
++    add.d           a0, a0, t0
++    jr              ra
++END(RAWMEMCHR)
++
++libc_hidden_builtin_def (RAWMEMCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr.c b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c
+new file mode 100644
+index 00000000..89c7ffff
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c
+@@ -0,0 +1,37 @@
++/* Multiple versions of rawmemchr.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#if IS_IN (libc)
++# define rawmemchr __redirect_rawmemchr
++# define __rawmemchr __redirect___rawmemchr
++# include <string.h>
++# undef rawmemchr
++# undef __rawmemchr
++
++# define SYMBOL_NAME rawmemchr
++# include "ifunc-rawmemchr.h"
++
++libc_ifunc_redirected (__redirect_rawmemchr, __rawmemchr,
++                       IFUNC_SELECTOR ());
++weak_alias (__rawmemchr, rawmemchr)
++# ifdef SHARED
++__hidden_ver1 (__rawmemchr, __GI___rawmemchr, __redirect___rawmemchr)
++  __attribute__((visibility ("hidden")));
++# endif
++#endif
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Add-ifunc-support-for-strcmp-aligned-lsx.patch Added

@@ -0,0 +1,499 @@
+From e258cfcf92f5e31e902fa045b41652f00fcf2521 Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Thu, 24 Aug 2023 16:50:18 +0800
+Subject: PATCH 09/29 LoongArch: Add ifunc support for strcmp{aligned, lsx}
+
+Based on the glibc microbenchmark, strcmp-aligned implementation could
+reduce the runtime 0%-10% for aligned comparison, 10%-20% for unaligned
+comparison, strcmp-lsx implemenation could reduce the runtime 0%-50%.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   2 +
+ .../lp64/multiarch/ifunc-impl-list.c          |   7 +
+ .../loongarch/lp64/multiarch/ifunc-strcmp.h   |  38 ++++
+ .../loongarch/lp64/multiarch/strcmp-aligned.S | 179 ++++++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S | 165 ++++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/strcmp.c     |  35 ++++
+ 6 files changed, 426 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index c4dd3143..d5a500de 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
++++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -12,6 +12,8 @@ sysdep_routines += \
+   strchrnul-aligned \
+   strchrnul-lsx \
+   strchrnul-lasx \
++  strcmp-aligned \
++  strcmp-lsx \
+   memcpy-aligned \
+   memcpy-unaligned \
+   memmove-unaligned \
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index 7cec0b77..9183b7da 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -62,6 +62,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ 	      IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_aligned)
+ 	      )
+ 
++  IFUNC_IMPL (i, name, strcmp,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, strcmp, SUPPORT_LSX, __strcmp_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_aligned)
++	      )
++
+   IFUNC_IMPL (i, name, memcpy,
+ #if !defined __loongarch_soft_float
+               IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h
+new file mode 100644
+index 00000000..ca26352b
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h
+@@ -0,0 +1,38 @@
++/* Common definition for strcmp ifunc selection.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
+new file mode 100644
+index 00000000..f5f4f336
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
+@@ -0,0 +1,179 @@
++/* Optimized strcmp implementation using basic Loongarch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define STRCMP_NAME __strcmp_aligned
++#else
++# define STRCMP_NAME strcmp
++#endif
++
++LEAF(STRCMP_NAME, 6)
++    lu12i.w     a4, 0x01010
++    andi        a2, a0, 0x7
++    ori         a4, a4, 0x101
++    andi        a3, a1, 0x7
++
++    bstrins.d   a4, a4, 63, 32
++    li.d        t7, -1
++    li.d        t8, 8
++    slli.d      a5, a4, 7
++
++    bne         a2, a3, L(unaligned)
++    bstrins.d   a0, zero, 2, 0
++    bstrins.d   a1, zero, 2, 0
++    ld.d        t0, a0, 0
++
++    ld.d        t1, a1, 0
++    slli.d      t3, a2, 3
++    sll.d       t2, t7, t3
++    orn         t0, t0, t2
++
++
++    orn         t1, t1, t2
++    sub.d       t2, t0, a4
++    andn        t3, a5, t0
++    and         t2, t2, t3
++
++    bne         t0, t1, L(al_end)
++L(al_loop):
++    bnez        t2, L(ret0)
++    ldx.d       t0, a0, t8
++    ldx.d       t1, a1, t8
++
++    addi.d      t8, t8, 8
++    sub.d       t2, t0, a4
++    andn        t3, a5, t0
++    and         t2, t2, t3
++
++    beq         t0, t1, L(al_loop)
++L(al_end):
++    xor         t3, t0, t1
++    or          t2, t2, t3
++    ctz.d       t3, t2
++
++
++    bstrins.d   t3, zero, 2, 0
++    srl.d       t0, t0, t3
++    srl.d       t1, t1, t3
++    andi        t0, t0, 0xff
++
++    andi        t1, t1, 0xff
++    sub.d       a0, t0, t1
++    jr          ra
++    nop
++
++L(ret0):
++    move        a0, zero
++    jr          ra
++    nop
++    nop
++
++L(unaligned):
++    slt         a6, a3, a2
++    xor         t0, a0, a1
++    maskeqz     t0, t0, a6
++    xor         a0, a0, t0
++
++
++    xor         a1, a1, t0
++    andi        a2, a0, 0x7
++    andi        a3, a1, 0x7
++    bstrins.d   a0, zero, 2, 0
++
++    bstrins.d   a1, zero, 2, 0
++    ld.d        t4, a0, 0
++    ld.d        t1, a1, 0
++    slli.d      a2, a2, 3
++
++    slli.d      a3, a3, 3
++    srl.d       t0, t4, a2
++    srl.d       t1, t1, a3
++    srl.d       t5, t7, a3
++
++    orn         t0, t0, t5
++    orn         t1, t1, t5
++    bne         t0, t1, L(not_equal)
++    sll.d       t5, t7, a2
++
++
++    sub.d       a3, a2, a3
++    orn         t4, t4, t5
++    sub.d       a2, zero, a3
++    sub.d       t2, t4, a4
++
++    andn        t3, a5, t4
++    and         t2, t2, t3
++    bnez        t2, L(find_zero)
++L(un_loop):
++    srl.d       t5, t4, a3
++
++    ldx.d       t4, a0, t8
++    ldx.d       t1, a1, t8
++    addi.d      t8, t8, 8
++    sll.d       t0, t4, a2
++
++    or          t0, t0, t5
++    bne         t0, t1, L(not_equal)
++    sub.d       t2, t4, a4
++    andn        t3, a5, t4
++
++
++    and         t2, t2, t3
++    beqz        t2, L(un_loop)
++L(find_zero):
++    sub.d       t2, t0, a4
++    andn        t3, a5, t0
++
++    and         t2, t2, t3
++    bnez        t2, L(ret0)
++    ldx.d       t1, a1, t8
++    srl.d       t0, t4, a3
++
++L(not_equal):
++    sub.d       t2, t0, a4
++    andn        t3, a5, t0
++    and         t2, t2, t3
++    xor         t3, t0, t1
++
++    or          t2, t2, t3
++L(un_end):
++    ctz.d       t3, t2
++    bstrins.d   t3, zero, 2, 0
++    srl.d       t0, t0, t3
++
++
++    srl.d       t1, t1, t3
++    andi        t0, t0, 0xff
++    andi        t1, t1, 0xff
++    sub.d       t2, t0, t1
++
++
++    sub.d       t3, t1, t0
++    masknez     t0, t2, a6
++    maskeqz     t1, t3, a6
++    or          a0, t0, t1
++
++    jr	ra
++END(STRCMP_NAME)
++
++libc_hidden_builtin_def (STRCMP_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
+new file mode 100644
+index 00000000..2e177a38
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
+@@ -0,0 +1,165 @@
++/* Optimized strcmp implementation using Loongarch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define STRCMP	__strcmp_lsx
++
++LEAF(STRCMP, 6)
++    pcalau12i       t0, %pc_hi20(L(INDEX))
++    andi            a2, a0, 0xf
++    vld             vr2, t0, %pc_lo12(L(INDEX))
++    andi            a3, a1, 0xf
++
++    bne             a2, a3, L(unaligned)
++    bstrins.d       a0, zero, 3, 0
++    bstrins.d       a1, zero, 3, 0
++    vld             vr0, a0, 0
++
++    vld             vr1, a1, 0
++    vreplgr2vr.b    vr3, a2
++    vslt.b          vr2, vr2, vr3
++    vseq.b          vr3, vr0, vr1
++
++    vmin.bu         vr3, vr0, vr3
++    vor.v           vr3, vr3, vr2
++    vsetanyeqz.b    fcc0, vr3
++    bcnez           fcc0, L(al_out)
++
++
++L(al_loop):
++    vld             vr0, a0, 16
++    vld             vr1, a1, 16
++    addi.d          a0, a0, 16
++    addi.d          a1, a1, 16
++
++    vseq.b          vr3, vr0, vr1
++    vmin.bu         vr3, vr0, vr3
++    vsetanyeqz.b    fcc0, vr3
++    bceqz           fcc0, L(al_loop)
++
++L(al_out):
++    vseqi.b         vr3, vr3, 0
++    vfrstpi.b       vr3, vr3, 0
++    vshuf.b         vr0, vr0, vr0, vr3
++    vshuf.b         vr1, vr1, vr1, vr3
++
++    vpickve2gr.bu   t0, vr0, 0
++    vpickve2gr.bu   t1, vr1, 0
++    sub.d           a0, t0, t1
++    jr              ra
++
++
++L(unaligned):
++    slt             a4, a3, a2
++    xor             t0, a0, a1
++    maskeqz         t0, t0, a4
++    xor             a0, a0, t0
++
++    xor             a1, a1, t0
++    andi            a2, a0, 0xf
++    andi            a3, a1, 0xf
++    bstrins.d       a0, zero, 3, 0
++
++    bstrins.d       a1, zero, 3, 0
++    vld             vr3, a0, 0
++    vld             vr1, a1, 0
++    vreplgr2vr.b    vr4, a2
++
++    vreplgr2vr.b    vr5, a3
++    vslt.b          vr7, vr2, vr5
++    vsub.b          vr5, vr5, vr4
++    vaddi.bu        vr6, vr2, 16
++
++
++    vsub.b          vr6, vr6, vr5
++    vshuf.b         vr0, vr3, vr3, vr6
++    vor.v           vr0, vr0, vr7
++    vor.v           vr1, vr1, vr7
++
++    vseq.b          vr5, vr0, vr1
++    vsetanyeqz.b    fcc0, vr5
++    bcnez           fcc0, L(not_equal)
++    vslt.b          vr4, vr2, vr4
++
++    vor.v           vr0, vr3, vr4
++    vsetanyeqz.b    fcc0, vr0
++    bcnez           fcc0, L(find_zero)
++    nop
++
++L(un_loop):
++    vld             vr3, a0, 16
++    vld             vr1, a1, 16
++    addi.d          a0, a0, 16
++    addi.d          a1, a1, 16
++
++
++    vshuf.b         vr0, vr3, vr0, vr6
++    vseq.b          vr5, vr0, vr1
++    vsetanyeqz.b    fcc0, vr5
++    bcnez           fcc0, L(not_equal)
++
++    vsetanyeqz.b    fcc0, vr3
++    vor.v           vr0, vr3, vr3
++    bceqz           fcc0, L(un_loop)
++L(find_zero):
++    vmin.bu         vr5, vr1, vr5
++
++    vsetanyeqz.b    fcc0, vr5
++    bcnez           fcc0, L(ret0)
++    vld             vr1, a1, 16
++    vshuf.b         vr0, vr3, vr3, vr6
++
++    vseq.b          vr5, vr0, vr1
++L(not_equal):
++    vmin.bu         vr5, vr0, vr5
++L(un_end):
++    vseqi.b         vr5, vr5, 0
++    vfrstpi.b       vr5, vr5, 0
++
++
++    vshuf.b         vr0, vr0, vr0, vr5
++    vshuf.b         vr1, vr1, vr1, vr5
++    vpickve2gr.bu   t0, vr0, 0
++    vpickve2gr.bu   t1, vr1, 0
++
++    sub.d           t3, t0, t1
++    sub.d           t4, t1, t0
++    masknez         t0, t3, a4
++    maskeqz         t1, t4, a4
++
++    or              a0, t0, t1
++    jr              ra
++L(ret0):
++    move            a0, zero
++    jr              ra
++END(STRCMP)
++
++    .section         .rodata.cst16,"M",@progbits,16
++    .align           4
++L(INDEX):
++    .dword           0x0706050403020100
++    .dword           0x0f0e0d0c0b0a0908
++
++libc_hidden_builtin_def (STRCMP)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp.c b/sysdeps/loongarch/lp64/multiarch/strcmp.c
+new file mode 100644
+index 00000000..6f249c0b
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strcmp.c
+@@ -0,0 +1,35 @@
++/* Multiple versions of strcmp.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define strcmp __redirect_strcmp
++# include <string.h>
++# undef strcmp
++
++# define SYMBOL_NAME strcmp
++# include "ifunc-strcmp.h"
++
++libc_ifunc_redirected (__redirect_strcmp, strcmp, IFUNC_SELECTOR ());
++
++# ifdef SHARED
++__hidden_ver1 (strcmp, __GI_strcmp, __redirect_strcmp)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strcmp);
++# endif
++#endif
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Add-ifunc-support-for-strcpy-stpcpy-aligne.patch Added

@@ -0,0 +1,1099 @@
+From 351086591d938aaf884d475261ae96ec5da00384 Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Wed, 13 Sep 2023 15:34:59 +0800
+Subject: PATCH 22/29 LoongArch: Add ifunc support for strcpy,
+ stpcpy{aligned, unaligned, lsx, lasx}
+
+According to glibc strcpy and stpcpy microbenchmark test results(changed
+to use generic_strcpy and generic_stpcpy instead of strlen + memcpy),
+comparing with the generic version, this implementation could reduce the
+runtime as following:
+
+Name              Percent of rutime reduced
+strcpy-aligned    8%-45%
+strcpy-unaligned  8%-48%, comparing with the aligned version, unaligned
+                  version takes less instructions to copy the tail of data
+		  which length is less than 8. it also has better performance
+		  in case src and dest cannot be both aligned with 8bytes
+strcpy-lsx        20%-80%
+strcpy-lasx       15%-86%
+stpcpy-aligned    6%-43%
+stpcpy-unaligned  8%-48%
+stpcpy-lsx        10%-80%
+stpcpy-lasx       10%-87%
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   8 +
+ .../lp64/multiarch/ifunc-impl-list.c          |  18 ++
+ .../loongarch/lp64/multiarch/stpcpy-aligned.S |  27 +++
+ .../loongarch/lp64/multiarch/stpcpy-lasx.S    |  22 ++
+ sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S |  22 ++
+ .../lp64/multiarch/stpcpy-unaligned.S         |  22 ++
+ sysdeps/loongarch/lp64/multiarch/stpcpy.c     |  42 ++++
+ .../loongarch/lp64/multiarch/strcpy-aligned.S | 202 ++++++++++++++++
+ .../loongarch/lp64/multiarch/strcpy-lasx.S    | 215 ++++++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S | 212 +++++++++++++++++
+ .../lp64/multiarch/strcpy-unaligned.S         | 138 +++++++++++
+ sysdeps/loongarch/lp64/multiarch/strcpy.c     |  35 +++
+ 12 files changed, 963 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index 360a6718..39550bea 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
++++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -16,6 +16,14 @@ sysdep_routines += \
+   strcmp-lsx \
+   strncmp-aligned \
+   strncmp-lsx \
++  strcpy-aligned \
++  strcpy-unaligned \
++  strcpy-lsx \
++  strcpy-lasx \
++  stpcpy-aligned \
++  stpcpy-unaligned \
++  stpcpy-lsx \
++  stpcpy-lasx \
+   memcpy-aligned \
+   memcpy-unaligned \
+   memmove-unaligned \
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index e397d58c..39a14f1d 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -76,6 +76,24 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ 	      IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_aligned)
+ 	      )
+ 
++  IFUNC_IMPL (i, name, strcpy,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_LASX, __strcpy_lasx)
++	      IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_LSX, __strcpy_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_UAL, __strcpy_unaligned)
++	      IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_aligned)
++	      )
++
++  IFUNC_IMPL (i, name, stpcpy,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_LASX, __stpcpy_lasx)
++	      IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_LSX, __stpcpy_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_UAL, __stpcpy_unaligned)
++	      IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned)
++	      )
++
+   IFUNC_IMPL (i, name, memcpy,
+ #if !defined __loongarch_soft_float
+               IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
+diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S
+new file mode 100644
+index 00000000..1f763db6
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S
+@@ -0,0 +1,27 @@
++/* stpcpy-aligned implementation is in strcpy-aligned.S.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#if IS_IN (libc)
++# define STPCPY __stpcpy_aligned
++#else
++# define STPCPY __stpcpy
++#endif
++
++#define USE_AS_STPCPY
++#define STRCPY STPCPY
++#include "strcpy-aligned.S"
+diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S
+new file mode 100644
+index 00000000..13d6c953
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S
+@@ -0,0 +1,22 @@
++/* stpcpy-lasx implementation is in strcpy-lasx.S.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#define STPCPY __stpcpy_lasx
++#define USE_AS_STPCPY
++#define STRCPY STPCPY
++#include "strcpy-lasx.S"
+diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S
+new file mode 100644
+index 00000000..e0f17ab5
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S
+@@ -0,0 +1,22 @@
++/* stpcpy-lsx implementation is in strcpy-lsx.S.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#define STPCPY __stpcpy_lsx
++#define USE_AS_STPCPY
++#define STRCPY STPCPY
++#include "strcpy-lsx.S"
+diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S
+new file mode 100644
+index 00000000..cc2f9712
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S
+@@ -0,0 +1,22 @@
++/* stpcpy-unaligned implementation is in strcpy-unaligned.S.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#define STPCPY __stpcpy_unaligned
++#define USE_AS_STPCPY
++#define STRCPY STPCPY
++#include "strcpy-unaligned.S"
+diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy.c b/sysdeps/loongarch/lp64/multiarch/stpcpy.c
+new file mode 100644
+index 00000000..d4860d7a
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy.c
+@@ -0,0 +1,42 @@
++/* Multiple versions of stpcpy.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2017-2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define stpcpy __redirect_stpcpy
++# define __stpcpy __redirect___stpcpy
++# define NO_MEMPCPY_STPCPY_REDIRECT
++# define __NO_STRING_INLINES
++# include <string.h>
++# undef stpcpy
++# undef __stpcpy
++
++# define SYMBOL_NAME stpcpy
++# include "ifunc-lasx.h"
++
++libc_ifunc_redirected (__redirect_stpcpy, __stpcpy, IFUNC_SELECTOR ());
++
++weak_alias (__stpcpy, stpcpy)
++# ifdef SHARED
++__hidden_ver1 (__stpcpy, __GI___stpcpy, __redirect___stpcpy)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (stpcpy);
++__hidden_ver1 (stpcpy, __GI_stpcpy, __redirect_stpcpy)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (stpcpy);
++# endif
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S
+new file mode 100644
+index 00000000..4ed539fd
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S
+@@ -0,0 +1,202 @@
++/* Optimized strcpy stpcpy aligned implementation using basic LoongArch
++   instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# ifndef STRCPY
++#  define STRCPY __strcpy_aligned
++# endif
++#else
++# ifndef STRCPY
++#  define STRCPY strcpy
++# endif
++#endif
++
++LEAF(STRCPY, 6)
++    andi        a3, a0, 0x7
++    move        a2, a0
++    beqz        a3, L(dest_align)
++    sub.d       a5, a1, a3
++    addi.d      a5, a5, 8
++
++L(make_dest_align):
++    ld.b        t0, a1, 0
++    addi.d      a1, a1, 1
++    st.b        t0, a2, 0
++    addi.d      a2, a2, 1
++    beqz        t0, L(al_out)
++
++    bne         a1, a5, L(make_dest_align)
++
++L(dest_align):
++    andi        a4, a1, 7
++    bstrins.d   a1, zero, 2, 0
++
++    lu12i.w     t5, 0x1010
++    ld.d        t0, a1, 0
++    ori         t5, t5, 0x101
++    bstrins.d   t5, t5, 63, 32
++
++    slli.d      t6, t5, 0x7
++    bnez        a4, L(unalign)
++    sub.d       t1, t0, t5
++    andn        t2, t6, t0
++
++    and         t3, t1, t2
++    bnez        t3, L(al_end)
++
++L(al_loop):
++    st.d        t0, a2, 0
++    ld.d        t0, a1, 8
++
++    addi.d      a1, a1, 8
++    addi.d      a2, a2, 8
++    sub.d       t1, t0, t5
++    andn        t2, t6, t0
++
++    and         t3, t1, t2
++    beqz        t3, L(al_loop)
++
++L(al_end):
++    ctz.d       t1, t3
++    srli.d      t1, t1, 3
++    addi.d      t1, t1, 1
++
++    andi        a3, t1, 8
++    andi        a4, t1, 4
++    andi        a5, t1, 2
++    andi        a6, t1, 1
++
++L(al_end_8):
++    beqz        a3, L(al_end_4)
++    st.d        t0, a2, 0
++#ifdef USE_AS_STPCPY
++    addi.d      a0, a2, 7
++#endif
++    jr          ra
++L(al_end_4):
++    beqz        a4, L(al_end_2)
++    st.w        t0, a2, 0
++    addi.d      a2, a2, 4
++    srli.d      t0, t0, 32
++L(al_end_2):
++    beqz        a5, L(al_end_1)
++    st.h        t0, a2, 0
++    addi.d      a2, a2, 2
++    srli.d      t0, t0, 16
++L(al_end_1):
++    beqz        a6, L(al_out)
++    st.b        t0, a2, 0
++    addi.d      a2, a2, 1
++L(al_out):
++#ifdef USE_AS_STPCPY
++    addi.d      a0, a2, -1
++#endif
++    jr          ra
++
++    .align      4
++L(unalign):
++    slli.d      a5, a4, 3
++    li.d        t1, -1
++    sub.d       a6, zero, a5
++
++    srl.d       a7, t0, a5
++    sll.d       t7, t1, a6
++
++    or          t0, a7, t7
++    sub.d       t1, t0, t5
++    andn        t2, t6, t0
++    and         t3, t1, t2
++
++    bnez        t3, L(un_end)
++
++    ld.d        t4, a1, 8
++
++    sub.d       t1, t4, t5
++    andn        t2, t6, t4
++    sll.d       t0, t4, a6
++    and         t3, t1, t2
++
++    or          t0, t0, a7
++    bnez        t3, L(un_end_with_remaining)
++
++L(un_loop):
++    srl.d       a7, t4, a5
++
++    ld.d        t4, a1, 16
++    addi.d      a1, a1, 8
++
++    st.d        t0, a2, 0
++    addi.d      a2, a2, 8
++
++    sub.d       t1, t4, t5
++    andn        t2, t6, t4
++    sll.d       t0, t4, a6
++    and         t3, t1, t2
++
++    or          t0, t0, a7
++    beqz        t3, L(un_loop)
++
++L(un_end_with_remaining):
++    ctz.d       t1, t3
++    srli.d      t1, t1, 3
++    addi.d      t1, t1, 1
++    sub.d       t1, t1, a4
++
++    blt         t1, zero, L(un_end_less_8)
++    st.d        t0, a2, 0
++    addi.d      a2, a2, 8
++    beqz        t1, L(un_out)
++    srl.d       t0, t4, a5
++    b           L(un_end_less_8)
++
++L(un_end):
++    ctz.d       t1, t3
++    srli.d      t1, t1, 3
++    addi.d      t1, t1, 1
++
++L(un_end_less_8):
++    andi        a4, t1, 4
++    andi        a5, t1, 2
++    andi        a6, t1, 1
++L(un_end_4):
++    beqz        a4, L(un_end_2)
++    st.w        t0, a2, 0
++    addi.d      a2, a2, 4
++    srli.d      t0, t0, 32
++L(un_end_2):
++    beqz        a5, L(un_end_1)
++    st.h        t0, a2, 0
++    addi.d      a2, a2, 2
++    srli.d      t0, t0, 16
++L(un_end_1):
++    beqz        a6, L(un_out)
++    st.b        t0, a2, 0
++    addi.d      a2, a2, 1
++L(un_out):
++#ifdef USE_AS_STPCPY
++    addi.d      a0, a2, -1
++#endif
++    jr          ra
++END(STRCPY)
++
++libc_hidden_builtin_def (STRCPY)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S
+new file mode 100644
+index 00000000..c2825612
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S
+@@ -0,0 +1,215 @@
++/* Optimized strcpy stpcpy implementation using LoongArch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# ifndef STRCPY
++#  define STRCPY __strcpy_lasx
++# endif
++
++# ifdef USE_AS_STPCPY
++#  define dstend a0
++# else
++#  define dstend a4
++# endif
++
++LEAF(STRCPY, 6)
++    ori             t8, zero, 0xfe0
++    andi            t0, a1, 0xfff
++    li.d            t7, -1
++    move            a2, a0
++
++    bltu            t8, t0, L(page_cross_start)
++L(start_entry):
++    xvld            xr0, a1, 0
++    li.d            t0, 32
++    andi            t1, a2, 0x1f
++
++    xvsetanyeqz.b   fcc0, xr0
++    sub.d           t0, t0, t1
++    bcnez           fcc0, L(end)
++    add.d           a1, a1, t0
++
++    xvst            xr0, a2, 0
++    andi            a3, a1, 0x1f
++    add.d           a2, a2, t0
++    bnez            a3, L(unaligned)
++
++
++    xvld            xr0, a1, 0
++    xvsetanyeqz.b   fcc0, xr0
++    bcnez           fcc0, L(al_end)
++L(al_loop):
++    xvst            xr0, a2, 0
++
++    xvld            xr0, a1, 32
++    addi.d          a2, a2, 32
++    addi.d          a1, a1, 32
++    xvsetanyeqz.b   fcc0, xr0
++
++    bceqz           fcc0, L(al_loop)
++L(al_end):
++    xvmsknz.b       xr0, xr0
++    xvpickve.w      xr1, xr0, 4
++    vilvl.h         vr0, vr1, vr0
++
++    movfr2gr.s      t0, fa0
++    cto.w           t0, t0
++    add.d           a1, a1, t0
++    xvld            xr0, a1, -31
++
++
++    add.d           dstend, a2, t0
++    xvst            xr0, dstend, -31
++    jr              ra
++    nop
++
++L(page_cross_start):
++    move            a4, a1
++    bstrins.d       a4, zero, 4, 0
++    xvld            xr0, a4, 0
++    xvmsknz.b       xr0, xr0
++
++    xvpickve.w      xr1, xr0, 4
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++    sra.w           t0, t0, a1
++
++    beq             t0, t7, L(start_entry)
++    b               L(tail)
++L(unaligned):
++    andi            t0, a1, 0xfff
++    bltu            t8, t0, L(un_page_cross)
++
++
++L(un_start_entry):
++    xvld            xr0, a1, 0
++    xvsetanyeqz.b   fcc0, xr0
++    bcnez           fcc0, L(un_end)
++    addi.d          a1, a1, 32
++
++L(un_loop):
++    xvst            xr0, a2, 0
++    andi            t0, a1, 0xfff
++    addi.d          a2, a2, 32
++    bltu            t8, t0, L(page_cross_loop)
++
++L(un_loop_entry):
++    xvld            xr0, a1, 0
++    addi.d          a1, a1, 32
++    xvsetanyeqz.b   fcc0, xr0
++    bceqz           fcc0, L(un_loop)
++
++    addi.d          a1, a1, -32
++L(un_end):
++    xvmsknz.b       xr0, xr0
++    xvpickve.w      xr1, xr0, 4
++    vilvl.h         vr0, vr1, vr0
++
++
++    movfr2gr.s      t0, fa0
++L(un_tail):
++    cto.w           t0, t0
++    add.d           a1, a1, t0
++    xvld            xr0, a1, -31
++
++    add.d           dstend, a2, t0
++    xvst            xr0, dstend, -31
++    jr              ra
++L(un_page_cross):
++    sub.d           a4, a1, a3
++
++    xvld            xr0, a4, 0
++    xvmsknz.b       xr0, xr0
++    xvpickve.w      xr1, xr0, 4
++    vilvl.h         vr0, vr1, vr0
++
++    movfr2gr.s      t0, fa0
++    sra.w           t0, t0, a1
++    beq             t0, t7, L(un_start_entry)
++    b               L(un_tail)
++
++
++L(page_cross_loop):
++    sub.d           a4, a1, a3
++    xvld            xr0, a4, 0
++    xvmsknz.b       xr0, xr0
++    xvpickve.w      xr1, xr0, 4
++
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++    sra.w           t0, t0, a1
++    beq             t0, t7, L(un_loop_entry)
++
++    b               L(un_tail)
++L(end):
++    xvmsknz.b       xr0, xr0
++    xvpickve.w      xr1, xr0, 4
++    vilvl.h         vr0, vr1, vr0
++
++    movfr2gr.s      t0, fa0
++L(tail):
++    cto.w           t0, t0
++    add.d           dstend, a2, t0
++    add.d           a5, a1, t0
++
++L(less_32):
++    srli.d          t1, t0, 4
++    beqz            t1, L(less_16)
++    vld             vr0, a1, 0
++    vld             vr1, a5, -15
++
++    vst             vr0, a2, 0
++    vst             vr1, dstend, -15
++    jr              ra
++L(less_16):
++    srli.d          t1, t0, 3
++
++    beqz            t1, L(less_8)
++    ld.d            t2, a1, 0
++    ld.d            t3, a5, -7
++    st.d            t2, a2, 0
++
++    st.d            t3, dstend, -7
++    jr              ra
++L(less_8):
++    li.d            t1, 3
++    bltu            t0, t1, L(less_3)
++
++    ld.w            t2, a1, 0
++    ld.w            t3, a5, -3
++    st.w            t2, a2, 0
++    st.w            t3, dstend, -3
++
++    jr              ra
++L(less_3):
++    beqz            t0, L(zero_byte)
++    ld.h            t2, a1, 0
++
++    st.h            t2, a2, 0
++L(zero_byte):
++    st.b            zero, dstend, 0
++    jr              ra
++END(STRCPY)
++
++libc_hidden_builtin_def (STRCPY)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S
+new file mode 100644
+index 00000000..fc2498f7
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S
+@@ -0,0 +1,212 @@
++/* Optimized strcpy stpcpy implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# ifndef STRCPY
++#  define STRCPY __strcpy_lsx
++# endif
++
++LEAF(STRCPY, 6)
++    pcalau12i       t0, %pc_hi20(L(INDEX))
++    andi            a4, a1, 0xf
++    vld             vr1, t0, %pc_lo12(L(INDEX))
++    move            a2, a0
++
++    beqz            a4, L(load_start)
++    xor             t0, a1, a4
++    vld             vr0, t0, 0
++    vreplgr2vr.b    vr2, a4
++
++    vadd.b          vr2, vr2, vr1
++    vshuf.b         vr0, vr2, vr0, vr2
++    vsetanyeqz.b    fcc0, vr0
++    bcnez           fcc0, L(end)
++
++L(load_start):
++    vld             vr0, a1, 0
++    li.d            t1, 16
++    andi            a3, a2, 0xf
++    vsetanyeqz.b    fcc0, vr0
++
++
++    sub.d           t0, t1, a3
++    bcnez           fcc0, L(end)
++    add.d           a1, a1, t0
++    vst             vr0, a2, 0
++
++    andi            a3, a1, 0xf
++    add.d           a2, a2, t0
++    bnez            a3, L(unaligned)
++    vld             vr0, a1, 0
++
++    vsetanyeqz.b    fcc0, vr0
++    bcnez           fcc0, L(al_end)
++L(al_loop):
++    vst             vr0, a2, 0
++    vld             vr0, a1, 16
++
++    addi.d          a2, a2, 16
++    addi.d          a1, a1, 16
++    vsetanyeqz.b    fcc0, vr0
++    bceqz           fcc0, L(al_loop)
++
++
++L(al_end):
++    vmsknz.b        vr1, vr0
++    movfr2gr.s      t0, fa1
++    cto.w           t0, t0
++    add.d           a1, a1, t0
++
++    vld             vr0, a1, -15
++# ifdef USE_AS_STPCPY
++    add.d           a0, a2, t0
++    vst             vr0, a0, -15
++# else
++    add.d           a2, a2, t0
++    vst             vr0, a2, -15
++# endif
++    jr              ra
++
++L(end):
++    vmsknz.b        vr1, vr0
++    movfr2gr.s      t0, fa1
++    cto.w           t0, t0
++    addi.d          t0, t0, 1
++
++L(end_16):
++    andi            t1, t0, 16
++    beqz            t1, L(end_8)
++    vst             vr0, a2, 0
++# ifdef USE_AS_STPCPY
++    addi.d          a0, a2, 15
++# endif
++    jr              ra
++
++L(end_8):
++    andi            t2, t0, 8
++    andi            t3, t0, 4
++    andi            t4, t0, 2
++    andi            t5, t0, 1
++
++    beqz            t2, L(end_4)
++    vstelm.d        vr0, a2, 0, 0
++    addi.d          a2, a2, 8
++    vbsrl.v         vr0, vr0, 8
++
++L(end_4):
++    beqz            t3, L(end_2)
++    vstelm.w        vr0, a2, 0, 0
++    addi.d          a2, a2, 4
++    vbsrl.v         vr0, vr0, 4
++
++L(end_2):
++    beqz            t4, L(end_1)
++    vstelm.h        vr0, a2, 0, 0
++    addi.d          a2, a2, 2
++    vbsrl.v         vr0, vr0, 2
++
++
++L(end_1):
++    beqz            t5, L(out)
++    vstelm.b        vr0, a2, 0, 0
++    addi.d          a2, a2, 1
++L(out):
++# ifdef USE_AS_STPCPY
++    addi.d          a0, a2, -1
++# endif
++    jr              ra
++
++    .align          4
++L(unaligned):
++    bstrins.d       a1, zero, 3, 0
++    vld             vr2, a1, 0
++    vreplgr2vr.b    vr3, a3
++    vslt.b          vr4, vr1, vr3
++
++    vor.v           vr0, vr2, vr4
++    vsetanyeqz.b    fcc0, vr0
++    bcnez           fcc0, L(un_first_end)
++    vld             vr0, a1, 16
++
++    vadd.b          vr3, vr3, vr1
++    vshuf.b         vr4, vr0, vr2, vr3
++    vsetanyeqz.b    fcc0, vr0
++    bcnez           fcc0, L(un_end)
++
++
++    vor.v           vr2, vr0, vr0
++    addi.d          a1, a1, 16
++L(un_loop):
++    vld             vr0, a1, 16
++    vst             vr4, a2, 0
++
++    addi.d          a2, a2, 16
++    vshuf.b         vr4, vr0, vr2, vr3
++    vsetanyeqz.b    fcc0, vr0
++    bcnez           fcc0, L(un_end)
++
++    vld             vr2, a1, 32
++    vst             vr4, a2, 0
++    addi.d          a1, a1, 32
++    addi.d          a2, a2, 16
++
++    vshuf.b         vr4, vr2, vr0, vr3
++    vsetanyeqz.b    fcc0, vr2
++    bceqz           fcc0, L(un_loop)
++    vor.v           vr0, vr2, vr2
++
++
++    addi.d          a1, a1, -16
++L(un_end):
++    vsetanyeqz.b    fcc0, vr4
++    bcnez           fcc0, 1f
++    vst             vr4, a2, 0
++
++1:
++    vmsknz.b        vr1, vr0
++    movfr2gr.s      t0, fa1
++    cto.w           t0, t0
++    add.d           a1, a1, t0
++
++    vld             vr0, a1, 1
++    add.d           a2, a2, t0
++    sub.d           a2, a2, a3
++    vst             vr0, a2, 1
++# ifdef USE_AS_STPCPY
++    addi.d          a0, a2, 16
++# endif
++    jr              ra
++L(un_first_end):
++    addi.d          a2, a2, -16
++    addi.d          a1, a1, -16
++    b               1b
++END(STRCPY)
++
++    .section        .rodata.cst16,"M",@progbits,16
++    .align          4
++L(INDEX):
++    .dword          0x0706050403020100
++    .dword          0x0f0e0d0c0b0a0908
++
++libc_hidden_builtin_def (STRCPY)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S
+new file mode 100644
+index 00000000..9e31883b
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S
+@@ -0,0 +1,138 @@
++/* Optimized strcpy unaligned implementation using basic LoongArch
++   instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++
++# ifndef STRCPY
++#  define STRCPY __strcpy_unaligned
++# endif
++
++# ifdef USE_AS_STPCPY
++#  define dstend a0
++# else
++#  define dstend a4
++# endif
++
++LEAF(STRCPY, 6)
++    lu12i.w     t5, 0x01010
++    li.w        t0, 0xff8
++    ori         t5, t5, 0x101
++    andi        t1, a1, 0xfff
++
++    bstrins.d   t5, t5, 63, 32
++    move        a2, a0
++    slli.d      t6, t5, 7
++    bltu        t0, t1, L(page_cross)
++
++L(start_entry):
++    ld.d        t0, a1, 0
++    li.d        t3, 8
++    andi        a3, a1, 0x7
++    sub.d       t1, t0, t5
++
++    andn        t2, t6, t0
++    sub.d       t3, t3, a3
++    and         t1, t1, t2
++    bnez        t1, L(end)
++
++
++    add.d       a1, a1, t3
++    st.d        t0, a2, 0
++    add.d       a2, a2, t3
++    ld.d        t0, a1, 0
++
++    sub.d       t1, t0, t5
++    andn        t2, t6, t0
++    and         t1, t1, t2
++    bnez        t1, L(long_end)
++
++L(loop):
++    st.d        t0, a2, 0
++    ld.d        t0, a1, 8
++    addi.d      a2, a2, 8
++    addi.d      a1, a1, 8
++
++    sub.d       t1, t0, t5
++    andn        t2, t6, t0
++    and         t1, t1, t2
++    beqz        t1, L(loop)
++
++
++L(long_end):
++    ctz.d       t1, t1
++    srli.d      t1, t1, 3
++    add.d       a1, a1, t1
++    ld.d        t0, a1, -7
++
++    add.d       dstend, a2, t1
++    st.d        t0, dstend, -7
++    jr          ra
++    nop
++
++L(end):
++    ctz.d       t1, t1
++    srli.d      t1, t1, 3
++    add.d       a3, a1, t1
++    add.d       dstend, a2, t1
++
++L(less_8):
++    li.d        t0, 3
++    bltu        t1, t0, L(less_3)
++    ld.w        t1, a1, 0
++    ld.w        t2, a3, -3
++
++
++    st.w        t1, a2, 0
++    st.w        t2, dstend, -3
++    jr          ra
++L(less_3):
++    beqz        t1, L(zero_bytes)
++
++    ld.h        t1, a1, 0
++    st.h        t1, a2, 0
++L(zero_bytes):
++    st.b        zero, dstend, 0
++    jr          ra
++
++L(page_cross):
++    move        a4, a1
++    bstrins.d   a4, zero, 2, 0
++    ld.d        t0, a4, 0
++    li.d        t3, -1
++
++    slli.d      t4, a1, 3
++    srl.d       t3, t3, t4
++    srl.d       t0, t0, t4
++    orn         t0, t0, t3
++
++
++    sub.d       t1, t0, t5
++    andn        t2, t6, t0
++    and         t1, t1, t2
++    beqz        t1, L(start_entry)
++
++    b           L(end)
++END(STRCPY)
++
++libc_hidden_builtin_def (STRCPY)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy.c b/sysdeps/loongarch/lp64/multiarch/strcpy.c
+new file mode 100644
+index 00000000..46afd068
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strcpy.c
+@@ -0,0 +1,35 @@
++/* Multiple versions of strcpy.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define strcpy __redirect_strcpy
++# include <string.h>
++# undef strcpy
++
++# define SYMBOL_NAME strcpy
++# include "ifunc-lasx.h"
++
++libc_ifunc_redirected (__redirect_strcpy, strcpy, IFUNC_SELECTOR ());
++
++# ifdef SHARED
++__hidden_ver1 (strcpy, __GI_strcpy, __redirect_strcpy)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strcpy);
++# endif
++#endif
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Add-ifunc-support-for-strncmp-aligned-lsx.patch Added

@@ -0,0 +1,583 @@
+From 6f03da2d7ef218c0f78375cf706dada59c3fee63 Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Thu, 24 Aug 2023 16:50:19 +0800
+Subject: PATCH 10/29 LoongArch: Add ifunc support for strncmp{aligned, lsx}
+
+Based on the glibc microbenchmark, only a few short inputs with this
+strncmp-aligned and strncmp-lsx implementation experience performance
+degradation, overall, strncmp-aligned could reduce the runtime 0%-10%
+for aligned comparision, 10%-25% for unaligend comparision, strncmp-lsx
+could reduce the runtime about 0%-60%.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   2 +
+ .../lp64/multiarch/ifunc-impl-list.c          |   7 +
+ .../loongarch/lp64/multiarch/ifunc-strncmp.h  |  38 +++
+ .../lp64/multiarch/strncmp-aligned.S          | 218 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/strncmp-lsx.S    | 208 +++++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/strncmp.c    |  35 +++
+ 6 files changed, 508 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index d5a500de..5d7ae7ae 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
++++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -14,6 +14,8 @@ sysdep_routines += \
+   strchrnul-lasx \
+   strcmp-aligned \
+   strcmp-lsx \
++  strncmp-aligned \
++  strncmp-lsx \
+   memcpy-aligned \
+   memcpy-unaligned \
+   memmove-unaligned \
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index 9183b7da..c8ba87bd 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -69,6 +69,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ 	      IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_aligned)
+ 	      )
+ 
++  IFUNC_IMPL (i, name, strncmp,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, strncmp, SUPPORT_LSX, __strncmp_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_aligned)
++	      )
++
+   IFUNC_IMPL (i, name, memcpy,
+ #if !defined __loongarch_soft_float
+               IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h
+new file mode 100644
+index 00000000..1a7dc36b
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h
+@@ -0,0 +1,38 @@
++/* Common definition for strncmp ifunc selection.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
+new file mode 100644
+index 00000000..e2687fa7
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
+@@ -0,0 +1,218 @@
++/* Optimized strncmp implementation using basic Loongarch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define STRNCMP __strncmp_aligned
++#else
++# define STRNCMP strncmp
++#endif
++
++LEAF(STRNCMP, 6)
++    beqz        a2, L(ret0)
++    lu12i.w     a5, 0x01010
++    andi        a3, a0, 0x7
++    ori         a5, a5, 0x101
++
++    andi        a4, a1, 0x7
++    bstrins.d   a5, a5, 63, 32
++    li.d        t7, -1
++    li.d        t8, 8
++
++    addi.d      a2, a2, -1
++    slli.d      a6, a5, 7
++    bne         a3, a4, L(unaligned)
++    bstrins.d   a0, zero, 2, 0
++
++    bstrins.d   a1, zero, 2, 0
++    ld.d        t0, a0, 0
++    ld.d        t1, a1, 0
++    slli.d      t2, a3, 3
++
++
++    sub.d       t5, t8, a3
++    srl.d       t3, t7, t2
++    srl.d       t0, t0, t2
++    srl.d       t1, t1, t2
++
++    orn         t0, t0, t3
++    orn         t1, t1, t3
++    sub.d       t2, t0, a5
++    andn        t3, a6, t0
++
++    and         t2, t2, t3
++    bne         t0, t1, L(al_end)
++    sltu        t4, a2, t5
++    sub.d       a2, a2, t5
++
++L(al_loop):
++    or          t4, t2, t4
++    bnez        t4, L(ret0)
++    ldx.d       t0, a0, t8
++    ldx.d       t1, a1, t8
++
++
++    addi.d      t8, t8, 8
++    sltui       t4, a2, 8
++    addi.d      a2, a2, -8
++    sub.d       t2, t0, a5
++
++    andn        t3, a6, t0
++    and         t2, t2, t3
++    beq         t0, t1, L(al_loop)
++    addi.d      a2, a2, 8
++
++L(al_end):
++    xor         t3, t0, t1
++    or          t2, t2, t3
++    ctz.d       t2, t2
++    srli.d      t4, t2, 3
++
++    bstrins.d   t2, zero, 2, 0
++    srl.d       t0, t0, t2
++    srl.d       t1, t1, t2
++    andi        t0, t0, 0xff
++
++
++    andi        t1, t1, 0xff
++    sltu        t2, a2, t4
++    sub.d       a0, t0, t1
++    masknez     a0, a0, t2
++
++    jr          ra
++L(ret0):
++    move        a0, zero
++    jr          ra
++    nop
++
++L(unaligned):
++    slt         a7, a4, a3
++    xor         t0, a0, a1
++    maskeqz     t0, t0, a7
++    xor         a0, a0, t0
++
++    xor         a1, a1, t0
++    andi        a3, a0, 0x7
++    andi        a4, a1, 0x7
++    bstrins.d   a0, zero, 2, 0
++
++
++    bstrins.d   a1, zero, 2, 0
++    ld.d        t4, a0, 0
++    ld.d        t1, a1, 0
++    slli.d      t2, a3, 3
++
++    slli.d      t3, a4, 3
++    srl.d       t5, t7, t3
++    srl.d       t0, t4, t2
++    srl.d       t1, t1, t3
++
++    orn         t0, t0, t5
++    orn         t1, t1, t5
++    bne         t0, t1, L(not_equal)
++    sub.d       t6, t8, a4
++
++    sub.d       a4, t2, t3
++    sll.d       t2, t7, t2
++    sub.d       t5, t8, a3
++    orn         t4, t4, t2
++
++
++    sub.d       t2, t4, a5
++    andn        t3, a6, t4
++    sltu        t7, a2, t5
++    and         t2, t2, t3
++
++    sub.d       a3, zero, a4
++    or          t2, t2, t7
++    bnez        t2, L(un_end)
++    sub.d       t7, t5, t6
++
++    sub.d       a2, a2, t5
++    sub.d       t6, t8, t7
++L(un_loop):
++    srl.d       t5, t4, a4
++    ldx.d       t4, a0, t8
++
++    ldx.d       t1, a1, t8
++    addi.d      t8, t8, 8
++    sll.d       t0, t4, a3
++    or          t0, t0, t5
++
++
++    bne         t0, t1, L(loop_not_equal)
++    sub.d       t2, t4, a5
++    andn        t3, a6, t4
++    sltui       t5, a2, 8
++
++    and         t2, t2, t3
++    addi.d      a2, a2, -8
++    or          t3, t2, t5
++    beqz        t3, L(un_loop)
++
++    addi.d      a2, a2, 8
++L(un_end):
++    sub.d       t2, t0, a5
++    andn        t3, a6, t0
++    sltu        t5, a2, t6
++
++    and         t2, t2, t3
++    or          t2, t2, t5
++    bnez        t2, L(ret0)
++    ldx.d       t1, a1, t8
++
++
++    srl.d       t0, t4, a4
++    sub.d       a2, a2, t6
++L(not_equal):
++    sub.d       t2, t0, a5
++    andn        t3, a6, t0
++
++    xor         t4, t0, t1
++    and         t2, t2, t3
++    or          t2, t2, t4
++    ctz.d       t2, t2
++
++    bstrins.d   t2, zero, 2, 0
++    srli.d      t4, t2, 3
++    srl.d       t0, t0, t2
++    srl.d       t1, t1, t2
++
++    andi        t0, t0, 0xff
++    andi        t1, t1, 0xff
++    sub.d       t2, t0, t1
++    sub.d       t3, t1, t0
++
++
++    masknez     t0, t2, a7
++    maskeqz     t1, t3, a7
++    sltu        t2, a2, t4
++    or          a0, t0, t1
++
++    masknez     a0, a0, t2
++    jr          ra
++L(loop_not_equal):
++    add.d       a2, a2, t7
++    b           L(not_equal)
++END(STRNCMP)
++
++libc_hidden_builtin_def (STRNCMP)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
+new file mode 100644
+index 00000000..0b4eee2a
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
+@@ -0,0 +1,208 @@
++/* Optimized strncmp implementation using Loongarch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define STRNCMP __strncmp_lsx
++
++LEAF(STRNCMP, 6)
++    beqz            a2, L(ret0)
++    pcalau12i       t0, %pc_hi20(L(INDEX))
++    andi            a3, a0, 0xf
++    vld             vr2, t0, %pc_lo12(L(INDEX))
++
++    andi            a4, a1, 0xf
++    li.d            t2, 16
++    bne             a3, a4, L(unaligned)
++    xor             t0, a0, a3
++
++    xor             t1, a1, a4
++    vld             vr0, t0, 0
++    vld             vr1, t1, 0
++    vreplgr2vr.b    vr3, a3
++
++
++    sub.d           t2, t2, a3
++    vadd.b          vr3, vr3, vr2
++    vshuf.b         vr0, vr3, vr0, vr3
++    vshuf.b         vr1, vr3, vr1, vr3
++
++    vseq.b          vr3, vr0, vr1
++    vmin.bu         vr3, vr0, vr3
++    bgeu            t2, a2, L(al_early_end)
++    vsetanyeqz.b    fcc0, vr3
++
++    bcnez           fcc0, L(al_end)
++    add.d           a3, a0, a2
++    addi.d          a4, a3, -1
++    bstrins.d       a4, zero, 3, 0
++
++    sub.d           a2, a3, a4
++L(al_loop):
++    vld             vr0, t0, 16
++    vld             vr1, t1, 16
++    addi.d          t0, t0, 16
++
++
++    addi.d          t1, t1, 16
++    vseq.b          vr3, vr0, vr1
++    vmin.bu         vr3, vr0, vr3
++    beq             t0, a4, L(al_early_end)
++
++    vsetanyeqz.b    fcc0, vr3
++    bceqz           fcc0, L(al_loop)
++L(al_end):
++    vseqi.b         vr3, vr3, 0
++    vfrstpi.b       vr3, vr3, 0
++
++    vshuf.b         vr0, vr0, vr0, vr3
++    vshuf.b         vr1, vr1, vr1, vr3
++    vpickve2gr.bu   t0, vr0, 0
++    vpickve2gr.bu   t1, vr1, 0
++
++    sub.d           a0, t0, t1
++    jr              ra
++L(al_early_end):
++    vreplgr2vr.b    vr4, a2
++    vslt.b          vr4, vr2, vr4
++
++
++    vorn.v          vr3, vr3, vr4
++    b               L(al_end)
++L(unaligned):
++    slt             a5, a3, a4
++    xor             t0, a0, a1
++
++    maskeqz         t0, t0, a5
++    xor             a0, a0, t0
++    xor             a1, a1, t0
++    andi            a3, a0, 0xf
++
++    andi            a4, a1, 0xf
++    xor             t0, a0, a3
++    xor             t1, a1, a4
++    vld             vr0, t0, 0
++
++    vld             vr3, t1, 0
++    sub.d           t2, t2, a3
++    vreplgr2vr.b    vr4, a3
++    vreplgr2vr.b    vr5, a4
++
++
++    vaddi.bu        vr6, vr2, 16
++    vsub.b          vr7, vr4, vr5
++    vsub.b          vr6, vr6, vr7
++    vadd.b          vr4, vr2, vr4
++
++    vshuf.b         vr1, vr3, vr3, vr6
++    vshuf.b         vr0, vr7, vr0, vr4
++    vshuf.b         vr1, vr7, vr1, vr4
++    vseq.b          vr4, vr0, vr1
++
++    vmin.bu         vr4, vr0, vr4
++    bgeu            t2, a2, L(un_early_end)
++    vsetanyeqz.b    fcc0, vr4
++    bcnez           fcc0, L(un_end)
++
++    add.d           a6, a0, a2
++    vslt.b          vr5, vr2, vr5
++    addi.d          a7, a6, -1
++    vor.v           vr3, vr3, vr5
++
++
++    bstrins.d       a7, zero, 3, 0
++    sub.d           a2, a6, a7
++L(un_loop):
++    vld             vr0, t0, 16
++    addi.d          t0, t0, 16
++
++    vsetanyeqz.b    fcc0, vr3
++    bcnez           fcc0, L(has_zero)
++    beq             t0, a7, L(end_with_len)
++    vor.v           vr1, vr3, vr3
++
++    vld             vr3, t1, 16
++    addi.d          t1, t1, 16
++    vshuf.b         vr1, vr3, vr1, vr6
++    vseq.b          vr4, vr0, vr1
++
++    vmin.bu         vr4, vr0, vr4
++    vsetanyeqz.b    fcc0, vr4
++    bceqz           fcc0, L(un_loop)
++L(un_end):
++    vseqi.b         vr4, vr4, 0
++
++
++    vfrstpi.b       vr4, vr4, 0
++    vshuf.b         vr0, vr0, vr0, vr4
++    vshuf.b         vr1, vr1, vr1, vr4
++    vpickve2gr.bu   t0, vr0, 0
++
++    vpickve2gr.bu   t1, vr1, 0
++    sub.d           t2, t0, t1
++    sub.d           t3, t1, t0
++    masknez         t0, t2, a5
++
++    maskeqz         t1, t3, a5
++    or              a0, t0, t1
++    jr              ra
++L(has_zero):
++    vshuf.b         vr1, vr3, vr3, vr6
++
++    vseq.b          vr4, vr0, vr1
++    vmin.bu         vr4, vr0, vr4
++    bne             t0, a7, L(un_end)
++L(un_early_end):
++    vreplgr2vr.b    vr5, a2
++
++    vslt.b          vr5, vr2, vr5
++    vorn.v          vr4, vr4, vr5
++    b               L(un_end)
++L(end_with_len):
++    sub.d           a6, a3, a4
++
++    bgeu            a6, a2, 1f
++    vld             vr4, t1, 16
++1:
++    vshuf.b         vr1, vr4, vr3, vr6
++    vseq.b          vr4, vr0, vr1
++
++    vmin.bu         vr4, vr0, vr4
++    vreplgr2vr.b    vr5, a2
++    vslt.b          vr5, vr2, vr5
++    vorn.v          vr4, vr4, vr5
++
++    b               L(un_end)
++L(ret0):
++    move            a0, zero
++    jr              ra
++END(STRNCMP)
++
++    .section         .rodata.cst16,"M",@progbits,16
++    .align           4
++L(INDEX):
++    .dword           0x0706050403020100
++    .dword           0x0f0e0d0c0b0a0908
++
++libc_hidden_builtin_def (STRNCMP)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp.c b/sysdeps/loongarch/lp64/multiarch/strncmp.c
+new file mode 100644
+index 00000000..af6d0bc4
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strncmp.c
+@@ -0,0 +1,35 @@
++/* Multiple versions of strncmp.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define strncmp __redirect_strncmp
++# include <string.h>
++# undef strncmp
++
++# define SYMBOL_NAME strncmp
++# include "ifunc-strncmp.h"
++
++libc_ifunc_redirected (__redirect_strncmp, strncmp, IFUNC_SELECTOR ());
++
++# ifdef SHARED
++__hidden_ver1 (strncmp, __GI_strncmp, __redirect_strncmp)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strncmp);
++# endif
++#endif
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Add-ifunc-support-for-strnlen-aligned-lsx-.patch Added

@@ -0,0 +1,465 @@
+From e494d32d3b76eee0d59cfab37789a356459b517a Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Thu, 24 Aug 2023 16:50:17 +0800
+Subject: PATCH 08/29 LoongArch: Add ifunc support for strnlen{aligned, lsx,
+ lasx}
+
+Based on the glibc microbenchmark, strnlen-aligned implementation could
+reduce the runtime more than 10%, strnlen-lsx implementation could reduce
+the runtime about 50%-78%, strnlen-lasx implementation could reduce the
+runtime about 50%-88%.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   3 +
+ .../lp64/multiarch/ifunc-impl-list.c          |   8 ++
+ .../loongarch/lp64/multiarch/ifunc-strnlen.h  |  41 +++++++
+ .../lp64/multiarch/strnlen-aligned.S          | 102 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/strnlen-lasx.S   | 100 +++++++++++++++++
+ .../loongarch/lp64/multiarch/strnlen-lsx.S    |  89 +++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/strnlen.c    |  39 +++++++
+ 7 files changed, 382 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index afa51041..c4dd3143 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
++++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -3,6 +3,9 @@ sysdep_routines += \
+   strlen-aligned \
+   strlen-lsx \
+   strlen-lasx \
++  strnlen-aligned \
++  strnlen-lsx \
++  strnlen-lasx \
+   strchr-aligned \
+   strchr-lsx \
+   strchr-lasx \
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index 25eb96b0..7cec0b77 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -38,6 +38,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ 	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned)
+ 	      )
+ 
++  IFUNC_IMPL (i, name, strnlen,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LASX, __strnlen_lasx)
++	      IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LSX, __strnlen_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_aligned)
++	      )
++
+   IFUNC_IMPL (i, name, strchr,
+ #if !defined __loongarch_soft_float
+ 	      IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LASX, __strchr_lasx)
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h
+new file mode 100644
+index 00000000..5cf89810
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h
+@@ -0,0 +1,41 @@
++/* Common definition for strnlen ifunc selections.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
+new file mode 100644
+index 00000000..b900430a
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
+@@ -0,0 +1,102 @@
++/* Optimized strnlen implementation using basic Loongarch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define STRNLEN __strnlen_aligned
++#else
++# define STRNLEN __strnlen
++#endif
++
++LEAF(STRNLEN, 6)
++    beqz        a1, L(out)
++    lu12i.w     a2, 0x01010
++    andi        t1, a0, 0x7
++    move        t4, a0
++
++    bstrins.d   a0, zero, 2, 0
++    ori         a2, a2, 0x101
++    li.w        t0, -1
++    ld.d        t2, a0, 0
++
++    slli.d      t3, t1, 3
++    bstrins.d   a2, a2, 63, 32
++    li.w        t5, 8
++    slli.d      a3, a2, 7
++
++    sub.w       t1, t5, t1
++    sll.d       t0, t0, t3
++    orn         t2, t2, t0
++    sub.d       t0, t2, a2
++
++
++    andn        t3, a3, t2
++    and         t0, t0, t3
++    bnez        t0, L(count_pos)
++    sub.d       t5, a1, t1
++
++    bgeu        t1, a1, L(out)
++    addi.d      a0, a0, 8
++L(loop):
++    ld.d        t2, a0, 0
++    sub.d       t0, t2, a2
++
++    andn        t1, a3, t2
++    sltui       t6, t5, 9
++    and         t0, t0, t1
++    or          t7, t0, t6
++
++    bnez        t7, L(count_pos)
++    ld.d        t2, a0, 8
++    addi.d      a0, a0, 16
++    sub.d       t0, t2, a2
++
++
++    andn        t1, a3, t2
++    sltui       t6, t5, 17
++    and         t0, t0, t1
++    addi.d      t5, t5, -16
++
++    or          t7, t0, t6
++    beqz        t7, L(loop)
++    addi.d      a0, a0, -8
++L(count_pos):
++    ctz.d       t1, t0
++
++    sub.d       a0, a0, t4
++    srli.d      t1, t1, 3
++    add.d       a0, t1, a0
++    sltu        t0, a0, a1
++
++    masknez     t1, a1, t0
++    maskeqz     a0, a0, t0
++    or          a0, a0, t1
++    jr          ra
++
++
++L(out):
++    move        a0, a1
++    jr          ra
++END(STRNLEN)
++
++weak_alias (STRNLEN, strnlen)
++libc_hidden_builtin_def (STRNLEN)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
+new file mode 100644
+index 00000000..2c03d3d9
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
+@@ -0,0 +1,100 @@
++/* Optimized strnlen implementation using loongarch LASX instructions
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define STRNLEN __strnlen_lasx
++
++LEAF(STRNLEN, 6)
++    beqz            a1, L(ret0)
++    andi            t1, a0, 0x3f
++    li.d            t3, 65
++    sub.d           a2, a0, t1
++
++    xvld            xr0, a2, 0
++    xvld            xr1, a2, 32
++    sub.d           t1, t3, t1
++    move            a3, a0
++
++    sltu            t1, a1, t1
++    xvmsknz.b       xr0, xr0
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr2, xr0, 4
++
++    xvpickve.w      xr3, xr1, 4
++    vilvl.h         vr0, vr2, vr0
++    vilvl.h         vr1, vr3, vr1
++    vilvl.w         vr0, vr1, vr0
++
++
++    movfr2gr.d      t0, fa0
++    sra.d           t0, t0, a0
++    orn             t1, t1, t0
++    bnez            t1, L(end)
++
++    add.d           a4, a0, a1
++    move            a0, a2
++    addi.d          a4, a4, -1
++    bstrins.d       a4, zero, 5, 0
++
++L(loop):
++    xvld            xr0, a0, 64
++    xvld            xr1, a0, 96
++    addi.d          a0, a0, 64
++    beq             a0, a4, L(out)
++
++    xvmin.bu        xr2, xr0, xr1
++    xvsetanyeqz.b   fcc0, xr2
++    bceqz           fcc0, L(loop)
++L(out):
++    xvmsknz.b       xr0, xr0
++
++
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr2, xr0, 4
++    xvpickve.w      xr3, xr1, 4
++    vilvl.h         vr0, vr2, vr0
++
++    vilvl.h         vr1, vr3, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++L(end):
++    sub.d           a0, a0, a3
++
++    cto.d           t0, t0
++    add.d           a0, a0, t0
++    sltu            t1, a0, a1
++    masknez         t0, a1, t1
++
++    maskeqz         t1, a0, t1
++    or              a0, t0, t1
++    jr              ra
++L(ret0):
++    move            a0, zero
++
++
++    jr              ra
++END(STRNLEN)
++
++libc_hidden_def (STRNLEN)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
+new file mode 100644
+index 00000000..b769a895
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
+@@ -0,0 +1,89 @@
++/* Optimized strnlen implementation using loongarch LSX instructions
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define STRNLEN __strnlen_lsx
++
++LEAF(STRNLEN, 6)
++    beqz            a1, L(ret0)
++    andi            t1, a0, 0x1f
++    li.d            t3, 33
++    sub.d           a2, a0, t1
++
++    vld             vr0, a2, 0
++    vld             vr1, a2, 16
++    sub.d           t1, t3, t1
++    move            a3, a0
++
++    sltu            t1, a1, t1
++    vmsknz.b        vr0, vr0
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++
++    movfr2gr.s      t0, fa0
++    sra.w           t0, t0, a0
++    orn             t1, t1, t0
++    bnez            t1, L(end)
++
++
++    add.d           a4, a0, a1
++    move            a0, a2
++    addi.d          a4, a4, -1
++    bstrins.d       a4, zero, 4, 0
++
++L(loop):
++    vld             vr0, a0, 32
++    vld             vr1, a0, 48
++    addi.d          a0, a0, 32
++    beq             a0, a4, L(out)
++
++    vmin.bu         vr2, vr0, vr1
++    vsetanyeqz.b    fcc0, vr2
++    bceqz           fcc0, L(loop)
++L(out):
++    vmsknz.b        vr0, vr0
++
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++L(end):
++    sub.d           a0, a0, a3
++
++
++    cto.w           t0, t0
++    add.d           a0, a0, t0
++    sltu            t1, a0, a1
++    masknez         t0, a1, t1
++
++    maskeqz         t1, a0, t1
++    or              a0, t0, t1
++    jr              ra
++L(ret0):
++    move            a0, zero
++
++    jr              ra
++END(STRNLEN)
++
++libc_hidden_builtin_def (STRNLEN)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen.c b/sysdeps/loongarch/lp64/multiarch/strnlen.c
+new file mode 100644
+index 00000000..38b7a25a
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strnlen.c
+@@ -0,0 +1,39 @@
++/* Multiple versions of strnlen.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define strnlen __redirect_strnlen
++# define __strnlen __redirect___strnlen
++# include <string.h>
++# undef __strnlen
++# undef strnlen
++
++# define SYMBOL_NAME strnlen
++# include "ifunc-strnlen.h"
++
++libc_ifunc_redirected (__redirect_strnlen, __strnlen, IFUNC_SELECTOR ());
++weak_alias (__strnlen, strnlen);
++# ifdef SHARED
++__hidden_ver1 (__strnlen, __GI___strnlen, __redirect___strnlen)
++  __attribute__((visibility ("hidden"))) __attribute_copy__ (strnlen);
++__hidden_ver1 (strnlen, __GI_strnlen, __redirect_strnlen)
++  __attribute__((weak, visibility ("hidden"))) __attribute_copy__ (strnlen);
++# endif
++#endif
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Add-ifunc-support-for-strrchr-aligned-lsx-.patch Added

@@ -0,0 +1,670 @@
+From d537d0ab45a55048c8da483e73be4448ddb45525 Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Wed, 13 Sep 2023 15:35:00 +0800
+Subject: PATCH 23/29 LoongArch: Add ifunc support for strrchr{aligned, lsx,
+ lasx}
+
+According to glibc strrchr microbenchmark test results, this implementation
+could reduce the runtime time as following:
+
+Name                Percent of rutime reduced
+strrchr-lasx        10%-50%
+strrchr-lsx         0%-50%
+strrchr-aligned     5%-50%
+
+Generic strrchr is implemented by function strlen + memrchr, the lasx version
+will compare with generic strrchr implemented by strlen-lasx + memrchr-lasx,
+the lsx version will compare with generic strrchr implemented by strlen-lsx +
+memrchr-lsx, the aligned version will compare with generic strrchr implemented
+by strlen-aligned + memrchr-generic.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   3 +
+ .../lp64/multiarch/ifunc-impl-list.c          |   8 +
+ .../loongarch/lp64/multiarch/ifunc-strrchr.h  |  41 ++++
+ .../lp64/multiarch/strrchr-aligned.S          | 170 +++++++++++++++++
+ .../loongarch/lp64/multiarch/strrchr-lasx.S   | 176 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/strrchr-lsx.S    | 144 ++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/strrchr.c    |  36 ++++
+ 7 files changed, 578 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index 39550bea..fe863e1b 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
++++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -9,6 +9,9 @@ sysdep_routines += \
+   strchr-aligned \
+   strchr-lsx \
+   strchr-lasx \
++  strrchr-aligned \
++  strrchr-lsx \
++  strrchr-lasx \
+   strchrnul-aligned \
+   strchrnul-lsx \
+   strchrnul-lasx \
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index 39a14f1d..529e2369 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -94,6 +94,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ 	      IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned)
+ 	      )
+ 
++  IFUNC_IMPL (i, name, strrchr,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LASX, __strrchr_lasx)
++	      IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LSX, __strrchr_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_aligned)
++	      )
++
+   IFUNC_IMPL (i, name, memcpy,
+ #if !defined __loongarch_soft_float
+               IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h
+new file mode 100644
+index 00000000..bbb34089
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h
+@@ -0,0 +1,41 @@
++/* Common definition for strrchr ifunc selections.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
+new file mode 100644
+index 00000000..a73deb78
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
+@@ -0,0 +1,170 @@
++/* Optimized strrchr implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define STRRCHR __strrchr_aligned
++#else
++# define STRRCHR strrchr
++#endif
++
++LEAF(STRRCHR, 6)
++    slli.d      t0, a0, 3
++    bstrins.d   a0, zero, 2, 0
++    lu12i.w     a2, 0x01010
++    ld.d        t2, a0, 0
++
++    andi        a1, a1, 0xff
++    ori         a2, a2, 0x101
++    li.d        t3, -1
++    bstrins.d	a2, a2, 63, 32
++
++    sll.d       t5, t3, t0
++    slli.d      a3, a2, 7
++    orn         t4, t2, t5
++    mul.d       a1, a1, a2
++
++    sub.d       t0, t4, a2
++    andn        t1, a3, t4
++    and         t1, t0, t1
++    beqz        t1, L(find_tail)
++
++
++    ctz.d       t0, t1
++    orn         t0, zero, t0
++    xor         t2, t4, a1
++    srl.d       t0, t3, t0
++
++    orn         t2, t2, t0
++    orn         t2, t2, t5
++    revb.d      t2, t2
++    sub.d       t1, t2, a2
++
++    andn        t0, a3, t2
++    and         t1, t0, t1
++    ctz.d       t0, t1
++    srli.d      t0, t0, 3
++
++    addi.d      a0, a0, 7
++    sub.d       a0, a0, t0
++    maskeqz     a0, a0, t1
++    jr          ra
++
++
++L(find_tail):
++    addi.d      a4, a0, 8
++    addi.d      a0, a0, 8
++L(loop_ascii):
++    ld.d        t2, a0, 0
++    sub.d       t1, t2, a2
++
++    and         t0, t1, a3
++    bnez        t0, L(more_check)
++    ld.d        t2, a0, 8
++    sub.d       t1, t2, a2
++
++    and         t0, t1, a3
++    addi.d      a0, a0, 16
++    beqz        t0, L(loop_ascii)
++    addi.d      a0, a0, -8
++
++L(more_check):
++    andn        t0, a3, t2
++    and         t1, t1, t0
++    bnez        t1, L(tail)
++    addi.d      a0, a0, 8
++
++
++L(loop_nonascii):
++    ld.d        t2, a0, 0
++    sub.d       t1, t2, a2
++    andn        t0, a3, t2
++    and         t1, t0, t1
++
++    bnez        t1, L(tail)
++    ld.d        t2, a0, 8
++    addi.d      a0, a0, 16
++    sub.d       t1, t2, a2
++
++    andn        t0, a3, t2
++    and         t1, t0, t1
++    beqz        t1, L(loop_nonascii)
++    addi.d      a0, a0, -8
++
++L(tail):
++    ctz.d       t0, t1
++    orn         t0, zero, t0
++    xor         t2, t2, a1
++    srl.d       t0, t3, t0
++
++
++    orn         t2, t2, t0
++    revb.d      t2, t2
++    sub.d       t1, t2, a2
++    andn        t0, a3, t2
++
++    and         t1, t0, t1
++    bnez        t1, L(count_pos)
++L(find_loop):
++    beq         a0, a4, L(find_end)
++    ld.d        t2, a0, -8
++
++    addi.d      a0, a0, -8
++    xor         t2, t2, a1
++    sub.d       t1, t2, a2
++    andn        t0, a3, t2
++
++    and         t1, t0, t1
++    beqz        t1, L(find_loop)
++    revb.d      t2, t2
++    sub.d       t1, t2, a2
++
++
++    andn        t0, a3, t2
++    and         t1, t0, t1
++L(count_pos):
++    ctz.d       t0, t1
++    addi.d      a0, a0, 7
++
++    srli.d      t0, t0, 3
++    sub.d       a0, a0, t0
++    jr          ra
++    nop
++
++L(find_end):
++    xor         t2, t4, a1
++    orn         t2, t2, t5
++    revb.d      t2, t2
++    sub.d       t1, t2, a2
++
++
++    andn        t0, a3, t2
++    and         t1, t0, t1
++    ctz.d       t0, t1
++    srli.d      t0, t0, 3
++
++    addi.d      a0, a4, -1
++    sub.d       a0, a0, t0
++    maskeqz     a0, a0, t1
++    jr          ra
++END(STRRCHR)
++
++libc_hidden_builtin_def(STRRCHR)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S
+new file mode 100644
+index 00000000..5a6e2297
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S
+@@ -0,0 +1,176 @@
++/* Optimized strrchr implementation using LoongArch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++#define STRRCHR __strrchr_lasx
++
++LEAF(STRRCHR, 6)
++    move            a2, a0
++    bstrins.d       a0, zero, 5, 0
++    xvld            xr0, a0, 0
++    xvld            xr1, a0, 32
++
++    li.d            t2, -1
++    xvreplgr2vr.b   xr4, a1
++    xvmsknz.b       xr2, xr0
++    xvmsknz.b       xr3, xr1
++
++    xvpickve.w      xr5, xr2, 4
++    xvpickve.w      xr6, xr3, 4
++    vilvl.h         vr2, vr5, vr2
++    vilvl.h         vr3, vr6, vr3
++
++    vilvl.w         vr2, vr3, vr2
++    movfr2gr.d      t0, fa2
++    sra.d           t0, t0, a2
++    beq             t0, t2, L(find_tail)
++
++
++    xvseq.b         xr2, xr0, xr4
++    xvseq.b         xr3, xr1, xr4
++    xvmsknz.b       xr2, xr2
++    xvmsknz.b       xr3, xr3
++
++    xvpickve.w      xr4, xr2, 4
++    xvpickve.w      xr5, xr3, 4
++    vilvl.h         vr2, vr4, vr2
++    vilvl.h         vr3, vr5, vr3
++
++    vilvl.w         vr1, vr3, vr2
++    slli.d          t3, t2, 1
++    movfr2gr.d      t1, fa1
++    cto.d           t0, t0
++
++    srl.d           t1, t1, a2
++    sll.d           t3, t3, t0
++    addi.d          a0, a2, 63
++    andn            t1, t1, t3
++
++
++    clz.d           t0, t1
++    sub.d           a0, a0, t0
++    maskeqz         a0, a0, t1
++    jr              ra
++
++    .align          5
++L(find_tail):
++    addi.d          a3, a0, 64
++L(loop):
++    xvld            xr2, a0, 64
++    xvld            xr3, a0, 96
++    addi.d          a0, a0, 64
++
++    xvmin.bu        xr5, xr2, xr3
++    xvsetanyeqz.b   fcc0, xr5
++    bceqz           fcc0, L(loop)
++    xvmsknz.b       xr5, xr2
++
++
++    xvmsknz.b       xr6, xr3
++    xvpickve.w      xr7, xr5, 4
++    xvpickve.w      xr8, xr6, 4
++    vilvl.h         vr5, vr7, vr5
++
++    vilvl.h         vr6, vr8, vr6
++    xvseq.b         xr2, xr2, xr4
++    xvseq.b         xr3, xr3, xr4
++    xvmsknz.b       xr2, xr2
++
++    xvmsknz.b       xr3, xr3
++    xvpickve.w      xr7, xr2, 4
++    xvpickve.w      xr8, xr3, 4
++    vilvl.h         vr2, vr7, vr2
++
++    vilvl.h         vr3, vr8, vr3
++    vilvl.w         vr5, vr6, vr5
++    vilvl.w         vr2, vr3, vr2
++    movfr2gr.d      t0, fa5
++
++
++    movfr2gr.d      t1, fa2
++    slli.d          t3, t2, 1
++    cto.d           t0, t0
++    sll.d           t3, t3, t0
++
++    andn            t1, t1, t3
++    beqz            t1, L(find_loop)
++    clz.d           t0, t1
++    addi.d          a0, a0, 63
++
++    sub.d           a0, a0, t0
++    jr              ra
++L(find_loop):
++    beq             a0, a3, L(find_end)
++    xvld            xr2, a0, -64
++
++    xvld            xr3, a0, -32
++    addi.d          a0, a0, -64
++    xvseq.b         xr2, xr2, xr4
++    xvseq.b         xr3, xr3, xr4
++
++
++    xvmax.bu        xr5, xr2, xr3
++    xvseteqz.v      fcc0, xr5
++    bcnez           fcc0, L(find_loop)
++    xvmsknz.b       xr0, xr2
++
++    xvmsknz.b       xr1, xr3
++    xvpickve.w      xr2, xr0, 4
++    xvpickve.w      xr3, xr1, 4
++    vilvl.h         vr0, vr2, vr0
++
++    vilvl.h         vr1, vr3, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++    addi.d          a0, a0, 63
++
++    clz.d           t0, t0
++    sub.d           a0, a0, t0
++    jr              ra
++    nop
++
++
++L(find_end):
++    xvseq.b         xr2, xr0, xr4
++    xvseq.b         xr3, xr1, xr4
++    xvmsknz.b       xr2, xr2
++    xvmsknz.b       xr3, xr3
++
++    xvpickve.w      xr4, xr2, 4
++    xvpickve.w      xr5, xr3, 4
++    vilvl.h         vr2, vr4, vr2
++    vilvl.h         vr3, vr5, vr3
++
++    vilvl.w         vr1, vr3, vr2
++    movfr2gr.d      t1, fa1
++    addi.d          a0, a2, 63
++    srl.d           t1, t1, a2
++
++    clz.d           t0, t1
++    sub.d           a0, a0, t0
++    maskeqz         a0, a0, t1
++    jr              ra
++END(STRRCHR)
++
++libc_hidden_builtin_def(STRRCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S
+new file mode 100644
+index 00000000..8f2fd22e
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S
+@@ -0,0 +1,144 @@
++/* Optimized strrchr implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++#define STRRCHR __strrchr_lsx
++
++LEAF(STRRCHR, 6)
++    move            a2, a0
++    bstrins.d       a0, zero, 4, 0
++    vld             vr0, a0, 0
++    vld             vr1, a0, 16
++
++    li.d            t2, -1
++    vreplgr2vr.b    vr4, a1
++    vmsknz.b        vr2, vr0
++    vmsknz.b        vr3, vr1
++
++    vilvl.h         vr2, vr3, vr2
++    movfr2gr.s      t0, fa2
++    sra.w           t0, t0, a2
++    beq             t0, t2, L(find_tail)
++
++    vseq.b          vr2, vr0, vr4
++    vseq.b          vr3, vr1, vr4
++    vmsknz.b        vr2, vr2
++    vmsknz.b        vr3, vr3
++
++
++    vilvl.h         vr1, vr3, vr2
++    slli.d          t3, t2, 1
++    movfr2gr.s      t1, fa1
++    cto.w           t0, t0
++
++    srl.w           t1, t1, a2
++    sll.d           t3, t3, t0
++    addi.d          a0, a2, 31
++    andn            t1, t1, t3
++
++    clz.w           t0, t1
++    sub.d           a0, a0, t0
++    maskeqz         a0, a0, t1
++    jr              ra
++
++    .align          5
++L(find_tail):
++    addi.d          a3, a0, 32
++L(loop):
++    vld             vr2, a0, 32
++    vld             vr3, a0, 48
++    addi.d          a0, a0, 32
++
++    vmin.bu         vr5, vr2, vr3
++    vsetanyeqz.b    fcc0, vr5
++    bceqz           fcc0, L(loop)
++    vmsknz.b        vr5, vr2
++
++    vmsknz.b        vr6, vr3
++    vilvl.h         vr5, vr6, vr5
++    vseq.b          vr2, vr2, vr4
++    vseq.b          vr3, vr3, vr4
++
++    vmsknz.b        vr2, vr2
++    vmsknz.b        vr3, vr3
++    vilvl.h         vr2, vr3, vr2
++    movfr2gr.s      t0, fa5
++
++
++    movfr2gr.s      t1, fa2
++    slli.d          t3, t2, 1
++    cto.w           t0, t0
++    sll.d           t3, t3, t0
++
++    andn            t1, t1, t3
++    beqz            t1, L(find_loop)
++    clz.w           t0, t1
++    addi.d          a0, a0, 31
++
++    sub.d           a0, a0, t0
++    jr              ra
++L(find_loop):
++    beq             a0, a3, L(find_end)
++    vld             vr2, a0, -32
++
++    vld             vr3, a0, -16
++    addi.d          a0, a0, -32
++    vseq.b          vr2, vr2, vr4
++    vseq.b          vr3, vr3, vr4
++
++
++    vmax.bu         vr5, vr2, vr3
++    vseteqz.v       fcc0, vr5
++    bcnez           fcc0, L(find_loop)
++    vmsknz.b        vr0, vr2
++
++    vmsknz.b        vr1, vr3
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++    addi.d          a0, a0, 31
++
++    clz.w           t0, t0
++    sub.d           a0, a0, t0
++    jr              ra
++    nop
++
++L(find_end):
++    vseq.b          vr2, vr0, vr4
++    vseq.b          vr3, vr1, vr4
++    vmsknz.b        vr2, vr2
++    vmsknz.b        vr3, vr3
++
++
++    vilvl.h         vr1, vr3, vr2
++    movfr2gr.s      t1, fa1
++    addi.d          a0, a2, 31
++    srl.w           t1, t1, a2
++
++    clz.w           t0, t1
++    sub.d           a0, a0, t0
++    maskeqz         a0, a0, t1
++    jr              ra
++END(STRRCHR)
++
++libc_hidden_builtin_def(STRRCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr.c b/sysdeps/loongarch/lp64/multiarch/strrchr.c
+new file mode 100644
+index 00000000..d9c9f660
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strrchr.c
+@@ -0,0 +1,36 @@
++/* Multiple versions of strrchr.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define strrchr __redirect_strrchr
++# include <string.h>
++# undef strrchr
++
++# define SYMBOL_NAME strrchr
++# include "ifunc-strrchr.h"
++
++libc_ifunc_redirected (__redirect_strrchr, strrchr, IFUNC_SELECTOR ());
++weak_alias (strrchr, rindex)
++# ifdef SHARED
++__hidden_ver1 (strrchr, __GI_strrchr, __redirect_strrchr)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strrchr);
++# endif
++
++#endif
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch Added

@@ -0,0 +1,626 @@
+From b5979df8ad07823c79a934c1fa0a91ec0abffb61 Mon Sep 17 00:00:00 2001
+From: caiyinyu <caiyinyu@loongson.cn>
+Date: Fri, 8 Sep 2023 14:10:55 +0800
+Subject: PATCH 20/29 LoongArch: Add lasx/lsx support for
+ _dl_runtime_profile.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/bits/link.h            |  24 ++-
+ sysdeps/loongarch/bits/link_lavcurrent.h |  25 +++
+ sysdeps/loongarch/dl-audit-check.h       |  23 +++
+ sysdeps/loongarch/dl-link.sym            |   8 +-
+ sysdeps/loongarch/dl-machine.h           |  11 +-
+ sysdeps/loongarch/dl-trampoline.S        | 177 +----------------
+ sysdeps/loongarch/dl-trampoline.h        | 242 +++++++++++++++++++++++
+ 7 files changed, 331 insertions(+), 179 deletions(-)
+ create mode 100644 sysdeps/loongarch/bits/link_lavcurrent.h
+ create mode 100644 sysdeps/loongarch/dl-audit-check.h
+
+diff --git a/sysdeps/loongarch/bits/link.h b/sysdeps/loongarch/bits/link.h
+index 7fa61312..00f6f25f 100644
+--- a/sysdeps/loongarch/bits/link.h
++++ b/sysdeps/loongarch/bits/link.h
+@@ -20,10 +20,26 @@
+ #error "Never include <bits/link.h> directly; use <link.h> instead."
+ #endif
+ 
++#ifndef __loongarch_soft_float
++typedef float La_loongarch_vr
++    __attribute__ ((__vector_size__ (16), __aligned__ (16)));
++typedef float La_loongarch_xr
++    __attribute__ ((__vector_size__ (32), __aligned__ (16)));
++
++typedef union
++{
++  double fpreg4;
++  La_loongarch_vr vr2;
++  La_loongarch_xr xr1;
++} La_loongarch_vector __attribute__ ((__aligned__ (16)));
++#endif
++
+ typedef struct La_loongarch_regs
+ {
+   unsigned long int lr_reg8; /* a0 - a7 */
+-  double lr_fpreg8;	       /* fa0 - fa7 */
++#ifndef __loongarch_soft_float
++  La_loongarch_vector lr_vec8; /* fa0 - fa7 or vr0 - vr7 or xr0 - xr7*/
++#endif
+   unsigned long int lr_ra;
+   unsigned long int lr_sp;
+ } La_loongarch_regs;
+@@ -33,8 +49,10 @@ typedef struct La_loongarch_retval
+ {
+   unsigned long int lrv_a0;
+   unsigned long int lrv_a1;
+-  double lrv_fa0;
+-  double lrv_fa1;
++#ifndef __loongarch_soft_float
++  La_loongarch_vector lrv_vec0;
++  La_loongarch_vector lrv_vec1;
++#endif
+ } La_loongarch_retval;
+ 
+ __BEGIN_DECLS
+diff --git a/sysdeps/loongarch/bits/link_lavcurrent.h b/sysdeps/loongarch/bits/link_lavcurrent.h
+new file mode 100644
+index 00000000..15f1eb84
+--- /dev/null
++++ b/sysdeps/loongarch/bits/link_lavcurrent.h
+@@ -0,0 +1,25 @@
++/* Data structure for communication from the run-time dynamic linker for
++   loaded ELF shared objects.  LAV_CURRENT definition.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#ifndef _LINK_H
++# error "Never include <bits/link_lavcurrent.h> directly; use <link.h> instead."
++#endif
++
++/* Version numbers for la_version handshake interface.  */
++#define LAV_CURRENT	3
+diff --git a/sysdeps/loongarch/dl-audit-check.h b/sysdeps/loongarch/dl-audit-check.h
+new file mode 100644
+index 00000000..a139c939
+--- /dev/null
++++ b/sysdeps/loongarch/dl-audit-check.h
+@@ -0,0 +1,23 @@
++/* rtld-audit version check.  LoongArch version.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++static inline bool
++_dl_audit_check_version (unsigned int lav)
++{
++  return lav == LAV_CURRENT;
++}
+diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
+index 868ab7c6..b534968e 100644
+--- a/sysdeps/loongarch/dl-link.sym
++++ b/sysdeps/loongarch/dl-link.sym
+@@ -6,9 +6,13 @@ DL_SIZEOF_RG            sizeof(struct La_loongarch_regs)
+ DL_SIZEOF_RV            sizeof(struct La_loongarch_retval)
+ 
+ DL_OFFSET_RG_A0         offsetof(struct La_loongarch_regs, lr_reg)
+-DL_OFFSET_RG_FA0        offsetof(struct La_loongarch_regs, lr_fpreg)
++#ifndef __loongarch_soft_float
++DL_OFFSET_RG_VEC0       offsetof(struct La_loongarch_regs, lr_vec)
++#endif
+ DL_OFFSET_RG_RA         offsetof(struct La_loongarch_regs, lr_ra)
+ DL_OFFSET_RG_SP         offsetof(struct La_loongarch_regs, lr_sp)
+ 
+ DL_OFFSET_RV_A0         offsetof(struct La_loongarch_retval, lrv_a0)
+-DL_OFFSET_RV_FA0        offsetof(struct La_loongarch_retval, lrv_a1)
++#ifndef __loongarch_soft_float
++DL_OFFSET_RV_VEC0       offsetof(struct La_loongarch_retval, lrv_vec0)
++#endif
+diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
+index 066bb233..8a2db9de 100644
+--- a/sysdeps/loongarch/dl-machine.h
++++ b/sysdeps/loongarch/dl-machine.h
+@@ -273,6 +273,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope,
+ #if !defined __loongarch_soft_float
+       extern void _dl_runtime_resolve_lasx (void) attribute_hidden;
+       extern void _dl_runtime_resolve_lsx (void) attribute_hidden;
++      extern void _dl_runtime_profile_lasx (void) attribute_hidden;
++      extern void _dl_runtime_profile_lsx (void) attribute_hidden;
+ #endif
+       extern void _dl_runtime_resolve (void) attribute_hidden;
+       extern void _dl_runtime_profile (void) attribute_hidden;
+@@ -287,7 +289,14 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope,
+ 	 end in this function.  */
+       if (profile != 0)
+ 	{
+-	   gotplt0 = (ElfW(Addr)) &_dl_runtime_profile;
++#if !defined __loongarch_soft_float
++	  if (SUPPORT_LASX)
++	    gotplt0 = (ElfW(Addr)) &_dl_runtime_profile_lasx;
++	  else if (SUPPORT_LSX)
++	    gotplt0 = (ElfW(Addr)) &_dl_runtime_profile_lsx;
++	  else
++#endif
++	    gotplt0 = (ElfW(Addr)) &_dl_runtime_profile;
+ 
+ 	  if (GLRO(dl_profile) != NULL
+ 	      && _dl_name_match_p (GLRO(dl_profile), l))
+diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S
+index 8fd91469..bb449ecf 100644
+--- a/sysdeps/loongarch/dl-trampoline.S
++++ b/sysdeps/loongarch/dl-trampoline.S
+@@ -22,190 +22,21 @@
+ #if !defined __loongarch_soft_float
+ #define USE_LASX
+ #define _dl_runtime_resolve _dl_runtime_resolve_lasx
++#define _dl_runtime_profile _dl_runtime_profile_lasx
+ #include "dl-trampoline.h"
+ #undef FRAME_SIZE
+ #undef USE_LASX
+ #undef _dl_runtime_resolve
++#undef _dl_runtime_profile
+ 
+ #define USE_LSX
+ #define _dl_runtime_resolve _dl_runtime_resolve_lsx
++#define _dl_runtime_profile _dl_runtime_profile_lsx
+ #include "dl-trampoline.h"
+ #undef FRAME_SIZE
+ #undef USE_LSX
+ #undef _dl_runtime_resolve
++#undef _dl_runtime_profile
+ #endif
+ 
+ #include "dl-trampoline.h"
+-
+-#include "dl-link.h"
+-
+-ENTRY (_dl_runtime_profile)
+-       /* LoongArch we get called with:
+-	t0	      linkr_map pointer
+-	t1	      the scaled offset stored in t0, which can be used
+-		      to calculate the offset of the current symbol in .rela.plt
+-	t2	      %hi(%pcrel(.got.plt)) stored in t2, no use in this function
+-	t3	      dl resolver entry point, no use in this function
+-
+-	Stack frame layout:
+-	sp,    #96 La_loongarch_regs
+-	sp,    #48 La_loongarch_retval
+-	sp,    #40 frame size return from pltenter
+-	sp,    #32 dl_profile_call saved a1
+-	sp,    #24 dl_profile_call saved a0
+-	sp,    #16 T1
+-	sp,     #0 ra, fp   <- fp
+-       */
+-
+-# define OFFSET_T1              16
+-# define OFFSET_SAVED_CALL_A0   OFFSET_T1 + 8
+-# define OFFSET_FS              OFFSET_SAVED_CALL_A0 + 16
+-# define OFFSET_RV              OFFSET_FS + 8
+-# define OFFSET_RG              OFFSET_RV + DL_SIZEOF_RV
+-
+-# define SF_SIZE                (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK))
+-
+-	/* Save arguments to stack. */
+-	ADDI	sp, sp, -SF_SIZE
+-	REG_S	ra, sp, 0
+-	REG_S	fp, sp, 8
+-
+-	or	fp, sp, zero
+-
+-	REG_S	a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+-	REG_S	a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+-	REG_S	a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+-	REG_S	a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+-	REG_S	a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+-	REG_S	a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+-	REG_S	a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+-	REG_S	a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+-
+-#ifndef __loongarch_soft_float
+-	FREG_S	fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
+-	FREG_S	fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
+-	FREG_S	fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
+-	FREG_S	fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
+-	FREG_S	fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
+-	FREG_S	fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
+-	FREG_S	fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
+-	FREG_S	fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
+-#endif
+-
+-	/* Update .got.plt and obtain runtime address of callee.  */
+-	SLLI	a1, t1, 1
+-	or	a0, t0, zero
+-	ADD	a1, a1, t1
+-	or	a2, ra, zero		/* return addr */
+-	ADDI	a3, fp, OFFSET_RG	/* La_loongarch_regs pointer */
+-	ADDI	a4, fp, OFFSET_FS 	/* frame size return from pltenter */
+-
+-	REG_S	a0, fp, OFFSET_SAVED_CALL_A0
+-	REG_S	a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
+-
+-	la	t2, _dl_profile_fixup
+-	jirl	ra, t2, 0
+-
+-	REG_L	t3, fp, OFFSET_FS
+-	bge	t3, zero, 1f
+-
+-	/* Save the return.  */
+-	or	t4, v0, zero
+-
+-	/* Restore arguments from stack.  */
+-	REG_L	a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+-	REG_L	a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+-	REG_L	a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+-	REG_L	a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+-	REG_L	a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+-	REG_L	a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+-	REG_L	a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+-	REG_L	a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+-
+-#ifndef __loongarch_soft_float
+-	FREG_L	fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
+-	FREG_L	fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
+-	FREG_L	fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
+-	FREG_L	fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
+-	FREG_L	fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
+-	FREG_L	fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
+-	FREG_L	fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
+-	FREG_L	fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
+-#endif
+-
+-	REG_L   ra, fp, 0
+-	REG_L   fp, fp, SZREG
+-
+-	ADDI	sp, sp, SF_SIZE
+-	jirl	zero, t4, 0
+-
+-1:
+-	/* The new frame size is in t3.  */
+-	SUB	sp, fp, t3
+-	BSTRINS sp, zero, 3, 0
+-
+-	REG_S	a0, fp, OFFSET_T1
+-
+-	or	a0, sp, zero
+-	ADDI	a1, fp, SF_SIZE
+-	or	a2, t3,	zero
+-	la	t5, memcpy
+-	jirl	ra, t5, 0
+-
+-	REG_L	t6, fp, OFFSET_T1
+-
+-	/* Call the function.  */
+-	REG_L	a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+-	REG_L	a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+-	REG_L	a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+-	REG_L	a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+-	REG_L	a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+-	REG_L	a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+-	REG_L	a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+-	REG_L	a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+-
+-#ifndef __loongarch_soft_float
+-	FREG_L	fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
+-	FREG_L	fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
+-	FREG_L	fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
+-	FREG_L	fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
+-	FREG_L	fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
+-	FREG_L	fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
+-	FREG_L	fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
+-	FREG_L	fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
+-#endif
+-	jirl	ra, t6, 0
+-
+-	REG_S	a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0
+-	REG_S	a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG
+-
+-#ifndef __loongarch_soft_float
+-	FREG_S	fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0
+-	FREG_S	fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 + SZFREG
+-#endif
+-
+-	/* Setup call to pltexit.  */
+-	REG_L	a0, fp, OFFSET_SAVED_CALL_A0
+-	REG_L	a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
+-	ADDI	a2, fp, OFFSET_RG
+-	ADDI	a3, fp, OFFSET_RV
+-	la	t7, _dl_audit_pltexit
+-	jirl	ra, t7, 0
+-
+-	REG_L	a0, fp, OFFSET_RV + DL_OFFSET_RV_A0
+-	REG_L	a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG
+-
+-#ifndef __loongarch_soft_float
+-	FREG_L	fa0, fp, OFFSET_RV + DL_OFFSET_RV_FA0
+-	FREG_L	fa1, fp, OFFSET_RV + DL_OFFSET_RV_FA0 + SZFREG
+-#endif
+-
+-	/* RA from within La_loongarch_reg.  */
+-	REG_L   ra, fp, OFFSET_RG + DL_OFFSET_RG_RA
+-	or	sp, fp, zero
+-	ADDI	sp, sp, SF_SIZE
+-	REG_S   fp, fp, SZREG
+-
+-	jirl	zero, ra, 0
+-
+-END (_dl_runtime_profile)
+diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h
+index 99fcacab..e298439d 100644
+--- a/sysdeps/loongarch/dl-trampoline.h
++++ b/sysdeps/loongarch/dl-trampoline.h
+@@ -125,3 +125,245 @@ ENTRY (_dl_runtime_resolve)
+ 	/* Invoke the callee. */
+ 	jirl	zero, t1, 0
+ END (_dl_runtime_resolve)
++
++#include "dl-link.h"
++
++ENTRY (_dl_runtime_profile)
++       /* LoongArch we get called with:
++	t0	      linkr_map pointer
++	t1	      the scaled offset stored in t0, which can be used
++		      to calculate the offset of the current symbol in .rela.plt
++	t2	      %hi(%pcrel(.got.plt)) stored in t2, no use in this function
++	t3	      dl resolver entry point, no use in this function
++
++	Stack frame layout:
++	sp,    #208 La_loongarch_regs
++	sp,    #128 La_loongarch_retval // align: 16
++	sp,    #112 frame size return from pltenter
++	sp,    #80  dl_profile_call saved vec1
++	sp,    #48  dl_profile_call saved vec0 // align: 16
++	sp,    #32  dl_profile_call saved a1
++	sp,    #24  dl_profile_call saved a0
++	sp,    #16  T1
++	sp,     #0  ra, fp   <- fp
++       */
++
++# define OFFSET_T1              16
++# define OFFSET_SAVED_CALL_A0   OFFSET_T1 + 8
++# define OFFSET_FS              OFFSET_SAVED_CALL_A0 + 16 + 8 + 64
++# define OFFSET_RV              OFFSET_FS + 8 + 8
++# define OFFSET_RG              OFFSET_RV + DL_SIZEOF_RV
++
++# define SF_SIZE                (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK))
++
++	/* Save arguments to stack. */
++	ADDI	sp, sp, -SF_SIZE
++	REG_S	ra, sp, 0
++	REG_S	fp, sp, 8
++
++	or	fp, sp, zero
++
++	REG_S	a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
++	REG_S	a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
++	REG_S	a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
++	REG_S	a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
++	REG_S	a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
++	REG_S	a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
++	REG_S	a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
++	REG_S	a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
++
++#ifdef USE_LASX
++	xvst	xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
++	xvst	xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
++	xvst	xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
++	xvst	xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
++	xvst	xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
++	xvst	xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
++	xvst	xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
++	xvst	xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
++#elif defined USE_LSX
++	vst	vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
++	vst	vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
++	vst	vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
++	vst	vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
++	vst	vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
++	vst	vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
++	vst	vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
++	vst	vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
++#elif !defined __loongarch_soft_float
++	FREG_S	fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
++	FREG_S	fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
++	FREG_S	fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
++	FREG_S	fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
++	FREG_S	fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
++	FREG_S	fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
++	FREG_S	fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
++	FREG_S	fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
++#endif
++
++	/* Update .got.plt and obtain runtime address of callee.  */
++	SLLI	a1, t1, 1
++	or	a0, t0, zero
++	ADD	a1, a1, t1
++	or	a2, ra, zero		/* return addr */
++	ADDI	a3, fp, OFFSET_RG	/* La_loongarch_regs pointer */
++	ADDI	a4, fp, OFFSET_FS 	/* frame size return from pltenter */
++
++	REG_S	a0, fp, OFFSET_SAVED_CALL_A0
++	REG_S	a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
++
++	la	t2, _dl_profile_fixup
++	jirl	ra, t2, 0
++
++	REG_L	t3, fp, OFFSET_FS
++	bge	t3, zero, 1f
++
++	/* Save the return.  */
++	or	t4, v0, zero
++
++	/* Restore arguments from stack.  */
++	REG_L	a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
++	REG_L	a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
++	REG_L	a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
++	REG_L	a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
++	REG_L	a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
++	REG_L	a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
++	REG_L	a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
++	REG_L	a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
++
++#ifdef USE_LASX
++	xvld	xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
++	xvld	xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
++	xvld	xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
++	xvld	xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
++	xvld	xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
++	xvld	xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
++	xvld	xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
++	xvld	xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
++#elif defined USE_LSX
++	vld	vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
++	vld	vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
++	vld	vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
++	vld	vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
++	vld	vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
++	vld	vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
++	vld	vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
++	vld	vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
++#elif !defined __loongarch_soft_float
++	FREG_L	fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
++	FREG_L	fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
++	FREG_L	fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
++	FREG_L	fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
++	FREG_L	fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
++	FREG_L	fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
++	FREG_L	fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
++	FREG_L	fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
++#endif
++
++	REG_L   ra, fp, 0
++	REG_L   fp, fp, SZREG
++
++	ADDI	sp, sp, SF_SIZE
++	jirl	zero, t4, 0
++
++1:
++	/* The new frame size is in t3.  */
++	SUB	sp, fp, t3
++	BSTRINS sp, zero, 3, 0
++
++	REG_S	a0, fp, OFFSET_T1
++
++	or	a0, sp, zero
++	ADDI	a1, fp, SF_SIZE
++	or	a2, t3,	zero
++	la	t5, memcpy
++	jirl	ra, t5, 0
++
++	REG_L	t6, fp, OFFSET_T1
++
++	/* Call the function.  */
++	REG_L	a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
++	REG_L	a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
++	REG_L	a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
++	REG_L	a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
++	REG_L	a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
++	REG_L	a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
++	REG_L	a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
++	REG_L	a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
++
++#ifdef USE_LASX
++	xvld	xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
++	xvld	xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
++	xvld	xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
++	xvld	xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
++	xvld	xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
++	xvld	xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
++	xvld	xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
++	xvld	xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
++#elif defined USE_LSX
++	vld	vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
++	vld	vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
++	vld	vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
++	vld	vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
++	vld	vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
++	vld	vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
++	vld	vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
++	vld	vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
++#elif !defined __loongarch_soft_float
++	FREG_L	fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
++	FREG_L	fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
++	FREG_L	fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
++	FREG_L	fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
++	FREG_L	fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
++	FREG_L	fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
++	FREG_L	fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
++	FREG_L	fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
++#endif
++
++	jirl	ra, t6, 0
++
++	REG_S	a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0
++	REG_S	a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG
++
++#ifdef USE_LASX
++	xvst	xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
++	xvst	xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG
++#elif defined USE_LSX
++	vst	vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
++	vst	vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG
++#elif !defined __loongarch_soft_float
++	FREG_S	fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
++	FREG_S	fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG
++#endif
++
++	/* Setup call to pltexit.  */
++	REG_L	a0, fp, OFFSET_SAVED_CALL_A0
++	REG_L	a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
++	ADDI	a2, fp, OFFSET_RG
++	ADDI	a3, fp, OFFSET_RV
++	la	t7, _dl_audit_pltexit
++	jirl	ra, t7, 0
++
++	REG_L	a0, fp, OFFSET_RV + DL_OFFSET_RV_A0
++	REG_L	a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG
++
++#ifdef USE_LASX
++	xvld	xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
++	xvld	xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG
++#elif defined USE_LSX
++	vld	vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
++	vld	vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG
++#elif !defined __loongarch_soft_float
++	FREG_L	fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
++	FREG_L	fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG
++#endif
++
++	/* RA from within La_loongarch_reg.  */
++	REG_L   ra, fp, OFFSET_RG + DL_OFFSET_RG_RA
++	or	sp, fp, zero
++	ADDI	sp, sp, SF_SIZE
++	REG_S   fp, fp, SZREG
++
++	jirl	zero, ra, 0
++
++END (_dl_runtime_profile)
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Add-minuimum-binutils-required-version.patch Added

@@ -0,0 +1,102 @@
+From 7353f21f6ed1754b67e455e2b80123787efa9e91 Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Tue, 8 Aug 2023 14:15:43 +0800
+Subject: PATCH 02/29 LoongArch: Add minuimum binutils required version
+
+LoongArch glibc can add some LASX/LSX vector instructions codes,
+change the required minimum binutils version to 2.41 which could
+support vector instructions. HAVE_LOONGARCH_VEC_ASM is removed
+accordingly.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ config.h.in                       | 5 -----
+ sysdeps/loongarch/configure       | 5 ++---
+ sysdeps/loongarch/configure.ac    | 4 ++--
+ sysdeps/loongarch/dl-machine.h    | 4 ++--
+ sysdeps/loongarch/dl-trampoline.S | 2 +-
+ 5 files changed, 7 insertions(+), 13 deletions(-)
+
+diff --git a/config.h.in b/config.h.in
+index 0dedc124..44a34072 100644
+--- a/config.h.in
++++ b/config.h.in
+@@ -141,11 +141,6 @@
+ /* LOONGARCH floating-point ABI for ld.so.  */
+ #undef LOONGARCH_ABI_FRLEN
+ 
+-/* Assembler support LoongArch LASX/LSX vector instructions.
+-   This macro becomes obsolete when glibc increased the minimum
+-   required version of GNU 'binutils' to 2.41 or later. */
+-#define HAVE_LOONGARCH_VEC_ASM 0
+-
+ /* Linux specific: minimum supported kernel version.  */
+ #undef	__LINUX_KERNEL_VERSION
+ 
+diff --git a/sysdeps/loongarch/configure b/sysdeps/loongarch/configure
+index 5843c7cf..395ddc92 100644
+--- a/sysdeps/loongarch/configure
++++ b/sysdeps/loongarch/configure
+@@ -128,8 +128,7 @@ rm -f conftest*
+ fi
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_loongarch_vec_asm" >&5
+ printf "%s\n" "$libc_cv_loongarch_vec_asm" >&6; }
+-if test $libc_cv_loongarch_vec_asm = yes; then
+-  printf "%s\n" "#define HAVE_LOONGARCH_VEC_ASM 1" >>confdefs.h
+-
++if test $libc_cv_loongarch_vec_asm = no; then
++  as_fn_error $? "binutils version is too old, use 2.41 or newer version" "$LINENO" 5
+ fi
+ 
+diff --git a/sysdeps/loongarch/configure.ac b/sysdeps/loongarch/configure.ac
+index ba89d834..989287c6 100644
+--- a/sysdeps/loongarch/configure.ac
++++ b/sysdeps/loongarch/configure.ac
+@@ -74,6 +74,6 @@ else
+   libc_cv_loongarch_vec_asm=no
+ fi
+ rm -f conftest*)
+-if test $libc_cv_loongarch_vec_asm = yes; then
+-  AC_DEFINE(HAVE_LOONGARCH_VEC_ASM)
++if test $libc_cv_loongarch_vec_asm = no; then
++  AC_MSG_ERROR(binutils version is too old, use 2.41 or newer version)
+ fi
+diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
+index 51ce9af8..066bb233 100644
+--- a/sysdeps/loongarch/dl-machine.h
++++ b/sysdeps/loongarch/dl-machine.h
+@@ -270,7 +270,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope,
+   /* If using PLTs, fill in the first two entries of .got.plt.  */
+   if (l->l_infoDT_JMPREL)
+     {
+-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float
++#if !defined __loongarch_soft_float
+       extern void _dl_runtime_resolve_lasx (void) attribute_hidden;
+       extern void _dl_runtime_resolve_lsx (void) attribute_hidden;
+ #endif
+@@ -300,7 +300,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope,
+ 	  /* This function will get called to fix up the GOT entry
+ 	     indicated by the offset on the stack, and then jump to
+ 	     the resolved address.  */
+-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float
++#if !defined __loongarch_soft_float
+ 	  if (SUPPORT_LASX)
+ 	    gotplt0 = (ElfW(Addr)) &_dl_runtime_resolve_lasx;
+ 	  else if (SUPPORT_LSX)
+diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S
+index f6ba5e44..8fd91469 100644
+--- a/sysdeps/loongarch/dl-trampoline.S
++++ b/sysdeps/loongarch/dl-trampoline.S
+@@ -19,7 +19,7 @@
+ #include <sysdep.h>
+ #include <sys/asm.h>
+ 
+-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float
++#if !defined __loongarch_soft_float
+ #define USE_LASX
+ #define _dl_runtime_resolve _dl_runtime_resolve_lasx
+ #include "dl-trampoline.h"
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Change-loongarch-to-LoongArch-in-comments.patch Added

@@ -0,0 +1,277 @@
+From e5ccd79e81de7ad5821fde83875973e878d85d4b Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Mon, 28 Aug 2023 10:08:40 +0800
+Subject: PATCH 19/29 LoongArch: Change loongarch to LoongArch in comments
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S    | 2 +-
+ sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S       | 2 +-
+ sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S        | 2 +-
+ sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S  | 2 +-
+ sysdeps/loongarch/lp64/multiarch/memmove-aligned.S   | 2 +-
+ sysdeps/loongarch/lp64/multiarch/memmove-lasx.S      | 2 +-
+ sysdeps/loongarch/lp64/multiarch/memmove-lsx.S       | 2 +-
+ sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strchr-aligned.S    | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strchr-lasx.S       | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strchr-lsx.S        | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S    | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S     | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S    | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S        | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strlen-aligned.S    | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strlen-lasx.S       | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strlen-lsx.S        | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S   | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S       | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S   | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S      | 2 +-
+ sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S       | 2 +-
+ 24 files changed, 24 insertions(+), 24 deletions(-)
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S
+index 299dd49c..7eb34395 100644
+--- a/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S
++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized memcpy_aligned implementation using basic Loongarch instructions.
++/* Optimized memcpy_aligned implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S
+index 4aae5bf8..ae148df5 100644
+--- a/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S
++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S
+@@ -1,4 +1,4 @@
+-/* Optimized memcpy implementation using Loongarch LASX instructions.
++/* Optimized memcpy implementation using LoongArch LASX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S
+index 6ebbe7a2..feb2bb0e 100644
+--- a/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S
++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S
+@@ -1,4 +1,4 @@
+-/* Optimized memcpy implementation using Loongarch LSX instructions.
++/* Optimized memcpy implementation using LoongArch LSX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
+index 8e60a22d..31019b13 100644
+--- a/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized unaligned memcpy implementation using basic Loongarch instructions.
++/* Optimized unaligned memcpy implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S
+index 5354f383..a02114c0 100644
+--- a/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S
++++ b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized memmove_aligned implementation using basic Loongarch instructions.
++/* Optimized memmove_aligned implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S
+index ff68e7a2..95d8ee7b 100644
+--- a/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S
++++ b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S
+@@ -1,4 +1,4 @@
+-/* Optimized memmove implementation using Loongarch LASX instructions.
++/* Optimized memmove implementation using LoongArch LASX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
+index 9e1502a7..8a936770 100644
+--- a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
++++ b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
+@@ -1,4 +1,4 @@
+-/* Optimized memmove implementation using Loongarch LSX instructions.
++/* Optimized memmove implementation using LoongArch LSX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
+index 90a64b6b..3284ce25 100644
+--- a/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
++++ b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized memmove_unaligned implementation using basic Loongarch instructions.
++/* Optimized memmove_unaligned implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
+index 5fb01806..62020054 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
++++ b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized strchr implementation using basic Loongarch instructions.
++/* Optimized strchr implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
+index 254402da..4d3cc588 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
++++ b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strchr implementation using loongarch LASX SIMD instructions.
++/* Optimized strchr implementation using LoongArch LASX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
+index dae98b0a..8b78c35c 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
++++ b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strlen implementation using loongarch LSX SIMD instructions.
++/* Optimized strlen implementation using LoongArch LSX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
+index 1c01a023..20856a06 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized strchrnul implementation using basic Loongarch instructions.
++/* Optimized strchrnul implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
+index d45495e4..4753d4ce 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strchrnul implementation using loongarch LASX SIMD instructions.
++/* Optimized strchrnul implementation using LoongArch LASX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
+index 07d793ae..671e740c 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strchrnul implementation using loongarch LSX SIMD instructions.
++/* Optimized strchrnul implementation using LoongArch LSX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
+index f5f4f336..ba1f9667 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
++++ b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized strcmp implementation using basic Loongarch instructions.
++/* Optimized strcmp implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
+index 2e177a38..091c8c9e 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
++++ b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strcmp implementation using Loongarch LSX instructions.
++/* Optimized strcmp implementation using LoongArch LSX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
+index e9e1d2fc..ed0548e4 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
++++ b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized strlen implementation using basic Loongarch instructions.
++/* Optimized strlen implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
+index 258c47ce..91342f34 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
++++ b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strlen implementation using loongarch LASX SIMD instructions.
++/* Optimized strlen implementation using LoongArch LASX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
+index b194355e..b09c12e0 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
++++ b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strlen implementation using Loongarch LSX SIMD instructions.
++/* Optimized strlen implementation using LoongArch LSX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
+index e2687fa7..f63de872 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
++++ b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized strncmp implementation using basic Loongarch instructions.
++/* Optimized strncmp implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
+index 0b4eee2a..83cb801d 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
++++ b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strncmp implementation using Loongarch LSX instructions.
++/* Optimized strncmp implementation using LoongArch LSX instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
+index b900430a..a8296a1b 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
+@@ -1,4 +1,4 @@
+-/* Optimized strnlen implementation using basic Loongarch instructions.
++/* Optimized strnlen implementation using basic LoongArch instructions.
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
+index 2c03d3d9..aa6c812d 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strnlen implementation using loongarch LASX instructions
++/* Optimized strnlen implementation using LoongArch LASX instructions
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
+index b769a895..d0febe3e 100644
+--- a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
+@@ -1,4 +1,4 @@
+-/* Optimized strnlen implementation using loongarch LSX instructions
++/* Optimized strnlen implementation using LoongArch LSX instructions
+    Copyright (C) 2023 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Change-to-put-magic-number-to-.rodata-sect.patch Added

@@ -0,0 +1,67 @@
+From fb72c81f9894b23797f6e2e066532c0963f5155f Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Wed, 13 Sep 2023 15:35:01 +0800
+Subject: PATCH 24/29 LoongArch: Change to put magic number to .rodata
+ section
+
+Change to put magic number to .rodata section in memmove-lsx, and use
+pcalau12i and %pc_lo12 with vld to get the data.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ .../loongarch/lp64/multiarch/memmove-lsx.S    | 20 +++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
+index 8a936770..5eb819ef 100644
+--- a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
++++ b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
+@@ -209,13 +209,10 @@ L(al_less_16):
+     nop
+ 
+ 
+-L(magic_num):
+-    .dword          0x0706050403020100
+-    .dword          0x0f0e0d0c0b0a0908
+ L(unaligned):
+-    pcaddi          t2, -4
++    pcalau12i       t2, %pc_hi20(L(INDEX))
+     bstrins.d       a1, zero, 3, 0
+-    vld             vr8, t2, 0
++    vld             vr8, t2, %pc_lo12(L(INDEX))
+     vld             vr0, a1, 0
+ 
+     vld             vr1, a1, 16
+@@ -413,13 +410,10 @@ L(back_al_less_16):
+     vst             vr1, a0, 0
+     jr              ra
+ 
+-L(magic_num_2):
+-    .dword          0x0706050403020100
+-    .dword          0x0f0e0d0c0b0a0908
+ L(back_unaligned):
+-    pcaddi          t2, -4
++    pcalau12i       t2, %pc_hi20(L(INDEX))
+     bstrins.d       a4, zero, 3, 0
+-    vld             vr8, t2, 0
++    vld             vr8, t2, %pc_lo12(L(INDEX))
+     vld             vr0, a4, 0
+ 
+     vld             vr1, a4, -16
+@@ -529,6 +523,12 @@ L(back_un_less_16):
+     jr              ra
+ END(MEMMOVE_NAME)
+ 
++    .section        .rodata.cst16,"M",@progbits,16
++    .align          4
++L(INDEX):
++    .dword          0x0706050403020100
++    .dword          0x0f0e0d0c0b0a0908
++
+ libc_hidden_builtin_def (MEMCPY_NAME)
+ libc_hidden_builtin_def (MEMMOVE_NAME)
+ #endif
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Micro-optimize-LD_PCREL.patch Added

@@ -0,0 +1,44 @@
+From 7f703cf758c4f185dd62f2a4f463002bb514af16 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 27 Aug 2023 00:36:51 +0800
+Subject: PATCH 13/29 LoongArch: Micro-optimize LD_PCREL
+
+We are requiring Binutils >= 2.41, so explicit relocation syntax is
+always supported by the assembler.  Use it to reduce one instruction.
+
+Signed-off-by: Xi Ruoyao <xry111@xry111.site>
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/unix/sysv/linux/loongarch/pointer_guard.h | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h b/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h
+index b25e353b..d6c78687 100644
+--- a/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h
++++ b/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h
+@@ -19,17 +19,15 @@
+ #ifndef POINTER_GUARD_H
+ #define POINTER_GUARD_H
+ 
+-/* Load a got-relative EXPR into G, using T.
+-   Note G and T are register names.  */
++/* Load a got-relative EXPR into register G.  */
+ #define LD_GLOBAL(G, EXPR) \
+   la.global G,  EXPR; \
+   REG_L     G,  G,  0;
+ 
+-/* Load a pc-relative EXPR into G, using T.
+-   Note G and T are register names.  */
++/* Load a pc-relative EXPR into register G.  */
+ #define LD_PCREL(G, EXPR) \
+-  la.pcrel  G,  EXPR; \
+-  REG_L     G,  G,  0;
++  pcalau12i G, %pc_hi20(EXPR); \
++  REG_L     G, G, %pc_lo12(EXPR);
+ 
+ #if (IS_IN (rtld) \
+      || (!defined SHARED && (IS_IN (libc) \
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Redefine-macro-LEAF-ENTRY.patch Added

@@ -0,0 +1,65 @@
+From 8dcd8c837df2e3cf81675522487697522f1542f8 Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Tue, 8 Aug 2023 14:15:42 +0800
+Subject: PATCH 01/29 LoongArch: Redefine macro LEAF/ENTRY.
+
+The following usage of macro LEAF/ENTRY are all feasible:
+1. LEAF(fcn) -- the align value of fcn is .align 3(default value)
+2. LEAF(fcn, 6) -- the align value of fcn is .align 6
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/sys/asm.h | 36 ++++++++++++++++++++++++++----------
+ 1 file changed, 26 insertions(+), 10 deletions(-)
+
+diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
+index d1a279b8..c5eb8afa 100644
+--- a/sysdeps/loongarch/sys/asm.h
++++ b/sysdeps/loongarch/sys/asm.h
+@@ -39,16 +39,32 @@
+ #define FREG_L fld.d
+ #define FREG_S fst.d
+ 
+-/* Declare leaf routine.  */
+-#define LEAF(symbol) \
+-  .text; \
+-  .globl symbol; \
+-  .align 3; \
+-  cfi_startproc; \
+-  .type symbol, @function; \
+-  symbol:
+-
+-#define ENTRY(symbol) LEAF (symbol)
++/*  Declare leaf routine.
++    The usage of macro LEAF/ENTRY is as follows:
++    1. LEAF(fcn) -- the align value of fcn is .align 3 (default value)
++    2. LEAF(fcn, 6) -- the align value of fcn is .align 6
++*/
++#define LEAF_IMPL(symbol, aln, ...)	\
++	.text;				\
++	.globl symbol;			\
++	.align aln;			\
++	.type symbol, @function;	\
++symbol: \
++	cfi_startproc;
++
++
++#define LEAF(...) LEAF_IMPL(__VA_ARGS__, 3)
++#define ENTRY(...) LEAF(__VA_ARGS__)
++
++#define	LEAF_NO_ALIGN(symbol)		\
++	.text;				\
++	.globl	symbol;			\
++	.type	symbol, @function;	\
++symbol: \
++	cfi_startproc;
++
++#define ENTRY_NO_ALIGN(symbol) LEAF_NO_ALIGN(symbol)
++
+ 
+ /* Mark end of function.  */
+ #undef END
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Remove-support-code-for-old-linker-in-star.patch Added

@@ -0,0 +1,56 @@
+From f8d66a269cb6f1a7087afadf3375bdf0553abf53 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 27 Aug 2023 00:36:50 +0800
+Subject: PATCH 12/29 LoongArch: Remove support code for old linker in
+ start.S
+
+We are requiring Binutils >= 2.41, so la.pcrel always works here.
+
+Signed-off-by: Xi Ruoyao <xry111@xry111.site>
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/start.S | 19 +++----------------
+ 1 file changed, 3 insertions(+), 16 deletions(-)
+
+diff --git a/sysdeps/loongarch/start.S b/sysdeps/loongarch/start.S
+index e9d82033..bf6bfc9e 100644
+--- a/sysdeps/loongarch/start.S
++++ b/sysdeps/loongarch/start.S
+@@ -60,20 +60,7 @@ ENTRY (ENTRY_POINT)
+ 	cfi_undefined (1)
+ 	or		a5, a0, zero /* rtld_fini */
+ 
+-#if ENABLE_STATIC_PIE
+-/* For static PIE, the GOT cannot be used in _start because the GOT entries are
+-   offsets instead of real addresses before __libc_start_main.
+-   __libc_start_main and/or main may be not local, so we rely on the linker to
+-   produce PLT entries for them.  GNU ld >= 2.40 supports this.  */
+-# define LA la.pcrel
+-#else
+-/* Old GNU ld (< 2.40) cannot handle PC relative address against a non-local
+-   function correctly.  We deem these old linkers failing to support static PIE
+-   and load the addresses from GOT.  */
+-# define LA la.got
+-#endif
+-
+-	LA		a0, t0, main
++	la.pcrel	a0, t0, main
+ 	REG_L		a1, sp, 0
+ 	ADDI		a2, sp, SZREG
+ 
+@@ -84,9 +71,9 @@ ENTRY (ENTRY_POINT)
+ 	move		a4, zero /* used to be fini */
+ 	or		a6, sp, zero /* stack_end */
+ 
+-	LA		ra, t0, __libc_start_main
++	la.pcrel	ra, t0, __libc_start_main
+ 	jirl		ra, ra, 0
+ 
+-	LA		ra, t0, abort
++	la.pcrel	ra, t0, abort
+ 	jirl		ra, ra, 0
+ END (ENTRY_POINT)
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Replace-deprecated-v0-with-a0-to-eliminate.patch Added

@@ -0,0 +1,28 @@
+From b4b4bb7c9220a0bbdf5aec0ac8c1de1d22329280 Mon Sep 17 00:00:00 2001
+From: caiyinyu <caiyinyu@loongson.cn>
+Date: Thu, 14 Sep 2023 19:48:24 +0800
+Subject: PATCH 21/29 LoongArch: Replace deprecated $v0 with $a0 to eliminate
+ 'as' Warnings.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/dl-machine.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
+index 8a2db9de..57913cef 100644
+--- a/sysdeps/loongarch/dl-machine.h
++++ b/sysdeps/loongarch/dl-machine.h
+@@ -90,7 +90,7 @@ static inline ElfW (Addr) elf_machine_dynamic (void)
+ 	or	$a0, $sp, $zero   \n\
+ 	bl	_dl_start   \n\
+ 	# Stash user entry point in s0.   \n\
+-	or	$s0, $v0, $zero   \n\
++	or	$s0, $a0, $zero   \n\
+ 	# Load the original argument count.   \n\
+ 	ld.d	$a1, $sp, 0   \n\
+ 	# Call _dl_init (struct link_map *main_map, int argc, \
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Unify-Register-Names.patch Added

@@ -0,0 +1,81 @@
+From 458ab6d5f39cca1cabd83abd2022f67491f6f5ed Mon Sep 17 00:00:00 2001
+From: caiyinyu <caiyinyu@loongson.cn>
+Date: Fri, 20 Oct 2023 09:20:02 +0800
+Subject: PATCH 27/29 LoongArch: Unify Register Names.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/__longjmp.S | 20 ++++++++++----------
+ sysdeps/loongarch/setjmp.S    | 18 +++++++++---------
+ 2 files changed, 19 insertions(+), 19 deletions(-)
+
+diff --git a/sysdeps/loongarch/__longjmp.S b/sysdeps/loongarch/__longjmp.S
+index cbde1946..e87ce311 100644
+--- a/sysdeps/loongarch/__longjmp.S
++++ b/sysdeps/loongarch/__longjmp.S
+@@ -43,18 +43,18 @@ ENTRY (__longjmp)
+ 	REG_L s8, a0, 12*SZREG
+ 
+ #ifndef __loongarch_soft_float
+-	FREG_L $f24, a0, 13*SZREG + 0*SZFREG
+-	FREG_L $f25, a0, 13*SZREG + 1*SZFREG
+-	FREG_L $f26, a0, 13*SZREG + 2*SZFREG
+-	FREG_L $f27, a0, 13*SZREG + 3*SZFREG
+-	FREG_L $f28, a0, 13*SZREG + 4*SZFREG
+-	FREG_L $f29, a0, 13*SZREG + 5*SZFREG
+-	FREG_L $f30, a0, 13*SZREG + 6*SZFREG
+-	FREG_L $f31, a0, 13*SZREG + 7*SZFREG
++	FREG_L fs0, a0, 13*SZREG + 0*SZFREG
++	FREG_L fs1, a0, 13*SZREG + 1*SZFREG
++	FREG_L fs2, a0, 13*SZREG + 2*SZFREG
++	FREG_L fs3, a0, 13*SZREG + 3*SZFREG
++	FREG_L fs4, a0, 13*SZREG + 4*SZFREG
++	FREG_L fs5, a0, 13*SZREG + 5*SZFREG
++	FREG_L fs6, a0, 13*SZREG + 6*SZFREG
++	FREG_L fs7, a0, 13*SZREG + 7*SZFREG
+ #endif
+ 
+-	sltui	a0,a1,1
++	sltui	a0, a1, 1
+ 	ADD	a0, a0, a1	 # a0 = (a1 == 0) ? 1 : a1
+-	jirl	zero,ra,0
++	jirl	zero, ra, 0
+ 
+ END (__longjmp)
+diff --git a/sysdeps/loongarch/setjmp.S b/sysdeps/loongarch/setjmp.S
+index 6c7065cd..b6e4f727 100644
+--- a/sysdeps/loongarch/setjmp.S
++++ b/sysdeps/loongarch/setjmp.S
+@@ -52,19 +52,19 @@ ENTRY (__sigsetjmp)
+ 	REG_S s8, a0, 12*SZREG
+ 
+ #ifndef __loongarch_soft_float
+-	FREG_S $f24, a0, 13*SZREG + 0*SZFREG
+-	FREG_S $f25, a0, 13*SZREG + 1*SZFREG
+-	FREG_S $f26, a0, 13*SZREG + 2*SZFREG
+-	FREG_S $f27, a0, 13*SZREG + 3*SZFREG
+-	FREG_S $f28, a0, 13*SZREG + 4*SZFREG
+-	FREG_S $f29, a0, 13*SZREG + 5*SZFREG
+-	FREG_S $f30, a0, 13*SZREG + 6*SZFREG
+-	FREG_S $f31, a0, 13*SZREG + 7*SZFREG
++	FREG_S fs0, a0, 13*SZREG + 0*SZFREG
++	FREG_S fs1, a0, 13*SZREG + 1*SZFREG
++	FREG_S fs2, a0, 13*SZREG + 2*SZFREG
++	FREG_S fs3, a0, 13*SZREG + 3*SZFREG
++	FREG_S fs4, a0, 13*SZREG + 4*SZFREG
++	FREG_S fs5, a0, 13*SZREG + 5*SZFREG
++	FREG_S fs6, a0, 13*SZREG + 6*SZFREG
++	FREG_S fs7, a0, 13*SZREG + 7*SZFREG
+ #endif
+ 
+ #if !IS_IN (libc) && IS_IN(rtld)
+ 	li.w		v0, 0
+-	jirl		zero,ra,0
++	jirl		zero, ra, 0
+ #else
+ 	b		__sigjmp_save
+ #endif
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Update-hwcap.h-to-sync-with-LoongArch-kern.patch Added

_service:tar_scm:LoongArch-elf-Add-new-LoongArch-reloc-types-109-into.patch Added

_service:tar_scm:Loongarch-Add-ifunc-support-and-add-different-versio.patch Added

@@ -0,0 +1,528 @@
+From 43abd8772a143cd96688c081500397dd712e631b Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Tue, 8 Aug 2023 14:15:44 +0800
+Subject: PATCH 03/29 Loongarch: Add ifunc support and add different versions
+ of strlen
+
+strlen-lasx is implemeted by LASX simd instructions(256bit)
+strlen-lsx is implemeted by LSX simd instructions(128bit)
+strlen-align is implemented by LA basic instructions and never use unaligned memory acess
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   7 ++
+ .../lp64/multiarch/ifunc-impl-list.c          |  41 +++++++
+ .../loongarch/lp64/multiarch/ifunc-strlen.h   |  40 +++++++
+ .../loongarch/lp64/multiarch/strlen-aligned.S | 100 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/strlen-lasx.S    |  63 +++++++++++
+ sysdeps/loongarch/lp64/multiarch/strlen-lsx.S |  71 +++++++++++++
+ sysdeps/loongarch/lp64/multiarch/strlen.c     |  37 +++++++
+ sysdeps/loongarch/sys/regdef.h                |  57 ++++++++++
+ .../unix/sysv/linux/loongarch/cpu-features.h  |   2 +
+ 9 files changed, 418 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/Makefile
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+new file mode 100644
+index 00000000..76c506c9
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -0,0 +1,7 @@
++ifeq ($(subdir),string)
++sysdep_routines += \
++  strlen-aligned \
++  strlen-lsx \
++  strlen-lasx \
++# sysdep_routines
++endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+new file mode 100644
+index 00000000..1a2a576f
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -0,0 +1,41 @@
++/* Enumerate available IFUNC implementations of a function LoongArch64 version.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include <assert.h>
++#include <string.h>
++#include <wchar.h>
++#include <ldsodefs.h>
++#include <ifunc-impl-list.h>
++#include <stdio.h>
++
++size_t
++__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
++			size_t max)
++{
++
++  size_t i = max;
++
++  IFUNC_IMPL (i, name, strlen,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LASX, __strlen_lasx)
++	      IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LSX, __strlen_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned)
++	      )
++  return i;
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h
+new file mode 100644
+index 00000000..6258bb76
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h
+@@ -0,0 +1,40 @@
++/* Common definition for strlen ifunc selections.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
+new file mode 100644
+index 00000000..e9e1d2fc
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
+@@ -0,0 +1,100 @@
++/* Optimized strlen implementation using basic Loongarch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define STRLEN __strlen_aligned
++#else
++# define STRLEN strlen
++#endif
++
++LEAF(STRLEN, 6)
++    move        a1, a0
++    bstrins.d   a0, zero, 2, 0
++    lu12i.w     a2, 0x01010
++    li.w        t0, -1
++
++    ld.d        t2, a0, 0
++    andi        t1, a1, 0x7
++    ori         a2, a2, 0x101
++    slli.d      t1, t1, 3
++
++    bstrins.d   a2, a2, 63, 32
++    sll.d       t1, t0, t1
++    slli.d      t3, a2, 7
++    nor         a3, zero, t3
++
++    orn         t2, t2, t1
++    sub.d       t0, t2, a2
++    nor         t1, t2, a3
++    and         t0, t0, t1
++
++
++    bnez        t0, L(count_pos)
++    addi.d      a0, a0, 8
++L(loop_16_7bit):
++    ld.d        t2, a0, 0
++    sub.d       t1, t2, a2
++
++    and         t0, t1, t3
++    bnez        t0, L(more_check)
++    ld.d        t2, a0, 8
++    sub.d       t1, t2, a2
++
++    and         t0, t1, t3
++    addi.d      a0, a0, 16
++    beqz        t0, L(loop_16_7bit)
++    addi.d      a0, a0, -8
++
++L(more_check):
++    nor         t0, t2, a3
++    and         t0, t1, t0
++    bnez        t0, L(count_pos)
++    addi.d      a0, a0, 8
++
++
++L(loop_16_8bit):
++    ld.d        t2, a0, 0
++    sub.d       t1, t2, a2
++    nor         t0, t2, a3
++    and         t0, t0, t1
++
++    bnez        t0, L(count_pos)
++    ld.d        t2, a0, 8
++    addi.d      a0, a0, 16
++    sub.d       t1, t2, a2
++
++    nor         t0, t2, a3
++    and         t0, t0, t1
++    beqz        t0, L(loop_16_8bit)
++    addi.d      a0, a0, -8
++
++L(count_pos):
++    ctz.d       t1, t0
++    sub.d       a0, a0, a1
++    srli.d      t1, t1, 3
++    add.d       a0, a0, t1
++
++    jr          ra
++END(STRLEN)
++
++libc_hidden_builtin_def (STRLEN)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
+new file mode 100644
+index 00000000..258c47ce
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
+@@ -0,0 +1,63 @@
++/* Optimized strlen implementation using loongarch LASX SIMD instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define STRLEN __strlen_lasx
++
++LEAF(STRLEN, 6)
++    move            a1, a0
++    bstrins.d       a0, zero, 4, 0
++    li.d            t1, -1
++    xvld            xr0, a0, 0
++
++    xvmsknz.b       xr0, xr0
++    xvpickve.w      xr1, xr0, 4
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0  # sign extend
++
++    sra.w           t0, t0, a1
++    beq             t0, t1, L(loop)
++    cto.w           a0, t0
++    jr              ra
++
++L(loop):
++    xvld            xr0, a0, 32
++    addi.d          a0, a0, 32
++    xvsetanyeqz.b   fcc0, xr0
++    bceqz           fcc0, L(loop)
++
++
++    xvmsknz.b       xr0, xr0
++    sub.d           a0, a0, a1
++    xvpickve.w      xr1, xr0, 4
++    vilvl.h         vr0, vr1, vr0
++
++    movfr2gr.s      t0, fa0
++    cto.w           t0, t0
++    add.d           a0, a0, t0
++    jr              ra
++END(STRLEN)
++
++libc_hidden_builtin_def (STRLEN)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
+new file mode 100644
+index 00000000..b194355e
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
+@@ -0,0 +1,71 @@
++/* Optimized strlen implementation using Loongarch LSX SIMD instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define STRLEN __strlen_lsx
++
++LEAF(STRLEN, 6)
++    move            a1, a0
++    bstrins.d       a0, zero, 4, 0
++    vld             vr0, a0, 0
++    vld             vr1, a0, 16
++
++    li.d            t1, -1
++    vmsknz.b        vr0, vr0
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++
++    movfr2gr.s      t0, fa0
++    sra.w           t0, t0, a1
++    beq             t0, t1, L(loop)
++    cto.w           a0, t0
++
++    jr              ra
++    nop
++    nop
++    nop
++
++
++L(loop):
++    vld             vr0, a0, 32
++    vld             vr1, a0, 48
++    addi.d          a0, a0, 32
++    vmin.bu         vr2, vr0, vr1
++
++    vsetanyeqz.b    fcc0, vr2
++    bceqz           fcc0, L(loop)
++    vmsknz.b        vr0, vr0
++    vmsknz.b        vr1, vr1
++
++    vilvl.h         vr0, vr1, vr0
++    sub.d           a0, a0, a1
++    movfr2gr.s      t0, fa0
++    cto.w           t0, t0
++
++    add.d           a0, a0, t0
++    jr              ra
++END(STRLEN)
++
++libc_hidden_builtin_def (STRLEN)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strlen.c b/sysdeps/loongarch/lp64/multiarch/strlen.c
+new file mode 100644
+index 00000000..381c2daa
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strlen.c
+@@ -0,0 +1,37 @@
++/* Multiple versions of strlen.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++
++#if IS_IN (libc)
++# define strlen __redirect_strlen
++# include <string.h>
++# undef strlen
++
++# define SYMBOL_NAME strlen
++# include "ifunc-strlen.h"
++
++libc_ifunc_redirected (__redirect_strlen, strlen, IFUNC_SELECTOR ());
++
++# ifdef SHARED
++__hidden_ver1 (strlen, __GI_strlen, __redirect_strlen)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strlen);
++# endif
++
++#endif
+diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
+index 5100f36d..524d2e32 100644
+--- a/sysdeps/loongarch/sys/regdef.h
++++ b/sysdeps/loongarch/sys/regdef.h
+@@ -89,6 +89,14 @@
+ #define fs5 $f29
+ #define fs6 $f30
+ #define fs7 $f31
++#define fcc0 $fcc0
++#define fcc1 $fcc1
++#define fcc2 $fcc2
++#define fcc3 $fcc3
++#define fcc4 $fcc4
++#define fcc5 $fcc5
++#define fcc6 $fcc6
++#define fcc7 $fcc7
+ 
+ #define vr0 $vr0
+ #define vr1 $vr1
+@@ -98,6 +106,30 @@
+ #define vr5 $vr5
+ #define vr6 $vr6
+ #define vr7 $vr7
++#define vr8 $vr8
++#define vr9 $vr9
++#define vr10 $vr10
++#define vr11 $vr11
++#define vr12 $vr12
++#define vr13 $vr13
++#define vr14 $vr14
++#define vr15 $vr15
++#define vr16 $vr16
++#define vr17 $vr17
++#define vr18 $vr18
++#define vr19 $vr19
++#define vr20 $vr20
++#define vr21 $vr21
++#define vr22 $vr22
++#define vr23 $vr23
++#define vr24 $vr24
++#define vr25 $vr25
++#define vr26 $vr26
++#define vr27 $vr27
++#define vr28 $vr28
++#define vr29 $vr29
++#define vr30 $vr30
++#define vr31 $vr31
+ 
+ #define xr0 $xr0
+ #define xr1 $xr1
+@@ -107,5 +139,30 @@
+ #define xr5 $xr5
+ #define xr6 $xr6
+ #define xr7 $xr7
++#define xr7 $xr7
++#define xr8 $xr8
++#define xr9 $xr9
++#define xr10 $xr10
++#define xr11 $xr11
++#define xr12 $xr12
++#define xr13 $xr13
++#define xr14 $xr14
++#define xr15 $xr15
++#define xr16 $xr16
++#define xr17 $xr17
++#define xr18 $xr18
++#define xr19 $xr19
++#define xr20 $xr20
++#define xr21 $xr21
++#define xr22 $xr22
++#define xr23 $xr23
++#define xr24 $xr24
++#define xr25 $xr25
++#define xr26 $xr26
++#define xr27 $xr27
++#define xr28 $xr28
++#define xr29 $xr29
++#define xr30 $xr30
++#define xr31 $xr31
+ 
+ #endif /* _SYS_REGDEF_H */
+diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+index e371e13b..d1a280a5 100644
+--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
++++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+@@ -25,5 +25,7 @@
+ #define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
+ #define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
+ 
++#define INIT_ARCH()
++
+ #endif /* _CPU_FEATURES_LOONGARCH64_H  */
+ 
+-- 
+2.33.0
+

_service:tar_scm:Loongarch-Add-ifunc-support-for-memcpy-aligned-unali.patch Added

@@ -0,0 +1,2570 @@
+From 9c522272146423c1ef9fb9e071737a8ad26e844e Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Tue, 15 Aug 2023 09:11:53 +0800
+Subject: PATCH 07/29 Loongarch: Add ifunc support for memcpy{aligned,
+ unaligned, lsx, lasx} and memmove{aligned, unaligned, lsx, lasx}
+
+These implementations improve the time to copy data in the glibc
+microbenchmark as below:
+memcpy-lasx       reduces the runtime about 8%-76%
+memcpy-lsx        reduces the runtime about 8%-72%
+memcpy-unaligned  reduces the runtime of unaligned data copying up to 40%
+memcpy-aligned    reduece the runtime of unaligned data copying up to 25%
+memmove-lasx      reduces the runtime about 20%-73%
+memmove-lsx       reduces the runtime about 50%
+memmove-unaligned reduces the runtime of unaligned data moving up to 40%
+memmove-aligned   reduces the runtime of unaligned data moving up to 25%
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |   5 +
+ .../lp64/multiarch/ifunc-impl-list.c          |  19 +
+ sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h |  45 +
+ .../loongarch/lp64/multiarch/memcpy-aligned.S | 783 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/memcpy-lasx.S    |  20 +
+ sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S |  20 +
+ .../lp64/multiarch/memcpy-unaligned.S         | 247 ++++++
+ sysdeps/loongarch/lp64/multiarch/memcpy.c     |  37 +
+ .../lp64/multiarch/memmove-aligned.S          |  20 +
+ .../loongarch/lp64/multiarch/memmove-lasx.S   | 287 +++++++
+ .../loongarch/lp64/multiarch/memmove-lsx.S    | 534 ++++++++++++
+ .../lp64/multiarch/memmove-unaligned.S        | 380 +++++++++
+ sysdeps/loongarch/lp64/multiarch/memmove.c    |  38 +
+ 13 files changed, 2435 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index 110a8c5c..afa51041 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
++++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -9,5 +9,10 @@ sysdep_routines += \
+   strchrnul-aligned \
+   strchrnul-lsx \
+   strchrnul-lasx \
++  memcpy-aligned \
++  memcpy-unaligned \
++  memmove-unaligned \
++  memmove-lsx \
++  memmove-lasx \
+ # sysdep_routines
+ endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index c7164b45..25eb96b0 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -53,5 +53,24 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ #endif
+ 	      IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_aligned)
+ 	      )
++
++  IFUNC_IMPL (i, name, memcpy,
++#if !defined __loongarch_soft_float
++              IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
++              IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LSX, __memcpy_lsx)
++#endif
++              IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_UAL, __memcpy_unaligned)
++              IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_aligned)
++              )
++
++  IFUNC_IMPL (i, name, memmove,
++#if !defined __loongarch_soft_float
++              IFUNC_IMPL_ADD (array, i, memmove, SUPPORT_LASX, __memmove_lasx)
++              IFUNC_IMPL_ADD (array, i, memmove, SUPPORT_LSX, __memmove_lsx)
++#endif
++              IFUNC_IMPL_ADD (array, i, memmove, SUPPORT_UAL, __memmove_unaligned)
++              IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_aligned)
++              )
++
+   return i;
+ }
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h b/sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h
+new file mode 100644
+index 00000000..3be67da6
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h
+@@ -0,0 +1,45 @@
++/* Common definition for ifunc selection implementation.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (unaligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++  if (SUPPORT_UAL)
++    return OPTIMIZE (unaligned);
++  else
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S
+new file mode 100644
+index 00000000..299dd49c
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S
+@@ -0,0 +1,783 @@
++/* Optimized memcpy_aligned implementation using basic Loongarch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define MEMCPY_NAME __memcpy_aligned
++# define MEMMOVE_NAME __memmove_aligned
++#else
++# define MEMCPY_NAME memcpy
++# define MEMMOVE_NAME memmove
++#endif
++
++#define LD_64(reg, n)            \
++    ld.d        t0, reg, n;      \
++    ld.d        t1, reg, n + 8;  \
++    ld.d        t2, reg, n + 16; \
++    ld.d        t3, reg, n + 24; \
++    ld.d        t4, reg, n + 32; \
++    ld.d        t5, reg, n + 40; \
++    ld.d        t6, reg, n + 48; \
++    ld.d        t7, reg, n + 56;
++
++#define ST_64(reg, n)            \
++    st.d        t0, reg, n;      \
++    st.d        t1, reg, n + 8;  \
++    st.d        t2, reg, n + 16; \
++    st.d        t3, reg, n + 24; \
++    st.d        t4, reg, n + 32; \
++    st.d        t5, reg, n + 40; \
++    st.d        t6, reg, n + 48; \
++    st.d        t7, reg, n + 56;
++
++LEAF(MEMMOVE_NAME, 6)
++    sub.d       t0, a0, a1
++    bltu        t0, a2, L(copy_back)
++END(MEMMOVE_NAME)
++
++LEAF_NO_ALIGN(MEMCPY_NAME)
++    srai.d      a3, a2, 4
++    beqz        a3, L(short_data)
++
++    move        a4, a0
++    andi        a5, a0, 0x7
++    andi        a6, a1, 0x7
++    li.d        t8, 8
++    beqz        a5, L(check_align)
++
++    sub.d       t2, t8, a5
++    sub.d       a2, a2, t2
++    pcaddi      t1, 20
++    slli.d      t3, t2, 3
++
++    add.d       a1, a1, t2
++    sub.d       t1, t1, t3
++    add.d       a4, a4, t2
++    jr          t1
++
++L(al7):
++    ld.b        t0, a1, -7
++    st.b        t0, a4, -7
++L(al6):
++    ld.b        t0, a1, -6
++    st.b        t0, a4, -6
++L(al5):
++    ld.b        t0, a1, -5
++    st.b        t0, a4, -5
++L(al4):
++    ld.b        t0, a1, -4
++    st.b        t0, a4, -4
++L(al3):
++    ld.b        t0, a1, -3
++    st.b        t0, a4, -3
++L(al2):
++    ld.b        t0, a1, -2
++    st.b        t0, a4, -2
++L(al1):
++    ld.b        t0, a1, -1
++    st.b        t0, a4, -1
++
++L(check_align):
++    bne         a5, a6, L(unalign)
++    srai.d      a3, a2, 4
++    beqz        a3, L(al_less_16bytes)
++    andi        a3, a2, 0x3f
++
++    beq         a3, a2, L(al_less_64bytes)
++    sub.d       t0, a2, a3
++    move        a2, a3
++    add.d       a5, a1, t0
++
++L(loop_64bytes):
++    LD_64(a1, 0)
++    addi.d      a1, a1, 64
++    ST_64(a4, 0)
++
++    addi.d      a4, a4, 64
++    bne         a1, a5, L(loop_64bytes)
++
++L(al_less_64bytes):
++    srai.d     a3, a2, 5
++    beqz       a3, L(al_less_32bytes)
++
++    ld.d       t0, a1, 0
++    ld.d       t1, a1, 8
++    ld.d       t2, a1, 16
++    ld.d       t3, a1, 24
++
++    addi.d     a1, a1, 32
++    addi.d     a2, a2, -32
++
++    st.d       t0, a4, 0
++    st.d       t1, a4, 8
++    st.d       t2, a4, 16
++    st.d       t3, a4, 24
++
++    addi.d     a4, a4, 32
++
++L(al_less_32bytes):
++    srai.d     a3, a2, 4
++    beqz       a3, L(al_less_16bytes)
++
++    ld.d       t0, a1, 0
++    ld.d       t1, a1, 8
++    addi.d     a1, a1, 16
++    addi.d     a2, a2, -16
++
++    st.d       t0, a4, 0
++    st.d       t1, a4, 8
++    addi.d     a4, a4, 16
++
++L(al_less_16bytes):
++    srai.d     a3, a2, 3
++    beqz       a3, L(al_less_8bytes)
++
++    ld.d       t0, a1, 0
++    addi.d     a1, a1, 8
++    addi.d     a2, a2, -8
++    st.d       t0, a4, 0
++    addi.d     a4, a4, 8
++
++L(al_less_8bytes):
++    srai.d      a3, a2, 2
++    beqz        a3, L(al_less_4bytes)
++
++    ld.w        t0, a1, 0
++    addi.d      a1, a1, 4
++    addi.d      a2, a2, -4
++    st.w        t0, a4, 0
++    addi.d      a4, a4, 4
++
++L(al_less_4bytes):
++    srai.d      a3, a2, 1
++    beqz        a3, L(al_less_2bytes)
++
++    ld.h        t0, a1, 0
++    addi.d      a1, a1, 2
++    addi.d      a2, a2, -2
++    st.h        t0, a4, 0
++    addi.d      a4, a4, 2
++
++L(al_less_2bytes):
++    beqz        a2, L(al_less_1byte)
++
++    ld.b        t0, a1, 0
++    st.b        t0, a4, 0
++
++L(al_less_1byte):
++    jr          ra
++
++L(unalign):
++    andi        a5, a1, 0x7
++    bstrins.d   a1, zero, 2, 0
++    sub.d       t8, t8, a5
++    slli.d      a5, a5, 3
++
++    ld.d        t0, a1, 0
++    addi.d      a1, a1, 8
++    slli.d      a6, t8, 3
++    srl.d       a7, t0, a5
++
++    srai.d      a3, a2, 4
++    beqz        a3, L(un_less_16bytes)
++    andi        a3, a2, 0x3f
++    beq         a3, a2, L(un_less_64bytes)
++
++    sub.d       t0, a2, a3
++    move        a2, a3
++    add.d       a3, a1, t0
++
++L(un_long_bytes):
++    ld.d        t0, a1, 0
++    ld.d        t1, a1, 8
++    ld.d        t2, a1, 16
++    ld.d        t3, a1, 24
++
++    srl.d       t4, t0, a5
++    sll.d       t0, t0, a6
++    srl.d       t5, t1, a5
++    sll.d       t1, t1, a6
++
++    srl.d       t6, t2, a5
++    sll.d       t2, t2, a6
++    srl.d       t7, t3, a5
++    sll.d       t3, t3, a6
++
++    or          t0, a7, t0
++    or          t1, t4, t1
++    or          t2, t5, t2
++    or          t3, t6, t3
++
++    ld.d        t4, a1, 32
++    ld.d        t5, a1, 40
++    ld.d        t6, a1, 48
++    ld.d        a7, a1, 56
++
++    st.d        t0, a4, 0
++    st.d        t1, a4, 8
++    st.d        t2, a4, 16
++    st.d        t3, a4, 24
++
++    addi.d      a1, a1, 64
++
++    srl.d       t0, t4, a5
++    sll.d       t4, t4, a6
++    srl.d       t1, t5, a5
++    sll.d       t5, t5, a6
++
++    srl.d       t2, t6, a5
++    sll.d       t6, t6, a6
++    sll.d       t3, a7, a6
++    srl.d       a7, a7, a5
++
++    or          t4, t7, t4
++    or          t5, t0, t5
++    or          t6, t1, t6
++    or          t3, t2, t3
++
++    st.d        t4, a4, 32
++    st.d        t5, a4, 40
++    st.d        t6, a4, 48
++    st.d        t3, a4, 56
++
++    addi.d      a4, a4, 64
++    bne         a3, a1, L(un_long_bytes)
++
++L(un_less_64bytes):
++    srai.d	a3, a2, 5
++    beqz	a3, L(un_less_32bytes)
++
++    ld.d        t0, a1, 0
++    ld.d        t1, a1, 8
++    ld.d        t2, a1, 16
++    ld.d        t3, a1, 24
++
++    addi.d      a1, a1, 32
++    addi.d      a2, a2, -32
++
++    srl.d       t4, t0, a5
++    sll.d       t0, t0, a6
++    srl.d       t5, t1, a5
++    sll.d       t1, t1, a6
++
++    srl.d       t6, t2, a5
++    sll.d       t2, t2, a6
++    or          t0, a7, t0
++    srl.d       a7, t3, a5
++    sll.d       t3, t3, a6
++
++    or          t1, t4, t1
++    or          t2, t5, t2
++    or          t3, t6, t3
++
++    st.d        t0, a4, 0
++    st.d        t1, a4, 8
++    st.d        t2, a4, 16
++    st.d        t3, a4, 24
++
++    addi.d      a4, a4, 32
++
++L(un_less_32bytes):
++    srai.d      a3, a2, 4
++    beqz        a3, L(un_less_16bytes)
++
++    ld.d        t0, a1, 0
++    ld.d        t1, a1, 8
++    addi.d      a1, a1, 16
++    addi.d      a2, a2, -16
++
++    srl.d       t2, t0, a5
++    sll.d       t3, t0, a6
++    sll.d       t4, t1, a6
++    or          t3, a7, t3
++    or          t4, t2, t4
++
++    srl.d       a7, t1, a5
++    st.d        t3, a4, 0
++    st.d        t4, a4, 8
++    addi.d      a4, a4, 16
++
++L(un_less_16bytes):
++    srai.d      a3, a2, 3
++    beqz        a3, L(un_less_8bytes)
++
++    ld.d        t0, a1, 0
++    addi.d      a1, a1, 8
++    addi.d      a2, a2, -8
++    sll.d       t1, t0, a6
++
++    or          t2, a7, t1
++    srl.d       a7, t0, a5
++    st.d        t2, a4, 0
++    addi.d      a4, a4, 8
++
++L(un_less_8bytes):
++    beqz        a2, L(un_less_1byte)
++    bge         t8, a2, 1f
++
++    ld.d        t0, a1, 0
++    sll.d       t0, t0, a6
++    or          a7, a7, t0
++
++1:
++    srai.d      a3, a2, 2
++    beqz        a3, L(un_less_4bytes)
++
++    addi.d      a2, a2, -4
++    st.w        a7, a4, 0
++    addi.d      a4, a4, 4
++    srai.d      a7, a7, 32
++
++L(un_less_4bytes):
++    srai.d      a3, a2, 1
++    beqz        a3, L(un_less_2bytes)
++
++    addi.d      a2, a2, -2
++    st.h        a7, a4, 0
++    addi.d      a4, a4, 2
++    srai.d      a7, a7, 16
++
++L(un_less_2bytes):
++    beqz        a2, L(un_less_1byte)
++    st.b        a7, a4, 0
++
++L(un_less_1byte):
++    jr          ra
++
++L(short_data):
++    pcaddi      t1, 36
++    slli.d      t2, a2, 3
++    add.d       a4, a0, a2
++    sub.d       t1, t1, t2
++    add.d       a1, a1, a2
++    jr          t1
++
++L(short_15_bytes):
++    ld.b       t0, a1, -15
++    st.b       t0, a4, -15
++L(short_14_bytes):
++    ld.b       t0, a1, -14
++    st.b       t0, a4, -14
++L(short_13_bytes):
++    ld.b       t0, a1, -13
++    st.b       t0, a4, -13
++L(short_12_bytes):
++    ld.b       t0, a1, -12
++    st.b       t0, a4, -12
++L(short_11_bytes):
++    ld.b       t0, a1, -11
++    st.b       t0, a4, -11
++L(short_10_bytes):
++    ld.b       t0, a1, -10
++    st.b       t0, a4, -10
++L(short_9_bytes):
++    ld.b       t0, a1, -9
++    st.b       t0, a4, -9
++L(short_8_bytes):
++    ld.b       t0, a1, -8
++    st.b       t0, a4, -8
++L(short_7_bytes):
++    ld.b       t0, a1, -7
++    st.b       t0, a4, -7
++L(short_6_bytes):
++    ld.b       t0, a1, -6
++    st.b       t0, a4, -6
++L(short_5_bytes):
++    ld.b       t0, a1, -5
++    st.b       t0, a4, -5
++L(short_4_bytes):
++    ld.b       t0, a1, -4
++    st.b       t0, a4, -4
++L(short_3_bytes):
++    ld.b       t0, a1, -3
++    st.b       t0, a4, -3
++L(short_2_bytes):
++    ld.b       t0, a1, -2
++    st.b       t0, a4, -2
++L(short_1_bytes):
++    ld.b       t0, a1, -1
++    st.b       t0, a4, -1
++    jr         ra
++
++L(copy_back):
++    srai.d      a3, a2, 4
++    beqz        a3, L(back_short_data)
++
++    add.d       a4, a0, a2
++    add.d       a1, a1, a2
++
++    andi        a5, a4, 0x7
++    andi        a6, a1, 0x7
++    beqz        a5, L(back_check_align)
++
++    sub.d       a2, a2, a5
++    sub.d       a1, a1, a5
++    sub.d       a4, a4, a5
++
++    pcaddi      t1, 18
++    slli.d      t3, a5, 3
++    sub.d       t1, t1, t3
++    jr          t1
++
++    ld.b        t0, a1, 6
++    st.b        t0, a4, 6
++    ld.b        t0, a1, 5
++    st.b        t0, a4, 5
++    ld.b        t0, a1, 4
++    st.b        t0, a4, 4
++    ld.b        t0, a1, 3
++    st.b        t0, a4, 3
++    ld.b        t0, a1, 2
++    st.b        t0, a4, 2
++    ld.b        t0, a1, 1
++    st.b        t0, a4, 1
++    ld.b        t0, a1, 0
++    st.b        t0, a4, 0
++
++L(back_check_align):
++    bne         a5, a6, L(back_unalign)
++
++    srai.d      a3, a2, 4
++    beqz        a3, L(back_less_16bytes)
++
++    andi        a3, a2, 0x3f
++    beq         a3, a2, L(back_less_64bytes)
++
++    sub.d       t0, a2, a3
++    move        a2, a3
++    sub.d       a5, a1, t0
++
++L(back_loop_64bytes):
++    LD_64(a1, -64)
++    addi.d      a1, a1, -64
++    ST_64(a4, -64)
++
++    addi.d      a4, a4, -64
++    bne         a1, a5, L(back_loop_64bytes)
++
++L(back_less_64bytes):
++    srai.d     a3, a2, 5
++    beqz       a3, L(back_less_32bytes)
++
++    ld.d       t0, a1, -32
++    ld.d       t1, a1, -24
++    ld.d       t2, a1, -16
++    ld.d       t3, a1, -8
++
++    addi.d     a1, a1, -32
++    addi.d     a2, a2, -32
++
++    st.d       t0, a4, -32
++    st.d       t1, a4, -24
++    st.d       t2, a4, -16
++    st.d       t3, a4, -8
++
++    addi.d     a4, a4, -32
++
++L(back_less_32bytes):
++    srai.d     a3, a2, 4
++    beqz       a3, L(back_less_16bytes)
++
++    ld.d       t0, a1, -16
++    ld.d       t1, a1, -8
++
++    addi.d     a2, a2, -16
++    addi.d     a1, a1, -16
++
++    st.d       t0, a4, -16
++    st.d       t1, a4, -8
++    addi.d     a4, a4, -16
++
++L(back_less_16bytes):
++    srai.d      a3, a2, 3
++    beqz        a3, L(back_less_8bytes)
++
++    ld.d        t0, a1, -8
++    addi.d      a2, a2, -8
++    addi.d      a1, a1, -8
++
++    st.d        t0, a4, -8
++    addi.d      a4, a4, -8
++
++L(back_less_8bytes):
++    srai.d      a3, a2, 2
++    beqz        a3, L(back_less_4bytes)
++
++    ld.w        t0, a1, -4
++    addi.d      a2, a2, -4
++    addi.d      a1, a1, -4
++
++    st.w        t0, a4, -4
++    addi.d      a4, a4, -4
++
++L(back_less_4bytes):
++    srai.d      a3, a2, 1
++    beqz        a3, L(back_less_2bytes)
++
++    ld.h        t0, a1, -2
++    addi.d      a2, a2, -2
++    addi.d      a1, a1, -2
++
++    st.h        t0, a4, -2
++    addi.d      a4, a4, -2
++
++L(back_less_2bytes):
++    beqz        a2, L(back_less_1byte)
++
++    ld.b        t0, a1, -1
++    st.b        t0, a4, -1
++
++L(back_less_1byte):
++    jr          ra
++
++L(back_unalign):
++    andi        t8, a1, 0x7
++    bstrins.d   a1, zero, 2, 0
++
++    sub.d       a6, zero, t8
++
++    ld.d        t0, a1, 0
++    slli.d      a6, a6, 3
++    slli.d      a5, t8, 3
++    sll.d       a7, t0, a6
++
++    srai.d      a3, a2, 4
++    beqz        a3, L(back_un_less_16bytes)
++
++    andi        a3, a2, 0x3f
++    beq         a3, a2, L(back_un_less_64bytes)
++
++    sub.d       t0, a2, a3
++    move        a2, a3
++    sub.d       a3, a1, t0
++
++L(back_un_long_bytes):
++    ld.d        t0, a1, -8
++    ld.d        t1, a1, -16
++    ld.d        t2, a1, -24
++    ld.d        t3, a1, -32
++
++    sll.d       t4, t0, a6
++    srl.d       t0, t0, a5
++
++    sll.d       t5, t1, a6
++    srl.d       t1, t1, a5
++
++    sll.d       t6, t2, a6
++    srl.d       t2, t2, a5
++
++    sll.d       t7, t3, a6
++    srl.d       t3, t3, a5
++
++    or          t0, t0, a7
++    or          t1, t1, t4
++    or          t2, t2, t5
++    or          t3, t3, t6
++
++    ld.d        t4, a1, -40
++    ld.d        t5, a1, -48
++    ld.d        t6, a1, -56
++    ld.d        a7, a1, -64
++    st.d        t0, a4, -8
++    st.d        t1, a4, -16
++    st.d        t2, a4, -24
++    st.d        t3, a4, -32
++
++    addi.d      a1, a1, -64
++
++    sll.d       t0, t4, a6
++    srl.d       t4, t4, a5
++
++    sll.d       t1, t5, a6
++    srl.d       t5, t5, a5
++
++    sll.d       t2, t6, a6
++    srl.d       t6, t6, a5
++
++    srl.d       t3, a7, a5
++    sll.d       a7, a7, a6
++
++    or          t4, t7, t4
++    or          t5, t0, t5
++    or          t6, t1, t6
++    or          t3, t2, t3
++
++    st.d        t4, a4, -40
++    st.d        t5, a4, -48
++    st.d        t6, a4, -56
++    st.d        t3, a4, -64
++
++    addi.d      a4, a4, -64
++    bne         a3, a1, L(back_un_long_bytes)
++
++L(back_un_less_64bytes):
++    srai.d	a3, a2, 5
++    beqz	a3, L(back_un_less_32bytes)
++
++    ld.d        t0, a1, -8
++    ld.d        t1, a1, -16
++    ld.d        t2, a1, -24
++    ld.d        t3, a1, -32
++
++    addi.d      a1, a1, -32
++    addi.d      a2, a2, -32
++
++    sll.d       t4, t0, a6
++    srl.d       t0, t0, a5
++
++    sll.d       t5, t1, a6
++    srl.d       t1, t1, a5
++
++    sll.d       t6, t2, a6
++    srl.d       t2, t2, a5
++
++    or          t0, a7, t0
++
++    sll.d       a7, t3, a6
++    srl.d       t3, t3, a5
++
++    or          t1, t4, t1
++    or          t2, t5, t2
++    or          t3, t6, t3
++
++    st.d        t0, a4, -8
++    st.d        t1, a4, -16
++    st.d        t2, a4, -24
++    st.d        t3, a4, -32
++
++    addi.d      a4, a4, -32
++
++L(back_un_less_32bytes):
++    srai.d      a3, a2, 4
++    beqz        a3, L(back_un_less_16bytes)
++
++    ld.d        t0, a1, -8
++    ld.d        t1, a1, -16
++
++    addi.d      a1, a1, -16
++    addi.d      a2, a2, -16
++
++    sll.d       t2, t0, a6
++    srl.d       t3, t0, a5
++
++    srl.d       t4, t1, a5
++    or          t3, a7, t3
++    or          t4, t2, t4
++    sll.d       a7, t1, a6
++
++    st.d        t3, a4, -8
++    st.d        t4, a4, -16
++
++    addi.d      a4, a4, -16
++
++L(back_un_less_16bytes):
++    srai.d      a3, a2, 3
++    beqz        a3, L(back_un_less_8bytes)
++
++    ld.d        t0, a1, -8
++
++    addi.d      a1, a1, -8
++    addi.d      a2, a2, -8
++
++    srl.d       t1, t0, a5
++    or          t2, a7, t1
++    sll.d       a7, t0, a6
++
++    st.d        t2, a4, -8
++    addi.d      a4, a4, -8
++
++L(back_un_less_8bytes):
++    beqz        a2, L(back_end)
++    bge         t8, a2, 1f
++
++    ld.d        t0, a1, -8
++    srl.d       t0, t0, a5
++    or          a7, a7, t0
++
++1:
++    srai.d      a3, a2, 2
++    beqz        a3, L(back_un_less_4bytes)
++
++    srai.d      t0, a7, 32
++    addi.d      a2, a2, -4
++    st.w        t0, a4, -4
++    addi.d      a4, a4, -4
++    slli.d      a7, a7, 32
++
++L(back_un_less_4bytes):
++    srai.d      a3, a2, 1
++    beqz        a3, L(back_un_less_2bytes)
++    srai.d      t0, a7, 48
++    addi.d      a2, a2, -2
++    st.h        t0, a4, -2
++    addi.d      a4, a4, -2
++    slli.d      a7, a7, 16
++L(back_un_less_2bytes):
++    beqz        a2, L(back_un_less_1byte)
++    srai.d      t0, a7, 56
++    st.b        t0, a4, -1
++L(back_un_less_1byte):
++    jr          ra
++
++L(back_short_data):
++    pcaddi     t1, 34
++    slli.d     t2, a2, 3
++    sub.d      t1, t1, t2
++    jr         t1
++
++    ld.b       t0, a1, 14
++    st.b       t0, a0, 14
++    ld.b       t0, a1, 13
++    st.b       t0, a0, 13
++    ld.b       t0, a1, 12
++    st.b       t0, a0, 12
++    ld.b       t0, a1, 11
++    st.b       t0, a0, 11
++    ld.b       t0, a1, 10
++    st.b       t0, a0, 10
++    ld.b       t0, a1, 9
++    st.b       t0, a0, 9
++    ld.b       t0, a1, 8
++    st.b       t0, a0, 8
++    ld.b       t0, a1, 7
++    st.b       t0, a0, 7
++    ld.b       t0, a1, 6
++    st.b       t0, a0, 6
++    ld.b       t0, a1, 5
++    st.b       t0, a0, 5
++    ld.b       t0, a1, 4
++    st.b       t0, a0, 4
++    ld.b       t0, a1, 3
++    st.b       t0, a0, 3
++    ld.b       t0, a1, 2
++    st.b       t0, a0, 2
++    ld.b       t0, a1, 1
++    st.b       t0, a0, 1
++    ld.b       t0, a1, 0
++    st.b       t0, a0, 0
++L(back_end):
++    jr         ra
++
++END(MEMCPY_NAME)
++
++libc_hidden_builtin_def (MEMMOVE_NAME)
++libc_hidden_builtin_def (MEMCPY_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S
+new file mode 100644
+index 00000000..4aae5bf8
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S
+@@ -0,0 +1,20 @@
++/* Optimized memcpy implementation using Loongarch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* memcpy is part of memmove.S */
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S
+new file mode 100644
+index 00000000..6ebbe7a2
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S
+@@ -0,0 +1,20 @@
++/* Optimized memcpy implementation using Loongarch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* memcpy is part of memmove.S */
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
+new file mode 100644
+index 00000000..8e60a22d
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
+@@ -0,0 +1,247 @@
++/* Optimized unaligned memcpy implementation using basic Loongarch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++
++# define MEMCPY_NAME __memcpy_unaligned
++
++# define LD_64(reg, n)           \
++    ld.d        t0, reg, n;      \
++    ld.d        t1, reg, n + 8;  \
++    ld.d        t2, reg, n + 16; \
++    ld.d        t3, reg, n + 24; \
++    ld.d        t4, reg, n + 32; \
++    ld.d        t5, reg, n + 40; \
++    ld.d        t6, reg, n + 48; \
++    ld.d        t7, reg, n + 56;
++
++# define ST_64(reg, n)           \
++    st.d        t0, reg, n;      \
++    st.d        t1, reg, n + 8;  \
++    st.d        t2, reg, n + 16; \
++    st.d        t3, reg, n + 24; \
++    st.d        t4, reg, n + 32; \
++    st.d        t5, reg, n + 40; \
++    st.d        t6, reg, n + 48; \
++    st.d        t7, reg, n + 56;
++
++LEAF(MEMCPY_NAME, 3)
++    add.d       a4, a1, a2
++    add.d       a3, a0, a2
++    li.w        a6, 16
++    bge         a6, a2, L(less_16bytes)
++
++    li.w        a6, 128
++    blt         a6, a2, L(long_bytes)
++    li.w        a6, 64
++    blt         a6, a2, L(more_64bytes)
++
++    li.w        a6, 32
++    blt         a6, a2, L(more_32bytes)
++
++    ld.d        t0, a1, 0
++    ld.d        t1, a1, 8
++    ld.d        t2, a4, -16
++    ld.d        t3, a4, -8
++
++    st.d        t0, a0, 0
++    st.d        t1, a0, 8
++    st.d        t2, a3, -16
++    st.d        t3, a3, -8
++    jr          ra
++
++L(more_64bytes):
++    srli.d      t8, a0, 3
++    slli.d      t8, t8, 3
++    addi.d      t8, t8,  0x8
++    sub.d       a7, a0, t8
++
++    ld.d        t0, a1, 0
++    sub.d       a1, a1, a7
++    st.d        t0, a0, 0
++    add.d       a7, a7, a2
++    addi.d      a7, a7, -0x20
++
++L(loop_32):
++    ld.d        t0, a1, 0
++    ld.d        t1, a1, 8
++    ld.d        t2, a1, 16
++    ld.d        t3, a1, 24
++
++    st.d        t0, t8, 0
++    st.d        t1, t8, 8
++    st.d        t2, t8, 16
++    st.d        t3, t8, 24
++
++    addi.d      t8, t8, 0x20
++    addi.d	a1, a1, 0x20
++    addi.d	a7, a7, -0x20
++    blt         zero, a7, L(loop_32)
++
++    ld.d        t4, a4, -32
++    ld.d        t5, a4, -24
++    ld.d        t6, a4, -16
++    ld.d        t7, a4, -8
++
++    st.d        t4, a3, -32
++    st.d        t5, a3, -24
++    st.d        t6, a3, -16
++    st.d        t7, a3, -8
++
++    jr          ra
++
++L(more_32bytes):
++    ld.d        t0, a1, 0
++    ld.d        t1, a1, 8
++    ld.d        t2, a1, 16
++    ld.d        t3, a1, 24
++
++    ld.d        t4, a4, -32
++    ld.d        t5, a4, -24
++    ld.d        t6, a4, -16
++    ld.d        t7, a4, -8
++
++    st.d        t0, a0, 0
++    st.d        t1, a0, 8
++    st.d        t2, a0, 16
++    st.d        t3, a0, 24
++
++    st.d        t4, a3, -32
++    st.d        t5, a3, -24
++    st.d        t6, a3, -16
++    st.d        t7, a3, -8
++
++    jr          ra
++
++L(less_16bytes):
++    srai.d      a6, a2, 3
++    beqz        a6, L(less_8bytes)
++
++    ld.d        t0, a1, 0
++    ld.d        t1, a4, -8
++    st.d        t0, a0, 0
++    st.d        t1, a3, -8
++
++    jr          ra
++
++L(less_8bytes):
++    srai.d      a6, a2, 2
++    beqz        a6, L(less_4bytes)
++
++    ld.w        t0, a1, 0
++    ld.w        t1, a4, -4
++    st.w        t0, a0, 0
++    st.w        t1, a3, -4
++
++    jr          ra
++
++L(less_4bytes):
++    srai.d      a6, a2, 1
++    beqz        a6, L(less_2bytes)
++
++    ld.h        t0, a1, 0
++    ld.h        t1, a4, -2
++    st.h        t0, a0, 0
++    st.h        t1, a3, -2
++
++    jr          ra
++
++L(less_2bytes):
++    beqz        a2, L(less_1bytes)
++
++    ld.b        t0, a1, 0
++    st.b        t0, a0, 0
++    jr          ra
++
++L(less_1bytes):
++    jr          ra
++
++L(long_bytes):
++    srli.d      t8, a0, 3
++    slli.d      t8, t8, 3
++    beq         a0, t8, L(start)
++    ld.d        t0, a1, 0
++
++    addi.d      t8, t8, 0x8
++    st.d        t0, a0, 0
++    sub.d       a7, a0, t8
++    sub.d       a1, a1, a7
++
++L(start):
++    addi.d     a5, a3, -0x80
++    blt        a5, t8, L(align_end_proc)
++
++L(loop_128):
++    LD_64(a1, 0)
++    ST_64(t8, 0)
++    LD_64(a1, 64)
++    addi.d     a1, a1, 0x80
++    ST_64(t8, 64)
++    addi.d     t8, t8, 0x80
++    bge        a5, t8, L(loop_128)
++
++L(align_end_proc):
++    sub.d      a2, a3, t8
++    pcaddi     t1, 34
++    andi       t2, a2, 0x78
++    sub.d      t1, t1, t2
++    jr         t1
++
++    ld.d       t0, a1, 112
++    st.d       t0, t8, 112
++    ld.d       t0, a1, 104
++    st.d       t0, t8, 104
++    ld.d       t0, a1, 96
++    st.d       t0, t8, 96
++    ld.d       t0, a1, 88
++    st.d       t0, t8, 88
++    ld.d       t0, a1, 80
++    st.d       t0, t8, 80
++    ld.d       t0, a1, 72
++    st.d       t0, t8, 72
++    ld.d       t0, a1, 64
++    st.d       t0, t8, 64
++    ld.d       t0, a1, 56
++    st.d       t0, t8, 56
++    ld.d       t0, a1, 48
++    st.d       t0, t8, 48
++    ld.d       t0, a1, 40
++    st.d       t0, t8, 40
++    ld.d       t0, a1, 32
++    st.d       t0, t8, 32
++    ld.d       t0, a1, 24
++    st.d       t0, t8, 24
++    ld.d       t0, a1, 16
++    st.d       t0, t8, 16
++    ld.d       t0, a1, 8
++    st.d       t0, t8, 8
++    ld.d       t0, a1, 0
++    st.d       t0, t8, 0
++    ld.d       t0, a4, -8
++    st.d       t0, a3, -8
++
++    jr         ra
++END(MEMCPY_NAME)
++
++libc_hidden_builtin_def (MEMCPY_NAME)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy.c b/sysdeps/loongarch/lp64/multiarch/memcpy.c
+new file mode 100644
+index 00000000..93b238ce
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memcpy.c
+@@ -0,0 +1,37 @@
++/* Multiple versions of memcpy.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define memcpy __redirect_memcpy
++# include <string.h>
++# undef memcpy
++
++# define SYMBOL_NAME memcpy
++# include "ifunc-lasx.h"
++
++libc_ifunc_redirected (__redirect_memcpy, memcpy,
++		       IFUNC_SELECTOR ());
++
++# ifdef SHARED
++__hidden_ver1 (memcpy, __GI_memcpy, __redirect_memcpy)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memcmp);
++# endif
++
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S
+new file mode 100644
+index 00000000..5354f383
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S
+@@ -0,0 +1,20 @@
++/* Optimized memmove_aligned implementation using basic Loongarch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* memmove_aligned is part of memcpy_aligned, see memcpy-aligned.S.  */
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S
+new file mode 100644
+index 00000000..ff68e7a2
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S
+@@ -0,0 +1,287 @@
++/* Optimized memmove implementation using Loongarch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++#ifndef MEMCPY_NAME
++# define MEMCPY_NAME __memcpy_lasx
++#endif
++
++#ifndef MEMMOVE_NAME
++# define MEMMOVE_NAME __memmove_lasx
++#endif
++
++LEAF(MEMCPY_NAME, 6)
++    li.d            t0, 32
++    add.d           a3, a0, a2
++    add.d           a4, a1, a2
++    bgeu            t0, a2, L(less_32bytes)
++
++    li.d            t1, 64
++    bltu            t1, a2, L(copy_long)
++    xvld            xr0, a1, 0
++    xvld            xr1, a4, -32
++
++    xvst            xr0, a0, 0
++    xvst            xr1, a3, -32
++    jr              ra
++L(less_32bytes):
++    srli.d          t0, a2, 4
++
++    beqz            t0, L(less_16bytes)
++    vld             vr0, a1, 0
++    vld             vr1, a4, -16
++    vst             vr0, a0, 0
++
++
++    vst             vr1, a3, -16
++    jr              ra
++L(less_16bytes):
++    srli.d          t0, a2, 3
++    beqz            t0, L(less_8bytes)
++
++    ld.d            t0, a1, 0
++    ld.d            t1, a4, -8
++    st.d            t0, a0, 0
++    st.d            t1, a3, -8
++
++    jr              ra
++L(less_8bytes):
++    srli.d          t0, a2, 2
++    beqz            t0, L(less_4bytes)
++    ld.w            t0, a1, 0
++
++    ld.w            t1, a4, -4
++    st.w            t0, a0, 0
++    st.w            t1, a3, -4
++    jr              ra
++
++
++L(less_4bytes):
++    srli.d          t0, a2, 1
++    beqz            t0, L(less_2bytes)
++    ld.h            t0, a1, 0
++    ld.h            t1, a4, -2
++
++    st.h            t0, a0, 0
++    st.h            t1, a3, -2
++    jr              ra
++L(less_2bytes):
++    beqz            a2, L(less_1bytes)
++
++    ld.b            t0, a1, 0
++    st.b            t0, a0, 0
++L(less_1bytes):
++    jr              ra
++END(MEMCPY_NAME)
++
++LEAF(MEMMOVE_NAME, 6)
++
++    li.d            t0, 32
++    add.d           a3, a0, a2
++    add.d           a4, a1, a2
++    bgeu            t0, a2, L(less_32bytes)
++
++    li.d            t1, 64
++    bltu            t1, a2, L(move_long)
++    xvld            xr0, a1, 0
++    xvld            xr1, a4, -32
++
++    xvst            xr0, a0, 0
++    xvst            xr1, a3, -32
++    jr              ra
++L(move_long):
++    sub.d           t2, a0, a1
++
++    bltu            t2, a2, L(copy_back)
++L(copy_long):
++    andi            t2, a0, 0x1f
++    addi.d          a2, a2, -1
++    sub.d           t2, t0, t2
++
++
++    xvld            xr8, a1, 0
++    xvld            xr9, a4, -32
++    sub.d           t3, a2, t2
++    add.d           a5, a0, t2
++
++    andi            a2, t3, 0xff
++    add.d           a1, a1, t2
++    beq             a2, t3, L(lt256)
++    sub.d           a6, a4, a2
++
++    addi.d          a6, a6, -1
++L(loop_256):
++    xvld            xr0, a1, 0
++    xvld            xr1, a1, 32
++    xvld            xr2, a1, 64
++
++    xvld            xr3, a1, 96
++    xvld            xr4, a1, 128
++    xvld            xr5, a1, 160
++    xvld            xr6, a1, 192
++
++
++    xvld            xr7, a1, 224
++    addi.d          a1, a1, 256
++    xvst            xr0, a5, 0
++    xvst            xr1, a5, 32
++
++    xvst            xr2, a5, 64
++    xvst            xr3, a5, 96
++    xvst            xr4, a5, 128
++    xvst            xr5, a5, 160
++
++    xvst            xr6, a5, 192
++    xvst            xr7, a5, 224
++    addi.d          a5, a5, 256
++    bne             a1, a6, L(loop_256)
++
++L(lt256):
++    srli.d          t2, a2, 7
++    beqz            t2, L(lt128)
++    xvld            xr0, a1, 0
++    xvld            xr1, a1, 32
++
++
++    xvld            xr2, a1, 64
++    xvld            xr3, a1, 96
++    addi.d          a1, a1, 128
++    addi.d          a2, a2, -128
++
++    xvst            xr0, a5, 0
++    xvst            xr1, a5, 32
++    xvst            xr2, a5, 64
++    xvst            xr3, a5, 96
++
++    addi.d          a5, a5, 128
++L(lt128):
++    bltu            a2, t1, L(lt64)
++    xvld            xr0, a1, 0
++    xvld            xr1, a1, 32
++
++    addi.d          a1, a1, 64
++    addi.d          a2, a2, -64
++    xvst            xr0, a5, 0
++    xvst            xr1, a5, 32
++
++
++    addi.d          a5, a5, 64
++L(lt64):
++    bltu            a2, t0, L(lt32)
++    xvld            xr0, a1, 0
++    xvst            xr0, a5, 0
++
++L(lt32):
++    xvst            xr8, a0, 0
++    xvst            xr9, a3, -32
++    jr              ra
++    nop
++
++L(copy_back):
++    addi.d          a3, a3, -1
++    addi.d          a2, a2, -2
++    andi            t2, a3, 0x1f
++    xvld            xr8, a1, 0
++
++    xvld            xr9, a4, -32
++    sub.d           t3, a2, t2
++    sub.d           a5, a3, t2
++    sub.d           a4, a4, t2
++
++
++    andi            a2, t3, 0xff
++    beq             a2, t3, L(back_lt256)
++    add.d           a6, a1, a2
++    addi.d          a6, a6, 2
++
++L(back_loop_256):
++    xvld            xr0, a4, -33
++    xvld            xr1, a4, -65
++    xvld            xr2, a4, -97
++    xvld            xr3, a4, -129
++
++    xvld            xr4, a4, -161
++    xvld            xr5, a4, -193
++    xvld            xr6, a4, -225
++    xvld            xr7, a4, -257
++
++    addi.d          a4, a4, -256
++    xvst            xr0, a5, -32
++    xvst            xr1, a5, -64
++    xvst            xr2, a5, -96
++
++
++    xvst            xr3, a5, -128
++    xvst            xr4, a5, -160
++    xvst            xr5, a5, -192
++    xvst            xr6, a5, -224
++
++    xvst            xr7, a5, -256
++    addi.d          a5, a5, -256
++    bne             a4, a6, L(back_loop_256)
++L(back_lt256):
++    srli.d          t2, a2, 7
++
++    beqz            t2, L(back_lt128)
++    xvld            xr0, a4, -33
++    xvld            xr1, a4, -65
++    xvld            xr2, a4, -97
++
++    xvld            xr3, a4, -129
++    addi.d          a2, a2, -128
++    addi.d          a4, a4, -128
++    xvst            xr0, a5, -32
++
++
++    xvst            xr1, a5, -64
++    xvst            xr2, a5, -96
++    xvst            xr3, a5, -128
++    addi.d          a5, a5, -128
++
++L(back_lt128):
++    blt             a2, t1, L(back_lt64)
++    xvld            xr0, a4, -33
++    xvld            xr1, a4, -65
++    addi.d          a2, a2, -64
++
++    addi.d          a4, a4, -64
++    xvst            xr0, a5, -32
++    xvst            xr1, a5, -64
++    addi.d          a5, a5, -64
++
++L(back_lt64):
++    bltu            a2, t0, L(back_lt32)
++    xvld            xr0, a4, -33
++    xvst            xr0, a5, -32
++L(back_lt32):
++    xvst            xr8, a0, 0
++
++
++    xvst            xr9, a3, -31
++    jr              ra
++END(MEMMOVE_NAME)
++
++libc_hidden_builtin_def (MEMCPY_NAME)
++libc_hidden_builtin_def (MEMMOVE_NAME)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
+new file mode 100644
+index 00000000..9e1502a7
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
+@@ -0,0 +1,534 @@
++/* Optimized memmove implementation using Loongarch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define MEMCPY_NAME __memcpy_lsx
++# define MEMMOVE_NAME __memmove_lsx
++
++LEAF(MEMCPY_NAME, 6)
++    li.d            t6, 16
++    add.d           a3, a0, a2
++    add.d           a4, a1, a2
++    bgeu            t6, a2, L(less_16bytes)
++
++    li.d            t8, 64
++    li.d            t7, 32
++    bltu            t8, a2, L(copy_long)
++    bltu            t7, a2, L(more_32bytes)
++
++    vld             vr0, a1, 0
++    vld             vr1, a4, -16
++    vst             vr0, a0, 0
++    vst             vr1, a3, -16
++
++    jr              ra
++L(more_32bytes):
++    vld             vr0, a1, 0
++    vld             vr1, a1, 16
++    vld             vr2, a4, -32
++
++
++    vld             vr3, a4, -16
++    vst             vr0, a0, 0
++    vst             vr1, a0, 16
++    vst             vr2, a3, -32
++
++    vst             vr3, a3, -16
++    jr              ra
++L(less_16bytes):
++    srli.d          t0, a2, 3
++    beqz            t0, L(less_8bytes)
++
++    vldrepl.d       vr0, a1, 0
++    vldrepl.d       vr1, a4, -8
++    vstelm.d        vr0, a0, 0, 0
++    vstelm.d        vr1, a3, -8, 0
++
++    jr              ra
++L(less_8bytes):
++    srli.d          t0, a2, 2
++    beqz            t0, L(less_4bytes)
++    vldrepl.w       vr0, a1, 0
++
++
++    vldrepl.w       vr1, a4, -4
++    vstelm.w        vr0, a0, 0, 0
++    vstelm.w        vr1, a3, -4, 0
++    jr              ra
++
++L(less_4bytes):
++    srli.d          t0, a2, 1
++    beqz            t0, L(less_2bytes)
++    vldrepl.h       vr0, a1, 0
++    vldrepl.h       vr1, a4, -2
++
++    vstelm.h        vr0, a0, 0, 0
++    vstelm.h        vr1, a3, -2, 0
++    jr              ra
++L(less_2bytes):
++    beqz            a2, L(less_1bytes)
++
++    ld.b            t0, a1, 0
++    st.b            t0, a0, 0
++L(less_1bytes):
++    jr              ra
++    nop
++END(MEMCPY_NAME)
++
++LEAF(MEMMOVE_NAME, 6)
++    li.d            t6, 16
++    add.d           a3, a0, a2
++    add.d           a4, a1, a2
++    bgeu            t6, a2, L(less_16bytes)
++
++    li.d            t8, 64
++    li.d            t7, 32
++    bltu            t8, a2, L(move_long)
++    bltu            t7, a2, L(more_32bytes)
++
++    vld             vr0, a1, 0
++    vld             vr1, a4, -16
++    vst             vr0, a0, 0
++    vst             vr1, a3, -16
++
++    jr              ra
++    nop
++L(move_long):
++    sub.d           t0, a0, a1
++    bltu            t0, a2, L(copy_back)
++
++
++L(copy_long):
++    vld             vr2, a1, 0
++    andi            t0, a0, 0xf
++    sub.d           t0, t6, t0
++    add.d           a1, a1, t0
++
++    sub.d           a2, a2, t0
++    andi            t1, a1, 0xf
++    bnez            t1, L(unaligned)
++    vld             vr0, a1, 0
++
++    addi.d          a2, a2, -16
++    vst             vr2, a0, 0
++    andi            t2, a2, 0x7f
++    add.d           a5, a0, t0
++
++    beq             a2, t2, L(al_less_128)
++    sub.d           t3, a2, t2
++    move            a2, t2
++    add.d           a6, a1, t3
++
++
++L(al_loop):
++    vld             vr1, a1, 16
++    vld             vr2, a1, 32
++    vld             vr3, a1, 48
++    vld             vr4, a1, 64
++
++    vld             vr5, a1, 80
++    vld             vr6, a1, 96
++    vld             vr7, a1, 112
++    vst             vr0, a5, 0
++
++    vld             vr0, a1, 128
++    addi.d          a1, a1, 128
++    vst             vr1, a5, 16
++    vst             vr2, a5, 32
++
++    vst             vr3, a5, 48
++    vst             vr4, a5, 64
++    vst             vr5, a5, 80
++    vst             vr6, a5, 96
++
++
++    vst             vr7, a5, 112
++    addi.d          a5, a5, 128
++    bne             a1, a6, L(al_loop)
++L(al_less_128):
++    blt             a2, t8, L(al_less_64)
++
++    vld             vr1, a1, 16
++    vld             vr2, a1, 32
++    vld             vr3, a1, 48
++    addi.d          a2, a2, -64
++
++    vst             vr0, a5, 0
++    vld             vr0, a1, 64
++    addi.d          a1, a1, 64
++    vst             vr1, a5, 16
++
++    vst             vr2, a5, 32
++    vst             vr3, a5, 48
++    addi.d          a5, a5, 64
++L(al_less_64):
++    blt             a2, t7, L(al_less_32)
++
++
++    vld             vr1, a1, 16
++    addi.d          a2, a2, -32
++    vst             vr0, a5, 0
++    vld             vr0, a1, 32
++
++    addi.d          a1, a1, 32
++    vst             vr1, a5, 16
++    addi.d          a5, a5, 32
++L(al_less_32):
++    blt             a2, t6, L(al_less_16)
++
++    vst             vr0, a5, 0
++    vld             vr0, a1, 16
++    addi.d          a5, a5, 16
++L(al_less_16):
++    vld             vr1, a4, -16
++
++    vst             vr0, a5, 0
++    vst             vr1, a3, -16
++    jr              ra
++    nop
++
++
++L(magic_num):
++    .dword          0x0706050403020100
++    .dword          0x0f0e0d0c0b0a0908
++L(unaligned):
++    pcaddi          t2, -4
++    bstrins.d       a1, zero, 3, 0
++    vld             vr8, t2, 0
++    vld             vr0, a1, 0
++
++    vld             vr1, a1, 16
++    addi.d          a2, a2, -16
++    vst             vr2, a0, 0
++    add.d           a5, a0, t0
++
++    vreplgr2vr.b    vr9, t1
++    andi            t2, a2, 0x7f
++    vadd.b          vr9, vr9, vr8
++    addi.d          a1, a1, 32
++
++
++    beq             t2, a2, L(un_less_128)
++    sub.d           t3, a2, t2
++    move            a2, t2
++    add.d           a6, a1, t3
++
++L(un_loop):
++    vld             vr2, a1, 0
++    vld             vr3, a1, 16
++    vld             vr4, a1, 32
++    vld             vr5, a1, 48
++
++    vld             vr6, a1, 64
++    vld             vr7, a1, 80
++    vshuf.b         vr8, vr1, vr0, vr9
++    vld             vr0, a1, 96
++
++    vst             vr8, a5, 0
++    vshuf.b         vr8, vr2, vr1, vr9
++    vld             vr1, a1, 112
++    vst             vr8, a5, 16
++
++
++    addi.d          a1, a1, 128
++    vshuf.b         vr2, vr3, vr2, vr9
++    vshuf.b         vr3, vr4, vr3, vr9
++    vst             vr2, a5, 32
++
++    vshuf.b         vr4, vr5, vr4, vr9
++    vst             vr3, a5, 48
++    vshuf.b         vr5, vr6, vr5, vr9
++    vst             vr4, a5, 64
++
++    vshuf.b         vr6, vr7, vr6, vr9
++    vst             vr5, a5, 80
++    vshuf.b         vr7, vr0, vr7, vr9
++    vst             vr6, a5, 96
++
++    vst             vr7, a5, 112
++    addi.d          a5, a5, 128
++    bne             a1, a6, L(un_loop)
++L(un_less_128):
++    blt             a2, t8, L(un_less_64)
++
++
++    vld             vr2, a1, 0
++    vld             vr3, a1, 16
++    vshuf.b         vr4, vr1, vr0, vr9
++    vld             vr0, a1, 32
++
++    vst             vr4, a5, 0
++    addi.d          a2, a2, -64
++    vshuf.b         vr4, vr2, vr1, vr9
++    vld             vr1, a1, 48
++
++    addi.d          a1, a1, 64
++    vst             vr4, a5, 16
++    vshuf.b         vr2, vr3, vr2, vr9
++    vshuf.b         vr3, vr0, vr3, vr9
++
++    vst             vr2, a5, 32
++    vst             vr3, a5, 48
++    addi.d          a5, a5, 64
++L(un_less_64):
++    blt             a2, t7, L(un_less_32)
++
++
++    vshuf.b         vr3, vr1, vr0, vr9
++    vld             vr0, a1, 0
++    vst             vr3, a5, 0
++    addi.d          a2, a2, -32
++
++    vshuf.b         vr3, vr0, vr1, vr9
++    vld             vr1, a1, 16
++    addi.d          a1, a1, 32
++    vst             vr3, a5, 16
++
++    addi.d          a5, a5, 32
++L(un_less_32):
++    blt             a2, t6, L(un_less_16)
++    vshuf.b         vr2, vr1, vr0, vr9
++    vor.v           vr0, vr1, vr1
++
++    vld             vr1, a1, 0
++    vst             vr2, a5, 0
++    addi.d          a5, a5, 16
++L(un_less_16):
++    vld             vr2, a4, -16
++
++
++    vshuf.b         vr0, vr1, vr0, vr9
++    vst             vr0, a5, 0
++    vst             vr2, a3, -16
++    jr              ra
++
++L(copy_back):
++    addi.d          t0, a3, -1
++    vld             vr2, a4, -16
++    andi            t0, t0, 0xf
++    addi.d          t0, t0, 1
++
++    sub.d           a4, a4, t0
++    sub.d           a2, a2, t0
++    andi            t1, a4, 0xf
++    bnez            t1, L(back_unaligned)
++
++    vld             vr0, a4, -16
++    addi.d          a2, a2, -16
++    vst             vr2, a3, -16
++    andi            t2, a2, 0x7f
++
++
++    sub.d           a3, a3, t0
++    beq             t2, a2, L(back_al_less_128)
++    sub.d           t3, a2, t2
++    move            a2, t2
++
++    sub.d           a6, a4, t3
++L(back_al_loop):
++    vld             vr1, a4, -32
++    vld             vr2, a4, -48
++    vld             vr3, a4, -64
++
++    vld             vr4, a4, -80
++    vld             vr5, a4, -96
++    vld             vr6, a4, -112
++    vld             vr7, a4, -128
++
++    vst             vr0, a3, -16
++    vld             vr0, a4, -144
++    addi.d          a4, a4, -128
++    vst             vr1, a3, -32
++
++
++    vst             vr2, a3, -48
++    vst             vr3, a3, -64
++    vst             vr4, a3, -80
++    vst             vr5, a3, -96
++
++    vst             vr6, a3, -112
++    vst             vr7, a3, -128
++    addi.d          a3, a3, -128
++    bne             a4, a6, L(back_al_loop)
++
++L(back_al_less_128):
++    blt             a2, t8, L(back_al_less_64)
++    vld             vr1, a4, -32
++    vld             vr2, a4, -48
++    vld             vr3, a4, -64
++
++    addi.d          a2, a2, -64
++    vst             vr0, a3, -16
++    vld             vr0, a4, -80
++    addi.d          a4, a4, -64
++
++
++    vst             vr1, a3, -32
++    vst             vr2, a3, -48
++    vst             vr3, a3, -64
++    addi.d          a3, a3, -64
++
++L(back_al_less_64):
++    blt             a2, t7, L(back_al_less_32)
++    vld             vr1, a4, -32
++    addi.d          a2, a2, -32
++    vst             vr0, a3, -16
++
++    vld             vr0, a4, -48
++    vst             vr1, a3, -32
++    addi.d          a3, a3, -32
++    addi.d          a4, a4, -32
++
++L(back_al_less_32):
++    blt             a2, t6, L(back_al_less_16)
++    vst             vr0, a3, -16
++    vld             vr0, a4, -32
++    addi.d          a3, a3, -16
++
++
++L(back_al_less_16):
++    vld             vr1, a1, 0
++    vst             vr0, a3, -16
++    vst             vr1, a0, 0
++    jr              ra
++
++L(magic_num_2):
++    .dword          0x0706050403020100
++    .dword          0x0f0e0d0c0b0a0908
++L(back_unaligned):
++    pcaddi          t2, -4
++    bstrins.d       a4, zero, 3, 0
++    vld             vr8, t2, 0
++    vld             vr0, a4, 0
++
++    vld             vr1, a4, -16
++    addi.d          a2, a2, -16
++    vst             vr2, a3, -16
++    sub.d           a3, a3, t0
++
++
++    vreplgr2vr.b    vr9, t1
++    andi            t2, a2, 0x7f
++    vadd.b          vr9, vr9, vr8
++    addi.d          a4, a4, -16
++
++    beq             t2, a2, L(back_un_less_128)
++    sub.d           t3, a2, t2
++    move            a2, t2
++    sub.d           a6, a4, t3
++
++L(back_un_loop):
++    vld             vr2, a4, -16
++    vld             vr3, a4, -32
++    vld             vr4, a4, -48
++
++    vld             vr5, a4, -64
++    vld             vr6, a4, -80
++    vld             vr7, a4, -96
++    vshuf.b         vr8, vr0, vr1, vr9
++
++
++    vld             vr0, a4, -112
++    vst             vr8, a3, -16
++    vshuf.b         vr8, vr1, vr2, vr9
++    vld             vr1, a4, -128
++
++    vst             vr8, a3, -32
++    addi.d          a4, a4, -128
++    vshuf.b         vr2, vr2, vr3, vr9
++    vshuf.b         vr3, vr3, vr4, vr9
++
++    vst             vr2, a3, -48
++    vshuf.b         vr4, vr4, vr5, vr9
++    vst             vr3, a3, -64
++    vshuf.b         vr5, vr5, vr6, vr9
++
++    vst             vr4, a3, -80
++    vshuf.b         vr6, vr6, vr7, vr9
++    vst             vr5, a3, -96
++    vshuf.b         vr7, vr7, vr0, vr9
++
++
++    vst             vr6, a3, -112
++    vst             vr7, a3, -128
++    addi.d          a3, a3, -128
++    bne             a4, a6, L(back_un_loop)
++
++L(back_un_less_128):
++    blt             a2, t8, L(back_un_less_64)
++    vld             vr2, a4, -16
++    vld             vr3, a4, -32
++    vshuf.b         vr4, vr0, vr1, vr9
++
++    vld             vr0, a4, -48
++    vst             vr4, a3, -16
++    addi.d          a2, a2, -64
++    vshuf.b         vr4, vr1, vr2, vr9
++
++    vld             vr1, a4, -64
++    addi.d          a4, a4, -64
++    vst             vr4, a3, -32
++    vshuf.b         vr2, vr2, vr3, vr9
++
++
++    vshuf.b         vr3, vr3, vr0, vr9
++    vst             vr2, a3, -48
++    vst             vr3, a3, -64
++    addi.d          a3, a3, -64
++
++L(back_un_less_64):
++    blt             a2, t7, L(back_un_less_32)
++    vshuf.b         vr3, vr0, vr1, vr9
++    vld             vr0, a4, -16
++    vst             vr3, a3, -16
++
++    addi.d          a2, a2, -32
++    vshuf.b         vr3, vr1, vr0, vr9
++    vld             vr1, a4, -32
++    addi.d          a4, a4, -32
++
++    vst             vr3, a3, -32
++    addi.d          a3, a3, -32
++L(back_un_less_32):
++    blt             a2, t6, L(back_un_less_16)
++    vshuf.b         vr2, vr0, vr1, vr9
++
++
++    vor.v           vr0, vr1, vr1
++    vld             vr1, a4, -16
++    vst             vr2, a3, -16
++    addi.d          a3, a3, -16
++
++L(back_un_less_16):
++    vld             vr2, a1, 0
++    vshuf.b         vr0, vr0, vr1, vr9
++    vst             vr0, a3, -16
++    vst             vr2, a0, 0
++
++    jr              ra
++END(MEMMOVE_NAME)
++
++libc_hidden_builtin_def (MEMCPY_NAME)
++libc_hidden_builtin_def (MEMMOVE_NAME)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
+new file mode 100644
+index 00000000..90a64b6b
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
+@@ -0,0 +1,380 @@
++/* Optimized memmove_unaligned implementation using basic Loongarch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++
++# define MEMMOVE_NAME __memmove_unaligned
++
++# define LD_64(reg, n)            \
++    ld.d        t0, reg, n;      \
++    ld.d        t1, reg, n + 8;  \
++    ld.d        t2, reg, n + 16; \
++    ld.d        t3, reg, n + 24; \
++    ld.d        t4, reg, n + 32; \
++    ld.d        t5, reg, n + 40; \
++    ld.d        t6, reg, n + 48; \
++    ld.d        t7, reg, n + 56;
++
++# define ST_64(reg, n)            \
++    st.d        t0, reg, n;      \
++    st.d        t1, reg, n + 8;  \
++    st.d        t2, reg, n + 16; \
++    st.d        t3, reg, n + 24; \
++    st.d        t4, reg, n + 32; \
++    st.d        t5, reg, n + 40; \
++    st.d        t6, reg, n + 48; \
++    st.d        t7, reg, n + 56;
++
++LEAF(MEMMOVE_NAME, 3)
++    add.d       a4, a1, a2
++    add.d       a3, a0, a2
++    beq         a1, a0, L(less_1bytes)
++    move        t8, a0
++
++    srai.d      a6, a2, 4
++    beqz        a6, L(less_16bytes)
++    srai.d      a6, a2, 6
++    bnez        a6, L(more_64bytes)
++    srai.d      a6, a2, 5
++    beqz        a6, L(less_32bytes)
++
++    ld.d        t0, a1, 0
++    ld.d        t1, a1, 8
++    ld.d        t2, a1, 16
++    ld.d        t3, a1, 24
++
++    ld.d        t4, a4, -32
++    ld.d        t5, a4, -24
++    ld.d        t6, a4, -16
++    ld.d        t7, a4, -8
++
++    st.d        t0, a0, 0
++    st.d        t1, a0, 8
++    st.d        t2, a0, 16
++    st.d        t3, a0, 24
++
++    st.d        t4, a3, -32
++    st.d        t5, a3, -24
++    st.d        t6, a3, -16
++    st.d        t7, a3, -8
++
++    jr          ra
++
++L(less_32bytes):
++    ld.d        t0, a1, 0
++    ld.d        t1, a1, 8
++    ld.d        t2, a4, -16
++    ld.d        t3, a4, -8
++
++    st.d        t0, a0, 0
++    st.d        t1, a0, 8
++    st.d        t2, a3, -16
++    st.d        t3, a3, -8
++
++    jr          ra
++
++L(less_16bytes):
++    srai.d      a6, a2, 3
++    beqz        a6, L(less_8bytes)
++
++    ld.d        t0, a1, 0
++    ld.d        t1, a4, -8
++    st.d        t0, a0, 0
++    st.d        t1, a3, -8
++
++    jr          ra
++
++L(less_8bytes):
++    srai.d      a6, a2, 2
++    beqz        a6, L(less_4bytes)
++
++    ld.w        t0, a1, 0
++    ld.w        t1, a4, -4
++    st.w        t0, a0, 0
++    st.w        t1, a3, -4
++
++    jr          ra
++
++L(less_4bytes):
++    srai.d      a6, a2, 1
++    beqz        a6, L(less_2bytes)
++
++    ld.h        t0, a1, 0
++    ld.h        t1, a4, -2
++    st.h        t0, a0, 0
++    st.h        t1, a3, -2
++
++    jr          ra
++
++L(less_2bytes):
++    beqz        a2, L(less_1bytes)
++
++    ld.b        t0, a1, 0
++    st.b        t0, a0, 0
++
++    jr          ra
++
++L(less_1bytes):
++    jr          ra
++
++L(more_64bytes):
++    sub.d       a7, a0, a1
++    bltu        a7, a2, L(copy_backward)
++
++L(copy_forward):
++    srli.d      a0, a0, 3
++    slli.d      a0, a0, 3
++    beq         a0, t8, L(all_align)
++    addi.d      a0, a0, 0x8
++    sub.d       a7, t8, a0
++    sub.d       a1, a1, a7
++    add.d       a2, a7, a2
++
++L(start_unalign_proc):
++    pcaddi      t1, 18
++    slli.d      a6, a7, 3
++    add.d       t1, t1, a6
++    jr          t1
++
++    ld.b        t0, a1, -7
++    st.b        t0, a0, -7
++    ld.b        t0, a1, -6
++    st.b        t0, a0, -6
++    ld.b        t0, a1, -5
++    st.b        t0, a0, -5
++    ld.b        t0, a1, -4
++    st.b        t0, a0, -4
++    ld.b        t0, a1, -3
++    st.b        t0, a0, -3
++    ld.b        t0, a1, -2
++    st.b        t0, a0, -2
++    ld.b        t0, a1, -1
++    st.b        t0, a0, -1
++L(start_over):
++
++    addi.d      a2, a2, -0x80
++    blt         a2, zero, L(end_unalign_proc)
++
++L(loop_less):
++    LD_64(a1, 0)
++    ST_64(a0, 0)
++    LD_64(a1, 64)
++    ST_64(a0, 64)
++
++    addi.d      a0, a0, 0x80
++    addi.d      a1, a1, 0x80
++    addi.d      a2, a2, -0x80
++    bge         a2, zero, L(loop_less)
++
++L(end_unalign_proc):
++    addi.d      a2, a2, 0x80
++
++    pcaddi      t1, 36
++    andi        t2, a2, 0x78
++    add.d       a1, a1, t2
++    add.d       a0, a0, t2
++    sub.d       t1, t1, t2
++    jr          t1
++
++    ld.d        t0, a1, -120
++    st.d        t0, a0, -120
++    ld.d        t0, a1, -112
++    st.d        t0, a0, -112
++    ld.d        t0, a1, -104
++    st.d        t0, a0, -104
++    ld.d        t0, a1, -96
++    st.d        t0, a0, -96
++    ld.d        t0, a1, -88
++    st.d        t0, a0, -88
++    ld.d        t0, a1, -80
++    st.d        t0, a0, -80
++    ld.d        t0, a1, -72
++    st.d        t0, a0, -72
++    ld.d        t0, a1, -64
++    st.d        t0, a0, -64
++    ld.d        t0, a1, -56
++    st.d        t0, a0, -56
++    ld.d        t0, a1, -48
++    st.d        t0, a0, -48
++    ld.d        t0, a1, -40
++    st.d        t0, a0, -40
++    ld.d        t0, a1, -32
++    st.d        t0, a0, -32
++    ld.d        t0, a1, -24
++    st.d        t0, a0, -24
++    ld.d        t0, a1, -16
++    st.d        t0, a0, -16
++    ld.d        t0, a1, -8
++    st.d        t0, a0, -8
++
++    andi        a2, a2, 0x7
++    pcaddi      t1, 18
++    slli.d      a2, a2, 3
++    sub.d       t1, t1, a2
++    jr          t1
++
++    ld.b        t0, a4, -7
++    st.b        t0, a3, -7
++    ld.b        t0, a4, -6
++    st.b        t0, a3, -6
++    ld.b        t0, a4, -5
++    st.b        t0, a3, -5
++    ld.b        t0, a4, -4
++    st.b        t0, a3, -4
++    ld.b        t0, a4, -3
++    st.b        t0, a3, -3
++    ld.b        t0, a4, -2
++    st.b        t0, a3, -2
++    ld.b        t0, a4, -1
++    st.b        t0, a3, -1
++L(end):
++    move        a0, t8
++    jr          ra
++
++L(all_align):
++    addi.d      a1, a1, 0x8
++    addi.d      a0, a0, 0x8
++    ld.d        t0, a1, -8
++    st.d        t0, a0, -8
++    addi.d      a2, a2, -8
++    b           L(start_over)
++
++L(all_align_back):
++    addi.d      a4, a4, -0x8
++    addi.d      a3, a3, -0x8
++    ld.d        t0, a4, 0
++    st.d        t0, a3, 0
++    addi.d      a2, a2, -8
++    b           L(start_over_back)
++
++L(copy_backward):
++    move        a5, a3
++    srli.d      a3, a3, 3
++    slli.d      a3, a3, 3
++    beq         a3, a5, L(all_align_back)
++    sub.d       a7, a3, a5
++    add.d       a4, a4, a7
++    add.d       a2, a7, a2
++
++    pcaddi      t1, 18
++    slli.d      a6, a7, 3
++    add.d       t1, t1, a6
++    jr          t1
++
++    ld.b        t0, a4, 6
++    st.b        t0, a3, 6
++    ld.b        t0, a4, 5
++    st.b        t0, a3, 5
++    ld.b        t0, a4, 4
++    st.b        t0, a3, 4
++    ld.b        t0, a4, 3
++    st.b        t0, a3, 3
++    ld.b        t0, a4, 2
++    st.b        t0, a3, 2
++    ld.b        t0, a4, 1
++    st.b        t0, a3, 1
++    ld.b        t0, a4, 0
++    st.b        t0, a3, 0
++L(start_over_back):
++    addi.d      a2, a2, -0x80
++    blt         a2, zero, L(end_unalign_proc_back)
++
++L(loop_less_back):
++    LD_64(a4, -64)
++    ST_64(a3, -64)
++    LD_64(a4, -128)
++    ST_64(a3, -128)
++
++    addi.d      a4, a4, -0x80
++    addi.d      a3, a3, -0x80
++    addi.d      a2, a2, -0x80
++    bge         a2, zero, L(loop_less_back)
++
++L(end_unalign_proc_back):
++    addi.d      a2, a2, 0x80
++
++    pcaddi      t1, 36
++    andi        t2, a2, 0x78
++    sub.d       a4, a4, t2
++    sub.d       a3, a3, t2
++    sub.d       t1, t1, t2
++    jr          t1
++
++    ld.d        t0, a4, 112
++    st.d        t0, a3, 112
++    ld.d        t0, a4, 104
++    st.d        t0, a3, 104
++    ld.d        t0, a4, 96
++    st.d        t0, a3, 96
++    ld.d        t0, a4, 88
++    st.d        t0, a3, 88
++    ld.d        t0, a4, 80
++    st.d        t0, a3, 80
++    ld.d        t0, a4, 72
++    st.d        t0, a3, 72
++    ld.d        t0, a4, 64
++    st.d        t0, a3, 64
++    ld.d        t0, a4, 56
++    st.d        t0, a3, 56
++    ld.d        t0, a4, 48
++    st.d        t0, a3, 48
++    ld.d        t0, a4, 40
++    st.d        t0, a3, 40
++    ld.d        t0, a4, 32
++    st.d        t0, a3, 32
++    ld.d        t0, a4, 24
++    st.d        t0, a3, 24
++    ld.d        t0, a4, 16
++    st.d        t0, a3, 16
++    ld.d        t0, a4, 8
++    st.d        t0, a3, 8
++    ld.d        t0, a4, 0
++    st.d        t0, a3, 0
++
++    andi        a2, a2, 0x7
++    pcaddi      t1, 18
++    slli.d      a2, a2, 3
++    sub.d       t1, t1, a2
++    jr          t1
++
++    ld.b        t0, a1, 6
++    st.b        t0, a0, 6
++    ld.b        t0, a1, 5
++    st.b        t0, a0, 5
++    ld.b        t0, a1, 4
++    st.b        t0, a0, 4
++    ld.b        t0, a1, 3
++    st.b        t0, a0, 3
++    ld.b        t0, a1, 2
++    st.b        t0, a0, 2
++    ld.b        t0, a1, 1
++    st.b        t0, a0, 1
++    ld.b        t0, a1, 0
++    st.b        t0, a0, 0
++
++    move        a0, t8
++    jr          ra
++END(MEMMOVE_NAME)
++
++libc_hidden_builtin_def (MEMMOVE_NAME)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove.c b/sysdeps/loongarch/lp64/multiarch/memmove.c
+new file mode 100644
+index 00000000..7e3ca4c4
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memmove.c
+@@ -0,0 +1,38 @@
++/* Multiple versions of memmove.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define memmove __redirect_memmove
++# include <string.h>
++# undef memmove
++
++# define SYMBOL_NAME memmove
++# include "ifunc-lasx.h"
++
++libc_ifunc_redirected (__redirect_memmove, __libc_memmove,
++		       IFUNC_SELECTOR ());
++strong_alias (__libc_memmove, memmove);
++
++# ifdef SHARED
++__hidden_ver1 (__libc_memmove, __GI_memmove, __redirect_memmove)
++  __attribute__ ((visibility ("hidden")));
++# endif
++
++#endif
+-- 
+2.33.0
+

_service:tar_scm:Loongarch-Add-ifunc-support-for-strchr-aligned-lsx-l.patch Added

@@ -0,0 +1,706 @@
+From aca7d7f0dde5f56344e8e58e5f6648c96bb1f1cc Mon Sep 17 00:00:00 2001
+From: dengjianbo <dengjianbo@loongson.cn>
+Date: Tue, 15 Aug 2023 09:08:11 +0800
+Subject: PATCH 06/29 Loongarch: Add ifunc support for strchr{aligned, lsx,
+ lasx} and strchrnul{aligned, lsx, lasx}
+
+These implementations improve the time to run strchr{nul}
+microbenchmark in glibc as below:
+strchr-lasx       reduces the runtime about 50%-83%
+strchr-lsx        reduces the runtime about 30%-67%
+strchr-aligned    reduces the runtime about 10%-20%
+strchrnul-lasx    reduces the runtime about 50%-83%
+strchrnul-lsx     reduces the runtime about 36%-65%
+strchrnul-aligned reduces the runtime about 6%-10%
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/lp64/multiarch/Makefile     |  6 ++
+ .../lp64/multiarch/ifunc-impl-list.c          | 16 +++
+ .../loongarch/lp64/multiarch/ifunc-strchr.h   | 41 ++++++++
+ .../lp64/multiarch/ifunc-strchrnul.h          | 41 ++++++++
+ .../loongarch/lp64/multiarch/strchr-aligned.S | 99 +++++++++++++++++++
+ .../loongarch/lp64/multiarch/strchr-lasx.S    | 91 +++++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/strchr-lsx.S | 73 ++++++++++++++
+ sysdeps/loongarch/lp64/multiarch/strchr.c     | 36 +++++++
+ .../lp64/multiarch/strchrnul-aligned.S        | 95 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/strchrnul-lasx.S | 22 +++++
+ .../loongarch/lp64/multiarch/strchrnul-lsx.S  | 22 +++++
+ sysdeps/loongarch/lp64/multiarch/strchrnul.c  | 39 ++++++++
+ 12 files changed, 581 insertions(+)
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul.c
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index 76c506c9..110a8c5c 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
++++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -3,5 +3,11 @@ sysdep_routines += \
+   strlen-aligned \
+   strlen-lsx \
+   strlen-lasx \
++  strchr-aligned \
++  strchr-lsx \
++  strchr-lasx \
++  strchrnul-aligned \
++  strchrnul-lsx \
++  strchrnul-lasx \
+ # sysdep_routines
+ endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+index 1a2a576f..c7164b45 100644
+--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -37,5 +37,21 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ #endif
+ 	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned)
+ 	      )
++
++  IFUNC_IMPL (i, name, strchr,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LASX, __strchr_lasx)
++	      IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LSX, __strchr_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_aligned)
++	      )
++
++  IFUNC_IMPL (i, name, strchrnul,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LASX, __strchrnul_lasx)
++	      IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LSX, __strchrnul_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_aligned)
++	      )
+   return i;
+ }
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h
+new file mode 100644
+index 00000000..4494db79
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h
+@@ -0,0 +1,41 @@
++/* Common definition for strchr ifunc selections.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h
+new file mode 100644
+index 00000000..8a925120
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h
+@@ -0,0 +1,41 @@
++/* Common definition for strchrnul ifunc selections.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
+new file mode 100644
+index 00000000..5fb01806
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
+@@ -0,0 +1,99 @@
++/* Optimized strchr implementation using basic Loongarch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define STRCHR_NAME __strchr_aligned
++#else
++# define STRCHR_NAME strchr
++#endif
++
++LEAF(STRCHR_NAME, 6)
++    slli.d      t1, a0, 3
++    bstrins.d   a0, zero, 2, 0
++    lu12i.w     a2, 0x01010
++    ld.d        t2, a0, 0
++
++    ori         a2, a2, 0x101
++    andi        a1, a1, 0xff
++    bstrins.d   a2, a2, 63, 32
++    li.w        t0, -1
++
++    mul.d       a1, a1, a2
++    sll.d       t0, t0, t1
++    slli.d      a3, a2, 7
++    orn         t2, t2, t0
++
++    sll.d       t3, a1, t1
++    xor         t4, t2, t3
++    sub.d       a4, t2, a2
++    sub.d       a5, t4, a2
++
++
++    andn        a4, a4, t2
++    andn        a5, a5, t4
++    or          t0, a4, a5
++    and         t0, t0, a3
++
++    bnez        t0, L(end)
++    addi.d      a0, a0, 8
++L(loop):
++    ld.d        t4, a0, 0
++    xor         t2, t4, a1
++
++    sub.d       a4, t4, a2
++    sub.d       a5, t2, a2
++    andn        a4, a4, t4
++    andn        a5, a5, t2
++
++    or          t0, a4, a5
++    and         t0, t0, a3
++    bnez        t0, L(end)
++    ld.d        t4, a0, 8
++
++
++    addi.d      a0, a0, 16
++    xor         t2, t4, a1
++    sub.d       a4, t4, a2
++    sub.d       a5, t2, a2
++
++    andn        a4, a4, t4
++    andn        a5, a5, t2
++    or          t0, a4, a5
++    and         t0, t0, a3
++
++    beqz        t0, L(loop)
++    addi.d      a0, a0, -8
++L(end):
++    and         t0, a5, a3
++    and         t1, a4, a3
++
++    ctz.d       t0, t0
++    ctz.d       t1, t1
++    srli.w      t2, t0, 3
++    sltu        t3, t1, t0
++
++
++    add.d       a0, a0, t2
++    masknez     a0, a0, t3
++    jr          ra
++END(STRCHR_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
+new file mode 100644
+index 00000000..254402da
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
+@@ -0,0 +1,91 @@
++/* Optimized strchr implementation using loongarch LASX SIMD instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++#ifndef AS_STRCHRNUL
++# define STRCHR __strchr_lasx
++#endif
++
++LEAF(STRCHR, 6)
++    andi            t1, a0, 0x1f
++    bstrins.d       a0, zero, 4, 0
++    xvld            xr0, a0, 0
++    li.d            t2, -1
++
++    xvreplgr2vr.b   xr1, a1
++    sll.d           t1, t2, t1
++    xvxor.v         xr2, xr0, xr1
++    xvmin.bu        xr0, xr0, xr2
++
++    xvmsknz.b       xr0, xr0
++    xvpickve.w      xr3, xr0, 4
++    vilvl.h         vr0, vr3, vr0
++    movfr2gr.s      t0, fa0
++
++    orn             t0, t0, t1
++    bne             t0, t2, L(end)
++    addi.d          a0, a0, 32
++    nop
++
++
++L(loop):
++    xvld            xr0, a0, 0
++    xvxor.v         xr2, xr0, xr1
++    xvmin.bu        xr0, xr0, xr2
++    xvsetanyeqz.b   fcc0, xr0
++
++    bcnez           fcc0, L(loop_end)
++    xvld            xr0, a0, 32
++    addi.d          a0, a0, 64
++    xvxor.v         xr2, xr0, xr1
++
++    xvmin.bu        xr0, xr0, xr2
++    xvsetanyeqz.b   fcc0, xr0
++    bceqz           fcc0, L(loop)
++    addi.d          a0, a0, -32
++
++L(loop_end):
++    xvmsknz.b       xr0, xr0
++    xvpickve.w      xr1, xr0, 4
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++
++
++L(end):
++    cto.w           t0, t0
++    add.d           a0, a0, t0
++#ifndef AS_STRCHRNUL
++    vreplgr2vr.b    vr0, t0
++    xvpermi.q       xr3, xr2, 1
++
++    vshuf.b         vr0, vr3, vr2, vr0
++    vpickve2gr.bu   t0, vr0, 0
++    masknez         a0, a0, t0
++#endif
++    jr              ra
++
++END(STRCHR)
++
++libc_hidden_builtin_def(STRCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
+new file mode 100644
+index 00000000..dae98b0a
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
+@@ -0,0 +1,73 @@
++/* Optimized strlen implementation using loongarch LSX SIMD instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++#ifndef AS_STRCHRNUL
++# define STRCHR __strchr_lsx
++#endif
++
++LEAF(STRCHR, 6)
++    andi            t1, a0, 0xf
++    bstrins.d       a0, zero, 3, 0
++    vld             vr0, a0, 0
++    li.d            t2, -1
++
++    vreplgr2vr.b    vr1, a1
++    sll.d           t3, t2, t1
++    vxor.v          vr2, vr0, vr1
++    vmin.bu         vr0, vr0, vr2
++
++    vmsknz.b        vr0, vr0
++    movfr2gr.s      t0, fa0
++    ext.w.h         t0, t0
++    orn             t0, t0, t3
++
++    beq             t0, t2, L(loop)
++L(found):
++    cto.w           t0, t0
++    add.d           a0, a0, t0
++#ifndef AS_STRCHRNUL
++    vreplve.b       vr2, vr2, t0
++    vpickve2gr.bu   t1, vr2, 0
++    masknez         a0, a0, t1
++#endif
++    jr              ra
++
++
++L(loop):
++    vld             vr0, a0, 16
++    addi.d          a0, a0, 16
++    vxor.v          vr2, vr0, vr1
++    vmin.bu         vr0, vr0, vr2
++
++    vsetanyeqz.b    fcc0, vr0
++    bceqz           fcc0, L(loop)
++    vmsknz.b        vr0, vr0
++    movfr2gr.s      t0, fa0
++
++    b               L(found)
++END(STRCHR)
++
++libc_hidden_builtin_def (STRCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchr.c b/sysdeps/loongarch/lp64/multiarch/strchr.c
+new file mode 100644
+index 00000000..404e97bd
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strchr.c
+@@ -0,0 +1,36 @@
++/* Multiple versions of strchr.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define strchr __redirect_strchr
++# include <string.h>
++# undef strchr
++
++# define SYMBOL_NAME strchr
++# include "ifunc-strchr.h"
++
++libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ());
++weak_alias(strchr, index)
++# ifdef SHARED
++__hidden_ver1 (strchr, __GI_strchr, __redirect_strchr)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strchr);
++# endif
++
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
+new file mode 100644
+index 00000000..1c01a023
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
+@@ -0,0 +1,95 @@
++/* Optimized strchrnul implementation using basic Loongarch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define STRCHRNUL_NAME __strchrnul_aligned
++#else
++# define STRCHRNUL_NAME __strchrnul
++#endif
++
++LEAF(STRCHRNUL_NAME, 6)
++    slli.d      t1, a0, 3
++    bstrins.d   a0, zero, 2, 0
++    lu12i.w     a2, 0x01010
++    ld.d        t2, a0, 0
++
++    ori         a2, a2, 0x101
++    andi        a1, a1, 0xff
++    bstrins.d   a2, a2, 63, 32
++    li.w        t0, -1
++
++    mul.d       a1, a1, a2
++    sll.d       t0, t0, t1
++    slli.d      a3, a2, 7
++    orn         t2, t2, t0
++
++    sll.d       t3, a1, t1
++    xor         t4, t2, t3
++    sub.d       a4, t2, a2
++    sub.d       a5, t4, a2
++
++
++    andn        a4, a4, t2
++    andn        a5, a5, t4
++    or          t0, a4, a5
++    and         t0, t0, a3
++
++    bnez        t0, L(end)
++    addi.d      a0, a0, 8
++L(loop):
++    ld.d        t4, a0, 0
++    xor         t2, t4, a1
++
++    sub.d       a4, t4, a2
++    sub.d       a5, t2, a2
++    andn        a4, a4, t4
++    andn        a5, a5, t2
++
++    or          t0, a4, a5
++    and         t0, t0, a3
++    bnez        t0, L(end)
++    ld.d        t4, a0, 8
++
++
++    addi.d      a0, a0, 16
++    xor         t2, t4, a1
++    sub.d       a4, t4, a2
++    sub.d       a5, t2, a2
++
++    andn        a4, a4, t4
++    andn        a5, a5, t2
++    or          t0, a4, a5
++    and         t0, t0, a3
++
++    beqz        t0, L(loop)
++    addi.d      a0, a0, -8
++L(end):
++    ctz.d       t0, t0
++    srli.w      t0, t0, 3
++
++
++    add.d       a0, a0, t0
++    jr          ra
++END(STRCHRNUL_NAME)
++
++libc_hidden_builtin_def (STRCHRNUL_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
+new file mode 100644
+index 00000000..d45495e4
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
+@@ -0,0 +1,22 @@
++/* Optimized strchrnul implementation using loongarch LASX SIMD instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#define STRCHR __strchrnul_lasx
++#define AS_STRCHRNUL
++#include "strchr-lasx.S"
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
+new file mode 100644
+index 00000000..07d793ae
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
+@@ -0,0 +1,22 @@
++/* Optimized strchrnul implementation using loongarch LSX SIMD instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#define STRCHR __strchrnul_lsx
++#define AS_STRCHRNUL
++#include "strchr-lsx.S"
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul.c b/sysdeps/loongarch/lp64/multiarch/strchrnul.c
+new file mode 100644
+index 00000000..f3b8296e
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul.c
+@@ -0,0 +1,39 @@
++/* Multiple versions of strchrnul.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++
++#if IS_IN (libc)
++# define strchrnul __redirect_strchrnul
++# define __strchrnul __redirect___strchrnul
++# include <string.h>
++# undef __strchrnul
++# undef strchrnul
++
++# define SYMBOL_NAME strchrnul
++# include "ifunc-strchrnul.h"
++
++libc_ifunc_redirected (__redirect_strchrnul, __strchrnul,
++                       IFUNC_SELECTOR ());
++weak_alias (__strchrnul, strchrnul)
++# ifdef SHARED
++__hidden_ver1 (__strchrnul, __GI___strchrnul, __redirect_strchrnul)
++  __attribute__((visibility ("hidden"))) __attribute_copy__ (strchrnul);
++# endif
++#endif
+-- 
+2.33.0
+

_service:tar_scm:Revert-LoongArch-Add-glibc.cpu.hwcap-support.patch Added

@@ -0,0 +1,478 @@
+From c0f3b0a8c71c26d5351e8ddabe3e8a323803e683 Mon Sep 17 00:00:00 2001
+From: caiyinyu <caiyinyu@loongson.cn>
+Date: Thu, 21 Sep 2023 09:10:11 +0800
+Subject: PATCH 26/29 Revert "LoongArch: Add glibc.cpu.hwcap support."
+
+This reverts commit a53451559dc9cce765ea5bcbb92c4007e058e92b.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ sysdeps/loongarch/Makefile                    |  4 -
+ sysdeps/loongarch/Versions                    |  5 --
+ sysdeps/loongarch/cpu-tunables.c              | 89 -------------------
+ sysdeps/loongarch/dl-get-cpu-features.c       | 25 ------
+ sysdeps/loongarch/dl-machine.h                | 27 +-----
+ sysdeps/loongarch/dl-tunables.list            | 25 ------
+ .../unix/sysv/linux/loongarch/cpu-features.c  | 29 ------
+ .../unix/sysv/linux/loongarch/cpu-features.h  | 18 +---
+ .../unix/sysv/linux/loongarch/dl-procinfo.c   | 60 -------------
+ sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c | 21 -----
+ .../unix/sysv/linux/loongarch/libc-start.c    | 34 -------
+ 11 files changed, 8 insertions(+), 329 deletions(-)
+ delete mode 100644 sysdeps/loongarch/Versions
+ delete mode 100644 sysdeps/loongarch/cpu-tunables.c
+ delete mode 100644 sysdeps/loongarch/dl-get-cpu-features.c
+ delete mode 100644 sysdeps/loongarch/dl-tunables.list
+ delete mode 100644 sysdeps/unix/sysv/linux/loongarch/cpu-features.c
+ delete mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
+ delete mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
+ delete mode 100644 sysdeps/unix/sysv/linux/loongarch/libc-start.c
+
+diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
+index 30a1f4a8..43d2f583 100644
+--- a/sysdeps/loongarch/Makefile
++++ b/sysdeps/loongarch/Makefile
+@@ -6,10 +6,6 @@ ifeq ($(subdir),elf)
+ gen-as-const-headers += dl-link.sym
+ endif
+ 
+-ifeq ($(subdir),elf)
+-  sysdep-dl-routines += dl-get-cpu-features
+-endif
+-
+ # LoongArch's assembler also needs to know about PIC as it changes the
+ # definition of some assembler macros.
+ ASFLAGS-.os += $(pic-ccflag)
+diff --git a/sysdeps/loongarch/Versions b/sysdeps/loongarch/Versions
+deleted file mode 100644
+index 33ae2cc0..00000000
+--- a/sysdeps/loongarch/Versions
++++ /dev/null
+@@ -1,5 +0,0 @@
+-ld {
+-  GLIBC_PRIVATE {
+-    _dl_larch_get_cpu_features;
+-  }
+-}
+diff --git a/sysdeps/loongarch/cpu-tunables.c b/sysdeps/loongarch/cpu-tunables.c
+deleted file mode 100644
+index 8e9fab93..00000000
+--- a/sysdeps/loongarch/cpu-tunables.c
++++ /dev/null
+@@ -1,89 +0,0 @@
+-/* LoongArch CPU feature tuning.
+-   This file is part of the GNU C Library.
+-   Copyright (C) 2023 Free Software Foundation, Inc.
+-
+-   The GNU C Library is free software; you can redistribute it and/or
+-   modify it under the terms of the GNU Lesser General Public
+-   License as published by the Free Software Foundation; either
+-   version 2.1 of the License, or (at your option) any later version.
+-
+-   The GNU C Library is distributed in the hope that it will be useful,
+-   but WITHOUT ANY WARRANTY; without even the implied warranty of
+-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-   Lesser General Public License for more details.
+-
+-   You should have received a copy of the GNU Lesser General Public
+-   License along with the GNU C Library; if not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-# include <stdbool.h>
+-# include <stdint.h>
+-# include <unistd.h>		/* Get STDOUT_FILENO for _dl_printf.  */
+-# include <elf/dl-tunables.h>
+-# include <string.h>
+-# include <cpu-features.h>
+-# include <ldsodefs.h>
+-# include <sys/auxv.h>
+-
+-# define HWCAP_LOONGARCH_IFUNC \
+-  (HWCAP_LOONGARCH_UAL | HWCAP_LOONGARCH_LSX | HWCAP_LOONGARCH_LASX)
+-
+-# define CHECK_GLIBC_IFUNC_CPU_OFF(f, name, len)			\
+-  _Static_assert (sizeof (#name) - 1 == len, #name " != " #len);	\
+-  if (!memcmp (f, #name, len) &&					\
+-      (GLRO (dl_hwcap) & HWCAP_LOONGARCH_##name))			\
+-    {									\
+-      hwcap |= (HWCAP_LOONGARCH_##name | (~HWCAP_LOONGARCH_IFUNC));	\
+-      break;								\
+-    }									\
+-
+-attribute_hidden
+-void
+-TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
+-{
+-  const char *p = valp->strval;
+-  size_t len;
+-  unsigned long hwcap = 0;
+-  const char *c;
+-
+-  do {
+-      for (c = p; *c != ','; c++)
+-	if (*c == '\0')
+-	  break;
+-
+-      len = c - p;
+-
+-      switch(len)
+-      {
+-	default:
+-	  _dl_fatal_printf (
+-	    "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
+-			    );
+-	  break;
+-	case 3:
+-	  {
+-	    CHECK_GLIBC_IFUNC_CPU_OFF (p, LSX, 3);
+-	    CHECK_GLIBC_IFUNC_CPU_OFF (p, UAL, 3);
+-	    _dl_fatal_printf (
+-		"Some features are invalid or not supported on this machine!!\n"
+-		"The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
+-                       );
+-	  }
+-	  break;
+-	case 4:
+-	  {
+-	    CHECK_GLIBC_IFUNC_CPU_OFF (p, LASX, 4);
+-	    _dl_fatal_printf (
+-		"Some features are invalid or not supported on this machine!!\n"
+-		"The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n"
+-                       );
+-	  }
+-	  break;
+-      }
+-
+-      p += len + 1;
+-    }
+-  while (*c != '\0');
+-
+-  GLRO (dl_larch_cpu_features).hwcap &= hwcap;
+-}
+diff --git a/sysdeps/loongarch/dl-get-cpu-features.c b/sysdeps/loongarch/dl-get-cpu-features.c
+deleted file mode 100644
+index 7cd9bc15..00000000
+--- a/sysdeps/loongarch/dl-get-cpu-features.c
++++ /dev/null
+@@ -1,25 +0,0 @@
+-/* Define _dl_larch_get_cpu_features.
+-   Copyright (C) 2023 Free Software Foundation, Inc.
+-
+-   The GNU C Library is free software; you can redistribute it and/or
+-   modify it under the terms of the GNU Lesser General Public
+-   License as published by the Free Software Foundation; either
+-   version 2.1 of the License, or (at your option) any later version.
+-
+-   The GNU C Library is distributed in the hope that it will be useful,
+-   but WITHOUT ANY WARRANTY; without even the implied warranty of
+-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-   Lesser General Public License for more details.
+-
+-   You should have received a copy of the GNU Lesser General Public
+-   License along with the GNU C Library; if not, see
+-   <https://www.gnu.org/licenses/>.  */
+-
+-
+-#include <ldsodefs.h>
+-
+-const struct cpu_features *
+-_dl_larch_get_cpu_features (void)
+-{
+-  return &GLRO(dl_larch_cpu_features);
+-}
+diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
+index b395a928..57913cef 100644
+--- a/sysdeps/loongarch/dl-machine.h
++++ b/sysdeps/loongarch/dl-machine.h
+@@ -29,8 +29,6 @@
+ #include <dl-static-tls.h>
+ #include <dl-machine-rel.h>
+ 
+-#include <cpu-features.c>
+-
+ #ifndef _RTLD_PROLOGUE
+ # define _RTLD_PROLOGUE(entry)					\
+ 	".globl\t" __STRING (entry) "\n\t"			\
+@@ -55,23 +53,6 @@
+ #define ELF_MACHINE_NO_REL 1
+ #define ELF_MACHINE_NO_RELA 0
+ 
+-#define DL_PLATFORM_INIT dl_platform_init ()
+-
+-static inline void __attribute__ ((unused))
+-dl_platform_init (void)
+-{
+-  if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
+-    /* Avoid an empty string which would disturb us.  */
+-    GLRO(dl_platform) = NULL;
+-
+-#ifdef SHARED
+-  /* init_cpu_features has been called early from __libc_start_main in
+-     static executable.  */
+-  init_cpu_features (&GLRO(dl_larch_cpu_features));
+-#endif
+-}
+-
+-
+ /* Return nonzero iff ELF header is compatible with the running host.  */
+ static inline int
+ elf_machine_matches_host (const ElfW (Ehdr) *ehdr)
+@@ -309,9 +290,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope,
+       if (profile != 0)
+ 	{
+ #if !defined __loongarch_soft_float
+-	  if (RTLD_SUPPORT_LASX)
++	  if (SUPPORT_LASX)
+ 	    gotplt0 = (ElfW(Addr)) &_dl_runtime_profile_lasx;
+-	  else if (RTLD_SUPPORT_LSX)
++	  else if (SUPPORT_LSX)
+ 	    gotplt0 = (ElfW(Addr)) &_dl_runtime_profile_lsx;
+ 	  else
+ #endif
+@@ -329,9 +310,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope,
+ 	     indicated by the offset on the stack, and then jump to
+ 	     the resolved address.  */
+ #if !defined __loongarch_soft_float
+-	  if (RTLD_SUPPORT_LASX)
++	  if (SUPPORT_LASX)
+ 	    gotplt0 = (ElfW(Addr)) &_dl_runtime_resolve_lasx;
+-	  else if (RTLD_SUPPORT_LSX)
++	  else if (SUPPORT_LSX)
+ 	    gotplt0 = (ElfW(Addr)) &_dl_runtime_resolve_lsx;
+ 	  else
+ #endif
+diff --git a/sysdeps/loongarch/dl-tunables.list b/sysdeps/loongarch/dl-tunables.list
+deleted file mode 100644
+index 66b34275..00000000
+--- a/sysdeps/loongarch/dl-tunables.list
++++ /dev/null
+@@ -1,25 +0,0 @@
+-# LoongArch specific tunables.
+-# Copyright (C) 2023 Free Software Foundation, Inc.
+-# This file is part of the GNU C Library.
+-
+-# The GNU C Library is free software; you can redistribute it and/or
+-# modify it under the terms of the GNU Lesser General Public
+-# License as published by the Free Software Foundation; either
+-# version 2.1 of the License, or (at your option) any later version.
+-
+-# The GNU C Library is distributed in the hope that it will be useful,
+-# but WITHOUT ANY WARRANTY; without even the implied warranty of
+-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-# Lesser General Public License for more details.
+-
+-# You should have received a copy of the GNU Lesser General Public
+-# License along with the GNU C Library; if not, see
+-# <http://www.gnu.org/licenses/>.
+-
+-glibc {
+-  cpu {
+-    hwcaps {
+-      type: STRING
+-    }
+-  }
+-}
+diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
+deleted file mode 100644
+index 1290c4ce..00000000
+--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
++++ /dev/null
+@@ -1,29 +0,0 @@
+-/* Initialize CPU feature data.  LoongArch64 version.
+-   This file is part of the GNU C Library.
+-   Copyright (C) 2023 Free Software Foundation, Inc.
+-
+-   The GNU C Library is free software; you can redistribute it and/or
+-   modify it under the terms of the GNU Lesser General Public
+-   License as published by the Free Software Foundation; either
+-   version 2.1 of the License, or (at your option) any later version.
+-
+-   The GNU C Library is distributed in the hope that it will be useful,
+-   but WITHOUT ANY WARRANTY; without even the implied warranty of
+-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-   Lesser General Public License for more details.
+-
+-   You should have received a copy of the GNU Lesser General Public
+-   License along with the GNU C Library; if not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-#include <cpu-features.h>
+-#include <elf/dl-hwcaps.h>
+-#include <elf/dl-tunables.h>
+-extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *) attribute_hidden;
+-
+-static inline void
+-init_cpu_features (struct cpu_features *cpu_features)
+-{
+-    GLRO (dl_larch_cpu_features).hwcap = GLRO (dl_hwcap);
+-    TUNABLE_GET (glibc, cpu, hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
+-}
+diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+index 450963ce..d1a280a5 100644
+--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
++++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+@@ -19,23 +19,13 @@
+ #ifndef _CPU_FEATURES_LOONGARCH64_H
+ #define _CPU_FEATURES_LOONGARCH64_H
+ 
+-#include <stdint.h>
+ #include <sys/auxv.h>
+ 
+-struct cpu_features
+- {
+-    uint64_t hwcap;
+- };
++#define SUPPORT_UAL (GLRO (dl_hwcap) & HWCAP_LOONGARCH_UAL)
++#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
++#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
+ 
+-/* Get a pointer to the CPU features structure.  */
+-extern const struct cpu_features *_dl_larch_get_cpu_features (void)
+-     __attribute__ ((pure));
+-
+-#define SUPPORT_UAL (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_UAL)
+-#define SUPPORT_LSX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LSX)
+-#define SUPPORT_LASX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LASX)
+-#define RTLD_SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
+-#define RTLD_SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
+ #define INIT_ARCH()
+ 
+ #endif /* _CPU_FEATURES_LOONGARCH64_H  */
++
+diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
+deleted file mode 100644
+index 6217fda9..00000000
+--- a/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
++++ /dev/null
+@@ -1,60 +0,0 @@
+-/* Data for LoongArch64 version of processor capability information.
+-   Linux version.
+-   Copyright (C) 2023 Free Software Foundation, Inc.
+-   This file is part of the GNU C Library.
+-
+-   The GNU C Library is free software; you can redistribute it and/or
+-   modify it under the terms of the GNU Lesser General Public
+-   License as published by the Free Software Foundation; either
+-   version 2.1 of the License, or (at your option) any later version.
+-
+-   The GNU C Library is distributed in the hope that it will be useful,
+-   but WITHOUT ANY WARRANTY; without even the implied warranty of
+-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-   Lesser General Public License for more details.
+-
+-   You should have received a copy of the GNU Lesser General Public
+-   License along with the GNU C Library; if not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-/* If anything should be added here check whether the size of each string
+-   is still ok with the given array size.
+-
+-   All the #ifdefs in the definitions are quite irritating but
+-   necessary if we want to avoid duplicating the information.  There
+-   are three different modes:
+-
+-   - PROCINFO_DECL is defined.  This means we are only interested in
+-     declarations.
+-
+-   - PROCINFO_DECL is not defined:
+-
+-     + if SHARED is defined the file is included in an array
+-       initializer.  The .element = { ... } syntax is needed.
+-
+-     + if SHARED is not defined a normal array initialization is
+-       needed.
+-  */
+-
+-#ifndef PROCINFO_CLASS
+-# define PROCINFO_CLASS
+-#endif
+-
+-#if !IS_IN (ldconfig)
+-# if !defined PROCINFO_DECL && defined SHARED
+-  ._dl_larch_cpu_features
+-# else
+-PROCINFO_CLASS struct cpu_features _dl_larch_cpu_features
+-# endif
+-# ifndef PROCINFO_DECL
+-= { }
+-# endif
+-# if !defined SHARED || defined PROCINFO_DECL
+-;
+-# else
+-,
+-# endif
+-#endif
+-
+-#undef PROCINFO_DECL
+-#undef PROCINFO_CLASS
+diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
+deleted file mode 100644
+index 455fd71a..00000000
+--- a/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
++++ /dev/null
+@@ -1,21 +0,0 @@
+-/* Operating system support for run-time dynamic linker.  LoongArch version.
+-   Copyright (C) 2023 Free Software Foundation, Inc.
+-   This file is part of the GNU C Library.
+-
+-   The GNU C Library is free software; you can redistribute it and/or
+-   modify it under the terms of the GNU Lesser General Public
+-   License as published by the Free Software Foundation; either
+-   version 2.1 of the License, or (at your option) any later version.
+-
+-   The GNU C Library is distributed in the hope that it will be useful,
+-   but WITHOUT ANY WARRANTY; without even the implied warranty of
+-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-   Lesser General Public License for more details.
+-
+-   You should have received a copy of the GNU Lesser General Public
+-   License along with the GNU C Library; if not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-#include <config.h>
+-#include <sysdeps/loongarch/cpu-tunables.c>
+-#include <sysdeps/unix/sysv/linux/dl-sysdep.c>
+diff --git a/sysdeps/unix/sysv/linux/loongarch/libc-start.c b/sysdeps/unix/sysv/linux/loongarch/libc-start.c
+deleted file mode 100644
+index f1346ece..00000000
+--- a/sysdeps/unix/sysv/linux/loongarch/libc-start.c
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* Override csu/libc-start.c on LoongArch64.
+-   Copyright (C) 2023 Free Software Foundation, Inc.
+-   This file is part of the GNU C Library.
+-
+-   The GNU C Library is free software; you can redistribute it and/or
+-   modify it under the terms of the GNU Lesser General Public
+-   License as published by the Free Software Foundation; either
+-   version 2.1 of the License, or (at your option) any later version.
+-
+-   The GNU C Library is distributed in the hope that it will be useful,
+-   but WITHOUT ANY WARRANTY; without even the implied warranty of
+-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-   Lesser General Public License for more details.
+-
+-   You should have received a copy of the GNU Lesser General Public
+-   License along with the GNU C Library; if not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-#ifndef SHARED
+-
+-/* Mark symbols hidden in static PIE for early self relocation to work.  */
+-# if BUILD_PIE_DEFAULT
+-#  pragma GCC visibility push(hidden)
+-# endif
+-
+-# include <ldsodefs.h>
+-# include <cpu-features.c>
+-
+-extern struct cpu_features _dl_larch_cpu_features;
+-
+-# define ARCH_INIT_CPU_FEATURES() init_cpu_features (&_dl_larch_cpu_features)
+-
+-#endif
+-#include <csu/libc-start.c>
+-- 
+2.33.0
+

_service:tar_scm:elf-Add-new-LoongArch-reloc-types-101-to-108-into-el.patch Added

@@ -0,0 +1,39 @@
+From fc60db3cf29ba157d09ba4f4b92e3ab382b0339d Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Wed, 9 Aug 2023 19:12:54 +0800
+Subject: PATCH 04/29 elf: Add new LoongArch reloc types (101 to 108) into
+ elf.h
+
+These reloc types are generated by GNU assembler >= 2.41 for relaxation
+support.
+
+Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=57a930e3
+Signed-off-by: Xi Ruoyao <xry111@xry111.site>
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ elf/elf.h | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/elf/elf.h b/elf/elf.h
+index 89fc8021..d623bdeb 100644
+--- a/elf/elf.h
++++ b/elf/elf.h
+@@ -4205,6 +4205,14 @@ enum
+ #define R_LARCH_TLS_GD_HI20 98
+ #define R_LARCH_32_PCREL 99
+ #define R_LARCH_RELAX 100
++#define R_LARCH_DELETE 101
++#define R_LARCH_ALIGN 102
++#define R_LARCH_PCREL20_S2 103
++#define R_LARCH_CFA 104
++#define R_LARCH_ADD6 105
++#define R_LARCH_SUB6 106
++#define R_LARCH_ADD_ULEB128 107
++#define R_LARCH_SUB_ULEB128 108
+ 
+ /* ARC specific declarations.  */
+ 
+-- 
+2.33.0
+

_service:tar_scm:linux-Sync-Linux-6.6-elf.h.patch Added

@@ -0,0 +1,48 @@
+From 6b3d687470b8f91bc6eb87e924fe97d4592b3aa5 Mon Sep 17 00:00:00 2001
+From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
+Date: Tue, 31 Oct 2023 13:32:38 -0300
+Subject: PATCH 29/29 linux: Sync Linux 6.6 elf.h
+
+It adds NT_X86_SHSTK (2fab02b25ae7cf5), NT_RISCV_CSR/NT_RISCV_VECTOR
+(9300f00439743c4), and NT_LOONGARCH_HW_BREAK/NT_LOONGARCH_HW_WATCH
+(1a69f7a161a78ae).
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ elf/elf.h | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/elf/elf.h b/elf/elf.h
+index 9c51073f..51633079 100644
+--- a/elf/elf.h
++++ b/elf/elf.h
+@@ -794,6 +794,7 @@ typedef struct
+ #define NT_386_TLS	0x200		/* i386 TLS slots (struct user_desc) */
+ #define NT_386_IOPERM	0x201		/* x86 io permission bitmap (1=deny) */
+ #define NT_X86_XSTATE	0x202		/* x86 extended state using xsave */
++#define NT_X86_SHSTK	0x204		/* x86 SHSTK state */
+ #define NT_S390_HIGH_GPRS	0x300	/* s390 upper register halves */
+ #define NT_S390_TIMER	0x301		/* s390 timer register */
+ #define NT_S390_TODCMP	0x302		/* s390 TOD clock comparator register */
+@@ -832,6 +833,8 @@ typedef struct
+ #define NT_MIPS_DSP	0x800		/* MIPS DSP ASE registers.  */
+ #define NT_MIPS_FP_MODE	0x801		/* MIPS floating-point mode.  */
+ #define NT_MIPS_MSA	0x802		/* MIPS SIMD registers.  */
++#define NT_RISCV_CSR	0x900		/* RISC-V Control and Status Registers */
++#define NT_RISCV_VECTOR	0x901		/* RISC-V vector registers */
+ #define NT_LOONGARCH_CPUCFG	0xa00	/* LoongArch CPU config registers.  */
+ #define NT_LOONGARCH_CSR	0xa01	/* LoongArch control and
+ 					   status registers.  */
+@@ -841,6 +844,8 @@ typedef struct
+ 					   SIMD Extension registers.  */
+ #define NT_LOONGARCH_LBT	0xa04	/* LoongArch Loongson Binary
+ 					   Translation registers.  */
++#define NT_LOONGARCH_HW_BREAK	0xa05   /* LoongArch hardware breakpoint registers */
++#define NT_LOONGARCH_HW_WATCH	0xa06   /* LoongArch hardware watchpoint registers */
+ 
+ /* Legal values for the note segment descriptor types for object files.  */
+ 
+-- 
+2.33.0
+