Projects
Mega:24.03
glibc
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 2
View file
_service:tar_scm:glibc.spec
Changed
@@ -48,10 +48,14 @@ %undefine with_valgrind %endif +%ifarch loongarch64 +%global ENABLE_RELOC 0 +%else %global ENABLE_RELOC 1 +%endif # Only some architectures have static PIE support -%define pie_arches %{ix86} x86_64 aarch64 +%define pie_arches %{ix86} x86_64 aarch64 loongarch64 %define enablekernel 3.2 %define target %{_target_cpu}-%{_vendor}-linux @@ -67,7 +71,7 @@ ############################################################################## Name: glibc Version: 2.38 -Release: 22 +Release: 23 Summary: The GNU libc libraries License: %{all_license} URL: http://www.gnu.org/software/glibc/ @@ -141,6 +145,36 @@ Patch53: sparc-Remove-unwind-information-from-signal-return-s.patch Patch54: arm-Remove-wrong-ldr-from-_dl_start_user-BZ-31339.patch Patch55: malloc-Use-__get_nprocs-on-arena_get2-BZ-30945.patch +Patch56: LoongArch-Redefine-macro-LEAF-ENTRY.patch +Patch57: LoongArch-Add-minuimum-binutils-required-version.patch +Patch58: Loongarch-Add-ifunc-support-and-add-different-versio.patch +Patch59: elf-Add-new-LoongArch-reloc-types-101-to-108-into-el.patch +Patch60: LoongArch-elf-Add-new-LoongArch-reloc-types-109-into.patch +Patch61: Loongarch-Add-ifunc-support-for-strchr-aligned-lsx-l.patch +Patch62: Loongarch-Add-ifunc-support-for-memcpy-aligned-unali.patch +Patch63: LoongArch-Add-ifunc-support-for-strnlen-aligned-lsx-.patch +Patch64: LoongArch-Add-ifunc-support-for-strcmp-aligned-lsx.patch +Patch65: LoongArch-Add-ifunc-support-for-strncmp-aligned-lsx.patch +Patch66: LoongArch-Remove-support-code-for-old-linker-in-star.patch +Patch67: LoongArch-Micro-optimize-LD_PCREL.patch +Patch68: LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch +Patch69: LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch +Patch70: LoongArch-Add-ifunc-support-for-memrchr-lsx-lasx.patch +Patch71: LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch +Patch72: LoongArch-Add-ifunc-support-for-memcmp-aligned-lsx-l.patch +Patch73: LoongArch-Change-loongarch-to-LoongArch-in-comments.patch +Patch74: LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch +Patch75: LoongArch-Replace-deprecated-v0-with-a0-to-eliminate.patch +Patch76: LoongArch-Add-ifunc-support-for-strcpy-stpcpy-aligne.patch +Patch77: LoongArch-Add-ifunc-support-for-strrchr-aligned-lsx-.patch +Patch78: LoongArch-Change-to-put-magic-number-to-.rodata-sect.patch +Patch79: LoongArch-Add-glibc.cpu.hwcap-support.patch +Patch80: Revert-LoongArch-Add-glibc.cpu.hwcap-support.patch +Patch81: LoongArch-Unify-Register-Names.patch +Patch82: LoongArch-Update-hwcap.h-to-sync-with-LoongArch-kern.patch +Patch83: linux-Sync-Linux-6.6-elf.h.patch +Patch84: Decrease-value-of-arch_minimum_kernel-with-LoongArch.patch + Patch9000: turn-default-value-of-x86_rep_stosb_threshold_form_2K_to_1M.patch Patch9001: locale-delete-no-hard-link-to-avoid-all_language-pac.patch @@ -759,7 +793,9 @@ touch nscd.filelist touch nss_modules.filelist touch nss-devel.filelist +%ifnarch loongarch64 touch libnsl.filelist +%endif touch debugutils.filelist touch benchtests.filelist touch help.filelist @@ -818,7 +854,9 @@ -e '%{_prefix}/share' \ -e '/var/db/Makefile' \ -e '/libnss_.*\.so0-9.*$' \ +%ifnarch loongarch64 -e '/libnsl' \ +%endif -e 'glibc-benchtests' \ -e 'aux-cache' \ > glibc.filelist @@ -891,8 +929,10 @@ ############################################################################## # libnsl subpackage ############################################################################## +%ifnarch loongarch64 grep -E '/libnsl\.so\.0-9+$' master.filelist > libnsl.filelist test $(wc -l < libnsl.filelist) -eq 1 +%endif ############################################################################## # glibc debugutils sub-package @@ -1331,8 +1371,10 @@ %files -f nss-devel.filelist nss-devel +%ifnarch loongarch64 %files -f libnsl.filelist -n libnsl /%{_lib}/libnsl.so.1 +%endif %files -f debugutils.filelist debugutils @@ -1354,6 +1396,10 @@ %endif %changelog +* Thu Feb 29 2024 Peng Fan <fanpeng@loongson.cn> - 2.38-23 +- LoongArch: sync patch from glibc upstream +- Reduced kernel version requirements + * Fri Feb 23 Jingxiao Lu <lujingxiao@huawei.com> - 2.38-22 - malloc: Use __get_nprocs on arena_get2 (BZ 30945)
View file
_service:tar_scm:Decrease-value-of-arch_minimum_kernel-with-LoongArch.patch
Added
@@ -0,0 +1,40 @@ +From 2c8dfc45a8009e5110a9d2148b62d802e989fde7 Mon Sep 17 00:00:00 2001 +From: ticat_fp <fanpeng@loongson.cn> +Date: Thu, 29 Feb 2024 15:58:31 +0800 +Subject: PATCH Decrease value of arch_minimum_kernel with LoongArch + +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/unix/sysv/linux/loongarch/configure | 2 +- + sysdeps/unix/sysv/linux/loongarch/configure.ac | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/sysdeps/unix/sysv/linux/loongarch/configure b/sysdeps/unix/sysv/linux/loongarch/configure +index 0d1159e9..851b2285 100644 +--- a/sysdeps/unix/sysv/linux/loongarch/configure ++++ b/sysdeps/unix/sysv/linux/loongarch/configure +@@ -1,7 +1,7 @@ + # This file is generated from configure.ac by Autoconf. DO NOT EDIT! + # Local configure fragment for sysdeps/unix/sysv/linux/loongarch. + +-arch_minimum_kernel=5.19.0 ++arch_minimum_kernel=4.19.0 + + libc_cv_loongarch_int_abi=no + +diff --git a/sysdeps/unix/sysv/linux/loongarch/configure.ac b/sysdeps/unix/sysv/linux/loongarch/configure.ac +index 04e9150a..00815c2f 100644 +--- a/sysdeps/unix/sysv/linux/loongarch/configure.ac ++++ b/sysdeps/unix/sysv/linux/loongarch/configure.ac +@@ -2,7 +2,7 @@ sinclude(./aclocal.m4)dnl Autoconf lossage + GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. + # Local configure fragment for sysdeps/unix/sysv/linux/loongarch. + +-arch_minimum_kernel=5.19.0 ++arch_minimum_kernel=4.19.0 + + libc_cv_loongarch_int_abi=no + AC_EGREP_CPP(4 8 8, __SIZEOF_INT__ __SIZEOF_LONG__ __SIZEOF_POINTER__ +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Add-glibc.cpu.hwcap-support.patch
Added
@@ -0,0 +1,499 @@ +From 8923e4e9c79e672fd6b3b89aba598a60d5c01211 Mon Sep 17 00:00:00 2001 +From: caiyinyu <caiyinyu@loongson.cn> +Date: Fri, 15 Sep 2023 17:35:19 +0800 +Subject: PATCH 25/29 LoongArch: Add glibc.cpu.hwcap support. + +Key Points: +1. On lasx & lsx platforms, We must use _dl_runtime_{profile, resolve}_{lsx, lasx} + to save vector registers. +2. Via "tunables", users can choose str/mem_{lasx,lsx,unaligned} functions with + `export GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,...`. + Note: glibc.cpu.hwcaps doesn't affect _dl_runtime_{profile, resolve}_{lsx, lasx} + selection. + +Usage Notes: +1. Only valid inputs: LASX, LSX, UAL. Case-sensitive, comma-separated, no spaces. +2. Example: `export GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,UAL` turns on LASX & UAL. + Unmentioned features turn off. With default ifunc: lasx > lsx > unaligned > + aligned > generic, effect is: lasx > unaligned > aligned > generic; lsx off. +3. Incorrect GLIBC_TUNABLES settings will show error messages. + For example: On lsx platforms, you cannot enable lasx features. If you do + that, you will get error messages. +4. Valid input examples: + - GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX: lasx > aligned > generic. + - GLIBC_TUNABLES=glibc.cpu.hwcaps=LSX,UAL: lsx > unaligned > aligned > generic. + - GLIBC_TUNABLES=glibc.cpu.hwcaps=LASX,UAL,LASX,UAL,LSX,LASX,UAL: Repetitions + allowed but not recommended. Results in: lasx > lsx > unaligned > aligned > + generic. + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/Makefile | 4 + + sysdeps/loongarch/Versions | 5 ++ + sysdeps/loongarch/cpu-tunables.c | 89 +++++++++++++++++++ + sysdeps/loongarch/dl-get-cpu-features.c | 25 ++++++ + sysdeps/loongarch/dl-machine.h | 27 +++++- + sysdeps/loongarch/dl-tunables.list | 25 ++++++ + .../unix/sysv/linux/loongarch/cpu-features.c | 29 ++++++ + .../unix/sysv/linux/loongarch/cpu-features.h | 18 +++- + .../unix/sysv/linux/loongarch/dl-procinfo.c | 60 +++++++++++++ + sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c | 21 +++++ + .../unix/sysv/linux/loongarch/libc-start.c | 34 +++++++ + 11 files changed, 329 insertions(+), 8 deletions(-) + create mode 100644 sysdeps/loongarch/Versions + create mode 100644 sysdeps/loongarch/cpu-tunables.c + create mode 100644 sysdeps/loongarch/dl-get-cpu-features.c + create mode 100644 sysdeps/loongarch/dl-tunables.list + create mode 100644 sysdeps/unix/sysv/linux/loongarch/cpu-features.c + create mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c + create mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c + create mode 100644 sysdeps/unix/sysv/linux/loongarch/libc-start.c + +diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile +index 43d2f583..30a1f4a8 100644 +--- a/sysdeps/loongarch/Makefile ++++ b/sysdeps/loongarch/Makefile +@@ -6,6 +6,10 @@ ifeq ($(subdir),elf) + gen-as-const-headers += dl-link.sym + endif + ++ifeq ($(subdir),elf) ++ sysdep-dl-routines += dl-get-cpu-features ++endif ++ + # LoongArch's assembler also needs to know about PIC as it changes the + # definition of some assembler macros. + ASFLAGS-.os += $(pic-ccflag) +diff --git a/sysdeps/loongarch/Versions b/sysdeps/loongarch/Versions +new file mode 100644 +index 00000000..33ae2cc0 +--- /dev/null ++++ b/sysdeps/loongarch/Versions +@@ -0,0 +1,5 @@ ++ld { ++ GLIBC_PRIVATE { ++ _dl_larch_get_cpu_features; ++ } ++} +diff --git a/sysdeps/loongarch/cpu-tunables.c b/sysdeps/loongarch/cpu-tunables.c +new file mode 100644 +index 00000000..8e9fab93 +--- /dev/null ++++ b/sysdeps/loongarch/cpu-tunables.c +@@ -0,0 +1,89 @@ ++/* LoongArch CPU feature tuning. ++ This file is part of the GNU C Library. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++# include <stdbool.h> ++# include <stdint.h> ++# include <unistd.h> /* Get STDOUT_FILENO for _dl_printf. */ ++# include <elf/dl-tunables.h> ++# include <string.h> ++# include <cpu-features.h> ++# include <ldsodefs.h> ++# include <sys/auxv.h> ++ ++# define HWCAP_LOONGARCH_IFUNC \ ++ (HWCAP_LOONGARCH_UAL | HWCAP_LOONGARCH_LSX | HWCAP_LOONGARCH_LASX) ++ ++# define CHECK_GLIBC_IFUNC_CPU_OFF(f, name, len) \ ++ _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \ ++ if (!memcmp (f, #name, len) && \ ++ (GLRO (dl_hwcap) & HWCAP_LOONGARCH_##name)) \ ++ { \ ++ hwcap |= (HWCAP_LOONGARCH_##name | (~HWCAP_LOONGARCH_IFUNC)); \ ++ break; \ ++ } \ ++ ++attribute_hidden ++void ++TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) ++{ ++ const char *p = valp->strval; ++ size_t len; ++ unsigned long hwcap = 0; ++ const char *c; ++ ++ do { ++ for (c = p; *c != ','; c++) ++ if (*c == '\0') ++ break; ++ ++ len = c - p; ++ ++ switch(len) ++ { ++ default: ++ _dl_fatal_printf ( ++ "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n" ++ ); ++ break; ++ case 3: ++ { ++ CHECK_GLIBC_IFUNC_CPU_OFF (p, LSX, 3); ++ CHECK_GLIBC_IFUNC_CPU_OFF (p, UAL, 3); ++ _dl_fatal_printf ( ++ "Some features are invalid or not supported on this machine!!\n" ++ "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n" ++ ); ++ } ++ break; ++ case 4: ++ { ++ CHECK_GLIBC_IFUNC_CPU_OFF (p, LASX, 4); ++ _dl_fatal_printf ( ++ "Some features are invalid or not supported on this machine!!\n" ++ "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n" ++ ); ++ } ++ break; ++ } ++ ++ p += len + 1; ++ } ++ while (*c != '\0'); ++ ++ GLRO (dl_larch_cpu_features).hwcap &= hwcap; ++} +diff --git a/sysdeps/loongarch/dl-get-cpu-features.c b/sysdeps/loongarch/dl-get-cpu-features.c +new file mode 100644 +index 00000000..7cd9bc15 +--- /dev/null ++++ b/sysdeps/loongarch/dl-get-cpu-features.c +@@ -0,0 +1,25 @@ ++/* Define _dl_larch_get_cpu_features. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++ ++#include <ldsodefs.h> ++ ++const struct cpu_features * ++_dl_larch_get_cpu_features (void) ++{ ++ return &GLRO(dl_larch_cpu_features); ++} +diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h +index 57913cef..b395a928 100644 +--- a/sysdeps/loongarch/dl-machine.h ++++ b/sysdeps/loongarch/dl-machine.h +@@ -29,6 +29,8 @@ + #include <dl-static-tls.h> + #include <dl-machine-rel.h> + ++#include <cpu-features.c> ++ + #ifndef _RTLD_PROLOGUE + # define _RTLD_PROLOGUE(entry) \ + ".globl\t" __STRING (entry) "\n\t" \ +@@ -53,6 +55,23 @@ + #define ELF_MACHINE_NO_REL 1 + #define ELF_MACHINE_NO_RELA 0 + ++#define DL_PLATFORM_INIT dl_platform_init () ++ ++static inline void __attribute__ ((unused)) ++dl_platform_init (void) ++{ ++ if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0') ++ /* Avoid an empty string which would disturb us. */ ++ GLRO(dl_platform) = NULL; ++ ++#ifdef SHARED ++ /* init_cpu_features has been called early from __libc_start_main in ++ static executable. */ ++ init_cpu_features (&GLRO(dl_larch_cpu_features)); ++#endif ++} ++ ++ + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int + elf_machine_matches_host (const ElfW (Ehdr) *ehdr) +@@ -290,9 +309,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope, + if (profile != 0) + { + #if !defined __loongarch_soft_float +- if (SUPPORT_LASX) ++ if (RTLD_SUPPORT_LASX) + gotplt0 = (ElfW(Addr)) &_dl_runtime_profile_lasx; +- else if (SUPPORT_LSX) ++ else if (RTLD_SUPPORT_LSX) + gotplt0 = (ElfW(Addr)) &_dl_runtime_profile_lsx; + else + #endif +@@ -310,9 +329,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope, + indicated by the offset on the stack, and then jump to + the resolved address. */ + #if !defined __loongarch_soft_float +- if (SUPPORT_LASX) ++ if (RTLD_SUPPORT_LASX) + gotplt0 = (ElfW(Addr)) &_dl_runtime_resolve_lasx; +- else if (SUPPORT_LSX) ++ else if (RTLD_SUPPORT_LSX) + gotplt0 = (ElfW(Addr)) &_dl_runtime_resolve_lsx; + else + #endif +diff --git a/sysdeps/loongarch/dl-tunables.list b/sysdeps/loongarch/dl-tunables.list +new file mode 100644 +index 00000000..66b34275 +--- /dev/null ++++ b/sysdeps/loongarch/dl-tunables.list +@@ -0,0 +1,25 @@ ++# LoongArch specific tunables. ++# Copyright (C) 2023 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++ ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++ ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++ ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# <http://www.gnu.org/licenses/>. ++ ++glibc { ++ cpu { ++ hwcaps { ++ type: STRING ++ } ++ } ++} +diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c +new file mode 100644 +index 00000000..1290c4ce +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c +@@ -0,0 +1,29 @@ ++/* Initialize CPU feature data. LoongArch64 version. ++ This file is part of the GNU C Library. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#include <cpu-features.h> ++#include <elf/dl-hwcaps.h> ++#include <elf/dl-tunables.h> ++extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *) attribute_hidden; ++ ++static inline void ++init_cpu_features (struct cpu_features *cpu_features) ++{ ++ GLRO (dl_larch_cpu_features).hwcap = GLRO (dl_hwcap); ++ TUNABLE_GET (glibc, cpu, hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps)); ++} +diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h +index d1a280a5..450963ce 100644 +--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h ++++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h +@@ -19,13 +19,23 @@ + #ifndef _CPU_FEATURES_LOONGARCH64_H + #define _CPU_FEATURES_LOONGARCH64_H + ++#include <stdint.h> + #include <sys/auxv.h> + +-#define SUPPORT_UAL (GLRO (dl_hwcap) & HWCAP_LOONGARCH_UAL) +-#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX) +-#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX) ++struct cpu_features ++ { ++ uint64_t hwcap; ++ }; + ++/* Get a pointer to the CPU features structure. */ ++extern const struct cpu_features *_dl_larch_get_cpu_features (void) ++ __attribute__ ((pure)); ++ ++#define SUPPORT_UAL (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_UAL) ++#define SUPPORT_LSX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LSX) ++#define SUPPORT_LASX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LASX) ++#define RTLD_SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX) ++#define RTLD_SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX) + #define INIT_ARCH() + + #endif /* _CPU_FEATURES_LOONGARCH64_H */ +- +diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c +new file mode 100644 +index 00000000..6217fda9 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c +@@ -0,0 +1,60 @@ ++/* Data for LoongArch64 version of processor capability information. ++ Linux version. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++/* If anything should be added here check whether the size of each string ++ is still ok with the given array size. ++ ++ All the #ifdefs in the definitions are quite irritating but ++ necessary if we want to avoid duplicating the information. There ++ are three different modes: ++ ++ - PROCINFO_DECL is defined. This means we are only interested in ++ declarations. ++ ++ - PROCINFO_DECL is not defined: ++ ++ + if SHARED is defined the file is included in an array ++ initializer. The .element = { ... } syntax is needed. ++ ++ + if SHARED is not defined a normal array initialization is ++ needed. ++ */ ++ ++#ifndef PROCINFO_CLASS ++# define PROCINFO_CLASS ++#endif ++ ++#if !IS_IN (ldconfig) ++# if !defined PROCINFO_DECL && defined SHARED ++ ._dl_larch_cpu_features ++# else ++PROCINFO_CLASS struct cpu_features _dl_larch_cpu_features ++# endif ++# ifndef PROCINFO_DECL ++= { } ++# endif ++# if !defined SHARED || defined PROCINFO_DECL ++; ++# else ++, ++# endif ++#endif ++ ++#undef PROCINFO_DECL ++#undef PROCINFO_CLASS +diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c +new file mode 100644 +index 00000000..455fd71a +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c +@@ -0,0 +1,21 @@ ++/* Operating system support for run-time dynamic linker. LoongArch version. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#include <config.h> ++#include <sysdeps/loongarch/cpu-tunables.c> ++#include <sysdeps/unix/sysv/linux/dl-sysdep.c> +diff --git a/sysdeps/unix/sysv/linux/loongarch/libc-start.c b/sysdeps/unix/sysv/linux/loongarch/libc-start.c +new file mode 100644 +index 00000000..f1346ece +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/loongarch/libc-start.c +@@ -0,0 +1,34 @@ ++/* Override csu/libc-start.c on LoongArch64. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#ifndef SHARED ++ ++/* Mark symbols hidden in static PIE for early self relocation to work. */ ++# if BUILD_PIE_DEFAULT ++# pragma GCC visibility push(hidden) ++# endif ++ ++# include <ldsodefs.h> ++# include <cpu-features.c> ++ ++extern struct cpu_features _dl_larch_cpu_features; ++ ++# define ARCH_INIT_CPU_FEATURES() init_cpu_features (&_dl_larch_cpu_features) ++ ++#endif ++#include <csu/libc-start.c> +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Add-ifunc-support-for-memchr-aligned-lsx-l.patch
Added
@@ -0,0 +1,485 @@ +From 3ee56bbc56faa7b85a6513340db4a4fdd6ce709d Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Mon, 28 Aug 2023 10:08:36 +0800 +Subject: PATCH 15/29 LoongArch: Add ifunc support for memchr{aligned, lsx, + lasx} + +According to glibc memchr microbenchmark, this implementation could reduce +the runtime as following: + +Name Percent of runtime reduced +memchr-lasx 37%-83% +memchr-lsx 30%-66% +memchr-aligned 0%-15% + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/lp64/multiarch/Makefile | 3 + + .../lp64/multiarch/ifunc-impl-list.c | 7 ++ + .../loongarch/lp64/multiarch/ifunc-memchr.h | 40 ++++++ + .../loongarch/lp64/multiarch/memchr-aligned.S | 95 ++++++++++++++ + .../loongarch/lp64/multiarch/memchr-lasx.S | 117 ++++++++++++++++++ + sysdeps/loongarch/lp64/multiarch/memchr-lsx.S | 102 +++++++++++++++ + sysdeps/loongarch/lp64/multiarch/memchr.c | 37 ++++++ + 7 files changed, 401 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr.c + +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +index 64416b02..2f4802cf 100644 +--- a/sysdeps/loongarch/lp64/multiarch/Makefile ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -24,5 +24,8 @@ sysdep_routines += \ + rawmemchr-aligned \ + rawmemchr-lsx \ + rawmemchr-lasx \ ++ memchr-aligned \ ++ memchr-lsx \ ++ memchr-lasx \ + # sysdep_routines + endif +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +index 3db9af14..a567b9cf 100644 +--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +@@ -102,5 +102,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned) + ) + ++ IFUNC_IMPL (i, name, memchr, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LASX, __memchr_lasx) ++ IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LSX, __memchr_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_aligned) ++ ) + return i; + } +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h +new file mode 100644 +index 00000000..9060ccd5 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h +@@ -0,0 +1,40 @@ ++/* Common definition for memchr ifunc selections. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <ldsodefs.h> ++#include <ifunc-init.h> ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S +new file mode 100644 +index 00000000..81d0d004 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S +@@ -0,0 +1,95 @@ ++/* Optimized memchr implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++# define MEMCHR_NAME __memchr_aligned ++#else ++# define MEMCHR_NAME memchr ++#endif ++ ++LEAF(MEMCHR_NAME, 6) ++ beqz a2, L(out) ++ andi t1, a0, 0x7 ++ add.d a5, a0, a2 ++ bstrins.d a0, zero, 2, 0 ++ ++ ld.d t0, a0, 0 ++ bstrins.d a1, a1, 15, 8 ++ lu12i.w a3, 0x01010 ++ slli.d t2, t1, 03 ++ ++ bstrins.d a1, a1, 31, 16 ++ ori a3, a3, 0x101 ++ li.d t7, -1 ++ li.d t8, 8 ++ ++ bstrins.d a1, a1, 63, 32 ++ bstrins.d a3, a3, 63, 32 ++ sll.d t2, t7, t2 ++ xor t0, t0, a1 ++ ++ ++ addi.d a6, a5, -1 ++ slli.d a4, a3, 7 ++ sub.d t1, t8, t1 ++ orn t0, t0, t2 ++ ++ sub.d t2, t0, a3 ++ andn t3, a4, t0 ++ bstrins.d a6, zero, 2, 0 ++ and t0, t2, t3 ++ ++ bgeu t1, a2, L(end) ++L(loop): ++ bnez t0, L(found) ++ ld.d t1, a0, 8 ++ xor t0, t1, a1 ++ ++ addi.d a0, a0, 8 ++ sub.d t2, t0, a3 ++ andn t3, a4, t0 ++ and t0, t2, t3 ++ ++ ++ bne a0, a6, L(loop) ++L(end): ++ sub.d t1, a5, a6 ++ ctz.d t0, t0 ++ srli.d t0, t0, 3 ++ ++ sltu t1, t0, t1 ++ add.d a0, a0, t0 ++ maskeqz a0, a0, t1 ++ jr ra ++ ++L(found): ++ ctz.d t0, t0 ++ srli.d t0, t0, 3 ++ add.d a0, a0, t0 ++ jr ra ++ ++L(out): ++ move a0, zero ++ jr ra ++END(MEMCHR_NAME) ++ ++libc_hidden_builtin_def (MEMCHR_NAME) +diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S +new file mode 100644 +index 00000000..a26cdf48 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S +@@ -0,0 +1,117 @@ ++/* Optimized memchr implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define MEMCHR __memchr_lasx ++ ++LEAF(MEMCHR, 6) ++ beqz a2, L(ret0) ++ add.d a3, a0, a2 ++ andi t0, a0, 0x3f ++ bstrins.d a0, zero, 5, 0 ++ ++ xvld xr0, a0, 0 ++ xvld xr1, a0, 32 ++ li.d t1, -1 ++ li.d t2, 64 ++ ++ xvreplgr2vr.b xr2, a1 ++ sll.d t3, t1, t0 ++ sub.d t2, t2, t0 ++ xvseq.b xr0, xr0, xr2 ++ ++ xvseq.b xr1, xr1, xr2 ++ xvmsknz.b xr0, xr0 ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ ++ ++ xvpickve.w xr4, xr1, 4 ++ vilvl.h vr0, vr3, vr0 ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ ++ movfr2gr.d t0, fa0 ++ and t0, t0, t3 ++ bgeu t2, a2, L(end) ++ bnez t0, L(found) ++ ++ addi.d a4, a3, -1 ++ bstrins.d a4, zero, 5, 0 ++L(loop): ++ xvld xr0, a0, 64 ++ xvld xr1, a0, 96 ++ ++ addi.d a0, a0, 64 ++ xvseq.b xr0, xr0, xr2 ++ xvseq.b xr1, xr1, xr2 ++ beq a0, a4, L(out) ++ ++ ++ xvmax.bu xr3, xr0, xr1 ++ xvseteqz.v fcc0, xr3 ++ bcnez fcc0, L(loop) ++ xvmsknz.b xr0, xr0 ++ ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ xvpickve.w xr4, xr1, 4 ++ vilvl.h vr0, vr3, vr0 ++ ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++L(found): ++ ctz.d t1, t0 ++ ++ add.d a0, a0, t1 ++ jr ra ++L(ret0): ++ move a0, zero ++ jr ra ++ ++ ++L(out): ++ xvmsknz.b xr0, xr0 ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ xvpickve.w xr4, xr1, 4 ++ ++ vilvl.h vr0, vr3, vr0 ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++ ++L(end): ++ sub.d t2, zero, a3 ++ srl.d t1, t1, t2 ++ and t0, t0, t1 ++ ctz.d t1, t0 ++ ++ add.d a0, a0, t1 ++ maskeqz a0, a0, t0 ++ jr ra ++END(MEMCHR) ++ ++libc_hidden_builtin_def (MEMCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S +new file mode 100644 +index 00000000..a73ecd25 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S +@@ -0,0 +1,102 @@ ++/* Optimized memchr implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define MEMCHR __memchr_lsx ++ ++LEAF(MEMCHR, 6) ++ beqz a2, L(ret0) ++ add.d a3, a0, a2 ++ andi t0, a0, 0x1f ++ bstrins.d a0, zero, 4, 0 ++ ++ vld vr0, a0, 0 ++ vld vr1, a0, 16 ++ li.d t1, -1 ++ li.d t2, 32 ++ ++ vreplgr2vr.b vr2, a1 ++ sll.d t3, t1, t0 ++ sub.d t2, t2, t0 ++ vseq.b vr0, vr0, vr2 ++ ++ vseq.b vr1, vr1, vr2 ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ ++ ++ movfr2gr.s t0, fa0 ++ and t0, t0, t3 ++ bgeu t2, a2, L(end) ++ bnez t0, L(found) ++ ++ addi.d a4, a3, -1 ++ bstrins.d a4, zero, 4, 0 ++L(loop): ++ vld vr0, a0, 32 ++ vld vr1, a0, 48 ++ ++ addi.d a0, a0, 32 ++ vseq.b vr0, vr0, vr2 ++ vseq.b vr1, vr1, vr2 ++ beq a0, a4, L(out) ++ ++ vmax.bu vr3, vr0, vr1 ++ vseteqz.v fcc0, vr3 ++ bcnez fcc0, L(loop) ++ vmsknz.b vr0, vr0 ++ ++ ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++L(found): ++ ctz.w t0, t0 ++ ++ add.d a0, a0, t0 ++ jr ra ++L(ret0): ++ move a0, zero ++ jr ra ++ ++L(out): ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ ++L(end): ++ sub.d t2, zero, a3 ++ srl.w t1, t1, t2 ++ and t0, t0, t1 ++ ctz.w t1, t0 ++ ++ ++ add.d a0, a0, t1 ++ maskeqz a0, a0, t0 ++ jr ra ++END(MEMCHR) ++ ++libc_hidden_builtin_def (MEMCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memchr.c b/sysdeps/loongarch/lp64/multiarch/memchr.c +new file mode 100644 +index 00000000..059479c0 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memchr.c +@@ -0,0 +1,37 @@ ++/* Multiple versions of memchr. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define memchr __redirect_memchr ++# include <string.h> ++# undef memchr ++ ++# define SYMBOL_NAME memchr ++# include "ifunc-memchr.h" ++ ++libc_ifunc_redirected (__redirect_memchr, memchr, ++ IFUNC_SELECTOR ()); ++ ++# ifdef SHARED ++__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr); ++# endif ++ ++#endif +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Add-ifunc-support-for-memcmp-aligned-lsx-l.patch
Added
@@ -0,0 +1,946 @@ +From 60f4bbd1eec528ba8df044ae6b3091f6337a7fcc Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Mon, 28 Aug 2023 10:08:39 +0800 +Subject: PATCH 18/29 LoongArch: Add ifunc support for memcmp{aligned, lsx, + lasx} + +According to glibc memcmp microbenchmark test results(Add generic +memcmp), this implementation have performance improvement +except the length is less than 3, details as below: + +Name Percent of time reduced +memcmp-lasx 16%-74% +memcmp-lsx 20%-50% +memcmp-aligned 5%-20% + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/lp64/multiarch/Makefile | 3 + + .../lp64/multiarch/ifunc-impl-list.c | 7 + + .../loongarch/lp64/multiarch/ifunc-memcmp.h | 40 +++ + .../loongarch/lp64/multiarch/memcmp-aligned.S | 292 ++++++++++++++++++ + .../loongarch/lp64/multiarch/memcmp-lasx.S | 207 +++++++++++++ + sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S | 269 ++++++++++++++++ + sysdeps/loongarch/lp64/multiarch/memcmp.c | 43 +++ + 7 files changed, 861 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp.c + +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +index 216886c5..360a6718 100644 +--- a/sysdeps/loongarch/lp64/multiarch/Makefile ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -34,5 +34,8 @@ sysdep_routines += \ + memset-unaligned \ + memset-lsx \ + memset-lasx \ ++ memcmp-aligned \ ++ memcmp-lsx \ ++ memcmp-lasx \ + # sysdep_routines + endif +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +index 37f60dde..e397d58c 100644 +--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +@@ -127,5 +127,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, memset, 1, __memset_aligned) + ) + ++ IFUNC_IMPL (i, name, memcmp, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, memcmp, SUPPORT_LASX, __memcmp_lasx) ++ IFUNC_IMPL_ADD (array, i, memcmp, SUPPORT_LSX, __memcmp_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_aligned) ++ ) + return i; + } +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h +new file mode 100644 +index 00000000..04adc2e5 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h +@@ -0,0 +1,40 @@ ++/* Common definition for memcmp ifunc selections. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <ldsodefs.h> ++#include <ifunc-init.h> ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S +new file mode 100644 +index 00000000..14a7caa9 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S +@@ -0,0 +1,292 @@ ++/* Optimized memcmp implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++# define MEMCMP_NAME __memcmp_aligned ++#else ++# define MEMCMP_NAME memcmp ++#endif ++ ++LEAF(MEMCMP_NAME, 6) ++ beqz a2, L(ret) ++ andi a4, a1, 0x7 ++ andi a3, a0, 0x7 ++ sltu a5, a4, a3 ++ ++ xor t0, a0, a1 ++ li.w t8, 8 ++ maskeqz t0, t0, a5 ++ li.w t7, -1 ++ ++ xor a0, a0, t0 ++ xor a1, a1, t0 ++ andi a3, a0, 0x7 ++ andi a4, a1, 0x7 ++ ++ xor a0, a0, a3 ++ xor a1, a1, a4 ++ ld.d t2, a0, 0 ++ ld.d t1, a1, 0 ++ ++ slli.d t3, a3, 3 ++ slli.d t4, a4, 3 ++ sub.d a6, t3, t4 ++ srl.d t1, t1, t4 ++ ++ srl.d t0, t2, t3 ++ srl.d t5, t7, t4 ++ sub.d t6, t0, t1 ++ and t6, t6, t5 ++ ++ sub.d t5, t8, a4 ++ bnez t6, L(first_out) ++ bgeu t5, a2, L(ret) ++ sub.d a2, a2, t5 ++ ++ bnez a6, L(unaligned) ++ blt a2, t8, L(al_less_8bytes) ++ andi t1, a2, 31 ++ beq t1, a2, L(al_less_32bytes) ++ ++ sub.d t2, a2, t1 ++ add.d a4, a0, t2 ++ move a2, t1 ++ ++L(al_loop): ++ ld.d t0, a0, 8 ++ ++ ld.d t1, a1, 8 ++ ld.d t2, a0, 16 ++ ld.d t3, a1, 16 ++ ld.d t4, a0, 24 ++ ++ ld.d t5, a1, 24 ++ ld.d t6, a0, 32 ++ ld.d t7, a1, 32 ++ addi.d a0, a0, 32 ++ ++ addi.d a1, a1, 32 ++ bne t0, t1, L(out1) ++ bne t2, t3, L(out2) ++ bne t4, t5, L(out3) ++ ++ bne t6, t7, L(out4) ++ bne a0, a4, L(al_loop) ++ ++L(al_less_32bytes): ++ srai.d a4, a2, 4 ++ beqz a4, L(al_less_16bytes) ++ ++ ld.d t0, a0, 8 ++ ld.d t1, a1, 8 ++ ld.d t2, a0, 16 ++ ld.d t3, a1, 16 ++ ++ addi.d a0, a0, 16 ++ addi.d a1, a1, 16 ++ addi.d a2, a2, -16 ++ bne t0, t1, L(out1) ++ ++ bne t2, t3, L(out2) ++ ++L(al_less_16bytes): ++ srai.d a4, a2, 3 ++ beqz a4, L(al_less_8bytes) ++ ld.d t0, a0, 8 ++ ++ ld.d t1, a1, 8 ++ addi.d a0, a0, 8 ++ addi.d a1, a1, 8 ++ addi.d a2, a2, -8 ++ ++ bne t0, t1, L(out1) ++ ++L(al_less_8bytes): ++ beqz a2, L(ret) ++ ld.d t0, a0, 8 ++ ld.d t1, a1, 8 ++ ++ li.d t7, -1 ++ slli.d t2, a2, 3 ++ sll.d t2, t7, t2 ++ sub.d t3, t0, t1 ++ ++ andn t6, t3, t2 ++ bnez t6, L(count_diff) ++ ++L(ret): ++ move a0, zero ++ jr ra ++ ++L(out4): ++ move t0, t6 ++ move t1, t7 ++ sub.d t6, t6, t7 ++ b L(count_diff) ++ ++L(out3): ++ move t0, t4 ++ move t1, t5 ++ sub.d t6, t4, t5 ++ b L(count_diff) ++ ++L(out2): ++ move t0, t2 ++ move t1, t3 ++L(out1): ++ sub.d t6, t0, t1 ++ b L(count_diff) ++ ++L(first_out): ++ slli.d t4, a2, 3 ++ slt t3, a2, t5 ++ sll.d t4, t7, t4 ++ maskeqz t4, t4, t3 ++ ++ andn t6, t6, t4 ++ ++L(count_diff): ++ ctz.d t2, t6 ++ bstrins.d t2, zero, 2, 0 ++ srl.d t0, t0, t2 ++ ++ srl.d t1, t1, t2 ++ andi t0, t0, 0xff ++ andi t1, t1, 0xff ++ sub.d t2, t0, t1 ++ ++ sub.d t3, t1, t0 ++ masknez t2, t2, a5 ++ maskeqz t3, t3, a5 ++ or a0, t2, t3 ++ ++ jr ra ++ ++L(unaligned): ++ sub.d a7, zero, a6 ++ srl.d t0, t2, a6 ++ blt a2, t8, L(un_less_8bytes) ++ ++ andi t1, a2, 31 ++ beq t1, a2, L(un_less_32bytes) ++ sub.d t2, a2, t1 ++ add.d a4, a0, t2 ++ ++ move a2, t1 ++ ++L(un_loop): ++ ld.d t2, a0, 8 ++ ld.d t1, a1, 8 ++ ld.d t4, a0, 16 ++ ++ ld.d t3, a1, 16 ++ ld.d t6, a0, 24 ++ ld.d t5, a1, 24 ++ ld.d t8, a0, 32 ++ ++ ld.d t7, a1, 32 ++ addi.d a0, a0, 32 ++ addi.d a1, a1, 32 ++ sll.d a3, t2, a7 ++ ++ or t0, a3, t0 ++ bne t0, t1, L(out1) ++ srl.d t0, t2, a6 ++ sll.d a3, t4, a7 ++ ++ or t2, a3, t0 ++ bne t2, t3, L(out2) ++ srl.d t0, t4, a6 ++ sll.d a3, t6, a7 ++ ++ or t4, a3, t0 ++ bne t4, t5, L(out3) ++ srl.d t0, t6, a6 ++ sll.d a3, t8, a7 ++ ++ or t6, t0, a3 ++ bne t6, t7, L(out4) ++ srl.d t0, t8, a6 ++ bne a0, a4, L(un_loop) ++ ++L(un_less_32bytes): ++ srai.d a4, a2, 4 ++ beqz a4, L(un_less_16bytes) ++ ld.d t2, a0, 8 ++ ld.d t1, a1, 8 ++ ++ ld.d t4, a0, 16 ++ ld.d t3, a1, 16 ++ addi.d a0, a0, 16 ++ addi.d a1, a1, 16 ++ ++ addi.d a2, a2, -16 ++ sll.d a3, t2, a7 ++ or t0, a3, t0 ++ bne t0, t1, L(out1) ++ ++ srl.d t0, t2, a6 ++ sll.d a3, t4, a7 ++ or t2, a3, t0 ++ bne t2, t3, L(out2) ++ ++ srl.d t0, t4, a6 ++ ++L(un_less_16bytes): ++ srai.d a4, a2, 3 ++ beqz a4, L(un_less_8bytes) ++ ld.d t2, a0, 8 ++ ++ ld.d t1, a1, 8 ++ addi.d a0, a0, 8 ++ addi.d a1, a1, 8 ++ addi.d a2, a2, -8 ++ ++ sll.d a3, t2, a7 ++ or t0, a3, t0 ++ bne t0, t1, L(out1) ++ srl.d t0, t2, a6 ++ ++L(un_less_8bytes): ++ beqz a2, L(ret) ++ andi a7, a7, 63 ++ slli.d a4, a2, 3 ++ bgeu a7, a4, L(last_cmp) ++ ++ ld.d t2, a0, 8 ++ sll.d a3, t2, a7 ++ or t0, a3, t0 ++ ++L(last_cmp): ++ ld.d t1, a1, 8 ++ ++ li.d t7, -1 ++ sll.d t2, t7, a4 ++ sub.d t3, t0, t1 ++ andn t6, t3, t2 ++ ++ bnez t6, L(count_diff) ++ move a0, zero ++ jr ra ++END(MEMCMP_NAME) ++ ++libc_hidden_builtin_def (MEMCMP_NAME) +diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S b/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S +new file mode 100644 +index 00000000..3151a179 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S +@@ -0,0 +1,207 @@ ++/* Optimized memcmp implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define MEMCMP __memcmp_lasx ++ ++LEAF(MEMCMP, 6) ++ li.d t2, 32 ++ add.d a3, a0, a2 ++ add.d a4, a1, a2 ++ bgeu t2, a2, L(less32) ++ ++ li.d t1, 160 ++ bgeu a2, t1, L(make_aligned) ++L(loop32): ++ xvld xr0, a0, 0 ++ xvld xr1, a1, 0 ++ ++ addi.d a0, a0, 32 ++ addi.d a1, a1, 32 ++ addi.d a2, a2, -32 ++ xvseq.b xr2, xr0, xr1 ++ ++ xvsetanyeqz.b fcc0, xr2 ++ bcnez fcc0, L(end) ++L(last_bytes): ++ bltu t2, a2, L(loop32) ++ xvld xr0, a3, -32 ++ ++ ++ xvld xr1, a4, -32 ++ xvseq.b xr2, xr0, xr1 ++L(end): ++ xvmsknz.b xr2, xr2 ++ xvpermi.q xr4, xr0, 1 ++ ++ xvpickve.w xr3, xr2, 4 ++ xvpermi.q xr5, xr1, 1 ++ vilvl.h vr2, vr3, vr2 ++ movfr2gr.s t0, fa2 ++ ++ cto.w t0, t0 ++ vreplgr2vr.b vr2, t0 ++ vshuf.b vr0, vr4, vr0, vr2 ++ vshuf.b vr1, vr5, vr1, vr2 ++ ++ vpickve2gr.bu t0, vr0, 0 ++ vpickve2gr.bu t1, vr1, 0 ++ sub.d a0, t0, t1 ++ jr ra ++ ++ ++L(less32): ++ srli.d t0, a2, 4 ++ beqz t0, L(less16) ++ vld vr0, a0, 0 ++ vld vr1, a1, 0 ++ ++ vld vr2, a3, -16 ++ vld vr3, a4, -16 ++L(short_ret): ++ vseq.b vr4, vr0, vr1 ++ vseq.b vr5, vr2, vr3 ++ ++ vmsknz.b vr4, vr4 ++ vmsknz.b vr5, vr5 ++ vilvl.h vr4, vr5, vr4 ++ movfr2gr.s t0, fa4 ++ ++ cto.w t0, t0 ++ vreplgr2vr.b vr4, t0 ++ vshuf.b vr0, vr2, vr0, vr4 ++ vshuf.b vr1, vr3, vr1, vr4 ++ ++ ++ vpickve2gr.bu t0, vr0, 0 ++ vpickve2gr.bu t1, vr1, 0 ++ sub.d a0, t0, t1 ++ jr ra ++ ++L(less16): ++ srli.d t0, a2, 3 ++ beqz t0, L(less8) ++ vldrepl.d vr0, a0, 0 ++ vldrepl.d vr1, a1, 0 ++ ++ vldrepl.d vr2, a3, -8 ++ vldrepl.d vr3, a4, -8 ++ b L(short_ret) ++ nop ++ ++L(less8): ++ srli.d t0, a2, 2 ++ beqz t0, L(less4) ++ vldrepl.w vr0, a0, 0 ++ vldrepl.w vr1, a1, 0 ++ ++ ++ vldrepl.w vr2, a3, -4 ++ vldrepl.w vr3, a4, -4 ++ b L(short_ret) ++ nop ++ ++L(less4): ++ srli.d t0, a2, 1 ++ beqz t0, L(less2) ++ vldrepl.h vr0, a0, 0 ++ vldrepl.h vr1, a1, 0 ++ ++ vldrepl.h vr2, a3, -2 ++ vldrepl.h vr3, a4, -2 ++ b L(short_ret) ++ nop ++ ++L(less2): ++ beqz a2, L(ret0) ++ ld.bu t0, a0, 0 ++ ld.bu t1, a1, 0 ++ sub.d a0, t0, t1 ++ ++ jr ra ++L(ret0): ++ move a0, zero ++ jr ra ++ ++L(make_aligned): ++ xvld xr0, a0, 0 ++ ++ xvld xr1, a1, 0 ++ xvseq.b xr2, xr0, xr1 ++ xvsetanyeqz.b fcc0, xr2 ++ bcnez fcc0, L(end) ++ ++ andi t0, a0, 0x1f ++ sub.d t0, t2, t0 ++ sub.d t1, a2, t0 ++ add.d a0, a0, t0 ++ ++ add.d a1, a1, t0 ++ andi a2, t1, 0x3f ++ sub.d t0, t1, a2 ++ add.d a5, a0, t0 ++ ++ ++L(loop_align): ++ xvld xr0, a0, 0 ++ xvld xr1, a1, 0 ++ xvld xr2, a0, 32 ++ xvld xr3, a1, 32 ++ ++ xvseq.b xr0, xr0, xr1 ++ xvseq.b xr1, xr2, xr3 ++ xvmin.bu xr2, xr1, xr0 ++ xvsetanyeqz.b fcc0, xr2 ++ ++ bcnez fcc0, L(pair_end) ++ addi.d a0, a0, 64 ++ addi.d a1, a1, 64 ++ bne a0, a5, L(loop_align) ++ ++ bnez a2, L(last_bytes) ++ move a0, zero ++ jr ra ++ nop ++ ++ ++L(pair_end): ++ xvmsknz.b xr0, xr0 ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr2, xr0, 4 ++ xvpickve.w xr3, xr1, 4 ++ ++ vilvl.h vr0, vr2, vr0 ++ vilvl.h vr1, vr3, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++ ++ cto.d t0, t0 ++ ldx.bu t1, a0, t0 ++ ldx.bu t2, a1, t0 ++ sub.d a0, t1, t2 ++ ++ jr ra ++END(MEMCMP) ++ ++libc_hidden_builtin_def (MEMCMP) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S +new file mode 100644 +index 00000000..38a50a4c +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S +@@ -0,0 +1,269 @@ ++/* Optimized memcmp implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++#define MEMCMP __memcmp_lsx ++ ++LEAF(MEMCMP, 6) ++ beqz a2, L(out) ++ pcalau12i t0, %pc_hi20(L(INDEX)) ++ andi a3, a0, 0xf ++ vld vr5, t0, %pc_lo12(L(INDEX)) ++ ++ andi a4, a1, 0xf ++ bne a3, a4, L(unaligned) ++ bstrins.d a0, zero, 3, 0 ++ xor a1, a1, a4 ++ ++ vld vr0, a0, 0 ++ vld vr1, a1, 0 ++ li.d t0, 16 ++ vreplgr2vr.b vr3, a3 ++ ++ sub.d t1, t0, a3 ++ vadd.b vr3, vr3, vr5 ++ vshuf.b vr0, vr3, vr0, vr3 ++ vshuf.b vr1, vr3, vr1, vr3 ++ ++ ++ vseq.b vr4, vr0, vr1 ++ bgeu t1, a2, L(al_end) ++ vsetanyeqz.b fcc0, vr4 ++ bcnez fcc0, L(al_found) ++ ++ sub.d t1, a2, t1 ++ andi a2, t1, 31 ++ beq a2, t1, L(al_less_32bytes) ++ sub.d t2, t1, a2 ++ ++ add.d a4, a0, t2 ++L(al_loop): ++ vld vr0, a0, 16 ++ vld vr1, a1, 16 ++ vld vr2, a0, 32 ++ ++ vld vr3, a1, 32 ++ addi.d a0, a0, 32 ++ addi.d a1, a1, 32 ++ vseq.b vr4, vr0, vr1 ++ ++ ++ vseq.b vr6, vr2, vr3 ++ vand.v vr6, vr4, vr6 ++ vsetanyeqz.b fcc0, vr6 ++ bcnez fcc0, L(al_pair_end) ++ ++ bne a0, a4, L(al_loop) ++L(al_less_32bytes): ++ bgeu t0, a2, L(al_less_16bytes) ++ vld vr0, a0, 16 ++ vld vr1, a1, 16 ++ ++ vld vr2, a0, 32 ++ vld vr3, a1, 32 ++ addi.d a2, a2, -16 ++ vreplgr2vr.b vr6, a2 ++ ++ vslt.b vr5, vr5, vr6 ++ vseq.b vr4, vr0, vr1 ++ vseq.b vr6, vr2, vr3 ++ vorn.v vr6, vr6, vr5 ++ ++ ++L(al_pair_end): ++ vsetanyeqz.b fcc0, vr4 ++ bcnez fcc0, L(al_found) ++ vnori.b vr4, vr6, 0 ++ vfrstpi.b vr4, vr4, 0 ++ ++ vshuf.b vr0, vr2, vr2, vr4 ++ vshuf.b vr1, vr3, vr3, vr4 ++ vpickve2gr.bu t0, vr0, 0 ++ vpickve2gr.bu t1, vr1, 0 ++ ++ sub.d a0, t0, t1 ++ jr ra ++ nop ++ nop ++ ++L(al_less_16bytes): ++ beqz a2, L(out) ++ vld vr0, a0, 16 ++ vld vr1, a1, 16 ++ vseq.b vr4, vr0, vr1 ++ ++ ++L(al_end): ++ vreplgr2vr.b vr6, a2 ++ vslt.b vr5, vr5, vr6 ++ vorn.v vr4, vr4, vr5 ++ nop ++ ++L(al_found): ++ vnori.b vr4, vr4, 0 ++ vfrstpi.b vr4, vr4, 0 ++ vshuf.b vr0, vr0, vr0, vr4 ++ vshuf.b vr1, vr1, vr1, vr4 ++ ++ vpickve2gr.bu t0, vr0, 0 ++ vpickve2gr.bu t1, vr1, 0 ++ sub.d a0, t0, t1 ++ jr ra ++ ++L(out): ++ move a0, zero ++ jr ra ++ nop ++ nop ++ ++ ++L(unaligned): ++ xor t2, a0, a1 ++ sltu a5, a3, a4 ++ masknez t2, t2, a5 ++ xor a0, a0, t2 ++ ++ xor a1, a1, t2 ++ andi a3, a0, 0xf ++ andi a4, a1, 0xf ++ bstrins.d a0, zero, 3, 0 ++ ++ xor a1, a1, a4 ++ vld vr4, a0, 0 ++ vld vr1, a1, 0 ++ li.d t0, 16 ++ ++ vreplgr2vr.b vr2, a4 ++ sub.d a6, a4, a3 ++ sub.d t1, t0, a4 ++ sub.d t2, t0, a6 ++ ++ ++ vadd.b vr2, vr2, vr5 ++ vreplgr2vr.b vr6, t2 ++ vadd.b vr6, vr6, vr5 ++ vshuf.b vr0, vr4, vr4, vr6 ++ ++ vshuf.b vr1, vr2, vr1, vr2 ++ vshuf.b vr0, vr2, vr0, vr2 ++ vseq.b vr7, vr0, vr1 ++ bgeu t1, a2, L(un_end) ++ ++ vsetanyeqz.b fcc0, vr7 ++ bcnez fcc0, L(un_found) ++ sub.d a2, a2, t1 ++ andi t1, a2, 31 ++ ++ beq a2, t1, L(un_less_32bytes) ++ sub.d t2, a2, t1 ++ move a2, t1 ++ add.d a4, a1, t2 ++ ++ ++L(un_loop): ++ vld vr2, a0, 16 ++ vld vr1, a1, 16 ++ vld vr3, a1, 32 ++ addi.d a1, a1, 32 ++ ++ addi.d a0, a0, 32 ++ vshuf.b vr0, vr2, vr4, vr6 ++ vld vr4, a0, 0 ++ vseq.b vr7, vr0, vr1 ++ ++ vshuf.b vr2, vr4, vr2, vr6 ++ vseq.b vr8, vr2, vr3 ++ vand.v vr8, vr7, vr8 ++ vsetanyeqz.b fcc0, vr8 ++ ++ bcnez fcc0, L(un_pair_end) ++ bne a1, a4, L(un_loop) ++ ++L(un_less_32bytes): ++ bltu a2, t0, L(un_less_16bytes) ++ vld vr2, a0, 16 ++ vld vr1, a1, 16 ++ addi.d a0, a0, 16 ++ ++ addi.d a1, a1, 16 ++ addi.d a2, a2, -16 ++ vshuf.b vr0, vr2, vr4, vr6 ++ vor.v vr4, vr2, vr2 ++ ++ vseq.b vr7, vr0, vr1 ++ vsetanyeqz.b fcc0, vr7 ++ bcnez fcc0, L(un_found) ++L(un_less_16bytes): ++ beqz a2, L(out) ++ vld vr1, a1, 16 ++ bgeu a6, a2, 1f ++ ++ vld vr2, a0, 16 ++1: ++ vshuf.b vr0, vr2, vr4, vr6 ++ vseq.b vr7, vr0, vr1 ++L(un_end): ++ vreplgr2vr.b vr3, a2 ++ ++ ++ vslt.b vr3, vr5, vr3 ++ vorn.v vr7, vr7, vr3 ++ ++L(un_found): ++ vnori.b vr7, vr7, 0 ++ vfrstpi.b vr7, vr7, 0 ++ ++ vshuf.b vr0, vr0, vr0, vr7 ++ vshuf.b vr1, vr1, vr1, vr7 ++L(calc_result): ++ vpickve2gr.bu t0, vr0, 0 ++ vpickve2gr.bu t1, vr1, 0 ++ ++ sub.d t2, t0, t1 ++ sub.d t3, t1, t0 ++ masknez t0, t3, a5 ++ maskeqz t1, t2, a5 ++ ++ or a0, t0, t1 ++ jr ra ++L(un_pair_end): ++ vsetanyeqz.b fcc0, vr7 ++ bcnez fcc0, L(un_found) ++ ++ ++ vnori.b vr7, vr8, 0 ++ vfrstpi.b vr7, vr7, 0 ++ vshuf.b vr0, vr2, vr2, vr7 ++ vshuf.b vr1, vr3, vr3, vr7 ++ ++ b L(calc_result) ++END(MEMCMP) ++ ++ .section .rodata.cst16,"M",@progbits,16 ++ .align 4 ++L(INDEX): ++ .dword 0x0706050403020100 ++ .dword 0x0f0e0d0c0b0a0908 ++ ++libc_hidden_builtin_def (MEMCMP) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp.c b/sysdeps/loongarch/lp64/multiarch/memcmp.c +new file mode 100644 +index 00000000..32eccac2 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memcmp.c +@@ -0,0 +1,43 @@ ++/* Multiple versions of memcmp. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define memcmp __redirect_memcmp ++# include <string.h> ++# undef memcmp ++ ++# define SYMBOL_NAME memcmp ++# include "ifunc-memcmp.h" ++ ++libc_ifunc_redirected (__redirect_memcmp, memcmp, ++ IFUNC_SELECTOR ()); ++# undef bcmp ++weak_alias (memcmp, bcmp) ++ ++# undef __memcmpeq ++strong_alias (memcmp, __memcmpeq) ++libc_hidden_def (__memcmpeq) ++ ++# ifdef SHARED ++__hidden_ver1 (memcmp, __GI_memcmp, __redirect_memcmp) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memcmp); ++# endif ++ ++#endif +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Add-ifunc-support-for-memrchr-lsx-lasx.patch
Added
@@ -0,0 +1,417 @@ +From c4c272fb8067364530a2a78df92c37403acc963f Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Mon, 28 Aug 2023 10:08:37 +0800 +Subject: PATCH 16/29 LoongArch: Add ifunc support for memrchr{lsx, lasx} + +According to glibc memrchr microbenchmark, this implementation could reduce +the runtime as following: + +Name Percent of rutime reduced +memrchr-lasx 20%-83% +memrchr-lsx 20%-64% + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/lp64/multiarch/Makefile | 3 + + .../lp64/multiarch/ifunc-impl-list.c | 8 ++ + .../loongarch/lp64/multiarch/ifunc-memrchr.h | 40 ++++++ + .../lp64/multiarch/memrchr-generic.c | 23 ++++ + .../loongarch/lp64/multiarch/memrchr-lasx.S | 123 ++++++++++++++++++ + .../loongarch/lp64/multiarch/memrchr-lsx.S | 105 +++++++++++++++ + sysdeps/loongarch/lp64/multiarch/memrchr.c | 33 +++++ + 7 files changed, 335 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-generic.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr.c + +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +index 2f4802cf..7b87bc90 100644 +--- a/sysdeps/loongarch/lp64/multiarch/Makefile ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -27,5 +27,8 @@ sysdep_routines += \ + memchr-aligned \ + memchr-lsx \ + memchr-lasx \ ++ memrchr-generic \ ++ memrchr-lsx \ ++ memrchr-lasx \ + # sysdep_routines + endif +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +index a567b9cf..8bd5489e 100644 +--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +@@ -109,5 +109,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + #endif + IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_aligned) + ) ++ ++ IFUNC_IMPL (i, name, memrchr, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, memrchr, SUPPORT_LASX, __memrchr_lasx) ++ IFUNC_IMPL_ADD (array, i, memrchr, SUPPORT_LSX, __memrchr_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_generic) ++ ) + return i; + } +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h +new file mode 100644 +index 00000000..8215f9ad +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h +@@ -0,0 +1,40 @@ ++/* Common definition for memrchr implementation. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <ldsodefs.h> ++#include <ifunc-init.h> ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (generic); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c b/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c +new file mode 100644 +index 00000000..ced61ebc +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c +@@ -0,0 +1,23 @@ ++/* Generic implementation of memrchr. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#if IS_IN (libc) ++# define MEMRCHR __memrchr_generic ++#endif ++ ++#include <string/memrchr.c> +diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S +new file mode 100644 +index 00000000..5f3e0d06 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S +@@ -0,0 +1,123 @@ ++/* Optimized memrchr implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++#ifndef MEMRCHR ++# define MEMRCHR __memrchr_lasx ++#endif ++ ++LEAF(MEMRCHR, 6) ++ beqz a2, L(ret0) ++ addi.d a2, a2, -1 ++ add.d a3, a0, a2 ++ andi t1, a3, 0x3f ++ ++ bstrins.d a3, zero, 5, 0 ++ addi.d t1, t1, 1 ++ xvld xr0, a3, 0 ++ xvld xr1, a3, 32 ++ ++ sub.d t2, zero, t1 ++ li.d t3, -1 ++ xvreplgr2vr.b xr2, a1 ++ andi t4, a0, 0x3f ++ ++ srl.d t2, t3, t2 ++ xvseq.b xr0, xr0, xr2 ++ xvseq.b xr1, xr1, xr2 ++ xvmsknz.b xr0, xr0 ++ ++ ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ xvpickve.w xr4, xr1, 4 ++ vilvl.h vr0, vr3, vr0 ++ ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++ and t0, t0, t2 ++ ++ bltu a2, t1, L(end) ++ bnez t0, L(found) ++ bstrins.d a0, zero, 5, 0 ++L(loop): ++ xvld xr0, a3, -64 ++ ++ xvld xr1, a3, -32 ++ addi.d a3, a3, -64 ++ xvseq.b xr0, xr0, xr2 ++ xvseq.b xr1, xr1, xr2 ++ ++ ++ beq a0, a3, L(out) ++ xvmax.bu xr3, xr0, xr1 ++ xvseteqz.v fcc0, xr3 ++ bcnez fcc0, L(loop) ++ ++ xvmsknz.b xr0, xr0 ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ xvpickve.w xr4, xr1, 4 ++ ++ vilvl.h vr0, vr3, vr0 ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++ ++L(found): ++ addi.d a0, a3, 63 ++ clz.d t1, t0 ++ sub.d a0, a0, t1 ++ jr ra ++ ++ ++L(out): ++ xvmsknz.b xr0, xr0 ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ xvpickve.w xr4, xr1, 4 ++ ++ vilvl.h vr0, vr3, vr0 ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++ ++L(end): ++ sll.d t2, t3, t4 ++ and t0, t0, t2 ++ addi.d a0, a3, 63 ++ clz.d t1, t0 ++ ++ sub.d a0, a0, t1 ++ maskeqz a0, a0, t0 ++ jr ra ++L(ret0): ++ move a0, zero ++ ++ ++ jr ra ++END(MEMRCHR) ++ ++libc_hidden_builtin_def (MEMRCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S +new file mode 100644 +index 00000000..39a7c8b0 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S +@@ -0,0 +1,105 @@ ++/* Optimized memrchr implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define MEMRCHR __memrchr_lsx ++ ++LEAF(MEMRCHR, 6) ++ beqz a2, L(ret0) ++ addi.d a2, a2, -1 ++ add.d a3, a0, a2 ++ andi t1, a3, 0x1f ++ ++ bstrins.d a3, zero, 4, 0 ++ addi.d t1, t1, 1 ++ vld vr0, a3, 0 ++ vld vr1, a3, 16 ++ ++ sub.d t2, zero, t1 ++ li.d t3, -1 ++ vreplgr2vr.b vr2, a1 ++ andi t4, a0, 0x1f ++ ++ srl.d t2, t3, t2 ++ vseq.b vr0, vr0, vr2 ++ vseq.b vr1, vr1, vr2 ++ vmsknz.b vr0, vr0 ++ ++ ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ and t0, t0, t2 ++ ++ bltu a2, t1, L(end) ++ bnez t0, L(found) ++ bstrins.d a0, zero, 4, 0 ++L(loop): ++ vld vr0, a3, -32 ++ ++ vld vr1, a3, -16 ++ addi.d a3, a3, -32 ++ vseq.b vr0, vr0, vr2 ++ vseq.b vr1, vr1, vr2 ++ ++ beq a0, a3, L(out) ++ vmax.bu vr3, vr0, vr1 ++ vseteqz.v fcc0, vr3 ++ bcnez fcc0, L(loop) ++ ++ ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ ++L(found): ++ addi.d a0, a3, 31 ++ clz.w t1, t0 ++ sub.d a0, a0, t1 ++ jr ra ++ ++L(out): ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ ++L(end): ++ sll.d t2, t3, t4 ++ and t0, t0, t2 ++ addi.d a0, a3, 31 ++ clz.w t1, t0 ++ ++ ++ sub.d a0, a0, t1 ++ maskeqz a0, a0, t0 ++ jr ra ++L(ret0): ++ move a0, zero ++ ++ jr ra ++END(MEMRCHR) ++ ++libc_hidden_builtin_def (MEMRCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr.c b/sysdeps/loongarch/lp64/multiarch/memrchr.c +new file mode 100644 +index 00000000..8baba9ab +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memrchr.c +@@ -0,0 +1,33 @@ ++/* Multiple versions of memrchr. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define memrchr __redirect_memrchr ++# include <string.h> ++# undef memrchr ++ ++# define SYMBOL_NAME memrchr ++# include "ifunc-memrchr.h" ++ ++libc_ifunc_redirected (__redirect_memrchr, __memrchr, IFUNC_SELECTOR ()); ++libc_hidden_def (__memrchr) ++weak_alias (__memrchr, memrchr) ++ ++#endif +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Add-ifunc-support-for-memset-aligned-unali.patch
Added
@@ -0,0 +1,784 @@ +From 14032f7bbe18443af8492f5d0365f72b76701673 Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Mon, 28 Aug 2023 10:08:38 +0800 +Subject: PATCH 17/29 LoongArch: Add ifunc support for memset{aligned, + unaligned, lsx, lasx} + +According to glibc memset microbenchmark test results, for LSX and LASX +versions, A few cases with length less than 8 experience performace +degradation, overall, the LASX version could reduce the runtime about +15% - 75%, LSX version could reduce the runtime about 15%-50%. + +The unaligned version uses unaligned memmory access to set data which +length is less than 64 and make address aligned with 8. For this part, +the performace is better than aligned version. Comparing with the generic +version, the performance is close when the length is larger than 128. When +the length is 8-128, the unaligned version could reduce the runtime about +30%-70%, the aligned version could reduce the runtime about 20%-50%. + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/lp64/multiarch/Makefile | 4 + + .../lp64/multiarch/dl-symbol-redir-ifunc.h | 24 +++ + .../lp64/multiarch/ifunc-impl-list.c | 10 + + .../loongarch/lp64/multiarch/memset-aligned.S | 174 ++++++++++++++++++ + .../loongarch/lp64/multiarch/memset-lasx.S | 142 ++++++++++++++ + sysdeps/loongarch/lp64/multiarch/memset-lsx.S | 135 ++++++++++++++ + .../lp64/multiarch/memset-unaligned.S | 162 ++++++++++++++++ + sysdeps/loongarch/lp64/multiarch/memset.c | 37 ++++ + 8 files changed, 688 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-unaligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memset.c + +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +index 7b87bc90..216886c5 100644 +--- a/sysdeps/loongarch/lp64/multiarch/Makefile ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -30,5 +30,9 @@ sysdep_routines += \ + memrchr-generic \ + memrchr-lsx \ + memrchr-lasx \ ++ memset-aligned \ ++ memset-unaligned \ ++ memset-lsx \ ++ memset-lasx \ + # sysdep_routines + endif +diff --git a/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h +new file mode 100644 +index 00000000..e2723873 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h +@@ -0,0 +1,24 @@ ++/* Symbol rediretion for loader/static initialization code. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#ifndef _DL_IFUNC_GENERIC_H ++#define _DL_IFUNC_GENERIC_H ++ ++asm ("memset = __memset_aligned"); ++ ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +index 8bd5489e..37f60dde 100644 +--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +@@ -117,5 +117,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + #endif + IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_generic) + ) ++ ++ IFUNC_IMPL (i, name, memset, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LASX, __memset_lasx) ++ IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LSX, __memset_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, memset, SUPPORT_UAL, __memset_unaligned) ++ IFUNC_IMPL_ADD (array, i, memset, 1, __memset_aligned) ++ ) ++ + return i; + } +diff --git a/sysdeps/loongarch/lp64/multiarch/memset-aligned.S b/sysdeps/loongarch/lp64/multiarch/memset-aligned.S +new file mode 100644 +index 00000000..1fce95b7 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memset-aligned.S +@@ -0,0 +1,174 @@ ++/* Optimized memset aligned implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++# define MEMSET_NAME __memset_aligned ++#else ++# define MEMSET_NAME memset ++#endif ++ ++LEAF(MEMSET_NAME, 6) ++ move t0, a0 ++ andi a3, a0, 0x7 ++ li.w t6, 16 ++ beqz a3, L(align) ++ bltu a2, t6, L(short_data) ++ ++L(make_align): ++ li.w t8, 8 ++ sub.d t2, t8, a3 ++ pcaddi t1, 11 ++ slli.d t3, t2, 2 ++ sub.d t1, t1, t3 ++ jr t1 ++ ++L(al7): ++ st.b a1, t0, 6 ++L(al6): ++ st.b a1, t0, 5 ++L(al5): ++ st.b a1, t0, 4 ++L(al4): ++ st.b a1, t0, 3 ++L(al3): ++ st.b a1, t0, 2 ++L(al2): ++ st.b a1, t0, 1 ++L(al1): ++ st.b a1, t0, 0 ++L(al0): ++ add.d t0, t0, t2 ++ sub.d a2, a2, t2 ++ ++L(align): ++ bstrins.d a1, a1, 15, 8 ++ bstrins.d a1, a1, 31, 16 ++ bstrins.d a1, a1, 63, 32 ++ bltu a2, t6, L(less_16bytes) ++ ++ andi a4, a2, 0x3f ++ beq a4, a2, L(less_64bytes) ++ ++ sub.d t1, a2, a4 ++ move a2, a4 ++ add.d a5, t0, t1 ++ ++L(loop_64bytes): ++ addi.d t0, t0, 64 ++ st.d a1, t0, -64 ++ st.d a1, t0, -56 ++ st.d a1, t0, -48 ++ st.d a1, t0, -40 ++ ++ st.d a1, t0, -32 ++ st.d a1, t0, -24 ++ st.d a1, t0, -16 ++ st.d a1, t0, -8 ++ bne t0, a5, L(loop_64bytes) ++ ++L(less_64bytes): ++ srai.d a4, a2, 5 ++ beqz a4, L(less_32bytes) ++ addi.d a2, a2, -32 ++ st.d a1, t0, 0 ++ ++ st.d a1, t0, 8 ++ st.d a1, t0, 16 ++ st.d a1, t0, 24 ++ addi.d t0, t0, 32 ++ ++L(less_32bytes): ++ bltu a2, t6, L(less_16bytes) ++ addi.d a2, a2, -16 ++ st.d a1, t0, 0 ++ st.d a1, t0, 8 ++ addi.d t0, t0, 16 ++ ++L(less_16bytes): ++ srai.d a4, a2, 3 ++ beqz a4, L(less_8bytes) ++ addi.d a2, a2, -8 ++ st.d a1, t0, 0 ++ addi.d t0, t0, 8 ++ ++L(less_8bytes): ++ beqz a2, L(less_1byte) ++ srai.d a4, a2, 2 ++ beqz a4, L(less_4bytes) ++ addi.d a2, a2, -4 ++ st.w a1, t0, 0 ++ addi.d t0, t0, 4 ++ ++L(less_4bytes): ++ srai.d a3, a2, 1 ++ beqz a3, L(less_2bytes) ++ addi.d a2, a2, -2 ++ st.h a1, t0, 0 ++ addi.d t0, t0, 2 ++ ++L(less_2bytes): ++ beqz a2, L(less_1byte) ++ st.b a1, t0, 0 ++L(less_1byte): ++ jr ra ++ ++L(short_data): ++ pcaddi t1, 19 ++ slli.d t3, a2, 2 ++ sub.d t1, t1, t3 ++ jr t1 ++L(short_15): ++ st.b a1, a0, 14 ++L(short_14): ++ st.b a1, a0, 13 ++L(short_13): ++ st.b a1, a0, 12 ++L(short_12): ++ st.b a1, a0, 11 ++L(short_11): ++ st.b a1, a0, 10 ++L(short_10): ++ st.b a1, a0, 9 ++L(short_9): ++ st.b a1, a0, 8 ++L(short_8): ++ st.b a1, a0, 7 ++L(short_7): ++ st.b a1, a0, 6 ++L(short_6): ++ st.b a1, a0, 5 ++L(short_5): ++ st.b a1, a0, 4 ++L(short_4): ++ st.b a1, a0, 3 ++L(short_3): ++ st.b a1, a0, 2 ++L(short_2): ++ st.b a1, a0, 1 ++L(short_1): ++ st.b a1, a0, 0 ++L(short_0): ++ jr ra ++END(MEMSET_NAME) ++ ++libc_hidden_builtin_def (MEMSET_NAME) +diff --git a/sysdeps/loongarch/lp64/multiarch/memset-lasx.S b/sysdeps/loongarch/lp64/multiarch/memset-lasx.S +new file mode 100644 +index 00000000..041abbac +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memset-lasx.S +@@ -0,0 +1,142 @@ ++/* Optimized memset implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define MEMSET __memset_lasx ++ ++LEAF(MEMSET, 6) ++ li.d t1, 32 ++ move a3, a0 ++ xvreplgr2vr.b xr0, a1 ++ add.d a4, a0, a2 ++ ++ bgeu t1, a2, L(less_32bytes) ++ li.d t3, 128 ++ li.d t2, 64 ++ blt t3, a2, L(long_bytes) ++ ++L(less_128bytes): ++ bgeu t2, a2, L(less_64bytes) ++ xvst xr0, a3, 0 ++ xvst xr0, a3, 32 ++ xvst xr0, a4, -32 ++ ++ xvst xr0, a4, -64 ++ jr ra ++L(less_64bytes): ++ xvst xr0, a3, 0 ++ xvst xr0, a4, -32 ++ ++ ++ jr ra ++L(less_32bytes): ++ srli.d t0, a2, 4 ++ beqz t0, L(less_16bytes) ++ vst vr0, a3, 0 ++ ++ vst vr0, a4, -16 ++ jr ra ++L(less_16bytes): ++ srli.d t0, a2, 3 ++ beqz t0, L(less_8bytes) ++ ++ vstelm.d vr0, a3, 0, 0 ++ vstelm.d vr0, a4, -8, 0 ++ jr ra ++L(less_8bytes): ++ srli.d t0, a2, 2 ++ ++ beqz t0, L(less_4bytes) ++ vstelm.w vr0, a3, 0, 0 ++ vstelm.w vr0, a4, -4, 0 ++ jr ra ++ ++ ++L(less_4bytes): ++ srli.d t0, a2, 1 ++ beqz t0, L(less_2bytes) ++ vstelm.h vr0, a3, 0, 0 ++ vstelm.h vr0, a4, -2, 0 ++ ++ jr ra ++L(less_2bytes): ++ beqz a2, L(less_1bytes) ++ st.b a1, a3, 0 ++L(less_1bytes): ++ jr ra ++ ++L(long_bytes): ++ xvst xr0, a3, 0 ++ bstrins.d a3, zero, 4, 0 ++ addi.d a3, a3, 32 ++ sub.d a2, a4, a3 ++ ++ andi t0, a2, 0xff ++ beq t0, a2, L(long_end) ++ move a2, t0 ++ sub.d t0, a4, t0 ++ ++ ++L(loop_256): ++ xvst xr0, a3, 0 ++ xvst xr0, a3, 32 ++ xvst xr0, a3, 64 ++ xvst xr0, a3, 96 ++ ++ xvst xr0, a3, 128 ++ xvst xr0, a3, 160 ++ xvst xr0, a3, 192 ++ xvst xr0, a3, 224 ++ ++ addi.d a3, a3, 256 ++ bne a3, t0, L(loop_256) ++L(long_end): ++ bltu a2, t3, L(end_less_128) ++ addi.d a2, a2, -128 ++ ++ xvst xr0, a3, 0 ++ xvst xr0, a3, 32 ++ xvst xr0, a3, 64 ++ xvst xr0, a3, 96 ++ ++ ++ addi.d a3, a3, 128 ++L(end_less_128): ++ bltu a2, t2, L(end_less_64) ++ addi.d a2, a2, -64 ++ xvst xr0, a3, 0 ++ ++ xvst xr0, a3, 32 ++ addi.d a3, a3, 64 ++L(end_less_64): ++ bltu a2, t1, L(end_less_32) ++ xvst xr0, a3, 0 ++ ++L(end_less_32): ++ xvst xr0, a4, -32 ++ jr ra ++END(MEMSET) ++ ++libc_hidden_builtin_def (MEMSET) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memset-lsx.S b/sysdeps/loongarch/lp64/multiarch/memset-lsx.S +new file mode 100644 +index 00000000..3d3982aa +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memset-lsx.S +@@ -0,0 +1,135 @@ ++/* Optimized memset implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define MEMSET __memset_lsx ++ ++LEAF(MEMSET, 6) ++ li.d t1, 16 ++ move a3, a0 ++ vreplgr2vr.b vr0, a1 ++ add.d a4, a0, a2 ++ ++ bgeu t1, a2, L(less_16bytes) ++ li.d t3, 64 ++ li.d t2, 32 ++ bgeu a2, t3, L(long_bytes) ++ ++L(less_64bytes): ++ bgeu t2, a2, L(less_32bytes) ++ vst vr0, a3, 0 ++ vst vr0, a3, 16 ++ vst vr0, a4, -32 ++ ++ vst vr0, a4, -16 ++ jr ra ++L(less_32bytes): ++ vst vr0, a3, 0 ++ vst vr0, a4, -16 ++ ++ ++ jr ra ++L(less_16bytes): ++ srli.d t0, a2, 3 ++ beqz t0, L(less_8bytes) ++ vstelm.d vr0, a3, 0, 0 ++ ++ vstelm.d vr0, a4, -8, 0 ++ jr ra ++L(less_8bytes): ++ srli.d t0, a2, 2 ++ beqz t0, L(less_4bytes) ++ ++ vstelm.w vr0, a3, 0, 0 ++ vstelm.w vr0, a4, -4, 0 ++ jr ra ++L(less_4bytes): ++ srli.d t0, a2, 1 ++ ++ beqz t0, L(less_2bytes) ++ vstelm.h vr0, a3, 0, 0 ++ vstelm.h vr0, a4, -2, 0 ++ jr ra ++ ++ ++L(less_2bytes): ++ beqz a2, L(less_1bytes) ++ vstelm.b vr0, a3, 0, 0 ++L(less_1bytes): ++ jr ra ++L(long_bytes): ++ vst vr0, a3, 0 ++ ++ bstrins.d a3, zero, 3, 0 ++ addi.d a3, a3, 16 ++ sub.d a2, a4, a3 ++ andi t0, a2, 0x7f ++ ++ beq t0, a2, L(long_end) ++ move a2, t0 ++ sub.d t0, a4, t0 ++ ++L(loop_128): ++ vst vr0, a3, 0 ++ ++ vst vr0, a3, 16 ++ vst vr0, a3, 32 ++ vst vr0, a3, 48 ++ vst vr0, a3, 64 ++ ++ ++ vst vr0, a3, 80 ++ vst vr0, a3, 96 ++ vst vr0, a3, 112 ++ addi.d a3, a3, 128 ++ ++ bne a3, t0, L(loop_128) ++L(long_end): ++ bltu a2, t3, L(end_less_64) ++ addi.d a2, a2, -64 ++ vst vr0, a3, 0 ++ ++ vst vr0, a3, 16 ++ vst vr0, a3, 32 ++ vst vr0, a3, 48 ++ addi.d a3, a3, 64 ++ ++L(end_less_64): ++ bltu a2, t2, L(end_less_32) ++ addi.d a2, a2, -32 ++ vst vr0, a3, 0 ++ vst vr0, a3, 16 ++ ++ addi.d a3, a3, 32 ++L(end_less_32): ++ bltu a2, t1, L(end_less_16) ++ vst vr0, a3, 0 ++ ++L(end_less_16): ++ vst vr0, a4, -16 ++ jr ra ++END(MEMSET) ++ ++libc_hidden_builtin_def (MEMSET) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S +new file mode 100644 +index 00000000..f7d32039 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S +@@ -0,0 +1,162 @@ ++/* Optimized memset unaligned implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++ ++# define MEMSET_NAME __memset_unaligned ++ ++#define ST_128(n) \ ++ st.d a1, a0, n; \ ++ st.d a1, a0, n+8 ; \ ++ st.d a1, a0, n+16 ; \ ++ st.d a1, a0, n+24 ; \ ++ st.d a1, a0, n+32 ; \ ++ st.d a1, a0, n+40 ; \ ++ st.d a1, a0, n+48 ; \ ++ st.d a1, a0, n+56 ; \ ++ st.d a1, a0, n+64 ; \ ++ st.d a1, a0, n+72 ; \ ++ st.d a1, a0, n+80 ; \ ++ st.d a1, a0, n+88 ; \ ++ st.d a1, a0, n+96 ; \ ++ st.d a1, a0, n+104; \ ++ st.d a1, a0, n+112; \ ++ st.d a1, a0, n+120; ++ ++LEAF(MEMSET_NAME, 6) ++ bstrins.d a1, a1, 15, 8 ++ add.d t7, a0, a2 ++ bstrins.d a1, a1, 31, 16 ++ move t0, a0 ++ ++ bstrins.d a1, a1, 63, 32 ++ srai.d t8, a2, 4 ++ beqz t8, L(less_16bytes) ++ srai.d t8, a2, 6 ++ ++ bnez t8, L(more_64bytes) ++ srai.d t8, a2, 5 ++ beqz t8, L(less_32bytes) ++ ++ st.d a1, a0, 0 ++ st.d a1, a0, 8 ++ st.d a1, a0, 16 ++ st.d a1, a0, 24 ++ ++ st.d a1, t7, -32 ++ st.d a1, t7, -24 ++ st.d a1, t7, -16 ++ st.d a1, t7, -8 ++ ++ jr ra ++ ++L(less_32bytes): ++ st.d a1, a0, 0 ++ st.d a1, a0, 8 ++ st.d a1, t7, -16 ++ st.d a1, t7, -8 ++ ++ jr ra ++ ++L(less_16bytes): ++ srai.d t8, a2, 3 ++ beqz t8, L(less_8bytes) ++ st.d a1, a0, 0 ++ st.d a1, t7, -8 ++ ++ jr ra ++ ++L(less_8bytes): ++ srai.d t8, a2, 2 ++ beqz t8, L(less_4bytes) ++ st.w a1, a0, 0 ++ st.w a1, t7, -4 ++ ++ jr ra ++ ++L(less_4bytes): ++ srai.d t8, a2, 1 ++ beqz t8, L(less_2bytes) ++ st.h a1, a0, 0 ++ st.h a1, t7, -2 ++ ++ jr ra ++ ++L(less_2bytes): ++ beqz a2, L(less_1bytes) ++ st.b a1, a0, 0 ++ ++ jr ra ++ ++L(less_1bytes): ++ jr ra ++ ++L(more_64bytes): ++ srli.d a0, a0, 3 ++ slli.d a0, a0, 3 ++ addi.d a0, a0, 0x8 ++ st.d a1, t0, 0 ++ ++ sub.d t2, t0, a0 ++ add.d a2, t2, a2 ++ addi.d a2, a2, -0x80 ++ blt a2, zero, L(end_unalign_proc) ++ ++L(loop_less): ++ ST_128(0) ++ addi.d a0, a0, 0x80 ++ addi.d a2, a2, -0x80 ++ bge a2, zero, L(loop_less) ++ ++L(end_unalign_proc): ++ addi.d a2, a2, 0x80 ++ pcaddi t1, 20 ++ andi t5, a2, 0x78 ++ srli.d t5, t5, 1 ++ ++ sub.d t1, t1, t5 ++ jr t1 ++ ++ st.d a1, a0, 112 ++ st.d a1, a0, 104 ++ st.d a1, a0, 96 ++ st.d a1, a0, 88 ++ st.d a1, a0, 80 ++ st.d a1, a0, 72 ++ st.d a1, a0, 64 ++ st.d a1, a0, 56 ++ st.d a1, a0, 48 ++ st.d a1, a0, 40 ++ st.d a1, a0, 32 ++ st.d a1, a0, 24 ++ st.d a1, a0, 16 ++ st.d a1, a0, 8 ++ st.d a1, a0, 0 ++ st.d a1, t7, -8 ++ ++ move a0, t0 ++ jr ra ++END(MEMSET_NAME) ++ ++libc_hidden_builtin_def (MEMSET_NAME) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memset.c b/sysdeps/loongarch/lp64/multiarch/memset.c +new file mode 100644 +index 00000000..3ff60d8a +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memset.c +@@ -0,0 +1,37 @@ ++/* Multiple versions of memset. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define memset __redirect_memset ++# include <string.h> ++# undef memset ++ ++# define SYMBOL_NAME memset ++# include "ifunc-lasx.h" ++ ++libc_ifunc_redirected (__redirect_memset, memset, ++ IFUNC_SELECTOR ()); ++ ++# ifdef SHARED ++__hidden_ver1 (memset, __GI_memset, __redirect_memset) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memset); ++# endif ++ ++#endif +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch
Added
@@ -0,0 +1,448 @@ +From b412bcb2cf4914a664bcd24924d670a2e37394b3 Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Mon, 28 Aug 2023 10:08:35 +0800 +Subject: PATCH 14/29 LoongArch: Add ifunc support for rawmemchr{aligned, + lsx, lasx} + +According to glibc rawmemchr microbenchmark, A few cases tested with +char '\0' experience performance degradation due to the lasx and lsx +versions don't handle the '\0' separately. Overall, rawmemchr-lasx +implementation could reduce the runtime about 40%-80%, rawmemchr-lsx +implementation could reduce the runtime about 40%-66%, rawmemchr-aligned +implementation could reduce the runtime about 20%-40%. + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/lp64/multiarch/Makefile | 3 + + .../lp64/multiarch/ifunc-impl-list.c | 8 ++ + .../lp64/multiarch/ifunc-rawmemchr.h | 40 ++++++ + .../lp64/multiarch/rawmemchr-aligned.S | 124 ++++++++++++++++++ + .../loongarch/lp64/multiarch/rawmemchr-lasx.S | 82 ++++++++++++ + .../loongarch/lp64/multiarch/rawmemchr-lsx.S | 71 ++++++++++ + sysdeps/loongarch/lp64/multiarch/rawmemchr.c | 37 ++++++ + 7 files changed, 365 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr.c + +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +index 5d7ae7ae..64416b02 100644 +--- a/sysdeps/loongarch/lp64/multiarch/Makefile ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -21,5 +21,8 @@ sysdep_routines += \ + memmove-unaligned \ + memmove-lsx \ + memmove-lasx \ ++ rawmemchr-aligned \ ++ rawmemchr-lsx \ ++ rawmemchr-lasx \ + # sysdep_routines + endif +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +index c8ba87bd..3db9af14 100644 +--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +@@ -94,5 +94,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_aligned) + ) + ++ IFUNC_IMPL (i, name, rawmemchr, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LASX, __rawmemchr_lasx) ++ IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LSX, __rawmemchr_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned) ++ ) ++ + return i; + } +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h +new file mode 100644 +index 00000000..a7bb4cf9 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h +@@ -0,0 +1,40 @@ ++/* Common definition for rawmemchr ifunc selections. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <ldsodefs.h> ++#include <ifunc-init.h> ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S +new file mode 100644 +index 00000000..9c7155ae +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S +@@ -0,0 +1,124 @@ ++/* Optimized rawmemchr implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++# define RAWMEMCHR_NAME __rawmemchr_aligned ++#else ++# define RAWMEMCHR_NAME __rawmemchr ++#endif ++ ++LEAF(RAWMEMCHR_NAME, 6) ++ andi t1, a0, 0x7 ++ bstrins.d a0, zero, 2, 0 ++ lu12i.w a2, 0x01010 ++ bstrins.d a1, a1, 15, 8 ++ ++ ld.d t0, a0, 0 ++ slli.d t1, t1, 3 ++ ori a2, a2, 0x101 ++ bstrins.d a1, a1, 31, 16 ++ ++ li.w t8, -1 ++ bstrins.d a1, a1, 63, 32 ++ bstrins.d a2, a2, 63, 32 ++ sll.d t2, t8, t1 ++ ++ sll.d t3, a1, t1 ++ orn t0, t0, t2 ++ slli.d a3, a2, 7 ++ beqz a1, L(find_zero) ++ ++ xor t0, t0, t3 ++ sub.d t1, t0, a2 ++ andn t2, a3, t0 ++ and t3, t1, t2 ++ ++ bnez t3, L(count_pos) ++ addi.d a0, a0, 8 ++ ++L(loop): ++ ld.d t0, a0, 0 ++ xor t0, t0, a1 ++ ++ sub.d t1, t0, a2 ++ andn t2, a3, t0 ++ and t3, t1, t2 ++ bnez t3, L(count_pos) ++ ++ ld.d t0, a0, 8 ++ addi.d a0, a0, 16 ++ xor t0, t0, a1 ++ sub.d t1, t0, a2 ++ ++ andn t2, a3, t0 ++ and t3, t1, t2 ++ beqz t3, L(loop) ++ addi.d a0, a0, -8 ++L(count_pos): ++ ctz.d t0, t3 ++ srli.d t0, t0, 3 ++ add.d a0, a0, t0 ++ jr ra ++ ++L(loop_7bit): ++ ld.d t0, a0, 0 ++L(find_zero): ++ sub.d t1, t0, a2 ++ and t2, t1, a3 ++ bnez t2, L(more_check) ++ ++ ld.d t0, a0, 8 ++ addi.d a0, a0, 16 ++ sub.d t1, t0, a2 ++ and t2, t1, a3 ++ ++ beqz t2, L(loop_7bit) ++ addi.d a0, a0, -8 ++ ++L(more_check): ++ andn t2, a3, t0 ++ and t3, t1, t2 ++ bnez t3, L(count_pos) ++ addi.d a0, a0, 8 ++ ++L(loop_8bit): ++ ld.d t0, a0, 0 ++ ++ sub.d t1, t0, a2 ++ andn t2, a3, t0 ++ and t3, t1, t2 ++ bnez t3, L(count_pos) ++ ++ ld.d t0, a0, 8 ++ addi.d a0, a0, 16 ++ sub.d t1, t0, a2 ++ ++ andn t2, a3, t0 ++ and t3, t1, t2 ++ beqz t3, L(loop_8bit) ++ ++ addi.d a0, a0, -8 ++ b L(count_pos) ++ ++END(RAWMEMCHR_NAME) ++ ++libc_hidden_builtin_def (__rawmemchr) +diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S +new file mode 100644 +index 00000000..be2eb59d +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S +@@ -0,0 +1,82 @@ ++/* Optimized rawmemchr implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/asm.h> ++#include <sys/regdef.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define RAWMEMCHR __rawmemchr_lasx ++ ++LEAF(RAWMEMCHR, 6) ++ move a2, a0 ++ bstrins.d a0, zero, 5, 0 ++ xvld xr0, a0, 0 ++ xvld xr1, a0, 32 ++ ++ xvreplgr2vr.b xr2, a1 ++ xvseq.b xr0, xr0, xr2 ++ xvseq.b xr1, xr1, xr2 ++ xvmsknz.b xr0, xr0 ++ ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ xvpickve.w xr4, xr1, 4 ++ vilvl.h vr0, vr3, vr0 ++ ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++ sra.d t0, t0, a2 ++ ++ ++ beqz t0, L(loop) ++ ctz.d t0, t0 ++ add.d a0, a2, t0 ++ jr ra ++ ++L(loop): ++ xvld xr0, a0, 64 ++ xvld xr1, a0, 96 ++ addi.d a0, a0, 64 ++ xvseq.b xr0, xr0, xr2 ++ ++ xvseq.b xr1, xr1, xr2 ++ xvmax.bu xr3, xr0, xr1 ++ xvseteqz.v fcc0, xr3 ++ bcnez fcc0, L(loop) ++ ++ xvmsknz.b xr0, xr0 ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ xvpickve.w xr4, xr1, 4 ++ ++ ++ vilvl.h vr0, vr3, vr0 ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++ ++ ctz.d t0, t0 ++ add.d a0, a0, t0 ++ jr ra ++END(RAWMEMCHR) ++ ++libc_hidden_builtin_def (RAWMEMCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S +new file mode 100644 +index 00000000..2f6fe024 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S +@@ -0,0 +1,71 @@ ++/* Optimized rawmemchr implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define RAWMEMCHR __rawmemchr_lsx ++ ++LEAF(RAWMEMCHR, 6) ++ move a2, a0 ++ bstrins.d a0, zero, 4, 0 ++ vld vr0, a0, 0 ++ vld vr1, a0, 16 ++ ++ vreplgr2vr.b vr2, a1 ++ vseq.b vr0, vr0, vr2 ++ vseq.b vr1, vr1, vr2 ++ vmsknz.b vr0, vr0 ++ ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ sra.w t0, t0, a2 ++ ++ beqz t0, L(loop) ++ ctz.w t0, t0 ++ add.d a0, a2, t0 ++ jr ra ++ ++ ++L(loop): ++ vld vr0, a0, 32 ++ vld vr1, a0, 48 ++ addi.d a0, a0, 32 ++ vseq.b vr0, vr0, vr2 ++ ++ vseq.b vr1, vr1, vr2 ++ vmax.bu vr3, vr0, vr1 ++ vseteqz.v fcc0, vr3 ++ bcnez fcc0, L(loop) ++ ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ ++ ctz.w t0, t0 ++ add.d a0, a0, t0 ++ jr ra ++END(RAWMEMCHR) ++ ++libc_hidden_builtin_def (RAWMEMCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr.c b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c +new file mode 100644 +index 00000000..89c7ffff +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c +@@ -0,0 +1,37 @@ ++/* Multiple versions of rawmemchr. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#if IS_IN (libc) ++# define rawmemchr __redirect_rawmemchr ++# define __rawmemchr __redirect___rawmemchr ++# include <string.h> ++# undef rawmemchr ++# undef __rawmemchr ++ ++# define SYMBOL_NAME rawmemchr ++# include "ifunc-rawmemchr.h" ++ ++libc_ifunc_redirected (__redirect_rawmemchr, __rawmemchr, ++ IFUNC_SELECTOR ()); ++weak_alias (__rawmemchr, rawmemchr) ++# ifdef SHARED ++__hidden_ver1 (__rawmemchr, __GI___rawmemchr, __redirect___rawmemchr) ++ __attribute__((visibility ("hidden"))); ++# endif ++#endif +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Add-ifunc-support-for-strcmp-aligned-lsx.patch
Added
@@ -0,0 +1,499 @@ +From e258cfcf92f5e31e902fa045b41652f00fcf2521 Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Thu, 24 Aug 2023 16:50:18 +0800 +Subject: PATCH 09/29 LoongArch: Add ifunc support for strcmp{aligned, lsx} + +Based on the glibc microbenchmark, strcmp-aligned implementation could +reduce the runtime 0%-10% for aligned comparison, 10%-20% for unaligned +comparison, strcmp-lsx implemenation could reduce the runtime 0%-50%. + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/lp64/multiarch/Makefile | 2 + + .../lp64/multiarch/ifunc-impl-list.c | 7 + + .../loongarch/lp64/multiarch/ifunc-strcmp.h | 38 ++++ + .../loongarch/lp64/multiarch/strcmp-aligned.S | 179 ++++++++++++++++++ + sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S | 165 ++++++++++++++++ + sysdeps/loongarch/lp64/multiarch/strcmp.c | 35 ++++ + 6 files changed, 426 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp.c + +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +index c4dd3143..d5a500de 100644 +--- a/sysdeps/loongarch/lp64/multiarch/Makefile ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -12,6 +12,8 @@ sysdep_routines += \ + strchrnul-aligned \ + strchrnul-lsx \ + strchrnul-lasx \ ++ strcmp-aligned \ ++ strcmp-lsx \ + memcpy-aligned \ + memcpy-unaligned \ + memmove-unaligned \ +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +index 7cec0b77..9183b7da 100644 +--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +@@ -62,6 +62,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_aligned) + ) + ++ IFUNC_IMPL (i, name, strcmp, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, strcmp, SUPPORT_LSX, __strcmp_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_aligned) ++ ) ++ + IFUNC_IMPL (i, name, memcpy, + #if !defined __loongarch_soft_float + IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx) +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h +new file mode 100644 +index 00000000..ca26352b +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h +@@ -0,0 +1,38 @@ ++/* Common definition for strcmp ifunc selection. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <ldsodefs.h> ++#include <ifunc-init.h> ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S +new file mode 100644 +index 00000000..f5f4f336 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S +@@ -0,0 +1,179 @@ ++/* Optimized strcmp implementation using basic Loongarch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++# define STRCMP_NAME __strcmp_aligned ++#else ++# define STRCMP_NAME strcmp ++#endif ++ ++LEAF(STRCMP_NAME, 6) ++ lu12i.w a4, 0x01010 ++ andi a2, a0, 0x7 ++ ori a4, a4, 0x101 ++ andi a3, a1, 0x7 ++ ++ bstrins.d a4, a4, 63, 32 ++ li.d t7, -1 ++ li.d t8, 8 ++ slli.d a5, a4, 7 ++ ++ bne a2, a3, L(unaligned) ++ bstrins.d a0, zero, 2, 0 ++ bstrins.d a1, zero, 2, 0 ++ ld.d t0, a0, 0 ++ ++ ld.d t1, a1, 0 ++ slli.d t3, a2, 3 ++ sll.d t2, t7, t3 ++ orn t0, t0, t2 ++ ++ ++ orn t1, t1, t2 ++ sub.d t2, t0, a4 ++ andn t3, a5, t0 ++ and t2, t2, t3 ++ ++ bne t0, t1, L(al_end) ++L(al_loop): ++ bnez t2, L(ret0) ++ ldx.d t0, a0, t8 ++ ldx.d t1, a1, t8 ++ ++ addi.d t8, t8, 8 ++ sub.d t2, t0, a4 ++ andn t3, a5, t0 ++ and t2, t2, t3 ++ ++ beq t0, t1, L(al_loop) ++L(al_end): ++ xor t3, t0, t1 ++ or t2, t2, t3 ++ ctz.d t3, t2 ++ ++ ++ bstrins.d t3, zero, 2, 0 ++ srl.d t0, t0, t3 ++ srl.d t1, t1, t3 ++ andi t0, t0, 0xff ++ ++ andi t1, t1, 0xff ++ sub.d a0, t0, t1 ++ jr ra ++ nop ++ ++L(ret0): ++ move a0, zero ++ jr ra ++ nop ++ nop ++ ++L(unaligned): ++ slt a6, a3, a2 ++ xor t0, a0, a1 ++ maskeqz t0, t0, a6 ++ xor a0, a0, t0 ++ ++ ++ xor a1, a1, t0 ++ andi a2, a0, 0x7 ++ andi a3, a1, 0x7 ++ bstrins.d a0, zero, 2, 0 ++ ++ bstrins.d a1, zero, 2, 0 ++ ld.d t4, a0, 0 ++ ld.d t1, a1, 0 ++ slli.d a2, a2, 3 ++ ++ slli.d a3, a3, 3 ++ srl.d t0, t4, a2 ++ srl.d t1, t1, a3 ++ srl.d t5, t7, a3 ++ ++ orn t0, t0, t5 ++ orn t1, t1, t5 ++ bne t0, t1, L(not_equal) ++ sll.d t5, t7, a2 ++ ++ ++ sub.d a3, a2, a3 ++ orn t4, t4, t5 ++ sub.d a2, zero, a3 ++ sub.d t2, t4, a4 ++ ++ andn t3, a5, t4 ++ and t2, t2, t3 ++ bnez t2, L(find_zero) ++L(un_loop): ++ srl.d t5, t4, a3 ++ ++ ldx.d t4, a0, t8 ++ ldx.d t1, a1, t8 ++ addi.d t8, t8, 8 ++ sll.d t0, t4, a2 ++ ++ or t0, t0, t5 ++ bne t0, t1, L(not_equal) ++ sub.d t2, t4, a4 ++ andn t3, a5, t4 ++ ++ ++ and t2, t2, t3 ++ beqz t2, L(un_loop) ++L(find_zero): ++ sub.d t2, t0, a4 ++ andn t3, a5, t0 ++ ++ and t2, t2, t3 ++ bnez t2, L(ret0) ++ ldx.d t1, a1, t8 ++ srl.d t0, t4, a3 ++ ++L(not_equal): ++ sub.d t2, t0, a4 ++ andn t3, a5, t0 ++ and t2, t2, t3 ++ xor t3, t0, t1 ++ ++ or t2, t2, t3 ++L(un_end): ++ ctz.d t3, t2 ++ bstrins.d t3, zero, 2, 0 ++ srl.d t0, t0, t3 ++ ++ ++ srl.d t1, t1, t3 ++ andi t0, t0, 0xff ++ andi t1, t1, 0xff ++ sub.d t2, t0, t1 ++ ++ ++ sub.d t3, t1, t0 ++ masknez t0, t2, a6 ++ maskeqz t1, t3, a6 ++ or a0, t0, t1 ++ ++ jr ra ++END(STRCMP_NAME) ++ ++libc_hidden_builtin_def (STRCMP_NAME) +diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S +new file mode 100644 +index 00000000..2e177a38 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S +@@ -0,0 +1,165 @@ ++/* Optimized strcmp implementation using Loongarch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define STRCMP __strcmp_lsx ++ ++LEAF(STRCMP, 6) ++ pcalau12i t0, %pc_hi20(L(INDEX)) ++ andi a2, a0, 0xf ++ vld vr2, t0, %pc_lo12(L(INDEX)) ++ andi a3, a1, 0xf ++ ++ bne a2, a3, L(unaligned) ++ bstrins.d a0, zero, 3, 0 ++ bstrins.d a1, zero, 3, 0 ++ vld vr0, a0, 0 ++ ++ vld vr1, a1, 0 ++ vreplgr2vr.b vr3, a2 ++ vslt.b vr2, vr2, vr3 ++ vseq.b vr3, vr0, vr1 ++ ++ vmin.bu vr3, vr0, vr3 ++ vor.v vr3, vr3, vr2 ++ vsetanyeqz.b fcc0, vr3 ++ bcnez fcc0, L(al_out) ++ ++ ++L(al_loop): ++ vld vr0, a0, 16 ++ vld vr1, a1, 16 ++ addi.d a0, a0, 16 ++ addi.d a1, a1, 16 ++ ++ vseq.b vr3, vr0, vr1 ++ vmin.bu vr3, vr0, vr3 ++ vsetanyeqz.b fcc0, vr3 ++ bceqz fcc0, L(al_loop) ++ ++L(al_out): ++ vseqi.b vr3, vr3, 0 ++ vfrstpi.b vr3, vr3, 0 ++ vshuf.b vr0, vr0, vr0, vr3 ++ vshuf.b vr1, vr1, vr1, vr3 ++ ++ vpickve2gr.bu t0, vr0, 0 ++ vpickve2gr.bu t1, vr1, 0 ++ sub.d a0, t0, t1 ++ jr ra ++ ++ ++L(unaligned): ++ slt a4, a3, a2 ++ xor t0, a0, a1 ++ maskeqz t0, t0, a4 ++ xor a0, a0, t0 ++ ++ xor a1, a1, t0 ++ andi a2, a0, 0xf ++ andi a3, a1, 0xf ++ bstrins.d a0, zero, 3, 0 ++ ++ bstrins.d a1, zero, 3, 0 ++ vld vr3, a0, 0 ++ vld vr1, a1, 0 ++ vreplgr2vr.b vr4, a2 ++ ++ vreplgr2vr.b vr5, a3 ++ vslt.b vr7, vr2, vr5 ++ vsub.b vr5, vr5, vr4 ++ vaddi.bu vr6, vr2, 16 ++ ++ ++ vsub.b vr6, vr6, vr5 ++ vshuf.b vr0, vr3, vr3, vr6 ++ vor.v vr0, vr0, vr7 ++ vor.v vr1, vr1, vr7 ++ ++ vseq.b vr5, vr0, vr1 ++ vsetanyeqz.b fcc0, vr5 ++ bcnez fcc0, L(not_equal) ++ vslt.b vr4, vr2, vr4 ++ ++ vor.v vr0, vr3, vr4 ++ vsetanyeqz.b fcc0, vr0 ++ bcnez fcc0, L(find_zero) ++ nop ++ ++L(un_loop): ++ vld vr3, a0, 16 ++ vld vr1, a1, 16 ++ addi.d a0, a0, 16 ++ addi.d a1, a1, 16 ++ ++ ++ vshuf.b vr0, vr3, vr0, vr6 ++ vseq.b vr5, vr0, vr1 ++ vsetanyeqz.b fcc0, vr5 ++ bcnez fcc0, L(not_equal) ++ ++ vsetanyeqz.b fcc0, vr3 ++ vor.v vr0, vr3, vr3 ++ bceqz fcc0, L(un_loop) ++L(find_zero): ++ vmin.bu vr5, vr1, vr5 ++ ++ vsetanyeqz.b fcc0, vr5 ++ bcnez fcc0, L(ret0) ++ vld vr1, a1, 16 ++ vshuf.b vr0, vr3, vr3, vr6 ++ ++ vseq.b vr5, vr0, vr1 ++L(not_equal): ++ vmin.bu vr5, vr0, vr5 ++L(un_end): ++ vseqi.b vr5, vr5, 0 ++ vfrstpi.b vr5, vr5, 0 ++ ++ ++ vshuf.b vr0, vr0, vr0, vr5 ++ vshuf.b vr1, vr1, vr1, vr5 ++ vpickve2gr.bu t0, vr0, 0 ++ vpickve2gr.bu t1, vr1, 0 ++ ++ sub.d t3, t0, t1 ++ sub.d t4, t1, t0 ++ masknez t0, t3, a4 ++ maskeqz t1, t4, a4 ++ ++ or a0, t0, t1 ++ jr ra ++L(ret0): ++ move a0, zero ++ jr ra ++END(STRCMP) ++ ++ .section .rodata.cst16,"M",@progbits,16 ++ .align 4 ++L(INDEX): ++ .dword 0x0706050403020100 ++ .dword 0x0f0e0d0c0b0a0908 ++ ++libc_hidden_builtin_def (STRCMP) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp.c b/sysdeps/loongarch/lp64/multiarch/strcmp.c +new file mode 100644 +index 00000000..6f249c0b +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcmp.c +@@ -0,0 +1,35 @@ ++/* Multiple versions of strcmp. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define strcmp __redirect_strcmp ++# include <string.h> ++# undef strcmp ++ ++# define SYMBOL_NAME strcmp ++# include "ifunc-strcmp.h" ++ ++libc_ifunc_redirected (__redirect_strcmp, strcmp, IFUNC_SELECTOR ()); ++ ++# ifdef SHARED ++__hidden_ver1 (strcmp, __GI_strcmp, __redirect_strcmp) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strcmp); ++# endif ++#endif +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Add-ifunc-support-for-strcpy-stpcpy-aligne.patch
Added
@@ -0,0 +1,1099 @@ +From 351086591d938aaf884d475261ae96ec5da00384 Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Wed, 13 Sep 2023 15:34:59 +0800 +Subject: PATCH 22/29 LoongArch: Add ifunc support for strcpy, + stpcpy{aligned, unaligned, lsx, lasx} + +According to glibc strcpy and stpcpy microbenchmark test results(changed +to use generic_strcpy and generic_stpcpy instead of strlen + memcpy), +comparing with the generic version, this implementation could reduce the +runtime as following: + +Name Percent of rutime reduced +strcpy-aligned 8%-45% +strcpy-unaligned 8%-48%, comparing with the aligned version, unaligned + version takes less instructions to copy the tail of data + which length is less than 8. it also has better performance + in case src and dest cannot be both aligned with 8bytes +strcpy-lsx 20%-80% +strcpy-lasx 15%-86% +stpcpy-aligned 6%-43% +stpcpy-unaligned 8%-48% +stpcpy-lsx 10%-80% +stpcpy-lasx 10%-87% + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/lp64/multiarch/Makefile | 8 + + .../lp64/multiarch/ifunc-impl-list.c | 18 ++ + .../loongarch/lp64/multiarch/stpcpy-aligned.S | 27 +++ + .../loongarch/lp64/multiarch/stpcpy-lasx.S | 22 ++ + sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S | 22 ++ + .../lp64/multiarch/stpcpy-unaligned.S | 22 ++ + sysdeps/loongarch/lp64/multiarch/stpcpy.c | 42 ++++ + .../loongarch/lp64/multiarch/strcpy-aligned.S | 202 ++++++++++++++++ + .../loongarch/lp64/multiarch/strcpy-lasx.S | 215 ++++++++++++++++++ + sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S | 212 +++++++++++++++++ + .../lp64/multiarch/strcpy-unaligned.S | 138 +++++++++++ + sysdeps/loongarch/lp64/multiarch/strcpy.c | 35 +++ + 12 files changed, 963 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy.c + +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +index 360a6718..39550bea 100644 +--- a/sysdeps/loongarch/lp64/multiarch/Makefile ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -16,6 +16,14 @@ sysdep_routines += \ + strcmp-lsx \ + strncmp-aligned \ + strncmp-lsx \ ++ strcpy-aligned \ ++ strcpy-unaligned \ ++ strcpy-lsx \ ++ strcpy-lasx \ ++ stpcpy-aligned \ ++ stpcpy-unaligned \ ++ stpcpy-lsx \ ++ stpcpy-lasx \ + memcpy-aligned \ + memcpy-unaligned \ + memmove-unaligned \ +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +index e397d58c..39a14f1d 100644 +--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +@@ -76,6 +76,24 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_aligned) + ) + ++ IFUNC_IMPL (i, name, strcpy, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_LASX, __strcpy_lasx) ++ IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_LSX, __strcpy_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_UAL, __strcpy_unaligned) ++ IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_aligned) ++ ) ++ ++ IFUNC_IMPL (i, name, stpcpy, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_LASX, __stpcpy_lasx) ++ IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_LSX, __stpcpy_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_UAL, __stpcpy_unaligned) ++ IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned) ++ ) ++ + IFUNC_IMPL (i, name, memcpy, + #if !defined __loongarch_soft_float + IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx) +diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S +new file mode 100644 +index 00000000..1f763db6 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S +@@ -0,0 +1,27 @@ ++/* stpcpy-aligned implementation is in strcpy-aligned.S. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#if IS_IN (libc) ++# define STPCPY __stpcpy_aligned ++#else ++# define STPCPY __stpcpy ++#endif ++ ++#define USE_AS_STPCPY ++#define STRCPY STPCPY ++#include "strcpy-aligned.S" +diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S +new file mode 100644 +index 00000000..13d6c953 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S +@@ -0,0 +1,22 @@ ++/* stpcpy-lasx implementation is in strcpy-lasx.S. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#define STPCPY __stpcpy_lasx ++#define USE_AS_STPCPY ++#define STRCPY STPCPY ++#include "strcpy-lasx.S" +diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S +new file mode 100644 +index 00000000..e0f17ab5 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S +@@ -0,0 +1,22 @@ ++/* stpcpy-lsx implementation is in strcpy-lsx.S. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#define STPCPY __stpcpy_lsx ++#define USE_AS_STPCPY ++#define STRCPY STPCPY ++#include "strcpy-lsx.S" +diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S +new file mode 100644 +index 00000000..cc2f9712 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S +@@ -0,0 +1,22 @@ ++/* stpcpy-unaligned implementation is in strcpy-unaligned.S. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#define STPCPY __stpcpy_unaligned ++#define USE_AS_STPCPY ++#define STRCPY STPCPY ++#include "strcpy-unaligned.S" +diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy.c b/sysdeps/loongarch/lp64/multiarch/stpcpy.c +new file mode 100644 +index 00000000..d4860d7a +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy.c +@@ -0,0 +1,42 @@ ++/* Multiple versions of stpcpy. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2017-2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define stpcpy __redirect_stpcpy ++# define __stpcpy __redirect___stpcpy ++# define NO_MEMPCPY_STPCPY_REDIRECT ++# define __NO_STRING_INLINES ++# include <string.h> ++# undef stpcpy ++# undef __stpcpy ++ ++# define SYMBOL_NAME stpcpy ++# include "ifunc-lasx.h" ++ ++libc_ifunc_redirected (__redirect_stpcpy, __stpcpy, IFUNC_SELECTOR ()); ++ ++weak_alias (__stpcpy, stpcpy) ++# ifdef SHARED ++__hidden_ver1 (__stpcpy, __GI___stpcpy, __redirect___stpcpy) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (stpcpy); ++__hidden_ver1 (stpcpy, __GI_stpcpy, __redirect_stpcpy) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (stpcpy); ++# endif ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S +new file mode 100644 +index 00000000..4ed539fd +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S +@@ -0,0 +1,202 @@ ++/* Optimized strcpy stpcpy aligned implementation using basic LoongArch ++ instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++# ifndef STRCPY ++# define STRCPY __strcpy_aligned ++# endif ++#else ++# ifndef STRCPY ++# define STRCPY strcpy ++# endif ++#endif ++ ++LEAF(STRCPY, 6) ++ andi a3, a0, 0x7 ++ move a2, a0 ++ beqz a3, L(dest_align) ++ sub.d a5, a1, a3 ++ addi.d a5, a5, 8 ++ ++L(make_dest_align): ++ ld.b t0, a1, 0 ++ addi.d a1, a1, 1 ++ st.b t0, a2, 0 ++ addi.d a2, a2, 1 ++ beqz t0, L(al_out) ++ ++ bne a1, a5, L(make_dest_align) ++ ++L(dest_align): ++ andi a4, a1, 7 ++ bstrins.d a1, zero, 2, 0 ++ ++ lu12i.w t5, 0x1010 ++ ld.d t0, a1, 0 ++ ori t5, t5, 0x101 ++ bstrins.d t5, t5, 63, 32 ++ ++ slli.d t6, t5, 0x7 ++ bnez a4, L(unalign) ++ sub.d t1, t0, t5 ++ andn t2, t6, t0 ++ ++ and t3, t1, t2 ++ bnez t3, L(al_end) ++ ++L(al_loop): ++ st.d t0, a2, 0 ++ ld.d t0, a1, 8 ++ ++ addi.d a1, a1, 8 ++ addi.d a2, a2, 8 ++ sub.d t1, t0, t5 ++ andn t2, t6, t0 ++ ++ and t3, t1, t2 ++ beqz t3, L(al_loop) ++ ++L(al_end): ++ ctz.d t1, t3 ++ srli.d t1, t1, 3 ++ addi.d t1, t1, 1 ++ ++ andi a3, t1, 8 ++ andi a4, t1, 4 ++ andi a5, t1, 2 ++ andi a6, t1, 1 ++ ++L(al_end_8): ++ beqz a3, L(al_end_4) ++ st.d t0, a2, 0 ++#ifdef USE_AS_STPCPY ++ addi.d a0, a2, 7 ++#endif ++ jr ra ++L(al_end_4): ++ beqz a4, L(al_end_2) ++ st.w t0, a2, 0 ++ addi.d a2, a2, 4 ++ srli.d t0, t0, 32 ++L(al_end_2): ++ beqz a5, L(al_end_1) ++ st.h t0, a2, 0 ++ addi.d a2, a2, 2 ++ srli.d t0, t0, 16 ++L(al_end_1): ++ beqz a6, L(al_out) ++ st.b t0, a2, 0 ++ addi.d a2, a2, 1 ++L(al_out): ++#ifdef USE_AS_STPCPY ++ addi.d a0, a2, -1 ++#endif ++ jr ra ++ ++ .align 4 ++L(unalign): ++ slli.d a5, a4, 3 ++ li.d t1, -1 ++ sub.d a6, zero, a5 ++ ++ srl.d a7, t0, a5 ++ sll.d t7, t1, a6 ++ ++ or t0, a7, t7 ++ sub.d t1, t0, t5 ++ andn t2, t6, t0 ++ and t3, t1, t2 ++ ++ bnez t3, L(un_end) ++ ++ ld.d t4, a1, 8 ++ ++ sub.d t1, t4, t5 ++ andn t2, t6, t4 ++ sll.d t0, t4, a6 ++ and t3, t1, t2 ++ ++ or t0, t0, a7 ++ bnez t3, L(un_end_with_remaining) ++ ++L(un_loop): ++ srl.d a7, t4, a5 ++ ++ ld.d t4, a1, 16 ++ addi.d a1, a1, 8 ++ ++ st.d t0, a2, 0 ++ addi.d a2, a2, 8 ++ ++ sub.d t1, t4, t5 ++ andn t2, t6, t4 ++ sll.d t0, t4, a6 ++ and t3, t1, t2 ++ ++ or t0, t0, a7 ++ beqz t3, L(un_loop) ++ ++L(un_end_with_remaining): ++ ctz.d t1, t3 ++ srli.d t1, t1, 3 ++ addi.d t1, t1, 1 ++ sub.d t1, t1, a4 ++ ++ blt t1, zero, L(un_end_less_8) ++ st.d t0, a2, 0 ++ addi.d a2, a2, 8 ++ beqz t1, L(un_out) ++ srl.d t0, t4, a5 ++ b L(un_end_less_8) ++ ++L(un_end): ++ ctz.d t1, t3 ++ srli.d t1, t1, 3 ++ addi.d t1, t1, 1 ++ ++L(un_end_less_8): ++ andi a4, t1, 4 ++ andi a5, t1, 2 ++ andi a6, t1, 1 ++L(un_end_4): ++ beqz a4, L(un_end_2) ++ st.w t0, a2, 0 ++ addi.d a2, a2, 4 ++ srli.d t0, t0, 32 ++L(un_end_2): ++ beqz a5, L(un_end_1) ++ st.h t0, a2, 0 ++ addi.d a2, a2, 2 ++ srli.d t0, t0, 16 ++L(un_end_1): ++ beqz a6, L(un_out) ++ st.b t0, a2, 0 ++ addi.d a2, a2, 1 ++L(un_out): ++#ifdef USE_AS_STPCPY ++ addi.d a0, a2, -1 ++#endif ++ jr ra ++END(STRCPY) ++ ++libc_hidden_builtin_def (STRCPY) +diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S +new file mode 100644 +index 00000000..c2825612 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S +@@ -0,0 +1,215 @@ ++/* Optimized strcpy stpcpy implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# ifndef STRCPY ++# define STRCPY __strcpy_lasx ++# endif ++ ++# ifdef USE_AS_STPCPY ++# define dstend a0 ++# else ++# define dstend a4 ++# endif ++ ++LEAF(STRCPY, 6) ++ ori t8, zero, 0xfe0 ++ andi t0, a1, 0xfff ++ li.d t7, -1 ++ move a2, a0 ++ ++ bltu t8, t0, L(page_cross_start) ++L(start_entry): ++ xvld xr0, a1, 0 ++ li.d t0, 32 ++ andi t1, a2, 0x1f ++ ++ xvsetanyeqz.b fcc0, xr0 ++ sub.d t0, t0, t1 ++ bcnez fcc0, L(end) ++ add.d a1, a1, t0 ++ ++ xvst xr0, a2, 0 ++ andi a3, a1, 0x1f ++ add.d a2, a2, t0 ++ bnez a3, L(unaligned) ++ ++ ++ xvld xr0, a1, 0 ++ xvsetanyeqz.b fcc0, xr0 ++ bcnez fcc0, L(al_end) ++L(al_loop): ++ xvst xr0, a2, 0 ++ ++ xvld xr0, a1, 32 ++ addi.d a2, a2, 32 ++ addi.d a1, a1, 32 ++ xvsetanyeqz.b fcc0, xr0 ++ ++ bceqz fcc0, L(al_loop) ++L(al_end): ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ ++ movfr2gr.s t0, fa0 ++ cto.w t0, t0 ++ add.d a1, a1, t0 ++ xvld xr0, a1, -31 ++ ++ ++ add.d dstend, a2, t0 ++ xvst xr0, dstend, -31 ++ jr ra ++ nop ++ ++L(page_cross_start): ++ move a4, a1 ++ bstrins.d a4, zero, 4, 0 ++ xvld xr0, a4, 0 ++ xvmsknz.b xr0, xr0 ++ ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ sra.w t0, t0, a1 ++ ++ beq t0, t7, L(start_entry) ++ b L(tail) ++L(unaligned): ++ andi t0, a1, 0xfff ++ bltu t8, t0, L(un_page_cross) ++ ++ ++L(un_start_entry): ++ xvld xr0, a1, 0 ++ xvsetanyeqz.b fcc0, xr0 ++ bcnez fcc0, L(un_end) ++ addi.d a1, a1, 32 ++ ++L(un_loop): ++ xvst xr0, a2, 0 ++ andi t0, a1, 0xfff ++ addi.d a2, a2, 32 ++ bltu t8, t0, L(page_cross_loop) ++ ++L(un_loop_entry): ++ xvld xr0, a1, 0 ++ addi.d a1, a1, 32 ++ xvsetanyeqz.b fcc0, xr0 ++ bceqz fcc0, L(un_loop) ++ ++ addi.d a1, a1, -32 ++L(un_end): ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ ++ ++ movfr2gr.s t0, fa0 ++L(un_tail): ++ cto.w t0, t0 ++ add.d a1, a1, t0 ++ xvld xr0, a1, -31 ++ ++ add.d dstend, a2, t0 ++ xvst xr0, dstend, -31 ++ jr ra ++L(un_page_cross): ++ sub.d a4, a1, a3 ++ ++ xvld xr0, a4, 0 ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ ++ movfr2gr.s t0, fa0 ++ sra.w t0, t0, a1 ++ beq t0, t7, L(un_start_entry) ++ b L(un_tail) ++ ++ ++L(page_cross_loop): ++ sub.d a4, a1, a3 ++ xvld xr0, a4, 0 ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr1, xr0, 4 ++ ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ sra.w t0, t0, a1 ++ beq t0, t7, L(un_loop_entry) ++ ++ b L(un_tail) ++L(end): ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ ++ movfr2gr.s t0, fa0 ++L(tail): ++ cto.w t0, t0 ++ add.d dstend, a2, t0 ++ add.d a5, a1, t0 ++ ++L(less_32): ++ srli.d t1, t0, 4 ++ beqz t1, L(less_16) ++ vld vr0, a1, 0 ++ vld vr1, a5, -15 ++ ++ vst vr0, a2, 0 ++ vst vr1, dstend, -15 ++ jr ra ++L(less_16): ++ srli.d t1, t0, 3 ++ ++ beqz t1, L(less_8) ++ ld.d t2, a1, 0 ++ ld.d t3, a5, -7 ++ st.d t2, a2, 0 ++ ++ st.d t3, dstend, -7 ++ jr ra ++L(less_8): ++ li.d t1, 3 ++ bltu t0, t1, L(less_3) ++ ++ ld.w t2, a1, 0 ++ ld.w t3, a5, -3 ++ st.w t2, a2, 0 ++ st.w t3, dstend, -3 ++ ++ jr ra ++L(less_3): ++ beqz t0, L(zero_byte) ++ ld.h t2, a1, 0 ++ ++ st.h t2, a2, 0 ++L(zero_byte): ++ st.b zero, dstend, 0 ++ jr ra ++END(STRCPY) ++ ++libc_hidden_builtin_def (STRCPY) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S +new file mode 100644 +index 00000000..fc2498f7 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S +@@ -0,0 +1,212 @@ ++/* Optimized strcpy stpcpy implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# ifndef STRCPY ++# define STRCPY __strcpy_lsx ++# endif ++ ++LEAF(STRCPY, 6) ++ pcalau12i t0, %pc_hi20(L(INDEX)) ++ andi a4, a1, 0xf ++ vld vr1, t0, %pc_lo12(L(INDEX)) ++ move a2, a0 ++ ++ beqz a4, L(load_start) ++ xor t0, a1, a4 ++ vld vr0, t0, 0 ++ vreplgr2vr.b vr2, a4 ++ ++ vadd.b vr2, vr2, vr1 ++ vshuf.b vr0, vr2, vr0, vr2 ++ vsetanyeqz.b fcc0, vr0 ++ bcnez fcc0, L(end) ++ ++L(load_start): ++ vld vr0, a1, 0 ++ li.d t1, 16 ++ andi a3, a2, 0xf ++ vsetanyeqz.b fcc0, vr0 ++ ++ ++ sub.d t0, t1, a3 ++ bcnez fcc0, L(end) ++ add.d a1, a1, t0 ++ vst vr0, a2, 0 ++ ++ andi a3, a1, 0xf ++ add.d a2, a2, t0 ++ bnez a3, L(unaligned) ++ vld vr0, a1, 0 ++ ++ vsetanyeqz.b fcc0, vr0 ++ bcnez fcc0, L(al_end) ++L(al_loop): ++ vst vr0, a2, 0 ++ vld vr0, a1, 16 ++ ++ addi.d a2, a2, 16 ++ addi.d a1, a1, 16 ++ vsetanyeqz.b fcc0, vr0 ++ bceqz fcc0, L(al_loop) ++ ++ ++L(al_end): ++ vmsknz.b vr1, vr0 ++ movfr2gr.s t0, fa1 ++ cto.w t0, t0 ++ add.d a1, a1, t0 ++ ++ vld vr0, a1, -15 ++# ifdef USE_AS_STPCPY ++ add.d a0, a2, t0 ++ vst vr0, a0, -15 ++# else ++ add.d a2, a2, t0 ++ vst vr0, a2, -15 ++# endif ++ jr ra ++ ++L(end): ++ vmsknz.b vr1, vr0 ++ movfr2gr.s t0, fa1 ++ cto.w t0, t0 ++ addi.d t0, t0, 1 ++ ++L(end_16): ++ andi t1, t0, 16 ++ beqz t1, L(end_8) ++ vst vr0, a2, 0 ++# ifdef USE_AS_STPCPY ++ addi.d a0, a2, 15 ++# endif ++ jr ra ++ ++L(end_8): ++ andi t2, t0, 8 ++ andi t3, t0, 4 ++ andi t4, t0, 2 ++ andi t5, t0, 1 ++ ++ beqz t2, L(end_4) ++ vstelm.d vr0, a2, 0, 0 ++ addi.d a2, a2, 8 ++ vbsrl.v vr0, vr0, 8 ++ ++L(end_4): ++ beqz t3, L(end_2) ++ vstelm.w vr0, a2, 0, 0 ++ addi.d a2, a2, 4 ++ vbsrl.v vr0, vr0, 4 ++ ++L(end_2): ++ beqz t4, L(end_1) ++ vstelm.h vr0, a2, 0, 0 ++ addi.d a2, a2, 2 ++ vbsrl.v vr0, vr0, 2 ++ ++ ++L(end_1): ++ beqz t5, L(out) ++ vstelm.b vr0, a2, 0, 0 ++ addi.d a2, a2, 1 ++L(out): ++# ifdef USE_AS_STPCPY ++ addi.d a0, a2, -1 ++# endif ++ jr ra ++ ++ .align 4 ++L(unaligned): ++ bstrins.d a1, zero, 3, 0 ++ vld vr2, a1, 0 ++ vreplgr2vr.b vr3, a3 ++ vslt.b vr4, vr1, vr3 ++ ++ vor.v vr0, vr2, vr4 ++ vsetanyeqz.b fcc0, vr0 ++ bcnez fcc0, L(un_first_end) ++ vld vr0, a1, 16 ++ ++ vadd.b vr3, vr3, vr1 ++ vshuf.b vr4, vr0, vr2, vr3 ++ vsetanyeqz.b fcc0, vr0 ++ bcnez fcc0, L(un_end) ++ ++ ++ vor.v vr2, vr0, vr0 ++ addi.d a1, a1, 16 ++L(un_loop): ++ vld vr0, a1, 16 ++ vst vr4, a2, 0 ++ ++ addi.d a2, a2, 16 ++ vshuf.b vr4, vr0, vr2, vr3 ++ vsetanyeqz.b fcc0, vr0 ++ bcnez fcc0, L(un_end) ++ ++ vld vr2, a1, 32 ++ vst vr4, a2, 0 ++ addi.d a1, a1, 32 ++ addi.d a2, a2, 16 ++ ++ vshuf.b vr4, vr2, vr0, vr3 ++ vsetanyeqz.b fcc0, vr2 ++ bceqz fcc0, L(un_loop) ++ vor.v vr0, vr2, vr2 ++ ++ ++ addi.d a1, a1, -16 ++L(un_end): ++ vsetanyeqz.b fcc0, vr4 ++ bcnez fcc0, 1f ++ vst vr4, a2, 0 ++ ++1: ++ vmsknz.b vr1, vr0 ++ movfr2gr.s t0, fa1 ++ cto.w t0, t0 ++ add.d a1, a1, t0 ++ ++ vld vr0, a1, 1 ++ add.d a2, a2, t0 ++ sub.d a2, a2, a3 ++ vst vr0, a2, 1 ++# ifdef USE_AS_STPCPY ++ addi.d a0, a2, 16 ++# endif ++ jr ra ++L(un_first_end): ++ addi.d a2, a2, -16 ++ addi.d a1, a1, -16 ++ b 1b ++END(STRCPY) ++ ++ .section .rodata.cst16,"M",@progbits,16 ++ .align 4 ++L(INDEX): ++ .dword 0x0706050403020100 ++ .dword 0x0f0e0d0c0b0a0908 ++ ++libc_hidden_builtin_def (STRCPY) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S +new file mode 100644 +index 00000000..9e31883b +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S +@@ -0,0 +1,138 @@ ++/* Optimized strcpy unaligned implementation using basic LoongArch ++ instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++ ++# ifndef STRCPY ++# define STRCPY __strcpy_unaligned ++# endif ++ ++# ifdef USE_AS_STPCPY ++# define dstend a0 ++# else ++# define dstend a4 ++# endif ++ ++LEAF(STRCPY, 6) ++ lu12i.w t5, 0x01010 ++ li.w t0, 0xff8 ++ ori t5, t5, 0x101 ++ andi t1, a1, 0xfff ++ ++ bstrins.d t5, t5, 63, 32 ++ move a2, a0 ++ slli.d t6, t5, 7 ++ bltu t0, t1, L(page_cross) ++ ++L(start_entry): ++ ld.d t0, a1, 0 ++ li.d t3, 8 ++ andi a3, a1, 0x7 ++ sub.d t1, t0, t5 ++ ++ andn t2, t6, t0 ++ sub.d t3, t3, a3 ++ and t1, t1, t2 ++ bnez t1, L(end) ++ ++ ++ add.d a1, a1, t3 ++ st.d t0, a2, 0 ++ add.d a2, a2, t3 ++ ld.d t0, a1, 0 ++ ++ sub.d t1, t0, t5 ++ andn t2, t6, t0 ++ and t1, t1, t2 ++ bnez t1, L(long_end) ++ ++L(loop): ++ st.d t0, a2, 0 ++ ld.d t0, a1, 8 ++ addi.d a2, a2, 8 ++ addi.d a1, a1, 8 ++ ++ sub.d t1, t0, t5 ++ andn t2, t6, t0 ++ and t1, t1, t2 ++ beqz t1, L(loop) ++ ++ ++L(long_end): ++ ctz.d t1, t1 ++ srli.d t1, t1, 3 ++ add.d a1, a1, t1 ++ ld.d t0, a1, -7 ++ ++ add.d dstend, a2, t1 ++ st.d t0, dstend, -7 ++ jr ra ++ nop ++ ++L(end): ++ ctz.d t1, t1 ++ srli.d t1, t1, 3 ++ add.d a3, a1, t1 ++ add.d dstend, a2, t1 ++ ++L(less_8): ++ li.d t0, 3 ++ bltu t1, t0, L(less_3) ++ ld.w t1, a1, 0 ++ ld.w t2, a3, -3 ++ ++ ++ st.w t1, a2, 0 ++ st.w t2, dstend, -3 ++ jr ra ++L(less_3): ++ beqz t1, L(zero_bytes) ++ ++ ld.h t1, a1, 0 ++ st.h t1, a2, 0 ++L(zero_bytes): ++ st.b zero, dstend, 0 ++ jr ra ++ ++L(page_cross): ++ move a4, a1 ++ bstrins.d a4, zero, 2, 0 ++ ld.d t0, a4, 0 ++ li.d t3, -1 ++ ++ slli.d t4, a1, 3 ++ srl.d t3, t3, t4 ++ srl.d t0, t0, t4 ++ orn t0, t0, t3 ++ ++ ++ sub.d t1, t0, t5 ++ andn t2, t6, t0 ++ and t1, t1, t2 ++ beqz t1, L(start_entry) ++ ++ b L(end) ++END(STRCPY) ++ ++libc_hidden_builtin_def (STRCPY) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy.c b/sysdeps/loongarch/lp64/multiarch/strcpy.c +new file mode 100644 +index 00000000..46afd068 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcpy.c +@@ -0,0 +1,35 @@ ++/* Multiple versions of strcpy. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define strcpy __redirect_strcpy ++# include <string.h> ++# undef strcpy ++ ++# define SYMBOL_NAME strcpy ++# include "ifunc-lasx.h" ++ ++libc_ifunc_redirected (__redirect_strcpy, strcpy, IFUNC_SELECTOR ()); ++ ++# ifdef SHARED ++__hidden_ver1 (strcpy, __GI_strcpy, __redirect_strcpy) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strcpy); ++# endif ++#endif +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Add-ifunc-support-for-strncmp-aligned-lsx.patch
Added
@@ -0,0 +1,583 @@ +From 6f03da2d7ef218c0f78375cf706dada59c3fee63 Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Thu, 24 Aug 2023 16:50:19 +0800 +Subject: PATCH 10/29 LoongArch: Add ifunc support for strncmp{aligned, lsx} + +Based on the glibc microbenchmark, only a few short inputs with this +strncmp-aligned and strncmp-lsx implementation experience performance +degradation, overall, strncmp-aligned could reduce the runtime 0%-10% +for aligned comparision, 10%-25% for unaligend comparision, strncmp-lsx +could reduce the runtime about 0%-60%. + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/lp64/multiarch/Makefile | 2 + + .../lp64/multiarch/ifunc-impl-list.c | 7 + + .../loongarch/lp64/multiarch/ifunc-strncmp.h | 38 +++ + .../lp64/multiarch/strncmp-aligned.S | 218 ++++++++++++++++++ + .../loongarch/lp64/multiarch/strncmp-lsx.S | 208 +++++++++++++++++ + sysdeps/loongarch/lp64/multiarch/strncmp.c | 35 +++ + 6 files changed, 508 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp.c + +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +index d5a500de..5d7ae7ae 100644 +--- a/sysdeps/loongarch/lp64/multiarch/Makefile ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -14,6 +14,8 @@ sysdep_routines += \ + strchrnul-lasx \ + strcmp-aligned \ + strcmp-lsx \ ++ strncmp-aligned \ ++ strncmp-lsx \ + memcpy-aligned \ + memcpy-unaligned \ + memmove-unaligned \ +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +index 9183b7da..c8ba87bd 100644 +--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +@@ -69,6 +69,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_aligned) + ) + ++ IFUNC_IMPL (i, name, strncmp, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, strncmp, SUPPORT_LSX, __strncmp_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_aligned) ++ ) ++ + IFUNC_IMPL (i, name, memcpy, + #if !defined __loongarch_soft_float + IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx) +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h +new file mode 100644 +index 00000000..1a7dc36b +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h +@@ -0,0 +1,38 @@ ++/* Common definition for strncmp ifunc selection. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <ldsodefs.h> ++#include <ifunc-init.h> ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S +new file mode 100644 +index 00000000..e2687fa7 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S +@@ -0,0 +1,218 @@ ++/* Optimized strncmp implementation using basic Loongarch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++# define STRNCMP __strncmp_aligned ++#else ++# define STRNCMP strncmp ++#endif ++ ++LEAF(STRNCMP, 6) ++ beqz a2, L(ret0) ++ lu12i.w a5, 0x01010 ++ andi a3, a0, 0x7 ++ ori a5, a5, 0x101 ++ ++ andi a4, a1, 0x7 ++ bstrins.d a5, a5, 63, 32 ++ li.d t7, -1 ++ li.d t8, 8 ++ ++ addi.d a2, a2, -1 ++ slli.d a6, a5, 7 ++ bne a3, a4, L(unaligned) ++ bstrins.d a0, zero, 2, 0 ++ ++ bstrins.d a1, zero, 2, 0 ++ ld.d t0, a0, 0 ++ ld.d t1, a1, 0 ++ slli.d t2, a3, 3 ++ ++ ++ sub.d t5, t8, a3 ++ srl.d t3, t7, t2 ++ srl.d t0, t0, t2 ++ srl.d t1, t1, t2 ++ ++ orn t0, t0, t3 ++ orn t1, t1, t3 ++ sub.d t2, t0, a5 ++ andn t3, a6, t0 ++ ++ and t2, t2, t3 ++ bne t0, t1, L(al_end) ++ sltu t4, a2, t5 ++ sub.d a2, a2, t5 ++ ++L(al_loop): ++ or t4, t2, t4 ++ bnez t4, L(ret0) ++ ldx.d t0, a0, t8 ++ ldx.d t1, a1, t8 ++ ++ ++ addi.d t8, t8, 8 ++ sltui t4, a2, 8 ++ addi.d a2, a2, -8 ++ sub.d t2, t0, a5 ++ ++ andn t3, a6, t0 ++ and t2, t2, t3 ++ beq t0, t1, L(al_loop) ++ addi.d a2, a2, 8 ++ ++L(al_end): ++ xor t3, t0, t1 ++ or t2, t2, t3 ++ ctz.d t2, t2 ++ srli.d t4, t2, 3 ++ ++ bstrins.d t2, zero, 2, 0 ++ srl.d t0, t0, t2 ++ srl.d t1, t1, t2 ++ andi t0, t0, 0xff ++ ++ ++ andi t1, t1, 0xff ++ sltu t2, a2, t4 ++ sub.d a0, t0, t1 ++ masknez a0, a0, t2 ++ ++ jr ra ++L(ret0): ++ move a0, zero ++ jr ra ++ nop ++ ++L(unaligned): ++ slt a7, a4, a3 ++ xor t0, a0, a1 ++ maskeqz t0, t0, a7 ++ xor a0, a0, t0 ++ ++ xor a1, a1, t0 ++ andi a3, a0, 0x7 ++ andi a4, a1, 0x7 ++ bstrins.d a0, zero, 2, 0 ++ ++ ++ bstrins.d a1, zero, 2, 0 ++ ld.d t4, a0, 0 ++ ld.d t1, a1, 0 ++ slli.d t2, a3, 3 ++ ++ slli.d t3, a4, 3 ++ srl.d t5, t7, t3 ++ srl.d t0, t4, t2 ++ srl.d t1, t1, t3 ++ ++ orn t0, t0, t5 ++ orn t1, t1, t5 ++ bne t0, t1, L(not_equal) ++ sub.d t6, t8, a4 ++ ++ sub.d a4, t2, t3 ++ sll.d t2, t7, t2 ++ sub.d t5, t8, a3 ++ orn t4, t4, t2 ++ ++ ++ sub.d t2, t4, a5 ++ andn t3, a6, t4 ++ sltu t7, a2, t5 ++ and t2, t2, t3 ++ ++ sub.d a3, zero, a4 ++ or t2, t2, t7 ++ bnez t2, L(un_end) ++ sub.d t7, t5, t6 ++ ++ sub.d a2, a2, t5 ++ sub.d t6, t8, t7 ++L(un_loop): ++ srl.d t5, t4, a4 ++ ldx.d t4, a0, t8 ++ ++ ldx.d t1, a1, t8 ++ addi.d t8, t8, 8 ++ sll.d t0, t4, a3 ++ or t0, t0, t5 ++ ++ ++ bne t0, t1, L(loop_not_equal) ++ sub.d t2, t4, a5 ++ andn t3, a6, t4 ++ sltui t5, a2, 8 ++ ++ and t2, t2, t3 ++ addi.d a2, a2, -8 ++ or t3, t2, t5 ++ beqz t3, L(un_loop) ++ ++ addi.d a2, a2, 8 ++L(un_end): ++ sub.d t2, t0, a5 ++ andn t3, a6, t0 ++ sltu t5, a2, t6 ++ ++ and t2, t2, t3 ++ or t2, t2, t5 ++ bnez t2, L(ret0) ++ ldx.d t1, a1, t8 ++ ++ ++ srl.d t0, t4, a4 ++ sub.d a2, a2, t6 ++L(not_equal): ++ sub.d t2, t0, a5 ++ andn t3, a6, t0 ++ ++ xor t4, t0, t1 ++ and t2, t2, t3 ++ or t2, t2, t4 ++ ctz.d t2, t2 ++ ++ bstrins.d t2, zero, 2, 0 ++ srli.d t4, t2, 3 ++ srl.d t0, t0, t2 ++ srl.d t1, t1, t2 ++ ++ andi t0, t0, 0xff ++ andi t1, t1, 0xff ++ sub.d t2, t0, t1 ++ sub.d t3, t1, t0 ++ ++ ++ masknez t0, t2, a7 ++ maskeqz t1, t3, a7 ++ sltu t2, a2, t4 ++ or a0, t0, t1 ++ ++ masknez a0, a0, t2 ++ jr ra ++L(loop_not_equal): ++ add.d a2, a2, t7 ++ b L(not_equal) ++END(STRNCMP) ++ ++libc_hidden_builtin_def (STRNCMP) +diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S +new file mode 100644 +index 00000000..0b4eee2a +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S +@@ -0,0 +1,208 @@ ++/* Optimized strncmp implementation using Loongarch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define STRNCMP __strncmp_lsx ++ ++LEAF(STRNCMP, 6) ++ beqz a2, L(ret0) ++ pcalau12i t0, %pc_hi20(L(INDEX)) ++ andi a3, a0, 0xf ++ vld vr2, t0, %pc_lo12(L(INDEX)) ++ ++ andi a4, a1, 0xf ++ li.d t2, 16 ++ bne a3, a4, L(unaligned) ++ xor t0, a0, a3 ++ ++ xor t1, a1, a4 ++ vld vr0, t0, 0 ++ vld vr1, t1, 0 ++ vreplgr2vr.b vr3, a3 ++ ++ ++ sub.d t2, t2, a3 ++ vadd.b vr3, vr3, vr2 ++ vshuf.b vr0, vr3, vr0, vr3 ++ vshuf.b vr1, vr3, vr1, vr3 ++ ++ vseq.b vr3, vr0, vr1 ++ vmin.bu vr3, vr0, vr3 ++ bgeu t2, a2, L(al_early_end) ++ vsetanyeqz.b fcc0, vr3 ++ ++ bcnez fcc0, L(al_end) ++ add.d a3, a0, a2 ++ addi.d a4, a3, -1 ++ bstrins.d a4, zero, 3, 0 ++ ++ sub.d a2, a3, a4 ++L(al_loop): ++ vld vr0, t0, 16 ++ vld vr1, t1, 16 ++ addi.d t0, t0, 16 ++ ++ ++ addi.d t1, t1, 16 ++ vseq.b vr3, vr0, vr1 ++ vmin.bu vr3, vr0, vr3 ++ beq t0, a4, L(al_early_end) ++ ++ vsetanyeqz.b fcc0, vr3 ++ bceqz fcc0, L(al_loop) ++L(al_end): ++ vseqi.b vr3, vr3, 0 ++ vfrstpi.b vr3, vr3, 0 ++ ++ vshuf.b vr0, vr0, vr0, vr3 ++ vshuf.b vr1, vr1, vr1, vr3 ++ vpickve2gr.bu t0, vr0, 0 ++ vpickve2gr.bu t1, vr1, 0 ++ ++ sub.d a0, t0, t1 ++ jr ra ++L(al_early_end): ++ vreplgr2vr.b vr4, a2 ++ vslt.b vr4, vr2, vr4 ++ ++ ++ vorn.v vr3, vr3, vr4 ++ b L(al_end) ++L(unaligned): ++ slt a5, a3, a4 ++ xor t0, a0, a1 ++ ++ maskeqz t0, t0, a5 ++ xor a0, a0, t0 ++ xor a1, a1, t0 ++ andi a3, a0, 0xf ++ ++ andi a4, a1, 0xf ++ xor t0, a0, a3 ++ xor t1, a1, a4 ++ vld vr0, t0, 0 ++ ++ vld vr3, t1, 0 ++ sub.d t2, t2, a3 ++ vreplgr2vr.b vr4, a3 ++ vreplgr2vr.b vr5, a4 ++ ++ ++ vaddi.bu vr6, vr2, 16 ++ vsub.b vr7, vr4, vr5 ++ vsub.b vr6, vr6, vr7 ++ vadd.b vr4, vr2, vr4 ++ ++ vshuf.b vr1, vr3, vr3, vr6 ++ vshuf.b vr0, vr7, vr0, vr4 ++ vshuf.b vr1, vr7, vr1, vr4 ++ vseq.b vr4, vr0, vr1 ++ ++ vmin.bu vr4, vr0, vr4 ++ bgeu t2, a2, L(un_early_end) ++ vsetanyeqz.b fcc0, vr4 ++ bcnez fcc0, L(un_end) ++ ++ add.d a6, a0, a2 ++ vslt.b vr5, vr2, vr5 ++ addi.d a7, a6, -1 ++ vor.v vr3, vr3, vr5 ++ ++ ++ bstrins.d a7, zero, 3, 0 ++ sub.d a2, a6, a7 ++L(un_loop): ++ vld vr0, t0, 16 ++ addi.d t0, t0, 16 ++ ++ vsetanyeqz.b fcc0, vr3 ++ bcnez fcc0, L(has_zero) ++ beq t0, a7, L(end_with_len) ++ vor.v vr1, vr3, vr3 ++ ++ vld vr3, t1, 16 ++ addi.d t1, t1, 16 ++ vshuf.b vr1, vr3, vr1, vr6 ++ vseq.b vr4, vr0, vr1 ++ ++ vmin.bu vr4, vr0, vr4 ++ vsetanyeqz.b fcc0, vr4 ++ bceqz fcc0, L(un_loop) ++L(un_end): ++ vseqi.b vr4, vr4, 0 ++ ++ ++ vfrstpi.b vr4, vr4, 0 ++ vshuf.b vr0, vr0, vr0, vr4 ++ vshuf.b vr1, vr1, vr1, vr4 ++ vpickve2gr.bu t0, vr0, 0 ++ ++ vpickve2gr.bu t1, vr1, 0 ++ sub.d t2, t0, t1 ++ sub.d t3, t1, t0 ++ masknez t0, t2, a5 ++ ++ maskeqz t1, t3, a5 ++ or a0, t0, t1 ++ jr ra ++L(has_zero): ++ vshuf.b vr1, vr3, vr3, vr6 ++ ++ vseq.b vr4, vr0, vr1 ++ vmin.bu vr4, vr0, vr4 ++ bne t0, a7, L(un_end) ++L(un_early_end): ++ vreplgr2vr.b vr5, a2 ++ ++ vslt.b vr5, vr2, vr5 ++ vorn.v vr4, vr4, vr5 ++ b L(un_end) ++L(end_with_len): ++ sub.d a6, a3, a4 ++ ++ bgeu a6, a2, 1f ++ vld vr4, t1, 16 ++1: ++ vshuf.b vr1, vr4, vr3, vr6 ++ vseq.b vr4, vr0, vr1 ++ ++ vmin.bu vr4, vr0, vr4 ++ vreplgr2vr.b vr5, a2 ++ vslt.b vr5, vr2, vr5 ++ vorn.v vr4, vr4, vr5 ++ ++ b L(un_end) ++L(ret0): ++ move a0, zero ++ jr ra ++END(STRNCMP) ++ ++ .section .rodata.cst16,"M",@progbits,16 ++ .align 4 ++L(INDEX): ++ .dword 0x0706050403020100 ++ .dword 0x0f0e0d0c0b0a0908 ++ ++libc_hidden_builtin_def (STRNCMP) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp.c b/sysdeps/loongarch/lp64/multiarch/strncmp.c +new file mode 100644 +index 00000000..af6d0bc4 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strncmp.c +@@ -0,0 +1,35 @@ ++/* Multiple versions of strncmp. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define strncmp __redirect_strncmp ++# include <string.h> ++# undef strncmp ++ ++# define SYMBOL_NAME strncmp ++# include "ifunc-strncmp.h" ++ ++libc_ifunc_redirected (__redirect_strncmp, strncmp, IFUNC_SELECTOR ()); ++ ++# ifdef SHARED ++__hidden_ver1 (strncmp, __GI_strncmp, __redirect_strncmp) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strncmp); ++# endif ++#endif +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Add-ifunc-support-for-strnlen-aligned-lsx-.patch
Added
@@ -0,0 +1,465 @@ +From e494d32d3b76eee0d59cfab37789a356459b517a Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Thu, 24 Aug 2023 16:50:17 +0800 +Subject: PATCH 08/29 LoongArch: Add ifunc support for strnlen{aligned, lsx, + lasx} + +Based on the glibc microbenchmark, strnlen-aligned implementation could +reduce the runtime more than 10%, strnlen-lsx implementation could reduce +the runtime about 50%-78%, strnlen-lasx implementation could reduce the +runtime about 50%-88%. + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/lp64/multiarch/Makefile | 3 + + .../lp64/multiarch/ifunc-impl-list.c | 8 ++ + .../loongarch/lp64/multiarch/ifunc-strnlen.h | 41 +++++++ + .../lp64/multiarch/strnlen-aligned.S | 102 ++++++++++++++++++ + .../loongarch/lp64/multiarch/strnlen-lasx.S | 100 +++++++++++++++++ + .../loongarch/lp64/multiarch/strnlen-lsx.S | 89 +++++++++++++++ + sysdeps/loongarch/lp64/multiarch/strnlen.c | 39 +++++++ + 7 files changed, 382 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen.c + +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +index afa51041..c4dd3143 100644 +--- a/sysdeps/loongarch/lp64/multiarch/Makefile ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -3,6 +3,9 @@ sysdep_routines += \ + strlen-aligned \ + strlen-lsx \ + strlen-lasx \ ++ strnlen-aligned \ ++ strnlen-lsx \ ++ strnlen-lasx \ + strchr-aligned \ + strchr-lsx \ + strchr-lasx \ +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +index 25eb96b0..7cec0b77 100644 +--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +@@ -38,6 +38,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned) + ) + ++ IFUNC_IMPL (i, name, strnlen, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LASX, __strnlen_lasx) ++ IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LSX, __strnlen_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_aligned) ++ ) ++ + IFUNC_IMPL (i, name, strchr, + #if !defined __loongarch_soft_float + IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LASX, __strchr_lasx) +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h +new file mode 100644 +index 00000000..5cf89810 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h +@@ -0,0 +1,41 @@ ++/* Common definition for strnlen ifunc selections. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <ldsodefs.h> ++#include <ifunc-init.h> ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S +new file mode 100644 +index 00000000..b900430a +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S +@@ -0,0 +1,102 @@ ++/* Optimized strnlen implementation using basic Loongarch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++# define STRNLEN __strnlen_aligned ++#else ++# define STRNLEN __strnlen ++#endif ++ ++LEAF(STRNLEN, 6) ++ beqz a1, L(out) ++ lu12i.w a2, 0x01010 ++ andi t1, a0, 0x7 ++ move t4, a0 ++ ++ bstrins.d a0, zero, 2, 0 ++ ori a2, a2, 0x101 ++ li.w t0, -1 ++ ld.d t2, a0, 0 ++ ++ slli.d t3, t1, 3 ++ bstrins.d a2, a2, 63, 32 ++ li.w t5, 8 ++ slli.d a3, a2, 7 ++ ++ sub.w t1, t5, t1 ++ sll.d t0, t0, t3 ++ orn t2, t2, t0 ++ sub.d t0, t2, a2 ++ ++ ++ andn t3, a3, t2 ++ and t0, t0, t3 ++ bnez t0, L(count_pos) ++ sub.d t5, a1, t1 ++ ++ bgeu t1, a1, L(out) ++ addi.d a0, a0, 8 ++L(loop): ++ ld.d t2, a0, 0 ++ sub.d t0, t2, a2 ++ ++ andn t1, a3, t2 ++ sltui t6, t5, 9 ++ and t0, t0, t1 ++ or t7, t0, t6 ++ ++ bnez t7, L(count_pos) ++ ld.d t2, a0, 8 ++ addi.d a0, a0, 16 ++ sub.d t0, t2, a2 ++ ++ ++ andn t1, a3, t2 ++ sltui t6, t5, 17 ++ and t0, t0, t1 ++ addi.d t5, t5, -16 ++ ++ or t7, t0, t6 ++ beqz t7, L(loop) ++ addi.d a0, a0, -8 ++L(count_pos): ++ ctz.d t1, t0 ++ ++ sub.d a0, a0, t4 ++ srli.d t1, t1, 3 ++ add.d a0, t1, a0 ++ sltu t0, a0, a1 ++ ++ masknez t1, a1, t0 ++ maskeqz a0, a0, t0 ++ or a0, a0, t1 ++ jr ra ++ ++ ++L(out): ++ move a0, a1 ++ jr ra ++END(STRNLEN) ++ ++weak_alias (STRNLEN, strnlen) ++libc_hidden_builtin_def (STRNLEN) +diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S +new file mode 100644 +index 00000000..2c03d3d9 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S +@@ -0,0 +1,100 @@ ++/* Optimized strnlen implementation using loongarch LASX instructions ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define STRNLEN __strnlen_lasx ++ ++LEAF(STRNLEN, 6) ++ beqz a1, L(ret0) ++ andi t1, a0, 0x3f ++ li.d t3, 65 ++ sub.d a2, a0, t1 ++ ++ xvld xr0, a2, 0 ++ xvld xr1, a2, 32 ++ sub.d t1, t3, t1 ++ move a3, a0 ++ ++ sltu t1, a1, t1 ++ xvmsknz.b xr0, xr0 ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr2, xr0, 4 ++ ++ xvpickve.w xr3, xr1, 4 ++ vilvl.h vr0, vr2, vr0 ++ vilvl.h vr1, vr3, vr1 ++ vilvl.w vr0, vr1, vr0 ++ ++ ++ movfr2gr.d t0, fa0 ++ sra.d t0, t0, a0 ++ orn t1, t1, t0 ++ bnez t1, L(end) ++ ++ add.d a4, a0, a1 ++ move a0, a2 ++ addi.d a4, a4, -1 ++ bstrins.d a4, zero, 5, 0 ++ ++L(loop): ++ xvld xr0, a0, 64 ++ xvld xr1, a0, 96 ++ addi.d a0, a0, 64 ++ beq a0, a4, L(out) ++ ++ xvmin.bu xr2, xr0, xr1 ++ xvsetanyeqz.b fcc0, xr2 ++ bceqz fcc0, L(loop) ++L(out): ++ xvmsknz.b xr0, xr0 ++ ++ ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr2, xr0, 4 ++ xvpickve.w xr3, xr1, 4 ++ vilvl.h vr0, vr2, vr0 ++ ++ vilvl.h vr1, vr3, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++L(end): ++ sub.d a0, a0, a3 ++ ++ cto.d t0, t0 ++ add.d a0, a0, t0 ++ sltu t1, a0, a1 ++ masknez t0, a1, t1 ++ ++ maskeqz t1, a0, t1 ++ or a0, t0, t1 ++ jr ra ++L(ret0): ++ move a0, zero ++ ++ ++ jr ra ++END(STRNLEN) ++ ++libc_hidden_def (STRNLEN) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S +new file mode 100644 +index 00000000..b769a895 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S +@@ -0,0 +1,89 @@ ++/* Optimized strnlen implementation using loongarch LSX instructions ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define STRNLEN __strnlen_lsx ++ ++LEAF(STRNLEN, 6) ++ beqz a1, L(ret0) ++ andi t1, a0, 0x1f ++ li.d t3, 33 ++ sub.d a2, a0, t1 ++ ++ vld vr0, a2, 0 ++ vld vr1, a2, 16 ++ sub.d t1, t3, t1 ++ move a3, a0 ++ ++ sltu t1, a1, t1 ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ ++ movfr2gr.s t0, fa0 ++ sra.w t0, t0, a0 ++ orn t1, t1, t0 ++ bnez t1, L(end) ++ ++ ++ add.d a4, a0, a1 ++ move a0, a2 ++ addi.d a4, a4, -1 ++ bstrins.d a4, zero, 4, 0 ++ ++L(loop): ++ vld vr0, a0, 32 ++ vld vr1, a0, 48 ++ addi.d a0, a0, 32 ++ beq a0, a4, L(out) ++ ++ vmin.bu vr2, vr0, vr1 ++ vsetanyeqz.b fcc0, vr2 ++ bceqz fcc0, L(loop) ++L(out): ++ vmsknz.b vr0, vr0 ++ ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++L(end): ++ sub.d a0, a0, a3 ++ ++ ++ cto.w t0, t0 ++ add.d a0, a0, t0 ++ sltu t1, a0, a1 ++ masknez t0, a1, t1 ++ ++ maskeqz t1, a0, t1 ++ or a0, t0, t1 ++ jr ra ++L(ret0): ++ move a0, zero ++ ++ jr ra ++END(STRNLEN) ++ ++libc_hidden_builtin_def (STRNLEN) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen.c b/sysdeps/loongarch/lp64/multiarch/strnlen.c +new file mode 100644 +index 00000000..38b7a25a +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strnlen.c +@@ -0,0 +1,39 @@ ++/* Multiple versions of strnlen. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define strnlen __redirect_strnlen ++# define __strnlen __redirect___strnlen ++# include <string.h> ++# undef __strnlen ++# undef strnlen ++ ++# define SYMBOL_NAME strnlen ++# include "ifunc-strnlen.h" ++ ++libc_ifunc_redirected (__redirect_strnlen, __strnlen, IFUNC_SELECTOR ()); ++weak_alias (__strnlen, strnlen); ++# ifdef SHARED ++__hidden_ver1 (__strnlen, __GI___strnlen, __redirect___strnlen) ++ __attribute__((visibility ("hidden"))) __attribute_copy__ (strnlen); ++__hidden_ver1 (strnlen, __GI_strnlen, __redirect_strnlen) ++ __attribute__((weak, visibility ("hidden"))) __attribute_copy__ (strnlen); ++# endif ++#endif +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Add-ifunc-support-for-strrchr-aligned-lsx-.patch
Added
@@ -0,0 +1,670 @@ +From d537d0ab45a55048c8da483e73be4448ddb45525 Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Wed, 13 Sep 2023 15:35:00 +0800 +Subject: PATCH 23/29 LoongArch: Add ifunc support for strrchr{aligned, lsx, + lasx} + +According to glibc strrchr microbenchmark test results, this implementation +could reduce the runtime time as following: + +Name Percent of rutime reduced +strrchr-lasx 10%-50% +strrchr-lsx 0%-50% +strrchr-aligned 5%-50% + +Generic strrchr is implemented by function strlen + memrchr, the lasx version +will compare with generic strrchr implemented by strlen-lasx + memrchr-lasx, +the lsx version will compare with generic strrchr implemented by strlen-lsx + +memrchr-lsx, the aligned version will compare with generic strrchr implemented +by strlen-aligned + memrchr-generic. + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/lp64/multiarch/Makefile | 3 + + .../lp64/multiarch/ifunc-impl-list.c | 8 + + .../loongarch/lp64/multiarch/ifunc-strrchr.h | 41 ++++ + .../lp64/multiarch/strrchr-aligned.S | 170 +++++++++++++++++ + .../loongarch/lp64/multiarch/strrchr-lasx.S | 176 ++++++++++++++++++ + .../loongarch/lp64/multiarch/strrchr-lsx.S | 144 ++++++++++++++ + sysdeps/loongarch/lp64/multiarch/strrchr.c | 36 ++++ + 7 files changed, 578 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr.c + +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +index 39550bea..fe863e1b 100644 +--- a/sysdeps/loongarch/lp64/multiarch/Makefile ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -9,6 +9,9 @@ sysdep_routines += \ + strchr-aligned \ + strchr-lsx \ + strchr-lasx \ ++ strrchr-aligned \ ++ strrchr-lsx \ ++ strrchr-lasx \ + strchrnul-aligned \ + strchrnul-lsx \ + strchrnul-lasx \ +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +index 39a14f1d..529e2369 100644 +--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +@@ -94,6 +94,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned) + ) + ++ IFUNC_IMPL (i, name, strrchr, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LASX, __strrchr_lasx) ++ IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LSX, __strrchr_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_aligned) ++ ) ++ + IFUNC_IMPL (i, name, memcpy, + #if !defined __loongarch_soft_float + IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx) +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h +new file mode 100644 +index 00000000..bbb34089 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h +@@ -0,0 +1,41 @@ ++/* Common definition for strrchr ifunc selections. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <ldsodefs.h> ++#include <ifunc-init.h> ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S +new file mode 100644 +index 00000000..a73deb78 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S +@@ -0,0 +1,170 @@ ++/* Optimized strrchr implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++# define STRRCHR __strrchr_aligned ++#else ++# define STRRCHR strrchr ++#endif ++ ++LEAF(STRRCHR, 6) ++ slli.d t0, a0, 3 ++ bstrins.d a0, zero, 2, 0 ++ lu12i.w a2, 0x01010 ++ ld.d t2, a0, 0 ++ ++ andi a1, a1, 0xff ++ ori a2, a2, 0x101 ++ li.d t3, -1 ++ bstrins.d a2, a2, 63, 32 ++ ++ sll.d t5, t3, t0 ++ slli.d a3, a2, 7 ++ orn t4, t2, t5 ++ mul.d a1, a1, a2 ++ ++ sub.d t0, t4, a2 ++ andn t1, a3, t4 ++ and t1, t0, t1 ++ beqz t1, L(find_tail) ++ ++ ++ ctz.d t0, t1 ++ orn t0, zero, t0 ++ xor t2, t4, a1 ++ srl.d t0, t3, t0 ++ ++ orn t2, t2, t0 ++ orn t2, t2, t5 ++ revb.d t2, t2 ++ sub.d t1, t2, a2 ++ ++ andn t0, a3, t2 ++ and t1, t0, t1 ++ ctz.d t0, t1 ++ srli.d t0, t0, 3 ++ ++ addi.d a0, a0, 7 ++ sub.d a0, a0, t0 ++ maskeqz a0, a0, t1 ++ jr ra ++ ++ ++L(find_tail): ++ addi.d a4, a0, 8 ++ addi.d a0, a0, 8 ++L(loop_ascii): ++ ld.d t2, a0, 0 ++ sub.d t1, t2, a2 ++ ++ and t0, t1, a3 ++ bnez t0, L(more_check) ++ ld.d t2, a0, 8 ++ sub.d t1, t2, a2 ++ ++ and t0, t1, a3 ++ addi.d a0, a0, 16 ++ beqz t0, L(loop_ascii) ++ addi.d a0, a0, -8 ++ ++L(more_check): ++ andn t0, a3, t2 ++ and t1, t1, t0 ++ bnez t1, L(tail) ++ addi.d a0, a0, 8 ++ ++ ++L(loop_nonascii): ++ ld.d t2, a0, 0 ++ sub.d t1, t2, a2 ++ andn t0, a3, t2 ++ and t1, t0, t1 ++ ++ bnez t1, L(tail) ++ ld.d t2, a0, 8 ++ addi.d a0, a0, 16 ++ sub.d t1, t2, a2 ++ ++ andn t0, a3, t2 ++ and t1, t0, t1 ++ beqz t1, L(loop_nonascii) ++ addi.d a0, a0, -8 ++ ++L(tail): ++ ctz.d t0, t1 ++ orn t0, zero, t0 ++ xor t2, t2, a1 ++ srl.d t0, t3, t0 ++ ++ ++ orn t2, t2, t0 ++ revb.d t2, t2 ++ sub.d t1, t2, a2 ++ andn t0, a3, t2 ++ ++ and t1, t0, t1 ++ bnez t1, L(count_pos) ++L(find_loop): ++ beq a0, a4, L(find_end) ++ ld.d t2, a0, -8 ++ ++ addi.d a0, a0, -8 ++ xor t2, t2, a1 ++ sub.d t1, t2, a2 ++ andn t0, a3, t2 ++ ++ and t1, t0, t1 ++ beqz t1, L(find_loop) ++ revb.d t2, t2 ++ sub.d t1, t2, a2 ++ ++ ++ andn t0, a3, t2 ++ and t1, t0, t1 ++L(count_pos): ++ ctz.d t0, t1 ++ addi.d a0, a0, 7 ++ ++ srli.d t0, t0, 3 ++ sub.d a0, a0, t0 ++ jr ra ++ nop ++ ++L(find_end): ++ xor t2, t4, a1 ++ orn t2, t2, t5 ++ revb.d t2, t2 ++ sub.d t1, t2, a2 ++ ++ ++ andn t0, a3, t2 ++ and t1, t0, t1 ++ ctz.d t0, t1 ++ srli.d t0, t0, 3 ++ ++ addi.d a0, a4, -1 ++ sub.d a0, a0, t0 ++ maskeqz a0, a0, t1 ++ jr ra ++END(STRRCHR) ++ ++libc_hidden_builtin_def(STRRCHR) +diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S +new file mode 100644 +index 00000000..5a6e2297 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S +@@ -0,0 +1,176 @@ ++/* Optimized strrchr implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++#define STRRCHR __strrchr_lasx ++ ++LEAF(STRRCHR, 6) ++ move a2, a0 ++ bstrins.d a0, zero, 5, 0 ++ xvld xr0, a0, 0 ++ xvld xr1, a0, 32 ++ ++ li.d t2, -1 ++ xvreplgr2vr.b xr4, a1 ++ xvmsknz.b xr2, xr0 ++ xvmsknz.b xr3, xr1 ++ ++ xvpickve.w xr5, xr2, 4 ++ xvpickve.w xr6, xr3, 4 ++ vilvl.h vr2, vr5, vr2 ++ vilvl.h vr3, vr6, vr3 ++ ++ vilvl.w vr2, vr3, vr2 ++ movfr2gr.d t0, fa2 ++ sra.d t0, t0, a2 ++ beq t0, t2, L(find_tail) ++ ++ ++ xvseq.b xr2, xr0, xr4 ++ xvseq.b xr3, xr1, xr4 ++ xvmsknz.b xr2, xr2 ++ xvmsknz.b xr3, xr3 ++ ++ xvpickve.w xr4, xr2, 4 ++ xvpickve.w xr5, xr3, 4 ++ vilvl.h vr2, vr4, vr2 ++ vilvl.h vr3, vr5, vr3 ++ ++ vilvl.w vr1, vr3, vr2 ++ slli.d t3, t2, 1 ++ movfr2gr.d t1, fa1 ++ cto.d t0, t0 ++ ++ srl.d t1, t1, a2 ++ sll.d t3, t3, t0 ++ addi.d a0, a2, 63 ++ andn t1, t1, t3 ++ ++ ++ clz.d t0, t1 ++ sub.d a0, a0, t0 ++ maskeqz a0, a0, t1 ++ jr ra ++ ++ .align 5 ++L(find_tail): ++ addi.d a3, a0, 64 ++L(loop): ++ xvld xr2, a0, 64 ++ xvld xr3, a0, 96 ++ addi.d a0, a0, 64 ++ ++ xvmin.bu xr5, xr2, xr3 ++ xvsetanyeqz.b fcc0, xr5 ++ bceqz fcc0, L(loop) ++ xvmsknz.b xr5, xr2 ++ ++ ++ xvmsknz.b xr6, xr3 ++ xvpickve.w xr7, xr5, 4 ++ xvpickve.w xr8, xr6, 4 ++ vilvl.h vr5, vr7, vr5 ++ ++ vilvl.h vr6, vr8, vr6 ++ xvseq.b xr2, xr2, xr4 ++ xvseq.b xr3, xr3, xr4 ++ xvmsknz.b xr2, xr2 ++ ++ xvmsknz.b xr3, xr3 ++ xvpickve.w xr7, xr2, 4 ++ xvpickve.w xr8, xr3, 4 ++ vilvl.h vr2, vr7, vr2 ++ ++ vilvl.h vr3, vr8, vr3 ++ vilvl.w vr5, vr6, vr5 ++ vilvl.w vr2, vr3, vr2 ++ movfr2gr.d t0, fa5 ++ ++ ++ movfr2gr.d t1, fa2 ++ slli.d t3, t2, 1 ++ cto.d t0, t0 ++ sll.d t3, t3, t0 ++ ++ andn t1, t1, t3 ++ beqz t1, L(find_loop) ++ clz.d t0, t1 ++ addi.d a0, a0, 63 ++ ++ sub.d a0, a0, t0 ++ jr ra ++L(find_loop): ++ beq a0, a3, L(find_end) ++ xvld xr2, a0, -64 ++ ++ xvld xr3, a0, -32 ++ addi.d a0, a0, -64 ++ xvseq.b xr2, xr2, xr4 ++ xvseq.b xr3, xr3, xr4 ++ ++ ++ xvmax.bu xr5, xr2, xr3 ++ xvseteqz.v fcc0, xr5 ++ bcnez fcc0, L(find_loop) ++ xvmsknz.b xr0, xr2 ++ ++ xvmsknz.b xr1, xr3 ++ xvpickve.w xr2, xr0, 4 ++ xvpickve.w xr3, xr1, 4 ++ vilvl.h vr0, vr2, vr0 ++ ++ vilvl.h vr1, vr3, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++ addi.d a0, a0, 63 ++ ++ clz.d t0, t0 ++ sub.d a0, a0, t0 ++ jr ra ++ nop ++ ++ ++L(find_end): ++ xvseq.b xr2, xr0, xr4 ++ xvseq.b xr3, xr1, xr4 ++ xvmsknz.b xr2, xr2 ++ xvmsknz.b xr3, xr3 ++ ++ xvpickve.w xr4, xr2, 4 ++ xvpickve.w xr5, xr3, 4 ++ vilvl.h vr2, vr4, vr2 ++ vilvl.h vr3, vr5, vr3 ++ ++ vilvl.w vr1, vr3, vr2 ++ movfr2gr.d t1, fa1 ++ addi.d a0, a2, 63 ++ srl.d t1, t1, a2 ++ ++ clz.d t0, t1 ++ sub.d a0, a0, t0 ++ maskeqz a0, a0, t1 ++ jr ra ++END(STRRCHR) ++ ++libc_hidden_builtin_def(STRRCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S +new file mode 100644 +index 00000000..8f2fd22e +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S +@@ -0,0 +1,144 @@ ++/* Optimized strrchr implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++#define STRRCHR __strrchr_lsx ++ ++LEAF(STRRCHR, 6) ++ move a2, a0 ++ bstrins.d a0, zero, 4, 0 ++ vld vr0, a0, 0 ++ vld vr1, a0, 16 ++ ++ li.d t2, -1 ++ vreplgr2vr.b vr4, a1 ++ vmsknz.b vr2, vr0 ++ vmsknz.b vr3, vr1 ++ ++ vilvl.h vr2, vr3, vr2 ++ movfr2gr.s t0, fa2 ++ sra.w t0, t0, a2 ++ beq t0, t2, L(find_tail) ++ ++ vseq.b vr2, vr0, vr4 ++ vseq.b vr3, vr1, vr4 ++ vmsknz.b vr2, vr2 ++ vmsknz.b vr3, vr3 ++ ++ ++ vilvl.h vr1, vr3, vr2 ++ slli.d t3, t2, 1 ++ movfr2gr.s t1, fa1 ++ cto.w t0, t0 ++ ++ srl.w t1, t1, a2 ++ sll.d t3, t3, t0 ++ addi.d a0, a2, 31 ++ andn t1, t1, t3 ++ ++ clz.w t0, t1 ++ sub.d a0, a0, t0 ++ maskeqz a0, a0, t1 ++ jr ra ++ ++ .align 5 ++L(find_tail): ++ addi.d a3, a0, 32 ++L(loop): ++ vld vr2, a0, 32 ++ vld vr3, a0, 48 ++ addi.d a0, a0, 32 ++ ++ vmin.bu vr5, vr2, vr3 ++ vsetanyeqz.b fcc0, vr5 ++ bceqz fcc0, L(loop) ++ vmsknz.b vr5, vr2 ++ ++ vmsknz.b vr6, vr3 ++ vilvl.h vr5, vr6, vr5 ++ vseq.b vr2, vr2, vr4 ++ vseq.b vr3, vr3, vr4 ++ ++ vmsknz.b vr2, vr2 ++ vmsknz.b vr3, vr3 ++ vilvl.h vr2, vr3, vr2 ++ movfr2gr.s t0, fa5 ++ ++ ++ movfr2gr.s t1, fa2 ++ slli.d t3, t2, 1 ++ cto.w t0, t0 ++ sll.d t3, t3, t0 ++ ++ andn t1, t1, t3 ++ beqz t1, L(find_loop) ++ clz.w t0, t1 ++ addi.d a0, a0, 31 ++ ++ sub.d a0, a0, t0 ++ jr ra ++L(find_loop): ++ beq a0, a3, L(find_end) ++ vld vr2, a0, -32 ++ ++ vld vr3, a0, -16 ++ addi.d a0, a0, -32 ++ vseq.b vr2, vr2, vr4 ++ vseq.b vr3, vr3, vr4 ++ ++ ++ vmax.bu vr5, vr2, vr3 ++ vseteqz.v fcc0, vr5 ++ bcnez fcc0, L(find_loop) ++ vmsknz.b vr0, vr2 ++ ++ vmsknz.b vr1, vr3 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ addi.d a0, a0, 31 ++ ++ clz.w t0, t0 ++ sub.d a0, a0, t0 ++ jr ra ++ nop ++ ++L(find_end): ++ vseq.b vr2, vr0, vr4 ++ vseq.b vr3, vr1, vr4 ++ vmsknz.b vr2, vr2 ++ vmsknz.b vr3, vr3 ++ ++ ++ vilvl.h vr1, vr3, vr2 ++ movfr2gr.s t1, fa1 ++ addi.d a0, a2, 31 ++ srl.w t1, t1, a2 ++ ++ clz.w t0, t1 ++ sub.d a0, a0, t0 ++ maskeqz a0, a0, t1 ++ jr ra ++END(STRRCHR) ++ ++libc_hidden_builtin_def(STRRCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr.c b/sysdeps/loongarch/lp64/multiarch/strrchr.c +new file mode 100644 +index 00000000..d9c9f660 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strrchr.c +@@ -0,0 +1,36 @@ ++/* Multiple versions of strrchr. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define strrchr __redirect_strrchr ++# include <string.h> ++# undef strrchr ++ ++# define SYMBOL_NAME strrchr ++# include "ifunc-strrchr.h" ++ ++libc_ifunc_redirected (__redirect_strrchr, strrchr, IFUNC_SELECTOR ()); ++weak_alias (strrchr, rindex) ++# ifdef SHARED ++__hidden_ver1 (strrchr, __GI_strrchr, __redirect_strrchr) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strrchr); ++# endif ++ ++#endif +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Add-lasx-lsx-support-for-_dl_runtime_profi.patch
Added
@@ -0,0 +1,626 @@ +From b5979df8ad07823c79a934c1fa0a91ec0abffb61 Mon Sep 17 00:00:00 2001 +From: caiyinyu <caiyinyu@loongson.cn> +Date: Fri, 8 Sep 2023 14:10:55 +0800 +Subject: PATCH 20/29 LoongArch: Add lasx/lsx support for + _dl_runtime_profile. + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/bits/link.h | 24 ++- + sysdeps/loongarch/bits/link_lavcurrent.h | 25 +++ + sysdeps/loongarch/dl-audit-check.h | 23 +++ + sysdeps/loongarch/dl-link.sym | 8 +- + sysdeps/loongarch/dl-machine.h | 11 +- + sysdeps/loongarch/dl-trampoline.S | 177 +---------------- + sysdeps/loongarch/dl-trampoline.h | 242 +++++++++++++++++++++++ + 7 files changed, 331 insertions(+), 179 deletions(-) + create mode 100644 sysdeps/loongarch/bits/link_lavcurrent.h + create mode 100644 sysdeps/loongarch/dl-audit-check.h + +diff --git a/sysdeps/loongarch/bits/link.h b/sysdeps/loongarch/bits/link.h +index 7fa61312..00f6f25f 100644 +--- a/sysdeps/loongarch/bits/link.h ++++ b/sysdeps/loongarch/bits/link.h +@@ -20,10 +20,26 @@ + #error "Never include <bits/link.h> directly; use <link.h> instead." + #endif + ++#ifndef __loongarch_soft_float ++typedef float La_loongarch_vr ++ __attribute__ ((__vector_size__ (16), __aligned__ (16))); ++typedef float La_loongarch_xr ++ __attribute__ ((__vector_size__ (32), __aligned__ (16))); ++ ++typedef union ++{ ++ double fpreg4; ++ La_loongarch_vr vr2; ++ La_loongarch_xr xr1; ++} La_loongarch_vector __attribute__ ((__aligned__ (16))); ++#endif ++ + typedef struct La_loongarch_regs + { + unsigned long int lr_reg8; /* a0 - a7 */ +- double lr_fpreg8; /* fa0 - fa7 */ ++#ifndef __loongarch_soft_float ++ La_loongarch_vector lr_vec8; /* fa0 - fa7 or vr0 - vr7 or xr0 - xr7*/ ++#endif + unsigned long int lr_ra; + unsigned long int lr_sp; + } La_loongarch_regs; +@@ -33,8 +49,10 @@ typedef struct La_loongarch_retval + { + unsigned long int lrv_a0; + unsigned long int lrv_a1; +- double lrv_fa0; +- double lrv_fa1; ++#ifndef __loongarch_soft_float ++ La_loongarch_vector lrv_vec0; ++ La_loongarch_vector lrv_vec1; ++#endif + } La_loongarch_retval; + + __BEGIN_DECLS +diff --git a/sysdeps/loongarch/bits/link_lavcurrent.h b/sysdeps/loongarch/bits/link_lavcurrent.h +new file mode 100644 +index 00000000..15f1eb84 +--- /dev/null ++++ b/sysdeps/loongarch/bits/link_lavcurrent.h +@@ -0,0 +1,25 @@ ++/* Data structure for communication from the run-time dynamic linker for ++ loaded ELF shared objects. LAV_CURRENT definition. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#ifndef _LINK_H ++# error "Never include <bits/link_lavcurrent.h> directly; use <link.h> instead." ++#endif ++ ++/* Version numbers for la_version handshake interface. */ ++#define LAV_CURRENT 3 +diff --git a/sysdeps/loongarch/dl-audit-check.h b/sysdeps/loongarch/dl-audit-check.h +new file mode 100644 +index 00000000..a139c939 +--- /dev/null ++++ b/sysdeps/loongarch/dl-audit-check.h +@@ -0,0 +1,23 @@ ++/* rtld-audit version check. LoongArch version. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++static inline bool ++_dl_audit_check_version (unsigned int lav) ++{ ++ return lav == LAV_CURRENT; ++} +diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym +index 868ab7c6..b534968e 100644 +--- a/sysdeps/loongarch/dl-link.sym ++++ b/sysdeps/loongarch/dl-link.sym +@@ -6,9 +6,13 @@ DL_SIZEOF_RG sizeof(struct La_loongarch_regs) + DL_SIZEOF_RV sizeof(struct La_loongarch_retval) + + DL_OFFSET_RG_A0 offsetof(struct La_loongarch_regs, lr_reg) +-DL_OFFSET_RG_FA0 offsetof(struct La_loongarch_regs, lr_fpreg) ++#ifndef __loongarch_soft_float ++DL_OFFSET_RG_VEC0 offsetof(struct La_loongarch_regs, lr_vec) ++#endif + DL_OFFSET_RG_RA offsetof(struct La_loongarch_regs, lr_ra) + DL_OFFSET_RG_SP offsetof(struct La_loongarch_regs, lr_sp) + + DL_OFFSET_RV_A0 offsetof(struct La_loongarch_retval, lrv_a0) +-DL_OFFSET_RV_FA0 offsetof(struct La_loongarch_retval, lrv_a1) ++#ifndef __loongarch_soft_float ++DL_OFFSET_RV_VEC0 offsetof(struct La_loongarch_retval, lrv_vec0) ++#endif +diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h +index 066bb233..8a2db9de 100644 +--- a/sysdeps/loongarch/dl-machine.h ++++ b/sysdeps/loongarch/dl-machine.h +@@ -273,6 +273,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope, + #if !defined __loongarch_soft_float + extern void _dl_runtime_resolve_lasx (void) attribute_hidden; + extern void _dl_runtime_resolve_lsx (void) attribute_hidden; ++ extern void _dl_runtime_profile_lasx (void) attribute_hidden; ++ extern void _dl_runtime_profile_lsx (void) attribute_hidden; + #endif + extern void _dl_runtime_resolve (void) attribute_hidden; + extern void _dl_runtime_profile (void) attribute_hidden; +@@ -287,7 +289,14 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope, + end in this function. */ + if (profile != 0) + { +- gotplt0 = (ElfW(Addr)) &_dl_runtime_profile; ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ gotplt0 = (ElfW(Addr)) &_dl_runtime_profile_lasx; ++ else if (SUPPORT_LSX) ++ gotplt0 = (ElfW(Addr)) &_dl_runtime_profile_lsx; ++ else ++#endif ++ gotplt0 = (ElfW(Addr)) &_dl_runtime_profile; + + if (GLRO(dl_profile) != NULL + && _dl_name_match_p (GLRO(dl_profile), l)) +diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S +index 8fd91469..bb449ecf 100644 +--- a/sysdeps/loongarch/dl-trampoline.S ++++ b/sysdeps/loongarch/dl-trampoline.S +@@ -22,190 +22,21 @@ + #if !defined __loongarch_soft_float + #define USE_LASX + #define _dl_runtime_resolve _dl_runtime_resolve_lasx ++#define _dl_runtime_profile _dl_runtime_profile_lasx + #include "dl-trampoline.h" + #undef FRAME_SIZE + #undef USE_LASX + #undef _dl_runtime_resolve ++#undef _dl_runtime_profile + + #define USE_LSX + #define _dl_runtime_resolve _dl_runtime_resolve_lsx ++#define _dl_runtime_profile _dl_runtime_profile_lsx + #include "dl-trampoline.h" + #undef FRAME_SIZE + #undef USE_LSX + #undef _dl_runtime_resolve ++#undef _dl_runtime_profile + #endif + + #include "dl-trampoline.h" +- +-#include "dl-link.h" +- +-ENTRY (_dl_runtime_profile) +- /* LoongArch we get called with: +- t0 linkr_map pointer +- t1 the scaled offset stored in t0, which can be used +- to calculate the offset of the current symbol in .rela.plt +- t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function +- t3 dl resolver entry point, no use in this function +- +- Stack frame layout: +- sp, #96 La_loongarch_regs +- sp, #48 La_loongarch_retval +- sp, #40 frame size return from pltenter +- sp, #32 dl_profile_call saved a1 +- sp, #24 dl_profile_call saved a0 +- sp, #16 T1 +- sp, #0 ra, fp <- fp +- */ +- +-# define OFFSET_T1 16 +-# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8 +-# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16 +-# define OFFSET_RV OFFSET_FS + 8 +-# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV +- +-# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK)) +- +- /* Save arguments to stack. */ +- ADDI sp, sp, -SF_SIZE +- REG_S ra, sp, 0 +- REG_S fp, sp, 8 +- +- or fp, sp, zero +- +- REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG +- REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG +- REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG +- REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG +- REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG +- REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG +- REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG +- REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG +- +-#ifndef __loongarch_soft_float +- FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG +- FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG +- FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG +- FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG +- FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG +- FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG +- FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG +- FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG +-#endif +- +- /* Update .got.plt and obtain runtime address of callee. */ +- SLLI a1, t1, 1 +- or a0, t0, zero +- ADD a1, a1, t1 +- or a2, ra, zero /* return addr */ +- ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */ +- ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */ +- +- REG_S a0, fp, OFFSET_SAVED_CALL_A0 +- REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG +- +- la t2, _dl_profile_fixup +- jirl ra, t2, 0 +- +- REG_L t3, fp, OFFSET_FS +- bge t3, zero, 1f +- +- /* Save the return. */ +- or t4, v0, zero +- +- /* Restore arguments from stack. */ +- REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG +- REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG +- REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG +- REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG +- REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG +- REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG +- REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG +- REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG +- +-#ifndef __loongarch_soft_float +- FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG +- FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG +- FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG +- FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG +- FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG +- FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG +- FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG +- FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG +-#endif +- +- REG_L ra, fp, 0 +- REG_L fp, fp, SZREG +- +- ADDI sp, sp, SF_SIZE +- jirl zero, t4, 0 +- +-1: +- /* The new frame size is in t3. */ +- SUB sp, fp, t3 +- BSTRINS sp, zero, 3, 0 +- +- REG_S a0, fp, OFFSET_T1 +- +- or a0, sp, zero +- ADDI a1, fp, SF_SIZE +- or a2, t3, zero +- la t5, memcpy +- jirl ra, t5, 0 +- +- REG_L t6, fp, OFFSET_T1 +- +- /* Call the function. */ +- REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG +- REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG +- REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG +- REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG +- REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG +- REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG +- REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG +- REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG +- +-#ifndef __loongarch_soft_float +- FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG +- FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG +- FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG +- FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG +- FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG +- FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG +- FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG +- FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG +-#endif +- jirl ra, t6, 0 +- +- REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 +- REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG +- +-#ifndef __loongarch_soft_float +- FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 +- FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 + SZFREG +-#endif +- +- /* Setup call to pltexit. */ +- REG_L a0, fp, OFFSET_SAVED_CALL_A0 +- REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG +- ADDI a2, fp, OFFSET_RG +- ADDI a3, fp, OFFSET_RV +- la t7, _dl_audit_pltexit +- jirl ra, t7, 0 +- +- REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0 +- REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG +- +-#ifndef __loongarch_soft_float +- FREG_L fa0, fp, OFFSET_RV + DL_OFFSET_RV_FA0 +- FREG_L fa1, fp, OFFSET_RV + DL_OFFSET_RV_FA0 + SZFREG +-#endif +- +- /* RA from within La_loongarch_reg. */ +- REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA +- or sp, fp, zero +- ADDI sp, sp, SF_SIZE +- REG_S fp, fp, SZREG +- +- jirl zero, ra, 0 +- +-END (_dl_runtime_profile) +diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h +index 99fcacab..e298439d 100644 +--- a/sysdeps/loongarch/dl-trampoline.h ++++ b/sysdeps/loongarch/dl-trampoline.h +@@ -125,3 +125,245 @@ ENTRY (_dl_runtime_resolve) + /* Invoke the callee. */ + jirl zero, t1, 0 + END (_dl_runtime_resolve) ++ ++#include "dl-link.h" ++ ++ENTRY (_dl_runtime_profile) ++ /* LoongArch we get called with: ++ t0 linkr_map pointer ++ t1 the scaled offset stored in t0, which can be used ++ to calculate the offset of the current symbol in .rela.plt ++ t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function ++ t3 dl resolver entry point, no use in this function ++ ++ Stack frame layout: ++ sp, #208 La_loongarch_regs ++ sp, #128 La_loongarch_retval // align: 16 ++ sp, #112 frame size return from pltenter ++ sp, #80 dl_profile_call saved vec1 ++ sp, #48 dl_profile_call saved vec0 // align: 16 ++ sp, #32 dl_profile_call saved a1 ++ sp, #24 dl_profile_call saved a0 ++ sp, #16 T1 ++ sp, #0 ra, fp <- fp ++ */ ++ ++# define OFFSET_T1 16 ++# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8 ++# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16 + 8 + 64 ++# define OFFSET_RV OFFSET_FS + 8 + 8 ++# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV ++ ++# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK)) ++ ++ /* Save arguments to stack. */ ++ ADDI sp, sp, -SF_SIZE ++ REG_S ra, sp, 0 ++ REG_S fp, sp, 8 ++ ++ or fp, sp, zero ++ ++ REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG ++ REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG ++ REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG ++ REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG ++ REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG ++ REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG ++ REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG ++ REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG ++ ++#ifdef USE_LASX ++ xvst xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG ++ xvst xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG ++ xvst xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG ++ xvst xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG ++ xvst xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG ++ xvst xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG ++ xvst xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG ++ xvst xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG ++#elif defined USE_LSX ++ vst vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG ++ vst vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG ++ vst vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG ++ vst vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG ++ vst vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG ++ vst vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG ++ vst vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG ++ vst vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG ++#elif !defined __loongarch_soft_float ++ FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG ++ FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG ++ FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG ++ FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG ++ FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG ++ FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG ++ FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG ++ FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG ++#endif ++ ++ /* Update .got.plt and obtain runtime address of callee. */ ++ SLLI a1, t1, 1 ++ or a0, t0, zero ++ ADD a1, a1, t1 ++ or a2, ra, zero /* return addr */ ++ ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */ ++ ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */ ++ ++ REG_S a0, fp, OFFSET_SAVED_CALL_A0 ++ REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG ++ ++ la t2, _dl_profile_fixup ++ jirl ra, t2, 0 ++ ++ REG_L t3, fp, OFFSET_FS ++ bge t3, zero, 1f ++ ++ /* Save the return. */ ++ or t4, v0, zero ++ ++ /* Restore arguments from stack. */ ++ REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG ++ REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG ++ REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG ++ REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG ++ REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG ++ REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG ++ REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG ++ REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG ++ ++#ifdef USE_LASX ++ xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG ++ xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG ++ xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG ++ xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG ++ xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG ++ xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG ++ xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG ++ xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG ++#elif defined USE_LSX ++ vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG ++ vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG ++ vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG ++ vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG ++ vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG ++ vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG ++ vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG ++ vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG ++#elif !defined __loongarch_soft_float ++ FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG ++ FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG ++ FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG ++ FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG ++ FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG ++ FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG ++ FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG ++ FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG ++#endif ++ ++ REG_L ra, fp, 0 ++ REG_L fp, fp, SZREG ++ ++ ADDI sp, sp, SF_SIZE ++ jirl zero, t4, 0 ++ ++1: ++ /* The new frame size is in t3. */ ++ SUB sp, fp, t3 ++ BSTRINS sp, zero, 3, 0 ++ ++ REG_S a0, fp, OFFSET_T1 ++ ++ or a0, sp, zero ++ ADDI a1, fp, SF_SIZE ++ or a2, t3, zero ++ la t5, memcpy ++ jirl ra, t5, 0 ++ ++ REG_L t6, fp, OFFSET_T1 ++ ++ /* Call the function. */ ++ REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG ++ REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG ++ REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG ++ REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG ++ REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG ++ REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG ++ REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG ++ REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG ++ ++#ifdef USE_LASX ++ xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG ++ xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG ++ xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG ++ xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG ++ xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG ++ xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG ++ xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG ++ xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG ++#elif defined USE_LSX ++ vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG ++ vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG ++ vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG ++ vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG ++ vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG ++ vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG ++ vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG ++ vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG ++#elif !defined __loongarch_soft_float ++ FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG ++ FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG ++ FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG ++ FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG ++ FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG ++ FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG ++ FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG ++ FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG ++#endif ++ ++ jirl ra, t6, 0 ++ ++ REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 ++ REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG ++ ++#ifdef USE_LASX ++ xvst xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 ++ xvst xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG ++#elif defined USE_LSX ++ vst vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 ++ vst vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG ++#elif !defined __loongarch_soft_float ++ FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 ++ FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG ++#endif ++ ++ /* Setup call to pltexit. */ ++ REG_L a0, fp, OFFSET_SAVED_CALL_A0 ++ REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG ++ ADDI a2, fp, OFFSET_RG ++ ADDI a3, fp, OFFSET_RV ++ la t7, _dl_audit_pltexit ++ jirl ra, t7, 0 ++ ++ REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0 ++ REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG ++ ++#ifdef USE_LASX ++ xvld xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 ++ xvld xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG ++#elif defined USE_LSX ++ vld vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 ++ vld vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG ++#elif !defined __loongarch_soft_float ++ FREG_L fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 ++ FREG_L fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG ++#endif ++ ++ /* RA from within La_loongarch_reg. */ ++ REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA ++ or sp, fp, zero ++ ADDI sp, sp, SF_SIZE ++ REG_S fp, fp, SZREG ++ ++ jirl zero, ra, 0 ++ ++END (_dl_runtime_profile) +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Add-minuimum-binutils-required-version.patch
Added
@@ -0,0 +1,102 @@ +From 7353f21f6ed1754b67e455e2b80123787efa9e91 Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Tue, 8 Aug 2023 14:15:43 +0800 +Subject: PATCH 02/29 LoongArch: Add minuimum binutils required version + +LoongArch glibc can add some LASX/LSX vector instructions codes, +change the required minimum binutils version to 2.41 which could +support vector instructions. HAVE_LOONGARCH_VEC_ASM is removed +accordingly. + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + config.h.in | 5 ----- + sysdeps/loongarch/configure | 5 ++--- + sysdeps/loongarch/configure.ac | 4 ++-- + sysdeps/loongarch/dl-machine.h | 4 ++-- + sysdeps/loongarch/dl-trampoline.S | 2 +- + 5 files changed, 7 insertions(+), 13 deletions(-) + +diff --git a/config.h.in b/config.h.in +index 0dedc124..44a34072 100644 +--- a/config.h.in ++++ b/config.h.in +@@ -141,11 +141,6 @@ + /* LOONGARCH floating-point ABI for ld.so. */ + #undef LOONGARCH_ABI_FRLEN + +-/* Assembler support LoongArch LASX/LSX vector instructions. +- This macro becomes obsolete when glibc increased the minimum +- required version of GNU 'binutils' to 2.41 or later. */ +-#define HAVE_LOONGARCH_VEC_ASM 0 +- + /* Linux specific: minimum supported kernel version. */ + #undef __LINUX_KERNEL_VERSION + +diff --git a/sysdeps/loongarch/configure b/sysdeps/loongarch/configure +index 5843c7cf..395ddc92 100644 +--- a/sysdeps/loongarch/configure ++++ b/sysdeps/loongarch/configure +@@ -128,8 +128,7 @@ rm -f conftest* + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_loongarch_vec_asm" >&5 + printf "%s\n" "$libc_cv_loongarch_vec_asm" >&6; } +-if test $libc_cv_loongarch_vec_asm = yes; then +- printf "%s\n" "#define HAVE_LOONGARCH_VEC_ASM 1" >>confdefs.h +- ++if test $libc_cv_loongarch_vec_asm = no; then ++ as_fn_error $? "binutils version is too old, use 2.41 or newer version" "$LINENO" 5 + fi + +diff --git a/sysdeps/loongarch/configure.ac b/sysdeps/loongarch/configure.ac +index ba89d834..989287c6 100644 +--- a/sysdeps/loongarch/configure.ac ++++ b/sysdeps/loongarch/configure.ac +@@ -74,6 +74,6 @@ else + libc_cv_loongarch_vec_asm=no + fi + rm -f conftest*) +-if test $libc_cv_loongarch_vec_asm = yes; then +- AC_DEFINE(HAVE_LOONGARCH_VEC_ASM) ++if test $libc_cv_loongarch_vec_asm = no; then ++ AC_MSG_ERROR(binutils version is too old, use 2.41 or newer version) + fi +diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h +index 51ce9af8..066bb233 100644 +--- a/sysdeps/loongarch/dl-machine.h ++++ b/sysdeps/loongarch/dl-machine.h +@@ -270,7 +270,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope, + /* If using PLTs, fill in the first two entries of .got.plt. */ + if (l->l_infoDT_JMPREL) + { +-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float ++#if !defined __loongarch_soft_float + extern void _dl_runtime_resolve_lasx (void) attribute_hidden; + extern void _dl_runtime_resolve_lsx (void) attribute_hidden; + #endif +@@ -300,7 +300,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope, + /* This function will get called to fix up the GOT entry + indicated by the offset on the stack, and then jump to + the resolved address. */ +-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float ++#if !defined __loongarch_soft_float + if (SUPPORT_LASX) + gotplt0 = (ElfW(Addr)) &_dl_runtime_resolve_lasx; + else if (SUPPORT_LSX) +diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S +index f6ba5e44..8fd91469 100644 +--- a/sysdeps/loongarch/dl-trampoline.S ++++ b/sysdeps/loongarch/dl-trampoline.S +@@ -19,7 +19,7 @@ + #include <sysdep.h> + #include <sys/asm.h> + +-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float ++#if !defined __loongarch_soft_float + #define USE_LASX + #define _dl_runtime_resolve _dl_runtime_resolve_lasx + #include "dl-trampoline.h" +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Change-loongarch-to-LoongArch-in-comments.patch
Added
@@ -0,0 +1,277 @@ +From e5ccd79e81de7ad5821fde83875973e878d85d4b Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Mon, 28 Aug 2023 10:08:40 +0800 +Subject: PATCH 19/29 LoongArch: Change loongarch to LoongArch in comments + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S | 2 +- + sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S | 2 +- + sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S | 2 +- + sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S | 2 +- + sysdeps/loongarch/lp64/multiarch/memmove-aligned.S | 2 +- + sysdeps/loongarch/lp64/multiarch/memmove-lasx.S | 2 +- + sysdeps/loongarch/lp64/multiarch/memmove-lsx.S | 2 +- + sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S | 2 +- + sysdeps/loongarch/lp64/multiarch/strchr-aligned.S | 2 +- + sysdeps/loongarch/lp64/multiarch/strchr-lasx.S | 2 +- + sysdeps/loongarch/lp64/multiarch/strchr-lsx.S | 2 +- + sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S | 2 +- + sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S | 2 +- + sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S | 2 +- + sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S | 2 +- + sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S | 2 +- + sysdeps/loongarch/lp64/multiarch/strlen-aligned.S | 2 +- + sysdeps/loongarch/lp64/multiarch/strlen-lasx.S | 2 +- + sysdeps/loongarch/lp64/multiarch/strlen-lsx.S | 2 +- + sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S | 2 +- + sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S | 2 +- + sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S | 2 +- + sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S | 2 +- + sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S | 2 +- + 24 files changed, 24 insertions(+), 24 deletions(-) + +diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S +index 299dd49c..7eb34395 100644 +--- a/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S ++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S +@@ -1,4 +1,4 @@ +-/* Optimized memcpy_aligned implementation using basic Loongarch instructions. ++/* Optimized memcpy_aligned implementation using basic LoongArch instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S +index 4aae5bf8..ae148df5 100644 +--- a/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S ++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S +@@ -1,4 +1,4 @@ +-/* Optimized memcpy implementation using Loongarch LASX instructions. ++/* Optimized memcpy implementation using LoongArch LASX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S +index 6ebbe7a2..feb2bb0e 100644 +--- a/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S ++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S +@@ -1,4 +1,4 @@ +-/* Optimized memcpy implementation using Loongarch LSX instructions. ++/* Optimized memcpy implementation using LoongArch LSX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S +index 8e60a22d..31019b13 100644 +--- a/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S ++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S +@@ -1,4 +1,4 @@ +-/* Optimized unaligned memcpy implementation using basic Loongarch instructions. ++/* Optimized unaligned memcpy implementation using basic LoongArch instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S +index 5354f383..a02114c0 100644 +--- a/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S ++++ b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S +@@ -1,4 +1,4 @@ +-/* Optimized memmove_aligned implementation using basic Loongarch instructions. ++/* Optimized memmove_aligned implementation using basic LoongArch instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S +index ff68e7a2..95d8ee7b 100644 +--- a/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S ++++ b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S +@@ -1,4 +1,4 @@ +-/* Optimized memmove implementation using Loongarch LASX instructions. ++/* Optimized memmove implementation using LoongArch LASX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S +index 9e1502a7..8a936770 100644 +--- a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S ++++ b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S +@@ -1,4 +1,4 @@ +-/* Optimized memmove implementation using Loongarch LSX instructions. ++/* Optimized memmove implementation using LoongArch LSX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S +index 90a64b6b..3284ce25 100644 +--- a/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S ++++ b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S +@@ -1,4 +1,4 @@ +-/* Optimized memmove_unaligned implementation using basic Loongarch instructions. ++/* Optimized memmove_unaligned implementation using basic LoongArch instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S +index 5fb01806..62020054 100644 +--- a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S ++++ b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S +@@ -1,4 +1,4 @@ +-/* Optimized strchr implementation using basic Loongarch instructions. ++/* Optimized strchr implementation using basic LoongArch instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S +index 254402da..4d3cc588 100644 +--- a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S ++++ b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S +@@ -1,4 +1,4 @@ +-/* Optimized strchr implementation using loongarch LASX SIMD instructions. ++/* Optimized strchr implementation using LoongArch LASX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S +index dae98b0a..8b78c35c 100644 +--- a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S ++++ b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S +@@ -1,4 +1,4 @@ +-/* Optimized strlen implementation using loongarch LSX SIMD instructions. ++/* Optimized strlen implementation using LoongArch LSX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S +index 1c01a023..20856a06 100644 +--- a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S ++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S +@@ -1,4 +1,4 @@ +-/* Optimized strchrnul implementation using basic Loongarch instructions. ++/* Optimized strchrnul implementation using basic LoongArch instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S +index d45495e4..4753d4ce 100644 +--- a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S ++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S +@@ -1,4 +1,4 @@ +-/* Optimized strchrnul implementation using loongarch LASX SIMD instructions. ++/* Optimized strchrnul implementation using LoongArch LASX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S +index 07d793ae..671e740c 100644 +--- a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S ++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S +@@ -1,4 +1,4 @@ +-/* Optimized strchrnul implementation using loongarch LSX SIMD instructions. ++/* Optimized strchrnul implementation using LoongArch LSX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S +index f5f4f336..ba1f9667 100644 +--- a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S ++++ b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S +@@ -1,4 +1,4 @@ +-/* Optimized strcmp implementation using basic Loongarch instructions. ++/* Optimized strcmp implementation using basic LoongArch instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S +index 2e177a38..091c8c9e 100644 +--- a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S ++++ b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S +@@ -1,4 +1,4 @@ +-/* Optimized strcmp implementation using Loongarch LSX instructions. ++/* Optimized strcmp implementation using LoongArch LSX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S +index e9e1d2fc..ed0548e4 100644 +--- a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S ++++ b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S +@@ -1,4 +1,4 @@ +-/* Optimized strlen implementation using basic Loongarch instructions. ++/* Optimized strlen implementation using basic LoongArch instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S +index 258c47ce..91342f34 100644 +--- a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S ++++ b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S +@@ -1,4 +1,4 @@ +-/* Optimized strlen implementation using loongarch LASX SIMD instructions. ++/* Optimized strlen implementation using LoongArch LASX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S +index b194355e..b09c12e0 100644 +--- a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S ++++ b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S +@@ -1,4 +1,4 @@ +-/* Optimized strlen implementation using Loongarch LSX SIMD instructions. ++/* Optimized strlen implementation using LoongArch LSX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S +index e2687fa7..f63de872 100644 +--- a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S ++++ b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S +@@ -1,4 +1,4 @@ +-/* Optimized strncmp implementation using basic Loongarch instructions. ++/* Optimized strncmp implementation using basic LoongArch instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S +index 0b4eee2a..83cb801d 100644 +--- a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S ++++ b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S +@@ -1,4 +1,4 @@ +-/* Optimized strncmp implementation using Loongarch LSX instructions. ++/* Optimized strncmp implementation using LoongArch LSX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S +index b900430a..a8296a1b 100644 +--- a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S ++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S +@@ -1,4 +1,4 @@ +-/* Optimized strnlen implementation using basic Loongarch instructions. ++/* Optimized strnlen implementation using basic LoongArch instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S +index 2c03d3d9..aa6c812d 100644 +--- a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S ++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S +@@ -1,4 +1,4 @@ +-/* Optimized strnlen implementation using loongarch LASX instructions ++/* Optimized strnlen implementation using LoongArch LASX instructions + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S +index b769a895..d0febe3e 100644 +--- a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S ++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S +@@ -1,4 +1,4 @@ +-/* Optimized strnlen implementation using loongarch LSX instructions ++/* Optimized strnlen implementation using LoongArch LSX instructions + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Change-to-put-magic-number-to-.rodata-sect.patch
Added
@@ -0,0 +1,67 @@ +From fb72c81f9894b23797f6e2e066532c0963f5155f Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Wed, 13 Sep 2023 15:35:01 +0800 +Subject: PATCH 24/29 LoongArch: Change to put magic number to .rodata + section + +Change to put magic number to .rodata section in memmove-lsx, and use +pcalau12i and %pc_lo12 with vld to get the data. + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + .../loongarch/lp64/multiarch/memmove-lsx.S | 20 +++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S +index 8a936770..5eb819ef 100644 +--- a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S ++++ b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S +@@ -209,13 +209,10 @@ L(al_less_16): + nop + + +-L(magic_num): +- .dword 0x0706050403020100 +- .dword 0x0f0e0d0c0b0a0908 + L(unaligned): +- pcaddi t2, -4 ++ pcalau12i t2, %pc_hi20(L(INDEX)) + bstrins.d a1, zero, 3, 0 +- vld vr8, t2, 0 ++ vld vr8, t2, %pc_lo12(L(INDEX)) + vld vr0, a1, 0 + + vld vr1, a1, 16 +@@ -413,13 +410,10 @@ L(back_al_less_16): + vst vr1, a0, 0 + jr ra + +-L(magic_num_2): +- .dword 0x0706050403020100 +- .dword 0x0f0e0d0c0b0a0908 + L(back_unaligned): +- pcaddi t2, -4 ++ pcalau12i t2, %pc_hi20(L(INDEX)) + bstrins.d a4, zero, 3, 0 +- vld vr8, t2, 0 ++ vld vr8, t2, %pc_lo12(L(INDEX)) + vld vr0, a4, 0 + + vld vr1, a4, -16 +@@ -529,6 +523,12 @@ L(back_un_less_16): + jr ra + END(MEMMOVE_NAME) + ++ .section .rodata.cst16,"M",@progbits,16 ++ .align 4 ++L(INDEX): ++ .dword 0x0706050403020100 ++ .dword 0x0f0e0d0c0b0a0908 ++ + libc_hidden_builtin_def (MEMCPY_NAME) + libc_hidden_builtin_def (MEMMOVE_NAME) + #endif +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Micro-optimize-LD_PCREL.patch
Added
@@ -0,0 +1,44 @@ +From 7f703cf758c4f185dd62f2a4f463002bb514af16 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 27 Aug 2023 00:36:51 +0800 +Subject: PATCH 13/29 LoongArch: Micro-optimize LD_PCREL + +We are requiring Binutils >= 2.41, so explicit relocation syntax is +always supported by the assembler. Use it to reduce one instruction. + +Signed-off-by: Xi Ruoyao <xry111@xry111.site> +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/unix/sysv/linux/loongarch/pointer_guard.h | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +diff --git a/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h b/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h +index b25e353b..d6c78687 100644 +--- a/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h ++++ b/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h +@@ -19,17 +19,15 @@ + #ifndef POINTER_GUARD_H + #define POINTER_GUARD_H + +-/* Load a got-relative EXPR into G, using T. +- Note G and T are register names. */ ++/* Load a got-relative EXPR into register G. */ + #define LD_GLOBAL(G, EXPR) \ + la.global G, EXPR; \ + REG_L G, G, 0; + +-/* Load a pc-relative EXPR into G, using T. +- Note G and T are register names. */ ++/* Load a pc-relative EXPR into register G. */ + #define LD_PCREL(G, EXPR) \ +- la.pcrel G, EXPR; \ +- REG_L G, G, 0; ++ pcalau12i G, %pc_hi20(EXPR); \ ++ REG_L G, G, %pc_lo12(EXPR); + + #if (IS_IN (rtld) \ + || (!defined SHARED && (IS_IN (libc) \ +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Redefine-macro-LEAF-ENTRY.patch
Added
@@ -0,0 +1,65 @@ +From 8dcd8c837df2e3cf81675522487697522f1542f8 Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Tue, 8 Aug 2023 14:15:42 +0800 +Subject: PATCH 01/29 LoongArch: Redefine macro LEAF/ENTRY. + +The following usage of macro LEAF/ENTRY are all feasible: +1. LEAF(fcn) -- the align value of fcn is .align 3(default value) +2. LEAF(fcn, 6) -- the align value of fcn is .align 6 + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/sys/asm.h | 36 ++++++++++++++++++++++++++---------- + 1 file changed, 26 insertions(+), 10 deletions(-) + +diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h +index d1a279b8..c5eb8afa 100644 +--- a/sysdeps/loongarch/sys/asm.h ++++ b/sysdeps/loongarch/sys/asm.h +@@ -39,16 +39,32 @@ + #define FREG_L fld.d + #define FREG_S fst.d + +-/* Declare leaf routine. */ +-#define LEAF(symbol) \ +- .text; \ +- .globl symbol; \ +- .align 3; \ +- cfi_startproc; \ +- .type symbol, @function; \ +- symbol: +- +-#define ENTRY(symbol) LEAF (symbol) ++/* Declare leaf routine. ++ The usage of macro LEAF/ENTRY is as follows: ++ 1. LEAF(fcn) -- the align value of fcn is .align 3 (default value) ++ 2. LEAF(fcn, 6) -- the align value of fcn is .align 6 ++*/ ++#define LEAF_IMPL(symbol, aln, ...) \ ++ .text; \ ++ .globl symbol; \ ++ .align aln; \ ++ .type symbol, @function; \ ++symbol: \ ++ cfi_startproc; ++ ++ ++#define LEAF(...) LEAF_IMPL(__VA_ARGS__, 3) ++#define ENTRY(...) LEAF(__VA_ARGS__) ++ ++#define LEAF_NO_ALIGN(symbol) \ ++ .text; \ ++ .globl symbol; \ ++ .type symbol, @function; \ ++symbol: \ ++ cfi_startproc; ++ ++#define ENTRY_NO_ALIGN(symbol) LEAF_NO_ALIGN(symbol) ++ + + /* Mark end of function. */ + #undef END +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Remove-support-code-for-old-linker-in-star.patch
Added
@@ -0,0 +1,56 @@ +From f8d66a269cb6f1a7087afadf3375bdf0553abf53 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 27 Aug 2023 00:36:50 +0800 +Subject: PATCH 12/29 LoongArch: Remove support code for old linker in + start.S + +We are requiring Binutils >= 2.41, so la.pcrel always works here. + +Signed-off-by: Xi Ruoyao <xry111@xry111.site> +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/start.S | 19 +++---------------- + 1 file changed, 3 insertions(+), 16 deletions(-) + +diff --git a/sysdeps/loongarch/start.S b/sysdeps/loongarch/start.S +index e9d82033..bf6bfc9e 100644 +--- a/sysdeps/loongarch/start.S ++++ b/sysdeps/loongarch/start.S +@@ -60,20 +60,7 @@ ENTRY (ENTRY_POINT) + cfi_undefined (1) + or a5, a0, zero /* rtld_fini */ + +-#if ENABLE_STATIC_PIE +-/* For static PIE, the GOT cannot be used in _start because the GOT entries are +- offsets instead of real addresses before __libc_start_main. +- __libc_start_main and/or main may be not local, so we rely on the linker to +- produce PLT entries for them. GNU ld >= 2.40 supports this. */ +-# define LA la.pcrel +-#else +-/* Old GNU ld (< 2.40) cannot handle PC relative address against a non-local +- function correctly. We deem these old linkers failing to support static PIE +- and load the addresses from GOT. */ +-# define LA la.got +-#endif +- +- LA a0, t0, main ++ la.pcrel a0, t0, main + REG_L a1, sp, 0 + ADDI a2, sp, SZREG + +@@ -84,9 +71,9 @@ ENTRY (ENTRY_POINT) + move a4, zero /* used to be fini */ + or a6, sp, zero /* stack_end */ + +- LA ra, t0, __libc_start_main ++ la.pcrel ra, t0, __libc_start_main + jirl ra, ra, 0 + +- LA ra, t0, abort ++ la.pcrel ra, t0, abort + jirl ra, ra, 0 + END (ENTRY_POINT) +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Replace-deprecated-v0-with-a0-to-eliminate.patch
Added
@@ -0,0 +1,28 @@ +From b4b4bb7c9220a0bbdf5aec0ac8c1de1d22329280 Mon Sep 17 00:00:00 2001 +From: caiyinyu <caiyinyu@loongson.cn> +Date: Thu, 14 Sep 2023 19:48:24 +0800 +Subject: PATCH 21/29 LoongArch: Replace deprecated $v0 with $a0 to eliminate + 'as' Warnings. + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/dl-machine.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h +index 8a2db9de..57913cef 100644 +--- a/sysdeps/loongarch/dl-machine.h ++++ b/sysdeps/loongarch/dl-machine.h +@@ -90,7 +90,7 @@ static inline ElfW (Addr) elf_machine_dynamic (void) + or $a0, $sp, $zero \n\ + bl _dl_start \n\ + # Stash user entry point in s0. \n\ +- or $s0, $v0, $zero \n\ ++ or $s0, $a0, $zero \n\ + # Load the original argument count. \n\ + ld.d $a1, $sp, 0 \n\ + # Call _dl_init (struct link_map *main_map, int argc, \ +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Unify-Register-Names.patch
Added
@@ -0,0 +1,81 @@ +From 458ab6d5f39cca1cabd83abd2022f67491f6f5ed Mon Sep 17 00:00:00 2001 +From: caiyinyu <caiyinyu@loongson.cn> +Date: Fri, 20 Oct 2023 09:20:02 +0800 +Subject: PATCH 27/29 LoongArch: Unify Register Names. + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/__longjmp.S | 20 ++++++++++---------- + sysdeps/loongarch/setjmp.S | 18 +++++++++--------- + 2 files changed, 19 insertions(+), 19 deletions(-) + +diff --git a/sysdeps/loongarch/__longjmp.S b/sysdeps/loongarch/__longjmp.S +index cbde1946..e87ce311 100644 +--- a/sysdeps/loongarch/__longjmp.S ++++ b/sysdeps/loongarch/__longjmp.S +@@ -43,18 +43,18 @@ ENTRY (__longjmp) + REG_L s8, a0, 12*SZREG + + #ifndef __loongarch_soft_float +- FREG_L $f24, a0, 13*SZREG + 0*SZFREG +- FREG_L $f25, a0, 13*SZREG + 1*SZFREG +- FREG_L $f26, a0, 13*SZREG + 2*SZFREG +- FREG_L $f27, a0, 13*SZREG + 3*SZFREG +- FREG_L $f28, a0, 13*SZREG + 4*SZFREG +- FREG_L $f29, a0, 13*SZREG + 5*SZFREG +- FREG_L $f30, a0, 13*SZREG + 6*SZFREG +- FREG_L $f31, a0, 13*SZREG + 7*SZFREG ++ FREG_L fs0, a0, 13*SZREG + 0*SZFREG ++ FREG_L fs1, a0, 13*SZREG + 1*SZFREG ++ FREG_L fs2, a0, 13*SZREG + 2*SZFREG ++ FREG_L fs3, a0, 13*SZREG + 3*SZFREG ++ FREG_L fs4, a0, 13*SZREG + 4*SZFREG ++ FREG_L fs5, a0, 13*SZREG + 5*SZFREG ++ FREG_L fs6, a0, 13*SZREG + 6*SZFREG ++ FREG_L fs7, a0, 13*SZREG + 7*SZFREG + #endif + +- sltui a0,a1,1 ++ sltui a0, a1, 1 + ADD a0, a0, a1 # a0 = (a1 == 0) ? 1 : a1 +- jirl zero,ra,0 ++ jirl zero, ra, 0 + + END (__longjmp) +diff --git a/sysdeps/loongarch/setjmp.S b/sysdeps/loongarch/setjmp.S +index 6c7065cd..b6e4f727 100644 +--- a/sysdeps/loongarch/setjmp.S ++++ b/sysdeps/loongarch/setjmp.S +@@ -52,19 +52,19 @@ ENTRY (__sigsetjmp) + REG_S s8, a0, 12*SZREG + + #ifndef __loongarch_soft_float +- FREG_S $f24, a0, 13*SZREG + 0*SZFREG +- FREG_S $f25, a0, 13*SZREG + 1*SZFREG +- FREG_S $f26, a0, 13*SZREG + 2*SZFREG +- FREG_S $f27, a0, 13*SZREG + 3*SZFREG +- FREG_S $f28, a0, 13*SZREG + 4*SZFREG +- FREG_S $f29, a0, 13*SZREG + 5*SZFREG +- FREG_S $f30, a0, 13*SZREG + 6*SZFREG +- FREG_S $f31, a0, 13*SZREG + 7*SZFREG ++ FREG_S fs0, a0, 13*SZREG + 0*SZFREG ++ FREG_S fs1, a0, 13*SZREG + 1*SZFREG ++ FREG_S fs2, a0, 13*SZREG + 2*SZFREG ++ FREG_S fs3, a0, 13*SZREG + 3*SZFREG ++ FREG_S fs4, a0, 13*SZREG + 4*SZFREG ++ FREG_S fs5, a0, 13*SZREG + 5*SZFREG ++ FREG_S fs6, a0, 13*SZREG + 6*SZFREG ++ FREG_S fs7, a0, 13*SZREG + 7*SZFREG + #endif + + #if !IS_IN (libc) && IS_IN(rtld) + li.w v0, 0 +- jirl zero,ra,0 ++ jirl zero, ra, 0 + #else + b __sigjmp_save + #endif +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Update-hwcap.h-to-sync-with-LoongArch-kern.patch
Added
@@ -0,0 +1,24 @@ +From 4828d1aa0028e819a5fb336d962e8f7cbfedf8b4 Mon Sep 17 00:00:00 2001 +From: caiyinyu <caiyinyu@loongson.cn> +Date: Mon, 23 Oct 2023 15:53:38 +0800 +Subject: PATCH 28/29 LoongArch: Update hwcap.h to sync with LoongArch + kernel. + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h b/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h +index 5104b69c..7acec23d 100644 +--- a/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h ++++ b/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h +@@ -35,3 +35,4 @@ + #define HWCAP_LOONGARCH_LBT_X86 (1 << 10) + #define HWCAP_LOONGARCH_LBT_ARM (1 << 11) + #define HWCAP_LOONGARCH_LBT_MIPS (1 << 12) ++#define HWCAP_LOONGARCH_PTW (1 << 13) +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-elf-Add-new-LoongArch-reloc-types-109-into.patch
Added
@@ -0,0 +1,30 @@ +From 4938840b15ff9734fdcc63cc0744ce3f3bbb0b16 Mon Sep 17 00:00:00 2001 +From: caiyinyu <caiyinyu@loongson.cn> +Date: Mon, 14 Aug 2023 15:34:08 +0800 +Subject: PATCH 05/29 LoongArch: elf: Add new LoongArch reloc types 109 into + elf.h + +These reloc types are generated by GNU assembler >= 2.41 for relaxation +support. + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + elf/elf.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/elf/elf.h b/elf/elf.h +index d623bdeb..9c51073f 100644 +--- a/elf/elf.h ++++ b/elf/elf.h +@@ -4213,6 +4213,7 @@ enum + #define R_LARCH_SUB6 106 + #define R_LARCH_ADD_ULEB128 107 + #define R_LARCH_SUB_ULEB128 108 ++#define R_LARCH_64_PCREL 109 + + /* ARC specific declarations. */ + +-- +2.33.0 +
View file
_service:tar_scm:Loongarch-Add-ifunc-support-and-add-different-versio.patch
Added
@@ -0,0 +1,528 @@ +From 43abd8772a143cd96688c081500397dd712e631b Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Tue, 8 Aug 2023 14:15:44 +0800 +Subject: PATCH 03/29 Loongarch: Add ifunc support and add different versions + of strlen + +strlen-lasx is implemeted by LASX simd instructions(256bit) +strlen-lsx is implemeted by LSX simd instructions(128bit) +strlen-align is implemented by LA basic instructions and never use unaligned memory acess + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/lp64/multiarch/Makefile | 7 ++ + .../lp64/multiarch/ifunc-impl-list.c | 41 +++++++ + .../loongarch/lp64/multiarch/ifunc-strlen.h | 40 +++++++ + .../loongarch/lp64/multiarch/strlen-aligned.S | 100 ++++++++++++++++++ + .../loongarch/lp64/multiarch/strlen-lasx.S | 63 +++++++++++ + sysdeps/loongarch/lp64/multiarch/strlen-lsx.S | 71 +++++++++++++ + sysdeps/loongarch/lp64/multiarch/strlen.c | 37 +++++++ + sysdeps/loongarch/sys/regdef.h | 57 ++++++++++ + .../unix/sysv/linux/loongarch/cpu-features.h | 2 + + 9 files changed, 418 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/multiarch/Makefile + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen.c + +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +new file mode 100644 +index 00000000..76c506c9 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -0,0 +1,7 @@ ++ifeq ($(subdir),string) ++sysdep_routines += \ ++ strlen-aligned \ ++ strlen-lsx \ ++ strlen-lasx \ ++# sysdep_routines ++endif +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +new file mode 100644 +index 00000000..1a2a576f +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +@@ -0,0 +1,41 @@ ++/* Enumerate available IFUNC implementations of a function LoongArch64 version. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#include <assert.h> ++#include <string.h> ++#include <wchar.h> ++#include <ldsodefs.h> ++#include <ifunc-impl-list.h> ++#include <stdio.h> ++ ++size_t ++__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, ++ size_t max) ++{ ++ ++ size_t i = max; ++ ++ IFUNC_IMPL (i, name, strlen, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LASX, __strlen_lasx) ++ IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LSX, __strlen_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned) ++ ) ++ return i; ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h +new file mode 100644 +index 00000000..6258bb76 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h +@@ -0,0 +1,40 @@ ++/* Common definition for strlen ifunc selections. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <ldsodefs.h> ++#include <ifunc-init.h> ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S +new file mode 100644 +index 00000000..e9e1d2fc +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S +@@ -0,0 +1,100 @@ ++/* Optimized strlen implementation using basic Loongarch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++# define STRLEN __strlen_aligned ++#else ++# define STRLEN strlen ++#endif ++ ++LEAF(STRLEN, 6) ++ move a1, a0 ++ bstrins.d a0, zero, 2, 0 ++ lu12i.w a2, 0x01010 ++ li.w t0, -1 ++ ++ ld.d t2, a0, 0 ++ andi t1, a1, 0x7 ++ ori a2, a2, 0x101 ++ slli.d t1, t1, 3 ++ ++ bstrins.d a2, a2, 63, 32 ++ sll.d t1, t0, t1 ++ slli.d t3, a2, 7 ++ nor a3, zero, t3 ++ ++ orn t2, t2, t1 ++ sub.d t0, t2, a2 ++ nor t1, t2, a3 ++ and t0, t0, t1 ++ ++ ++ bnez t0, L(count_pos) ++ addi.d a0, a0, 8 ++L(loop_16_7bit): ++ ld.d t2, a0, 0 ++ sub.d t1, t2, a2 ++ ++ and t0, t1, t3 ++ bnez t0, L(more_check) ++ ld.d t2, a0, 8 ++ sub.d t1, t2, a2 ++ ++ and t0, t1, t3 ++ addi.d a0, a0, 16 ++ beqz t0, L(loop_16_7bit) ++ addi.d a0, a0, -8 ++ ++L(more_check): ++ nor t0, t2, a3 ++ and t0, t1, t0 ++ bnez t0, L(count_pos) ++ addi.d a0, a0, 8 ++ ++ ++L(loop_16_8bit): ++ ld.d t2, a0, 0 ++ sub.d t1, t2, a2 ++ nor t0, t2, a3 ++ and t0, t0, t1 ++ ++ bnez t0, L(count_pos) ++ ld.d t2, a0, 8 ++ addi.d a0, a0, 16 ++ sub.d t1, t2, a2 ++ ++ nor t0, t2, a3 ++ and t0, t0, t1 ++ beqz t0, L(loop_16_8bit) ++ addi.d a0, a0, -8 ++ ++L(count_pos): ++ ctz.d t1, t0 ++ sub.d a0, a0, a1 ++ srli.d t1, t1, 3 ++ add.d a0, a0, t1 ++ ++ jr ra ++END(STRLEN) ++ ++libc_hidden_builtin_def (STRLEN) +diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S +new file mode 100644 +index 00000000..258c47ce +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S +@@ -0,0 +1,63 @@ ++/* Optimized strlen implementation using loongarch LASX SIMD instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define STRLEN __strlen_lasx ++ ++LEAF(STRLEN, 6) ++ move a1, a0 ++ bstrins.d a0, zero, 4, 0 ++ li.d t1, -1 ++ xvld xr0, a0, 0 ++ ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 # sign extend ++ ++ sra.w t0, t0, a1 ++ beq t0, t1, L(loop) ++ cto.w a0, t0 ++ jr ra ++ ++L(loop): ++ xvld xr0, a0, 32 ++ addi.d a0, a0, 32 ++ xvsetanyeqz.b fcc0, xr0 ++ bceqz fcc0, L(loop) ++ ++ ++ xvmsknz.b xr0, xr0 ++ sub.d a0, a0, a1 ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ ++ movfr2gr.s t0, fa0 ++ cto.w t0, t0 ++ add.d a0, a0, t0 ++ jr ra ++END(STRLEN) ++ ++libc_hidden_builtin_def (STRLEN) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S +new file mode 100644 +index 00000000..b194355e +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S +@@ -0,0 +1,71 @@ ++/* Optimized strlen implementation using Loongarch LSX SIMD instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define STRLEN __strlen_lsx ++ ++LEAF(STRLEN, 6) ++ move a1, a0 ++ bstrins.d a0, zero, 4, 0 ++ vld vr0, a0, 0 ++ vld vr1, a0, 16 ++ ++ li.d t1, -1 ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ ++ movfr2gr.s t0, fa0 ++ sra.w t0, t0, a1 ++ beq t0, t1, L(loop) ++ cto.w a0, t0 ++ ++ jr ra ++ nop ++ nop ++ nop ++ ++ ++L(loop): ++ vld vr0, a0, 32 ++ vld vr1, a0, 48 ++ addi.d a0, a0, 32 ++ vmin.bu vr2, vr0, vr1 ++ ++ vsetanyeqz.b fcc0, vr2 ++ bceqz fcc0, L(loop) ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ ++ vilvl.h vr0, vr1, vr0 ++ sub.d a0, a0, a1 ++ movfr2gr.s t0, fa0 ++ cto.w t0, t0 ++ ++ add.d a0, a0, t0 ++ jr ra ++END(STRLEN) ++ ++libc_hidden_builtin_def (STRLEN) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strlen.c b/sysdeps/loongarch/lp64/multiarch/strlen.c +new file mode 100644 +index 00000000..381c2daa +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strlen.c +@@ -0,0 +1,37 @@ ++/* Multiple versions of strlen. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++ ++#if IS_IN (libc) ++# define strlen __redirect_strlen ++# include <string.h> ++# undef strlen ++ ++# define SYMBOL_NAME strlen ++# include "ifunc-strlen.h" ++ ++libc_ifunc_redirected (__redirect_strlen, strlen, IFUNC_SELECTOR ()); ++ ++# ifdef SHARED ++__hidden_ver1 (strlen, __GI_strlen, __redirect_strlen) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strlen); ++# endif ++ ++#endif +diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h +index 5100f36d..524d2e32 100644 +--- a/sysdeps/loongarch/sys/regdef.h ++++ b/sysdeps/loongarch/sys/regdef.h +@@ -89,6 +89,14 @@ + #define fs5 $f29 + #define fs6 $f30 + #define fs7 $f31 ++#define fcc0 $fcc0 ++#define fcc1 $fcc1 ++#define fcc2 $fcc2 ++#define fcc3 $fcc3 ++#define fcc4 $fcc4 ++#define fcc5 $fcc5 ++#define fcc6 $fcc6 ++#define fcc7 $fcc7 + + #define vr0 $vr0 + #define vr1 $vr1 +@@ -98,6 +106,30 @@ + #define vr5 $vr5 + #define vr6 $vr6 + #define vr7 $vr7 ++#define vr8 $vr8 ++#define vr9 $vr9 ++#define vr10 $vr10 ++#define vr11 $vr11 ++#define vr12 $vr12 ++#define vr13 $vr13 ++#define vr14 $vr14 ++#define vr15 $vr15 ++#define vr16 $vr16 ++#define vr17 $vr17 ++#define vr18 $vr18 ++#define vr19 $vr19 ++#define vr20 $vr20 ++#define vr21 $vr21 ++#define vr22 $vr22 ++#define vr23 $vr23 ++#define vr24 $vr24 ++#define vr25 $vr25 ++#define vr26 $vr26 ++#define vr27 $vr27 ++#define vr28 $vr28 ++#define vr29 $vr29 ++#define vr30 $vr30 ++#define vr31 $vr31 + + #define xr0 $xr0 + #define xr1 $xr1 +@@ -107,5 +139,30 @@ + #define xr5 $xr5 + #define xr6 $xr6 + #define xr7 $xr7 ++#define xr7 $xr7 ++#define xr8 $xr8 ++#define xr9 $xr9 ++#define xr10 $xr10 ++#define xr11 $xr11 ++#define xr12 $xr12 ++#define xr13 $xr13 ++#define xr14 $xr14 ++#define xr15 $xr15 ++#define xr16 $xr16 ++#define xr17 $xr17 ++#define xr18 $xr18 ++#define xr19 $xr19 ++#define xr20 $xr20 ++#define xr21 $xr21 ++#define xr22 $xr22 ++#define xr23 $xr23 ++#define xr24 $xr24 ++#define xr25 $xr25 ++#define xr26 $xr26 ++#define xr27 $xr27 ++#define xr28 $xr28 ++#define xr29 $xr29 ++#define xr30 $xr30 ++#define xr31 $xr31 + + #endif /* _SYS_REGDEF_H */ +diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h +index e371e13b..d1a280a5 100644 +--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h ++++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h +@@ -25,5 +25,7 @@ + #define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX) + #define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX) + ++#define INIT_ARCH() ++ + #endif /* _CPU_FEATURES_LOONGARCH64_H */ + +-- +2.33.0 +
View file
_service:tar_scm:Loongarch-Add-ifunc-support-for-memcpy-aligned-unali.patch
Added
@@ -0,0 +1,2570 @@ +From 9c522272146423c1ef9fb9e071737a8ad26e844e Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Tue, 15 Aug 2023 09:11:53 +0800 +Subject: PATCH 07/29 Loongarch: Add ifunc support for memcpy{aligned, + unaligned, lsx, lasx} and memmove{aligned, unaligned, lsx, lasx} + +These implementations improve the time to copy data in the glibc +microbenchmark as below: +memcpy-lasx reduces the runtime about 8%-76% +memcpy-lsx reduces the runtime about 8%-72% +memcpy-unaligned reduces the runtime of unaligned data copying up to 40% +memcpy-aligned reduece the runtime of unaligned data copying up to 25% +memmove-lasx reduces the runtime about 20%-73% +memmove-lsx reduces the runtime about 50% +memmove-unaligned reduces the runtime of unaligned data moving up to 40% +memmove-aligned reduces the runtime of unaligned data moving up to 25% + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/lp64/multiarch/Makefile | 5 + + .../lp64/multiarch/ifunc-impl-list.c | 19 + + sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h | 45 + + .../loongarch/lp64/multiarch/memcpy-aligned.S | 783 ++++++++++++++++++ + .../loongarch/lp64/multiarch/memcpy-lasx.S | 20 + + sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S | 20 + + .../lp64/multiarch/memcpy-unaligned.S | 247 ++++++ + sysdeps/loongarch/lp64/multiarch/memcpy.c | 37 + + .../lp64/multiarch/memmove-aligned.S | 20 + + .../loongarch/lp64/multiarch/memmove-lasx.S | 287 +++++++ + .../loongarch/lp64/multiarch/memmove-lsx.S | 534 ++++++++++++ + .../lp64/multiarch/memmove-unaligned.S | 380 +++++++++ + sysdeps/loongarch/lp64/multiarch/memmove.c | 38 + + 13 files changed, 2435 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove.c + +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +index 110a8c5c..afa51041 100644 +--- a/sysdeps/loongarch/lp64/multiarch/Makefile ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -9,5 +9,10 @@ sysdep_routines += \ + strchrnul-aligned \ + strchrnul-lsx \ + strchrnul-lasx \ ++ memcpy-aligned \ ++ memcpy-unaligned \ ++ memmove-unaligned \ ++ memmove-lsx \ ++ memmove-lasx \ + # sysdep_routines + endif +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +index c7164b45..25eb96b0 100644 +--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +@@ -53,5 +53,24 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + #endif + IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_aligned) + ) ++ ++ IFUNC_IMPL (i, name, memcpy, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx) ++ IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LSX, __memcpy_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_UAL, __memcpy_unaligned) ++ IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_aligned) ++ ) ++ ++ IFUNC_IMPL (i, name, memmove, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, memmove, SUPPORT_LASX, __memmove_lasx) ++ IFUNC_IMPL_ADD (array, i, memmove, SUPPORT_LSX, __memmove_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, memmove, SUPPORT_UAL, __memmove_unaligned) ++ IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_aligned) ++ ) ++ + return i; + } +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h b/sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h +new file mode 100644 +index 00000000..3be67da6 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h +@@ -0,0 +1,45 @@ ++/* Common definition for ifunc selection implementation. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <ldsodefs.h> ++#include <ifunc-init.h> ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (unaligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ if (SUPPORT_UAL) ++ return OPTIMIZE (unaligned); ++ else ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S +new file mode 100644 +index 00000000..299dd49c +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S +@@ -0,0 +1,783 @@ ++/* Optimized memcpy_aligned implementation using basic Loongarch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++# define MEMCPY_NAME __memcpy_aligned ++# define MEMMOVE_NAME __memmove_aligned ++#else ++# define MEMCPY_NAME memcpy ++# define MEMMOVE_NAME memmove ++#endif ++ ++#define LD_64(reg, n) \ ++ ld.d t0, reg, n; \ ++ ld.d t1, reg, n + 8; \ ++ ld.d t2, reg, n + 16; \ ++ ld.d t3, reg, n + 24; \ ++ ld.d t4, reg, n + 32; \ ++ ld.d t5, reg, n + 40; \ ++ ld.d t6, reg, n + 48; \ ++ ld.d t7, reg, n + 56; ++ ++#define ST_64(reg, n) \ ++ st.d t0, reg, n; \ ++ st.d t1, reg, n + 8; \ ++ st.d t2, reg, n + 16; \ ++ st.d t3, reg, n + 24; \ ++ st.d t4, reg, n + 32; \ ++ st.d t5, reg, n + 40; \ ++ st.d t6, reg, n + 48; \ ++ st.d t7, reg, n + 56; ++ ++LEAF(MEMMOVE_NAME, 6) ++ sub.d t0, a0, a1 ++ bltu t0, a2, L(copy_back) ++END(MEMMOVE_NAME) ++ ++LEAF_NO_ALIGN(MEMCPY_NAME) ++ srai.d a3, a2, 4 ++ beqz a3, L(short_data) ++ ++ move a4, a0 ++ andi a5, a0, 0x7 ++ andi a6, a1, 0x7 ++ li.d t8, 8 ++ beqz a5, L(check_align) ++ ++ sub.d t2, t8, a5 ++ sub.d a2, a2, t2 ++ pcaddi t1, 20 ++ slli.d t3, t2, 3 ++ ++ add.d a1, a1, t2 ++ sub.d t1, t1, t3 ++ add.d a4, a4, t2 ++ jr t1 ++ ++L(al7): ++ ld.b t0, a1, -7 ++ st.b t0, a4, -7 ++L(al6): ++ ld.b t0, a1, -6 ++ st.b t0, a4, -6 ++L(al5): ++ ld.b t0, a1, -5 ++ st.b t0, a4, -5 ++L(al4): ++ ld.b t0, a1, -4 ++ st.b t0, a4, -4 ++L(al3): ++ ld.b t0, a1, -3 ++ st.b t0, a4, -3 ++L(al2): ++ ld.b t0, a1, -2 ++ st.b t0, a4, -2 ++L(al1): ++ ld.b t0, a1, -1 ++ st.b t0, a4, -1 ++ ++L(check_align): ++ bne a5, a6, L(unalign) ++ srai.d a3, a2, 4 ++ beqz a3, L(al_less_16bytes) ++ andi a3, a2, 0x3f ++ ++ beq a3, a2, L(al_less_64bytes) ++ sub.d t0, a2, a3 ++ move a2, a3 ++ add.d a5, a1, t0 ++ ++L(loop_64bytes): ++ LD_64(a1, 0) ++ addi.d a1, a1, 64 ++ ST_64(a4, 0) ++ ++ addi.d a4, a4, 64 ++ bne a1, a5, L(loop_64bytes) ++ ++L(al_less_64bytes): ++ srai.d a3, a2, 5 ++ beqz a3, L(al_less_32bytes) ++ ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ ld.d t2, a1, 16 ++ ld.d t3, a1, 24 ++ ++ addi.d a1, a1, 32 ++ addi.d a2, a2, -32 ++ ++ st.d t0, a4, 0 ++ st.d t1, a4, 8 ++ st.d t2, a4, 16 ++ st.d t3, a4, 24 ++ ++ addi.d a4, a4, 32 ++ ++L(al_less_32bytes): ++ srai.d a3, a2, 4 ++ beqz a3, L(al_less_16bytes) ++ ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ addi.d a1, a1, 16 ++ addi.d a2, a2, -16 ++ ++ st.d t0, a4, 0 ++ st.d t1, a4, 8 ++ addi.d a4, a4, 16 ++ ++L(al_less_16bytes): ++ srai.d a3, a2, 3 ++ beqz a3, L(al_less_8bytes) ++ ++ ld.d t0, a1, 0 ++ addi.d a1, a1, 8 ++ addi.d a2, a2, -8 ++ st.d t0, a4, 0 ++ addi.d a4, a4, 8 ++ ++L(al_less_8bytes): ++ srai.d a3, a2, 2 ++ beqz a3, L(al_less_4bytes) ++ ++ ld.w t0, a1, 0 ++ addi.d a1, a1, 4 ++ addi.d a2, a2, -4 ++ st.w t0, a4, 0 ++ addi.d a4, a4, 4 ++ ++L(al_less_4bytes): ++ srai.d a3, a2, 1 ++ beqz a3, L(al_less_2bytes) ++ ++ ld.h t0, a1, 0 ++ addi.d a1, a1, 2 ++ addi.d a2, a2, -2 ++ st.h t0, a4, 0 ++ addi.d a4, a4, 2 ++ ++L(al_less_2bytes): ++ beqz a2, L(al_less_1byte) ++ ++ ld.b t0, a1, 0 ++ st.b t0, a4, 0 ++ ++L(al_less_1byte): ++ jr ra ++ ++L(unalign): ++ andi a5, a1, 0x7 ++ bstrins.d a1, zero, 2, 0 ++ sub.d t8, t8, a5 ++ slli.d a5, a5, 3 ++ ++ ld.d t0, a1, 0 ++ addi.d a1, a1, 8 ++ slli.d a6, t8, 3 ++ srl.d a7, t0, a5 ++ ++ srai.d a3, a2, 4 ++ beqz a3, L(un_less_16bytes) ++ andi a3, a2, 0x3f ++ beq a3, a2, L(un_less_64bytes) ++ ++ sub.d t0, a2, a3 ++ move a2, a3 ++ add.d a3, a1, t0 ++ ++L(un_long_bytes): ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ ld.d t2, a1, 16 ++ ld.d t3, a1, 24 ++ ++ srl.d t4, t0, a5 ++ sll.d t0, t0, a6 ++ srl.d t5, t1, a5 ++ sll.d t1, t1, a6 ++ ++ srl.d t6, t2, a5 ++ sll.d t2, t2, a6 ++ srl.d t7, t3, a5 ++ sll.d t3, t3, a6 ++ ++ or t0, a7, t0 ++ or t1, t4, t1 ++ or t2, t5, t2 ++ or t3, t6, t3 ++ ++ ld.d t4, a1, 32 ++ ld.d t5, a1, 40 ++ ld.d t6, a1, 48 ++ ld.d a7, a1, 56 ++ ++ st.d t0, a4, 0 ++ st.d t1, a4, 8 ++ st.d t2, a4, 16 ++ st.d t3, a4, 24 ++ ++ addi.d a1, a1, 64 ++ ++ srl.d t0, t4, a5 ++ sll.d t4, t4, a6 ++ srl.d t1, t5, a5 ++ sll.d t5, t5, a6 ++ ++ srl.d t2, t6, a5 ++ sll.d t6, t6, a6 ++ sll.d t3, a7, a6 ++ srl.d a7, a7, a5 ++ ++ or t4, t7, t4 ++ or t5, t0, t5 ++ or t6, t1, t6 ++ or t3, t2, t3 ++ ++ st.d t4, a4, 32 ++ st.d t5, a4, 40 ++ st.d t6, a4, 48 ++ st.d t3, a4, 56 ++ ++ addi.d a4, a4, 64 ++ bne a3, a1, L(un_long_bytes) ++ ++L(un_less_64bytes): ++ srai.d a3, a2, 5 ++ beqz a3, L(un_less_32bytes) ++ ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ ld.d t2, a1, 16 ++ ld.d t3, a1, 24 ++ ++ addi.d a1, a1, 32 ++ addi.d a2, a2, -32 ++ ++ srl.d t4, t0, a5 ++ sll.d t0, t0, a6 ++ srl.d t5, t1, a5 ++ sll.d t1, t1, a6 ++ ++ srl.d t6, t2, a5 ++ sll.d t2, t2, a6 ++ or t0, a7, t0 ++ srl.d a7, t3, a5 ++ sll.d t3, t3, a6 ++ ++ or t1, t4, t1 ++ or t2, t5, t2 ++ or t3, t6, t3 ++ ++ st.d t0, a4, 0 ++ st.d t1, a4, 8 ++ st.d t2, a4, 16 ++ st.d t3, a4, 24 ++ ++ addi.d a4, a4, 32 ++ ++L(un_less_32bytes): ++ srai.d a3, a2, 4 ++ beqz a3, L(un_less_16bytes) ++ ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ addi.d a1, a1, 16 ++ addi.d a2, a2, -16 ++ ++ srl.d t2, t0, a5 ++ sll.d t3, t0, a6 ++ sll.d t4, t1, a6 ++ or t3, a7, t3 ++ or t4, t2, t4 ++ ++ srl.d a7, t1, a5 ++ st.d t3, a4, 0 ++ st.d t4, a4, 8 ++ addi.d a4, a4, 16 ++ ++L(un_less_16bytes): ++ srai.d a3, a2, 3 ++ beqz a3, L(un_less_8bytes) ++ ++ ld.d t0, a1, 0 ++ addi.d a1, a1, 8 ++ addi.d a2, a2, -8 ++ sll.d t1, t0, a6 ++ ++ or t2, a7, t1 ++ srl.d a7, t0, a5 ++ st.d t2, a4, 0 ++ addi.d a4, a4, 8 ++ ++L(un_less_8bytes): ++ beqz a2, L(un_less_1byte) ++ bge t8, a2, 1f ++ ++ ld.d t0, a1, 0 ++ sll.d t0, t0, a6 ++ or a7, a7, t0 ++ ++1: ++ srai.d a3, a2, 2 ++ beqz a3, L(un_less_4bytes) ++ ++ addi.d a2, a2, -4 ++ st.w a7, a4, 0 ++ addi.d a4, a4, 4 ++ srai.d a7, a7, 32 ++ ++L(un_less_4bytes): ++ srai.d a3, a2, 1 ++ beqz a3, L(un_less_2bytes) ++ ++ addi.d a2, a2, -2 ++ st.h a7, a4, 0 ++ addi.d a4, a4, 2 ++ srai.d a7, a7, 16 ++ ++L(un_less_2bytes): ++ beqz a2, L(un_less_1byte) ++ st.b a7, a4, 0 ++ ++L(un_less_1byte): ++ jr ra ++ ++L(short_data): ++ pcaddi t1, 36 ++ slli.d t2, a2, 3 ++ add.d a4, a0, a2 ++ sub.d t1, t1, t2 ++ add.d a1, a1, a2 ++ jr t1 ++ ++L(short_15_bytes): ++ ld.b t0, a1, -15 ++ st.b t0, a4, -15 ++L(short_14_bytes): ++ ld.b t0, a1, -14 ++ st.b t0, a4, -14 ++L(short_13_bytes): ++ ld.b t0, a1, -13 ++ st.b t0, a4, -13 ++L(short_12_bytes): ++ ld.b t0, a1, -12 ++ st.b t0, a4, -12 ++L(short_11_bytes): ++ ld.b t0, a1, -11 ++ st.b t0, a4, -11 ++L(short_10_bytes): ++ ld.b t0, a1, -10 ++ st.b t0, a4, -10 ++L(short_9_bytes): ++ ld.b t0, a1, -9 ++ st.b t0, a4, -9 ++L(short_8_bytes): ++ ld.b t0, a1, -8 ++ st.b t0, a4, -8 ++L(short_7_bytes): ++ ld.b t0, a1, -7 ++ st.b t0, a4, -7 ++L(short_6_bytes): ++ ld.b t0, a1, -6 ++ st.b t0, a4, -6 ++L(short_5_bytes): ++ ld.b t0, a1, -5 ++ st.b t0, a4, -5 ++L(short_4_bytes): ++ ld.b t0, a1, -4 ++ st.b t0, a4, -4 ++L(short_3_bytes): ++ ld.b t0, a1, -3 ++ st.b t0, a4, -3 ++L(short_2_bytes): ++ ld.b t0, a1, -2 ++ st.b t0, a4, -2 ++L(short_1_bytes): ++ ld.b t0, a1, -1 ++ st.b t0, a4, -1 ++ jr ra ++ ++L(copy_back): ++ srai.d a3, a2, 4 ++ beqz a3, L(back_short_data) ++ ++ add.d a4, a0, a2 ++ add.d a1, a1, a2 ++ ++ andi a5, a4, 0x7 ++ andi a6, a1, 0x7 ++ beqz a5, L(back_check_align) ++ ++ sub.d a2, a2, a5 ++ sub.d a1, a1, a5 ++ sub.d a4, a4, a5 ++ ++ pcaddi t1, 18 ++ slli.d t3, a5, 3 ++ sub.d t1, t1, t3 ++ jr t1 ++ ++ ld.b t0, a1, 6 ++ st.b t0, a4, 6 ++ ld.b t0, a1, 5 ++ st.b t0, a4, 5 ++ ld.b t0, a1, 4 ++ st.b t0, a4, 4 ++ ld.b t0, a1, 3 ++ st.b t0, a4, 3 ++ ld.b t0, a1, 2 ++ st.b t0, a4, 2 ++ ld.b t0, a1, 1 ++ st.b t0, a4, 1 ++ ld.b t0, a1, 0 ++ st.b t0, a4, 0 ++ ++L(back_check_align): ++ bne a5, a6, L(back_unalign) ++ ++ srai.d a3, a2, 4 ++ beqz a3, L(back_less_16bytes) ++ ++ andi a3, a2, 0x3f ++ beq a3, a2, L(back_less_64bytes) ++ ++ sub.d t0, a2, a3 ++ move a2, a3 ++ sub.d a5, a1, t0 ++ ++L(back_loop_64bytes): ++ LD_64(a1, -64) ++ addi.d a1, a1, -64 ++ ST_64(a4, -64) ++ ++ addi.d a4, a4, -64 ++ bne a1, a5, L(back_loop_64bytes) ++ ++L(back_less_64bytes): ++ srai.d a3, a2, 5 ++ beqz a3, L(back_less_32bytes) ++ ++ ld.d t0, a1, -32 ++ ld.d t1, a1, -24 ++ ld.d t2, a1, -16 ++ ld.d t3, a1, -8 ++ ++ addi.d a1, a1, -32 ++ addi.d a2, a2, -32 ++ ++ st.d t0, a4, -32 ++ st.d t1, a4, -24 ++ st.d t2, a4, -16 ++ st.d t3, a4, -8 ++ ++ addi.d a4, a4, -32 ++ ++L(back_less_32bytes): ++ srai.d a3, a2, 4 ++ beqz a3, L(back_less_16bytes) ++ ++ ld.d t0, a1, -16 ++ ld.d t1, a1, -8 ++ ++ addi.d a2, a2, -16 ++ addi.d a1, a1, -16 ++ ++ st.d t0, a4, -16 ++ st.d t1, a4, -8 ++ addi.d a4, a4, -16 ++ ++L(back_less_16bytes): ++ srai.d a3, a2, 3 ++ beqz a3, L(back_less_8bytes) ++ ++ ld.d t0, a1, -8 ++ addi.d a2, a2, -8 ++ addi.d a1, a1, -8 ++ ++ st.d t0, a4, -8 ++ addi.d a4, a4, -8 ++ ++L(back_less_8bytes): ++ srai.d a3, a2, 2 ++ beqz a3, L(back_less_4bytes) ++ ++ ld.w t0, a1, -4 ++ addi.d a2, a2, -4 ++ addi.d a1, a1, -4 ++ ++ st.w t0, a4, -4 ++ addi.d a4, a4, -4 ++ ++L(back_less_4bytes): ++ srai.d a3, a2, 1 ++ beqz a3, L(back_less_2bytes) ++ ++ ld.h t0, a1, -2 ++ addi.d a2, a2, -2 ++ addi.d a1, a1, -2 ++ ++ st.h t0, a4, -2 ++ addi.d a4, a4, -2 ++ ++L(back_less_2bytes): ++ beqz a2, L(back_less_1byte) ++ ++ ld.b t0, a1, -1 ++ st.b t0, a4, -1 ++ ++L(back_less_1byte): ++ jr ra ++ ++L(back_unalign): ++ andi t8, a1, 0x7 ++ bstrins.d a1, zero, 2, 0 ++ ++ sub.d a6, zero, t8 ++ ++ ld.d t0, a1, 0 ++ slli.d a6, a6, 3 ++ slli.d a5, t8, 3 ++ sll.d a7, t0, a6 ++ ++ srai.d a3, a2, 4 ++ beqz a3, L(back_un_less_16bytes) ++ ++ andi a3, a2, 0x3f ++ beq a3, a2, L(back_un_less_64bytes) ++ ++ sub.d t0, a2, a3 ++ move a2, a3 ++ sub.d a3, a1, t0 ++ ++L(back_un_long_bytes): ++ ld.d t0, a1, -8 ++ ld.d t1, a1, -16 ++ ld.d t2, a1, -24 ++ ld.d t3, a1, -32 ++ ++ sll.d t4, t0, a6 ++ srl.d t0, t0, a5 ++ ++ sll.d t5, t1, a6 ++ srl.d t1, t1, a5 ++ ++ sll.d t6, t2, a6 ++ srl.d t2, t2, a5 ++ ++ sll.d t7, t3, a6 ++ srl.d t3, t3, a5 ++ ++ or t0, t0, a7 ++ or t1, t1, t4 ++ or t2, t2, t5 ++ or t3, t3, t6 ++ ++ ld.d t4, a1, -40 ++ ld.d t5, a1, -48 ++ ld.d t6, a1, -56 ++ ld.d a7, a1, -64 ++ st.d t0, a4, -8 ++ st.d t1, a4, -16 ++ st.d t2, a4, -24 ++ st.d t3, a4, -32 ++ ++ addi.d a1, a1, -64 ++ ++ sll.d t0, t4, a6 ++ srl.d t4, t4, a5 ++ ++ sll.d t1, t5, a6 ++ srl.d t5, t5, a5 ++ ++ sll.d t2, t6, a6 ++ srl.d t6, t6, a5 ++ ++ srl.d t3, a7, a5 ++ sll.d a7, a7, a6 ++ ++ or t4, t7, t4 ++ or t5, t0, t5 ++ or t6, t1, t6 ++ or t3, t2, t3 ++ ++ st.d t4, a4, -40 ++ st.d t5, a4, -48 ++ st.d t6, a4, -56 ++ st.d t3, a4, -64 ++ ++ addi.d a4, a4, -64 ++ bne a3, a1, L(back_un_long_bytes) ++ ++L(back_un_less_64bytes): ++ srai.d a3, a2, 5 ++ beqz a3, L(back_un_less_32bytes) ++ ++ ld.d t0, a1, -8 ++ ld.d t1, a1, -16 ++ ld.d t2, a1, -24 ++ ld.d t3, a1, -32 ++ ++ addi.d a1, a1, -32 ++ addi.d a2, a2, -32 ++ ++ sll.d t4, t0, a6 ++ srl.d t0, t0, a5 ++ ++ sll.d t5, t1, a6 ++ srl.d t1, t1, a5 ++ ++ sll.d t6, t2, a6 ++ srl.d t2, t2, a5 ++ ++ or t0, a7, t0 ++ ++ sll.d a7, t3, a6 ++ srl.d t3, t3, a5 ++ ++ or t1, t4, t1 ++ or t2, t5, t2 ++ or t3, t6, t3 ++ ++ st.d t0, a4, -8 ++ st.d t1, a4, -16 ++ st.d t2, a4, -24 ++ st.d t3, a4, -32 ++ ++ addi.d a4, a4, -32 ++ ++L(back_un_less_32bytes): ++ srai.d a3, a2, 4 ++ beqz a3, L(back_un_less_16bytes) ++ ++ ld.d t0, a1, -8 ++ ld.d t1, a1, -16 ++ ++ addi.d a1, a1, -16 ++ addi.d a2, a2, -16 ++ ++ sll.d t2, t0, a6 ++ srl.d t3, t0, a5 ++ ++ srl.d t4, t1, a5 ++ or t3, a7, t3 ++ or t4, t2, t4 ++ sll.d a7, t1, a6 ++ ++ st.d t3, a4, -8 ++ st.d t4, a4, -16 ++ ++ addi.d a4, a4, -16 ++ ++L(back_un_less_16bytes): ++ srai.d a3, a2, 3 ++ beqz a3, L(back_un_less_8bytes) ++ ++ ld.d t0, a1, -8 ++ ++ addi.d a1, a1, -8 ++ addi.d a2, a2, -8 ++ ++ srl.d t1, t0, a5 ++ or t2, a7, t1 ++ sll.d a7, t0, a6 ++ ++ st.d t2, a4, -8 ++ addi.d a4, a4, -8 ++ ++L(back_un_less_8bytes): ++ beqz a2, L(back_end) ++ bge t8, a2, 1f ++ ++ ld.d t0, a1, -8 ++ srl.d t0, t0, a5 ++ or a7, a7, t0 ++ ++1: ++ srai.d a3, a2, 2 ++ beqz a3, L(back_un_less_4bytes) ++ ++ srai.d t0, a7, 32 ++ addi.d a2, a2, -4 ++ st.w t0, a4, -4 ++ addi.d a4, a4, -4 ++ slli.d a7, a7, 32 ++ ++L(back_un_less_4bytes): ++ srai.d a3, a2, 1 ++ beqz a3, L(back_un_less_2bytes) ++ srai.d t0, a7, 48 ++ addi.d a2, a2, -2 ++ st.h t0, a4, -2 ++ addi.d a4, a4, -2 ++ slli.d a7, a7, 16 ++L(back_un_less_2bytes): ++ beqz a2, L(back_un_less_1byte) ++ srai.d t0, a7, 56 ++ st.b t0, a4, -1 ++L(back_un_less_1byte): ++ jr ra ++ ++L(back_short_data): ++ pcaddi t1, 34 ++ slli.d t2, a2, 3 ++ sub.d t1, t1, t2 ++ jr t1 ++ ++ ld.b t0, a1, 14 ++ st.b t0, a0, 14 ++ ld.b t0, a1, 13 ++ st.b t0, a0, 13 ++ ld.b t0, a1, 12 ++ st.b t0, a0, 12 ++ ld.b t0, a1, 11 ++ st.b t0, a0, 11 ++ ld.b t0, a1, 10 ++ st.b t0, a0, 10 ++ ld.b t0, a1, 9 ++ st.b t0, a0, 9 ++ ld.b t0, a1, 8 ++ st.b t0, a0, 8 ++ ld.b t0, a1, 7 ++ st.b t0, a0, 7 ++ ld.b t0, a1, 6 ++ st.b t0, a0, 6 ++ ld.b t0, a1, 5 ++ st.b t0, a0, 5 ++ ld.b t0, a1, 4 ++ st.b t0, a0, 4 ++ ld.b t0, a1, 3 ++ st.b t0, a0, 3 ++ ld.b t0, a1, 2 ++ st.b t0, a0, 2 ++ ld.b t0, a1, 1 ++ st.b t0, a0, 1 ++ ld.b t0, a1, 0 ++ st.b t0, a0, 0 ++L(back_end): ++ jr ra ++ ++END(MEMCPY_NAME) ++ ++libc_hidden_builtin_def (MEMMOVE_NAME) ++libc_hidden_builtin_def (MEMCPY_NAME) +diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S +new file mode 100644 +index 00000000..4aae5bf8 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S +@@ -0,0 +1,20 @@ ++/* Optimized memcpy implementation using Loongarch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* memcpy is part of memmove.S */ +diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S +new file mode 100644 +index 00000000..6ebbe7a2 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S +@@ -0,0 +1,20 @@ ++/* Optimized memcpy implementation using Loongarch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* memcpy is part of memmove.S */ +diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S +new file mode 100644 +index 00000000..8e60a22d +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S +@@ -0,0 +1,247 @@ ++/* Optimized unaligned memcpy implementation using basic Loongarch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++ ++# define MEMCPY_NAME __memcpy_unaligned ++ ++# define LD_64(reg, n) \ ++ ld.d t0, reg, n; \ ++ ld.d t1, reg, n + 8; \ ++ ld.d t2, reg, n + 16; \ ++ ld.d t3, reg, n + 24; \ ++ ld.d t4, reg, n + 32; \ ++ ld.d t5, reg, n + 40; \ ++ ld.d t6, reg, n + 48; \ ++ ld.d t7, reg, n + 56; ++ ++# define ST_64(reg, n) \ ++ st.d t0, reg, n; \ ++ st.d t1, reg, n + 8; \ ++ st.d t2, reg, n + 16; \ ++ st.d t3, reg, n + 24; \ ++ st.d t4, reg, n + 32; \ ++ st.d t5, reg, n + 40; \ ++ st.d t6, reg, n + 48; \ ++ st.d t7, reg, n + 56; ++ ++LEAF(MEMCPY_NAME, 3) ++ add.d a4, a1, a2 ++ add.d a3, a0, a2 ++ li.w a6, 16 ++ bge a6, a2, L(less_16bytes) ++ ++ li.w a6, 128 ++ blt a6, a2, L(long_bytes) ++ li.w a6, 64 ++ blt a6, a2, L(more_64bytes) ++ ++ li.w a6, 32 ++ blt a6, a2, L(more_32bytes) ++ ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ ld.d t2, a4, -16 ++ ld.d t3, a4, -8 ++ ++ st.d t0, a0, 0 ++ st.d t1, a0, 8 ++ st.d t2, a3, -16 ++ st.d t3, a3, -8 ++ jr ra ++ ++L(more_64bytes): ++ srli.d t8, a0, 3 ++ slli.d t8, t8, 3 ++ addi.d t8, t8, 0x8 ++ sub.d a7, a0, t8 ++ ++ ld.d t0, a1, 0 ++ sub.d a1, a1, a7 ++ st.d t0, a0, 0 ++ add.d a7, a7, a2 ++ addi.d a7, a7, -0x20 ++ ++L(loop_32): ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ ld.d t2, a1, 16 ++ ld.d t3, a1, 24 ++ ++ st.d t0, t8, 0 ++ st.d t1, t8, 8 ++ st.d t2, t8, 16 ++ st.d t3, t8, 24 ++ ++ addi.d t8, t8, 0x20 ++ addi.d a1, a1, 0x20 ++ addi.d a7, a7, -0x20 ++ blt zero, a7, L(loop_32) ++ ++ ld.d t4, a4, -32 ++ ld.d t5, a4, -24 ++ ld.d t6, a4, -16 ++ ld.d t7, a4, -8 ++ ++ st.d t4, a3, -32 ++ st.d t5, a3, -24 ++ st.d t6, a3, -16 ++ st.d t7, a3, -8 ++ ++ jr ra ++ ++L(more_32bytes): ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ ld.d t2, a1, 16 ++ ld.d t3, a1, 24 ++ ++ ld.d t4, a4, -32 ++ ld.d t5, a4, -24 ++ ld.d t6, a4, -16 ++ ld.d t7, a4, -8 ++ ++ st.d t0, a0, 0 ++ st.d t1, a0, 8 ++ st.d t2, a0, 16 ++ st.d t3, a0, 24 ++ ++ st.d t4, a3, -32 ++ st.d t5, a3, -24 ++ st.d t6, a3, -16 ++ st.d t7, a3, -8 ++ ++ jr ra ++ ++L(less_16bytes): ++ srai.d a6, a2, 3 ++ beqz a6, L(less_8bytes) ++ ++ ld.d t0, a1, 0 ++ ld.d t1, a4, -8 ++ st.d t0, a0, 0 ++ st.d t1, a3, -8 ++ ++ jr ra ++ ++L(less_8bytes): ++ srai.d a6, a2, 2 ++ beqz a6, L(less_4bytes) ++ ++ ld.w t0, a1, 0 ++ ld.w t1, a4, -4 ++ st.w t0, a0, 0 ++ st.w t1, a3, -4 ++ ++ jr ra ++ ++L(less_4bytes): ++ srai.d a6, a2, 1 ++ beqz a6, L(less_2bytes) ++ ++ ld.h t0, a1, 0 ++ ld.h t1, a4, -2 ++ st.h t0, a0, 0 ++ st.h t1, a3, -2 ++ ++ jr ra ++ ++L(less_2bytes): ++ beqz a2, L(less_1bytes) ++ ++ ld.b t0, a1, 0 ++ st.b t0, a0, 0 ++ jr ra ++ ++L(less_1bytes): ++ jr ra ++ ++L(long_bytes): ++ srli.d t8, a0, 3 ++ slli.d t8, t8, 3 ++ beq a0, t8, L(start) ++ ld.d t0, a1, 0 ++ ++ addi.d t8, t8, 0x8 ++ st.d t0, a0, 0 ++ sub.d a7, a0, t8 ++ sub.d a1, a1, a7 ++ ++L(start): ++ addi.d a5, a3, -0x80 ++ blt a5, t8, L(align_end_proc) ++ ++L(loop_128): ++ LD_64(a1, 0) ++ ST_64(t8, 0) ++ LD_64(a1, 64) ++ addi.d a1, a1, 0x80 ++ ST_64(t8, 64) ++ addi.d t8, t8, 0x80 ++ bge a5, t8, L(loop_128) ++ ++L(align_end_proc): ++ sub.d a2, a3, t8 ++ pcaddi t1, 34 ++ andi t2, a2, 0x78 ++ sub.d t1, t1, t2 ++ jr t1 ++ ++ ld.d t0, a1, 112 ++ st.d t0, t8, 112 ++ ld.d t0, a1, 104 ++ st.d t0, t8, 104 ++ ld.d t0, a1, 96 ++ st.d t0, t8, 96 ++ ld.d t0, a1, 88 ++ st.d t0, t8, 88 ++ ld.d t0, a1, 80 ++ st.d t0, t8, 80 ++ ld.d t0, a1, 72 ++ st.d t0, t8, 72 ++ ld.d t0, a1, 64 ++ st.d t0, t8, 64 ++ ld.d t0, a1, 56 ++ st.d t0, t8, 56 ++ ld.d t0, a1, 48 ++ st.d t0, t8, 48 ++ ld.d t0, a1, 40 ++ st.d t0, t8, 40 ++ ld.d t0, a1, 32 ++ st.d t0, t8, 32 ++ ld.d t0, a1, 24 ++ st.d t0, t8, 24 ++ ld.d t0, a1, 16 ++ st.d t0, t8, 16 ++ ld.d t0, a1, 8 ++ st.d t0, t8, 8 ++ ld.d t0, a1, 0 ++ st.d t0, t8, 0 ++ ld.d t0, a4, -8 ++ st.d t0, a3, -8 ++ ++ jr ra ++END(MEMCPY_NAME) ++ ++libc_hidden_builtin_def (MEMCPY_NAME) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy.c b/sysdeps/loongarch/lp64/multiarch/memcpy.c +new file mode 100644 +index 00000000..93b238ce +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memcpy.c +@@ -0,0 +1,37 @@ ++/* Multiple versions of memcpy. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define memcpy __redirect_memcpy ++# include <string.h> ++# undef memcpy ++ ++# define SYMBOL_NAME memcpy ++# include "ifunc-lasx.h" ++ ++libc_ifunc_redirected (__redirect_memcpy, memcpy, ++ IFUNC_SELECTOR ()); ++ ++# ifdef SHARED ++__hidden_ver1 (memcpy, __GI_memcpy, __redirect_memcpy) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memcmp); ++# endif ++ ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S +new file mode 100644 +index 00000000..5354f383 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S +@@ -0,0 +1,20 @@ ++/* Optimized memmove_aligned implementation using basic Loongarch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* memmove_aligned is part of memcpy_aligned, see memcpy-aligned.S. */ +diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S +new file mode 100644 +index 00000000..ff68e7a2 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S +@@ -0,0 +1,287 @@ ++/* Optimized memmove implementation using Loongarch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++#ifndef MEMCPY_NAME ++# define MEMCPY_NAME __memcpy_lasx ++#endif ++ ++#ifndef MEMMOVE_NAME ++# define MEMMOVE_NAME __memmove_lasx ++#endif ++ ++LEAF(MEMCPY_NAME, 6) ++ li.d t0, 32 ++ add.d a3, a0, a2 ++ add.d a4, a1, a2 ++ bgeu t0, a2, L(less_32bytes) ++ ++ li.d t1, 64 ++ bltu t1, a2, L(copy_long) ++ xvld xr0, a1, 0 ++ xvld xr1, a4, -32 ++ ++ xvst xr0, a0, 0 ++ xvst xr1, a3, -32 ++ jr ra ++L(less_32bytes): ++ srli.d t0, a2, 4 ++ ++ beqz t0, L(less_16bytes) ++ vld vr0, a1, 0 ++ vld vr1, a4, -16 ++ vst vr0, a0, 0 ++ ++ ++ vst vr1, a3, -16 ++ jr ra ++L(less_16bytes): ++ srli.d t0, a2, 3 ++ beqz t0, L(less_8bytes) ++ ++ ld.d t0, a1, 0 ++ ld.d t1, a4, -8 ++ st.d t0, a0, 0 ++ st.d t1, a3, -8 ++ ++ jr ra ++L(less_8bytes): ++ srli.d t0, a2, 2 ++ beqz t0, L(less_4bytes) ++ ld.w t0, a1, 0 ++ ++ ld.w t1, a4, -4 ++ st.w t0, a0, 0 ++ st.w t1, a3, -4 ++ jr ra ++ ++ ++L(less_4bytes): ++ srli.d t0, a2, 1 ++ beqz t0, L(less_2bytes) ++ ld.h t0, a1, 0 ++ ld.h t1, a4, -2 ++ ++ st.h t0, a0, 0 ++ st.h t1, a3, -2 ++ jr ra ++L(less_2bytes): ++ beqz a2, L(less_1bytes) ++ ++ ld.b t0, a1, 0 ++ st.b t0, a0, 0 ++L(less_1bytes): ++ jr ra ++END(MEMCPY_NAME) ++ ++LEAF(MEMMOVE_NAME, 6) ++ ++ li.d t0, 32 ++ add.d a3, a0, a2 ++ add.d a4, a1, a2 ++ bgeu t0, a2, L(less_32bytes) ++ ++ li.d t1, 64 ++ bltu t1, a2, L(move_long) ++ xvld xr0, a1, 0 ++ xvld xr1, a4, -32 ++ ++ xvst xr0, a0, 0 ++ xvst xr1, a3, -32 ++ jr ra ++L(move_long): ++ sub.d t2, a0, a1 ++ ++ bltu t2, a2, L(copy_back) ++L(copy_long): ++ andi t2, a0, 0x1f ++ addi.d a2, a2, -1 ++ sub.d t2, t0, t2 ++ ++ ++ xvld xr8, a1, 0 ++ xvld xr9, a4, -32 ++ sub.d t3, a2, t2 ++ add.d a5, a0, t2 ++ ++ andi a2, t3, 0xff ++ add.d a1, a1, t2 ++ beq a2, t3, L(lt256) ++ sub.d a6, a4, a2 ++ ++ addi.d a6, a6, -1 ++L(loop_256): ++ xvld xr0, a1, 0 ++ xvld xr1, a1, 32 ++ xvld xr2, a1, 64 ++ ++ xvld xr3, a1, 96 ++ xvld xr4, a1, 128 ++ xvld xr5, a1, 160 ++ xvld xr6, a1, 192 ++ ++ ++ xvld xr7, a1, 224 ++ addi.d a1, a1, 256 ++ xvst xr0, a5, 0 ++ xvst xr1, a5, 32 ++ ++ xvst xr2, a5, 64 ++ xvst xr3, a5, 96 ++ xvst xr4, a5, 128 ++ xvst xr5, a5, 160 ++ ++ xvst xr6, a5, 192 ++ xvst xr7, a5, 224 ++ addi.d a5, a5, 256 ++ bne a1, a6, L(loop_256) ++ ++L(lt256): ++ srli.d t2, a2, 7 ++ beqz t2, L(lt128) ++ xvld xr0, a1, 0 ++ xvld xr1, a1, 32 ++ ++ ++ xvld xr2, a1, 64 ++ xvld xr3, a1, 96 ++ addi.d a1, a1, 128 ++ addi.d a2, a2, -128 ++ ++ xvst xr0, a5, 0 ++ xvst xr1, a5, 32 ++ xvst xr2, a5, 64 ++ xvst xr3, a5, 96 ++ ++ addi.d a5, a5, 128 ++L(lt128): ++ bltu a2, t1, L(lt64) ++ xvld xr0, a1, 0 ++ xvld xr1, a1, 32 ++ ++ addi.d a1, a1, 64 ++ addi.d a2, a2, -64 ++ xvst xr0, a5, 0 ++ xvst xr1, a5, 32 ++ ++ ++ addi.d a5, a5, 64 ++L(lt64): ++ bltu a2, t0, L(lt32) ++ xvld xr0, a1, 0 ++ xvst xr0, a5, 0 ++ ++L(lt32): ++ xvst xr8, a0, 0 ++ xvst xr9, a3, -32 ++ jr ra ++ nop ++ ++L(copy_back): ++ addi.d a3, a3, -1 ++ addi.d a2, a2, -2 ++ andi t2, a3, 0x1f ++ xvld xr8, a1, 0 ++ ++ xvld xr9, a4, -32 ++ sub.d t3, a2, t2 ++ sub.d a5, a3, t2 ++ sub.d a4, a4, t2 ++ ++ ++ andi a2, t3, 0xff ++ beq a2, t3, L(back_lt256) ++ add.d a6, a1, a2 ++ addi.d a6, a6, 2 ++ ++L(back_loop_256): ++ xvld xr0, a4, -33 ++ xvld xr1, a4, -65 ++ xvld xr2, a4, -97 ++ xvld xr3, a4, -129 ++ ++ xvld xr4, a4, -161 ++ xvld xr5, a4, -193 ++ xvld xr6, a4, -225 ++ xvld xr7, a4, -257 ++ ++ addi.d a4, a4, -256 ++ xvst xr0, a5, -32 ++ xvst xr1, a5, -64 ++ xvst xr2, a5, -96 ++ ++ ++ xvst xr3, a5, -128 ++ xvst xr4, a5, -160 ++ xvst xr5, a5, -192 ++ xvst xr6, a5, -224 ++ ++ xvst xr7, a5, -256 ++ addi.d a5, a5, -256 ++ bne a4, a6, L(back_loop_256) ++L(back_lt256): ++ srli.d t2, a2, 7 ++ ++ beqz t2, L(back_lt128) ++ xvld xr0, a4, -33 ++ xvld xr1, a4, -65 ++ xvld xr2, a4, -97 ++ ++ xvld xr3, a4, -129 ++ addi.d a2, a2, -128 ++ addi.d a4, a4, -128 ++ xvst xr0, a5, -32 ++ ++ ++ xvst xr1, a5, -64 ++ xvst xr2, a5, -96 ++ xvst xr3, a5, -128 ++ addi.d a5, a5, -128 ++ ++L(back_lt128): ++ blt a2, t1, L(back_lt64) ++ xvld xr0, a4, -33 ++ xvld xr1, a4, -65 ++ addi.d a2, a2, -64 ++ ++ addi.d a4, a4, -64 ++ xvst xr0, a5, -32 ++ xvst xr1, a5, -64 ++ addi.d a5, a5, -64 ++ ++L(back_lt64): ++ bltu a2, t0, L(back_lt32) ++ xvld xr0, a4, -33 ++ xvst xr0, a5, -32 ++L(back_lt32): ++ xvst xr8, a0, 0 ++ ++ ++ xvst xr9, a3, -31 ++ jr ra ++END(MEMMOVE_NAME) ++ ++libc_hidden_builtin_def (MEMCPY_NAME) ++libc_hidden_builtin_def (MEMMOVE_NAME) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S +new file mode 100644 +index 00000000..9e1502a7 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S +@@ -0,0 +1,534 @@ ++/* Optimized memmove implementation using Loongarch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define MEMCPY_NAME __memcpy_lsx ++# define MEMMOVE_NAME __memmove_lsx ++ ++LEAF(MEMCPY_NAME, 6) ++ li.d t6, 16 ++ add.d a3, a0, a2 ++ add.d a4, a1, a2 ++ bgeu t6, a2, L(less_16bytes) ++ ++ li.d t8, 64 ++ li.d t7, 32 ++ bltu t8, a2, L(copy_long) ++ bltu t7, a2, L(more_32bytes) ++ ++ vld vr0, a1, 0 ++ vld vr1, a4, -16 ++ vst vr0, a0, 0 ++ vst vr1, a3, -16 ++ ++ jr ra ++L(more_32bytes): ++ vld vr0, a1, 0 ++ vld vr1, a1, 16 ++ vld vr2, a4, -32 ++ ++ ++ vld vr3, a4, -16 ++ vst vr0, a0, 0 ++ vst vr1, a0, 16 ++ vst vr2, a3, -32 ++ ++ vst vr3, a3, -16 ++ jr ra ++L(less_16bytes): ++ srli.d t0, a2, 3 ++ beqz t0, L(less_8bytes) ++ ++ vldrepl.d vr0, a1, 0 ++ vldrepl.d vr1, a4, -8 ++ vstelm.d vr0, a0, 0, 0 ++ vstelm.d vr1, a3, -8, 0 ++ ++ jr ra ++L(less_8bytes): ++ srli.d t0, a2, 2 ++ beqz t0, L(less_4bytes) ++ vldrepl.w vr0, a1, 0 ++ ++ ++ vldrepl.w vr1, a4, -4 ++ vstelm.w vr0, a0, 0, 0 ++ vstelm.w vr1, a3, -4, 0 ++ jr ra ++ ++L(less_4bytes): ++ srli.d t0, a2, 1 ++ beqz t0, L(less_2bytes) ++ vldrepl.h vr0, a1, 0 ++ vldrepl.h vr1, a4, -2 ++ ++ vstelm.h vr0, a0, 0, 0 ++ vstelm.h vr1, a3, -2, 0 ++ jr ra ++L(less_2bytes): ++ beqz a2, L(less_1bytes) ++ ++ ld.b t0, a1, 0 ++ st.b t0, a0, 0 ++L(less_1bytes): ++ jr ra ++ nop ++END(MEMCPY_NAME) ++ ++LEAF(MEMMOVE_NAME, 6) ++ li.d t6, 16 ++ add.d a3, a0, a2 ++ add.d a4, a1, a2 ++ bgeu t6, a2, L(less_16bytes) ++ ++ li.d t8, 64 ++ li.d t7, 32 ++ bltu t8, a2, L(move_long) ++ bltu t7, a2, L(more_32bytes) ++ ++ vld vr0, a1, 0 ++ vld vr1, a4, -16 ++ vst vr0, a0, 0 ++ vst vr1, a3, -16 ++ ++ jr ra ++ nop ++L(move_long): ++ sub.d t0, a0, a1 ++ bltu t0, a2, L(copy_back) ++ ++ ++L(copy_long): ++ vld vr2, a1, 0 ++ andi t0, a0, 0xf ++ sub.d t0, t6, t0 ++ add.d a1, a1, t0 ++ ++ sub.d a2, a2, t0 ++ andi t1, a1, 0xf ++ bnez t1, L(unaligned) ++ vld vr0, a1, 0 ++ ++ addi.d a2, a2, -16 ++ vst vr2, a0, 0 ++ andi t2, a2, 0x7f ++ add.d a5, a0, t0 ++ ++ beq a2, t2, L(al_less_128) ++ sub.d t3, a2, t2 ++ move a2, t2 ++ add.d a6, a1, t3 ++ ++ ++L(al_loop): ++ vld vr1, a1, 16 ++ vld vr2, a1, 32 ++ vld vr3, a1, 48 ++ vld vr4, a1, 64 ++ ++ vld vr5, a1, 80 ++ vld vr6, a1, 96 ++ vld vr7, a1, 112 ++ vst vr0, a5, 0 ++ ++ vld vr0, a1, 128 ++ addi.d a1, a1, 128 ++ vst vr1, a5, 16 ++ vst vr2, a5, 32 ++ ++ vst vr3, a5, 48 ++ vst vr4, a5, 64 ++ vst vr5, a5, 80 ++ vst vr6, a5, 96 ++ ++ ++ vst vr7, a5, 112 ++ addi.d a5, a5, 128 ++ bne a1, a6, L(al_loop) ++L(al_less_128): ++ blt a2, t8, L(al_less_64) ++ ++ vld vr1, a1, 16 ++ vld vr2, a1, 32 ++ vld vr3, a1, 48 ++ addi.d a2, a2, -64 ++ ++ vst vr0, a5, 0 ++ vld vr0, a1, 64 ++ addi.d a1, a1, 64 ++ vst vr1, a5, 16 ++ ++ vst vr2, a5, 32 ++ vst vr3, a5, 48 ++ addi.d a5, a5, 64 ++L(al_less_64): ++ blt a2, t7, L(al_less_32) ++ ++ ++ vld vr1, a1, 16 ++ addi.d a2, a2, -32 ++ vst vr0, a5, 0 ++ vld vr0, a1, 32 ++ ++ addi.d a1, a1, 32 ++ vst vr1, a5, 16 ++ addi.d a5, a5, 32 ++L(al_less_32): ++ blt a2, t6, L(al_less_16) ++ ++ vst vr0, a5, 0 ++ vld vr0, a1, 16 ++ addi.d a5, a5, 16 ++L(al_less_16): ++ vld vr1, a4, -16 ++ ++ vst vr0, a5, 0 ++ vst vr1, a3, -16 ++ jr ra ++ nop ++ ++ ++L(magic_num): ++ .dword 0x0706050403020100 ++ .dword 0x0f0e0d0c0b0a0908 ++L(unaligned): ++ pcaddi t2, -4 ++ bstrins.d a1, zero, 3, 0 ++ vld vr8, t2, 0 ++ vld vr0, a1, 0 ++ ++ vld vr1, a1, 16 ++ addi.d a2, a2, -16 ++ vst vr2, a0, 0 ++ add.d a5, a0, t0 ++ ++ vreplgr2vr.b vr9, t1 ++ andi t2, a2, 0x7f ++ vadd.b vr9, vr9, vr8 ++ addi.d a1, a1, 32 ++ ++ ++ beq t2, a2, L(un_less_128) ++ sub.d t3, a2, t2 ++ move a2, t2 ++ add.d a6, a1, t3 ++ ++L(un_loop): ++ vld vr2, a1, 0 ++ vld vr3, a1, 16 ++ vld vr4, a1, 32 ++ vld vr5, a1, 48 ++ ++ vld vr6, a1, 64 ++ vld vr7, a1, 80 ++ vshuf.b vr8, vr1, vr0, vr9 ++ vld vr0, a1, 96 ++ ++ vst vr8, a5, 0 ++ vshuf.b vr8, vr2, vr1, vr9 ++ vld vr1, a1, 112 ++ vst vr8, a5, 16 ++ ++ ++ addi.d a1, a1, 128 ++ vshuf.b vr2, vr3, vr2, vr9 ++ vshuf.b vr3, vr4, vr3, vr9 ++ vst vr2, a5, 32 ++ ++ vshuf.b vr4, vr5, vr4, vr9 ++ vst vr3, a5, 48 ++ vshuf.b vr5, vr6, vr5, vr9 ++ vst vr4, a5, 64 ++ ++ vshuf.b vr6, vr7, vr6, vr9 ++ vst vr5, a5, 80 ++ vshuf.b vr7, vr0, vr7, vr9 ++ vst vr6, a5, 96 ++ ++ vst vr7, a5, 112 ++ addi.d a5, a5, 128 ++ bne a1, a6, L(un_loop) ++L(un_less_128): ++ blt a2, t8, L(un_less_64) ++ ++ ++ vld vr2, a1, 0 ++ vld vr3, a1, 16 ++ vshuf.b vr4, vr1, vr0, vr9 ++ vld vr0, a1, 32 ++ ++ vst vr4, a5, 0 ++ addi.d a2, a2, -64 ++ vshuf.b vr4, vr2, vr1, vr9 ++ vld vr1, a1, 48 ++ ++ addi.d a1, a1, 64 ++ vst vr4, a5, 16 ++ vshuf.b vr2, vr3, vr2, vr9 ++ vshuf.b vr3, vr0, vr3, vr9 ++ ++ vst vr2, a5, 32 ++ vst vr3, a5, 48 ++ addi.d a5, a5, 64 ++L(un_less_64): ++ blt a2, t7, L(un_less_32) ++ ++ ++ vshuf.b vr3, vr1, vr0, vr9 ++ vld vr0, a1, 0 ++ vst vr3, a5, 0 ++ addi.d a2, a2, -32 ++ ++ vshuf.b vr3, vr0, vr1, vr9 ++ vld vr1, a1, 16 ++ addi.d a1, a1, 32 ++ vst vr3, a5, 16 ++ ++ addi.d a5, a5, 32 ++L(un_less_32): ++ blt a2, t6, L(un_less_16) ++ vshuf.b vr2, vr1, vr0, vr9 ++ vor.v vr0, vr1, vr1 ++ ++ vld vr1, a1, 0 ++ vst vr2, a5, 0 ++ addi.d a5, a5, 16 ++L(un_less_16): ++ vld vr2, a4, -16 ++ ++ ++ vshuf.b vr0, vr1, vr0, vr9 ++ vst vr0, a5, 0 ++ vst vr2, a3, -16 ++ jr ra ++ ++L(copy_back): ++ addi.d t0, a3, -1 ++ vld vr2, a4, -16 ++ andi t0, t0, 0xf ++ addi.d t0, t0, 1 ++ ++ sub.d a4, a4, t0 ++ sub.d a2, a2, t0 ++ andi t1, a4, 0xf ++ bnez t1, L(back_unaligned) ++ ++ vld vr0, a4, -16 ++ addi.d a2, a2, -16 ++ vst vr2, a3, -16 ++ andi t2, a2, 0x7f ++ ++ ++ sub.d a3, a3, t0 ++ beq t2, a2, L(back_al_less_128) ++ sub.d t3, a2, t2 ++ move a2, t2 ++ ++ sub.d a6, a4, t3 ++L(back_al_loop): ++ vld vr1, a4, -32 ++ vld vr2, a4, -48 ++ vld vr3, a4, -64 ++ ++ vld vr4, a4, -80 ++ vld vr5, a4, -96 ++ vld vr6, a4, -112 ++ vld vr7, a4, -128 ++ ++ vst vr0, a3, -16 ++ vld vr0, a4, -144 ++ addi.d a4, a4, -128 ++ vst vr1, a3, -32 ++ ++ ++ vst vr2, a3, -48 ++ vst vr3, a3, -64 ++ vst vr4, a3, -80 ++ vst vr5, a3, -96 ++ ++ vst vr6, a3, -112 ++ vst vr7, a3, -128 ++ addi.d a3, a3, -128 ++ bne a4, a6, L(back_al_loop) ++ ++L(back_al_less_128): ++ blt a2, t8, L(back_al_less_64) ++ vld vr1, a4, -32 ++ vld vr2, a4, -48 ++ vld vr3, a4, -64 ++ ++ addi.d a2, a2, -64 ++ vst vr0, a3, -16 ++ vld vr0, a4, -80 ++ addi.d a4, a4, -64 ++ ++ ++ vst vr1, a3, -32 ++ vst vr2, a3, -48 ++ vst vr3, a3, -64 ++ addi.d a3, a3, -64 ++ ++L(back_al_less_64): ++ blt a2, t7, L(back_al_less_32) ++ vld vr1, a4, -32 ++ addi.d a2, a2, -32 ++ vst vr0, a3, -16 ++ ++ vld vr0, a4, -48 ++ vst vr1, a3, -32 ++ addi.d a3, a3, -32 ++ addi.d a4, a4, -32 ++ ++L(back_al_less_32): ++ blt a2, t6, L(back_al_less_16) ++ vst vr0, a3, -16 ++ vld vr0, a4, -32 ++ addi.d a3, a3, -16 ++ ++ ++L(back_al_less_16): ++ vld vr1, a1, 0 ++ vst vr0, a3, -16 ++ vst vr1, a0, 0 ++ jr ra ++ ++L(magic_num_2): ++ .dword 0x0706050403020100 ++ .dword 0x0f0e0d0c0b0a0908 ++L(back_unaligned): ++ pcaddi t2, -4 ++ bstrins.d a4, zero, 3, 0 ++ vld vr8, t2, 0 ++ vld vr0, a4, 0 ++ ++ vld vr1, a4, -16 ++ addi.d a2, a2, -16 ++ vst vr2, a3, -16 ++ sub.d a3, a3, t0 ++ ++ ++ vreplgr2vr.b vr9, t1 ++ andi t2, a2, 0x7f ++ vadd.b vr9, vr9, vr8 ++ addi.d a4, a4, -16 ++ ++ beq t2, a2, L(back_un_less_128) ++ sub.d t3, a2, t2 ++ move a2, t2 ++ sub.d a6, a4, t3 ++ ++L(back_un_loop): ++ vld vr2, a4, -16 ++ vld vr3, a4, -32 ++ vld vr4, a4, -48 ++ ++ vld vr5, a4, -64 ++ vld vr6, a4, -80 ++ vld vr7, a4, -96 ++ vshuf.b vr8, vr0, vr1, vr9 ++ ++ ++ vld vr0, a4, -112 ++ vst vr8, a3, -16 ++ vshuf.b vr8, vr1, vr2, vr9 ++ vld vr1, a4, -128 ++ ++ vst vr8, a3, -32 ++ addi.d a4, a4, -128 ++ vshuf.b vr2, vr2, vr3, vr9 ++ vshuf.b vr3, vr3, vr4, vr9 ++ ++ vst vr2, a3, -48 ++ vshuf.b vr4, vr4, vr5, vr9 ++ vst vr3, a3, -64 ++ vshuf.b vr5, vr5, vr6, vr9 ++ ++ vst vr4, a3, -80 ++ vshuf.b vr6, vr6, vr7, vr9 ++ vst vr5, a3, -96 ++ vshuf.b vr7, vr7, vr0, vr9 ++ ++ ++ vst vr6, a3, -112 ++ vst vr7, a3, -128 ++ addi.d a3, a3, -128 ++ bne a4, a6, L(back_un_loop) ++ ++L(back_un_less_128): ++ blt a2, t8, L(back_un_less_64) ++ vld vr2, a4, -16 ++ vld vr3, a4, -32 ++ vshuf.b vr4, vr0, vr1, vr9 ++ ++ vld vr0, a4, -48 ++ vst vr4, a3, -16 ++ addi.d a2, a2, -64 ++ vshuf.b vr4, vr1, vr2, vr9 ++ ++ vld vr1, a4, -64 ++ addi.d a4, a4, -64 ++ vst vr4, a3, -32 ++ vshuf.b vr2, vr2, vr3, vr9 ++ ++ ++ vshuf.b vr3, vr3, vr0, vr9 ++ vst vr2, a3, -48 ++ vst vr3, a3, -64 ++ addi.d a3, a3, -64 ++ ++L(back_un_less_64): ++ blt a2, t7, L(back_un_less_32) ++ vshuf.b vr3, vr0, vr1, vr9 ++ vld vr0, a4, -16 ++ vst vr3, a3, -16 ++ ++ addi.d a2, a2, -32 ++ vshuf.b vr3, vr1, vr0, vr9 ++ vld vr1, a4, -32 ++ addi.d a4, a4, -32 ++ ++ vst vr3, a3, -32 ++ addi.d a3, a3, -32 ++L(back_un_less_32): ++ blt a2, t6, L(back_un_less_16) ++ vshuf.b vr2, vr0, vr1, vr9 ++ ++ ++ vor.v vr0, vr1, vr1 ++ vld vr1, a4, -16 ++ vst vr2, a3, -16 ++ addi.d a3, a3, -16 ++ ++L(back_un_less_16): ++ vld vr2, a1, 0 ++ vshuf.b vr0, vr0, vr1, vr9 ++ vst vr0, a3, -16 ++ vst vr2, a0, 0 ++ ++ jr ra ++END(MEMMOVE_NAME) ++ ++libc_hidden_builtin_def (MEMCPY_NAME) ++libc_hidden_builtin_def (MEMMOVE_NAME) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S +new file mode 100644 +index 00000000..90a64b6b +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S +@@ -0,0 +1,380 @@ ++/* Optimized memmove_unaligned implementation using basic Loongarch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++ ++# define MEMMOVE_NAME __memmove_unaligned ++ ++# define LD_64(reg, n) \ ++ ld.d t0, reg, n; \ ++ ld.d t1, reg, n + 8; \ ++ ld.d t2, reg, n + 16; \ ++ ld.d t3, reg, n + 24; \ ++ ld.d t4, reg, n + 32; \ ++ ld.d t5, reg, n + 40; \ ++ ld.d t6, reg, n + 48; \ ++ ld.d t7, reg, n + 56; ++ ++# define ST_64(reg, n) \ ++ st.d t0, reg, n; \ ++ st.d t1, reg, n + 8; \ ++ st.d t2, reg, n + 16; \ ++ st.d t3, reg, n + 24; \ ++ st.d t4, reg, n + 32; \ ++ st.d t5, reg, n + 40; \ ++ st.d t6, reg, n + 48; \ ++ st.d t7, reg, n + 56; ++ ++LEAF(MEMMOVE_NAME, 3) ++ add.d a4, a1, a2 ++ add.d a3, a0, a2 ++ beq a1, a0, L(less_1bytes) ++ move t8, a0 ++ ++ srai.d a6, a2, 4 ++ beqz a6, L(less_16bytes) ++ srai.d a6, a2, 6 ++ bnez a6, L(more_64bytes) ++ srai.d a6, a2, 5 ++ beqz a6, L(less_32bytes) ++ ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ ld.d t2, a1, 16 ++ ld.d t3, a1, 24 ++ ++ ld.d t4, a4, -32 ++ ld.d t5, a4, -24 ++ ld.d t6, a4, -16 ++ ld.d t7, a4, -8 ++ ++ st.d t0, a0, 0 ++ st.d t1, a0, 8 ++ st.d t2, a0, 16 ++ st.d t3, a0, 24 ++ ++ st.d t4, a3, -32 ++ st.d t5, a3, -24 ++ st.d t6, a3, -16 ++ st.d t7, a3, -8 ++ ++ jr ra ++ ++L(less_32bytes): ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ ld.d t2, a4, -16 ++ ld.d t3, a4, -8 ++ ++ st.d t0, a0, 0 ++ st.d t1, a0, 8 ++ st.d t2, a3, -16 ++ st.d t3, a3, -8 ++ ++ jr ra ++ ++L(less_16bytes): ++ srai.d a6, a2, 3 ++ beqz a6, L(less_8bytes) ++ ++ ld.d t0, a1, 0 ++ ld.d t1, a4, -8 ++ st.d t0, a0, 0 ++ st.d t1, a3, -8 ++ ++ jr ra ++ ++L(less_8bytes): ++ srai.d a6, a2, 2 ++ beqz a6, L(less_4bytes) ++ ++ ld.w t0, a1, 0 ++ ld.w t1, a4, -4 ++ st.w t0, a0, 0 ++ st.w t1, a3, -4 ++ ++ jr ra ++ ++L(less_4bytes): ++ srai.d a6, a2, 1 ++ beqz a6, L(less_2bytes) ++ ++ ld.h t0, a1, 0 ++ ld.h t1, a4, -2 ++ st.h t0, a0, 0 ++ st.h t1, a3, -2 ++ ++ jr ra ++ ++L(less_2bytes): ++ beqz a2, L(less_1bytes) ++ ++ ld.b t0, a1, 0 ++ st.b t0, a0, 0 ++ ++ jr ra ++ ++L(less_1bytes): ++ jr ra ++ ++L(more_64bytes): ++ sub.d a7, a0, a1 ++ bltu a7, a2, L(copy_backward) ++ ++L(copy_forward): ++ srli.d a0, a0, 3 ++ slli.d a0, a0, 3 ++ beq a0, t8, L(all_align) ++ addi.d a0, a0, 0x8 ++ sub.d a7, t8, a0 ++ sub.d a1, a1, a7 ++ add.d a2, a7, a2 ++ ++L(start_unalign_proc): ++ pcaddi t1, 18 ++ slli.d a6, a7, 3 ++ add.d t1, t1, a6 ++ jr t1 ++ ++ ld.b t0, a1, -7 ++ st.b t0, a0, -7 ++ ld.b t0, a1, -6 ++ st.b t0, a0, -6 ++ ld.b t0, a1, -5 ++ st.b t0, a0, -5 ++ ld.b t0, a1, -4 ++ st.b t0, a0, -4 ++ ld.b t0, a1, -3 ++ st.b t0, a0, -3 ++ ld.b t0, a1, -2 ++ st.b t0, a0, -2 ++ ld.b t0, a1, -1 ++ st.b t0, a0, -1 ++L(start_over): ++ ++ addi.d a2, a2, -0x80 ++ blt a2, zero, L(end_unalign_proc) ++ ++L(loop_less): ++ LD_64(a1, 0) ++ ST_64(a0, 0) ++ LD_64(a1, 64) ++ ST_64(a0, 64) ++ ++ addi.d a0, a0, 0x80 ++ addi.d a1, a1, 0x80 ++ addi.d a2, a2, -0x80 ++ bge a2, zero, L(loop_less) ++ ++L(end_unalign_proc): ++ addi.d a2, a2, 0x80 ++ ++ pcaddi t1, 36 ++ andi t2, a2, 0x78 ++ add.d a1, a1, t2 ++ add.d a0, a0, t2 ++ sub.d t1, t1, t2 ++ jr t1 ++ ++ ld.d t0, a1, -120 ++ st.d t0, a0, -120 ++ ld.d t0, a1, -112 ++ st.d t0, a0, -112 ++ ld.d t0, a1, -104 ++ st.d t0, a0, -104 ++ ld.d t0, a1, -96 ++ st.d t0, a0, -96 ++ ld.d t0, a1, -88 ++ st.d t0, a0, -88 ++ ld.d t0, a1, -80 ++ st.d t0, a0, -80 ++ ld.d t0, a1, -72 ++ st.d t0, a0, -72 ++ ld.d t0, a1, -64 ++ st.d t0, a0, -64 ++ ld.d t0, a1, -56 ++ st.d t0, a0, -56 ++ ld.d t0, a1, -48 ++ st.d t0, a0, -48 ++ ld.d t0, a1, -40 ++ st.d t0, a0, -40 ++ ld.d t0, a1, -32 ++ st.d t0, a0, -32 ++ ld.d t0, a1, -24 ++ st.d t0, a0, -24 ++ ld.d t0, a1, -16 ++ st.d t0, a0, -16 ++ ld.d t0, a1, -8 ++ st.d t0, a0, -8 ++ ++ andi a2, a2, 0x7 ++ pcaddi t1, 18 ++ slli.d a2, a2, 3 ++ sub.d t1, t1, a2 ++ jr t1 ++ ++ ld.b t0, a4, -7 ++ st.b t0, a3, -7 ++ ld.b t0, a4, -6 ++ st.b t0, a3, -6 ++ ld.b t0, a4, -5 ++ st.b t0, a3, -5 ++ ld.b t0, a4, -4 ++ st.b t0, a3, -4 ++ ld.b t0, a4, -3 ++ st.b t0, a3, -3 ++ ld.b t0, a4, -2 ++ st.b t0, a3, -2 ++ ld.b t0, a4, -1 ++ st.b t0, a3, -1 ++L(end): ++ move a0, t8 ++ jr ra ++ ++L(all_align): ++ addi.d a1, a1, 0x8 ++ addi.d a0, a0, 0x8 ++ ld.d t0, a1, -8 ++ st.d t0, a0, -8 ++ addi.d a2, a2, -8 ++ b L(start_over) ++ ++L(all_align_back): ++ addi.d a4, a4, -0x8 ++ addi.d a3, a3, -0x8 ++ ld.d t0, a4, 0 ++ st.d t0, a3, 0 ++ addi.d a2, a2, -8 ++ b L(start_over_back) ++ ++L(copy_backward): ++ move a5, a3 ++ srli.d a3, a3, 3 ++ slli.d a3, a3, 3 ++ beq a3, a5, L(all_align_back) ++ sub.d a7, a3, a5 ++ add.d a4, a4, a7 ++ add.d a2, a7, a2 ++ ++ pcaddi t1, 18 ++ slli.d a6, a7, 3 ++ add.d t1, t1, a6 ++ jr t1 ++ ++ ld.b t0, a4, 6 ++ st.b t0, a3, 6 ++ ld.b t0, a4, 5 ++ st.b t0, a3, 5 ++ ld.b t0, a4, 4 ++ st.b t0, a3, 4 ++ ld.b t0, a4, 3 ++ st.b t0, a3, 3 ++ ld.b t0, a4, 2 ++ st.b t0, a3, 2 ++ ld.b t0, a4, 1 ++ st.b t0, a3, 1 ++ ld.b t0, a4, 0 ++ st.b t0, a3, 0 ++L(start_over_back): ++ addi.d a2, a2, -0x80 ++ blt a2, zero, L(end_unalign_proc_back) ++ ++L(loop_less_back): ++ LD_64(a4, -64) ++ ST_64(a3, -64) ++ LD_64(a4, -128) ++ ST_64(a3, -128) ++ ++ addi.d a4, a4, -0x80 ++ addi.d a3, a3, -0x80 ++ addi.d a2, a2, -0x80 ++ bge a2, zero, L(loop_less_back) ++ ++L(end_unalign_proc_back): ++ addi.d a2, a2, 0x80 ++ ++ pcaddi t1, 36 ++ andi t2, a2, 0x78 ++ sub.d a4, a4, t2 ++ sub.d a3, a3, t2 ++ sub.d t1, t1, t2 ++ jr t1 ++ ++ ld.d t0, a4, 112 ++ st.d t0, a3, 112 ++ ld.d t0, a4, 104 ++ st.d t0, a3, 104 ++ ld.d t0, a4, 96 ++ st.d t0, a3, 96 ++ ld.d t0, a4, 88 ++ st.d t0, a3, 88 ++ ld.d t0, a4, 80 ++ st.d t0, a3, 80 ++ ld.d t0, a4, 72 ++ st.d t0, a3, 72 ++ ld.d t0, a4, 64 ++ st.d t0, a3, 64 ++ ld.d t0, a4, 56 ++ st.d t0, a3, 56 ++ ld.d t0, a4, 48 ++ st.d t0, a3, 48 ++ ld.d t0, a4, 40 ++ st.d t0, a3, 40 ++ ld.d t0, a4, 32 ++ st.d t0, a3, 32 ++ ld.d t0, a4, 24 ++ st.d t0, a3, 24 ++ ld.d t0, a4, 16 ++ st.d t0, a3, 16 ++ ld.d t0, a4, 8 ++ st.d t0, a3, 8 ++ ld.d t0, a4, 0 ++ st.d t0, a3, 0 ++ ++ andi a2, a2, 0x7 ++ pcaddi t1, 18 ++ slli.d a2, a2, 3 ++ sub.d t1, t1, a2 ++ jr t1 ++ ++ ld.b t0, a1, 6 ++ st.b t0, a0, 6 ++ ld.b t0, a1, 5 ++ st.b t0, a0, 5 ++ ld.b t0, a1, 4 ++ st.b t0, a0, 4 ++ ld.b t0, a1, 3 ++ st.b t0, a0, 3 ++ ld.b t0, a1, 2 ++ st.b t0, a0, 2 ++ ld.b t0, a1, 1 ++ st.b t0, a0, 1 ++ ld.b t0, a1, 0 ++ st.b t0, a0, 0 ++ ++ move a0, t8 ++ jr ra ++END(MEMMOVE_NAME) ++ ++libc_hidden_builtin_def (MEMMOVE_NAME) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memmove.c b/sysdeps/loongarch/lp64/multiarch/memmove.c +new file mode 100644 +index 00000000..7e3ca4c4 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memmove.c +@@ -0,0 +1,38 @@ ++/* Multiple versions of memmove. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define memmove __redirect_memmove ++# include <string.h> ++# undef memmove ++ ++# define SYMBOL_NAME memmove ++# include "ifunc-lasx.h" ++ ++libc_ifunc_redirected (__redirect_memmove, __libc_memmove, ++ IFUNC_SELECTOR ()); ++strong_alias (__libc_memmove, memmove); ++ ++# ifdef SHARED ++__hidden_ver1 (__libc_memmove, __GI_memmove, __redirect_memmove) ++ __attribute__ ((visibility ("hidden"))); ++# endif ++ ++#endif +-- +2.33.0 +
View file
_service:tar_scm:Loongarch-Add-ifunc-support-for-strchr-aligned-lsx-l.patch
Added
@@ -0,0 +1,706 @@ +From aca7d7f0dde5f56344e8e58e5f6648c96bb1f1cc Mon Sep 17 00:00:00 2001 +From: dengjianbo <dengjianbo@loongson.cn> +Date: Tue, 15 Aug 2023 09:08:11 +0800 +Subject: PATCH 06/29 Loongarch: Add ifunc support for strchr{aligned, lsx, + lasx} and strchrnul{aligned, lsx, lasx} + +These implementations improve the time to run strchr{nul} +microbenchmark in glibc as below: +strchr-lasx reduces the runtime about 50%-83% +strchr-lsx reduces the runtime about 30%-67% +strchr-aligned reduces the runtime about 10%-20% +strchrnul-lasx reduces the runtime about 50%-83% +strchrnul-lsx reduces the runtime about 36%-65% +strchrnul-aligned reduces the runtime about 6%-10% + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/lp64/multiarch/Makefile | 6 ++ + .../lp64/multiarch/ifunc-impl-list.c | 16 +++ + .../loongarch/lp64/multiarch/ifunc-strchr.h | 41 ++++++++ + .../lp64/multiarch/ifunc-strchrnul.h | 41 ++++++++ + .../loongarch/lp64/multiarch/strchr-aligned.S | 99 +++++++++++++++++++ + .../loongarch/lp64/multiarch/strchr-lasx.S | 91 +++++++++++++++++ + sysdeps/loongarch/lp64/multiarch/strchr-lsx.S | 73 ++++++++++++++ + sysdeps/loongarch/lp64/multiarch/strchr.c | 36 +++++++ + .../lp64/multiarch/strchrnul-aligned.S | 95 ++++++++++++++++++ + .../loongarch/lp64/multiarch/strchrnul-lasx.S | 22 +++++ + .../loongarch/lp64/multiarch/strchrnul-lsx.S | 22 +++++ + sysdeps/loongarch/lp64/multiarch/strchrnul.c | 39 ++++++++ + 12 files changed, 581 insertions(+) + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul.c + +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +index 76c506c9..110a8c5c 100644 +--- a/sysdeps/loongarch/lp64/multiarch/Makefile ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -3,5 +3,11 @@ sysdep_routines += \ + strlen-aligned \ + strlen-lsx \ + strlen-lasx \ ++ strchr-aligned \ ++ strchr-lsx \ ++ strchr-lasx \ ++ strchrnul-aligned \ ++ strchrnul-lsx \ ++ strchrnul-lasx \ + # sysdep_routines + endif +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +index 1a2a576f..c7164b45 100644 +--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +@@ -37,5 +37,21 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + #endif + IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned) + ) ++ ++ IFUNC_IMPL (i, name, strchr, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LASX, __strchr_lasx) ++ IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LSX, __strchr_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_aligned) ++ ) ++ ++ IFUNC_IMPL (i, name, strchrnul, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LASX, __strchrnul_lasx) ++ IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LSX, __strchrnul_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_aligned) ++ ) + return i; + } +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h +new file mode 100644 +index 00000000..4494db79 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h +@@ -0,0 +1,41 @@ ++/* Common definition for strchr ifunc selections. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <ldsodefs.h> ++#include <ifunc-init.h> ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h +new file mode 100644 +index 00000000..8a925120 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h +@@ -0,0 +1,41 @@ ++/* Common definition for strchrnul ifunc selections. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <ldsodefs.h> ++#include <ifunc-init.h> ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S +new file mode 100644 +index 00000000..5fb01806 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S +@@ -0,0 +1,99 @@ ++/* Optimized strchr implementation using basic Loongarch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++# define STRCHR_NAME __strchr_aligned ++#else ++# define STRCHR_NAME strchr ++#endif ++ ++LEAF(STRCHR_NAME, 6) ++ slli.d t1, a0, 3 ++ bstrins.d a0, zero, 2, 0 ++ lu12i.w a2, 0x01010 ++ ld.d t2, a0, 0 ++ ++ ori a2, a2, 0x101 ++ andi a1, a1, 0xff ++ bstrins.d a2, a2, 63, 32 ++ li.w t0, -1 ++ ++ mul.d a1, a1, a2 ++ sll.d t0, t0, t1 ++ slli.d a3, a2, 7 ++ orn t2, t2, t0 ++ ++ sll.d t3, a1, t1 ++ xor t4, t2, t3 ++ sub.d a4, t2, a2 ++ sub.d a5, t4, a2 ++ ++ ++ andn a4, a4, t2 ++ andn a5, a5, t4 ++ or t0, a4, a5 ++ and t0, t0, a3 ++ ++ bnez t0, L(end) ++ addi.d a0, a0, 8 ++L(loop): ++ ld.d t4, a0, 0 ++ xor t2, t4, a1 ++ ++ sub.d a4, t4, a2 ++ sub.d a5, t2, a2 ++ andn a4, a4, t4 ++ andn a5, a5, t2 ++ ++ or t0, a4, a5 ++ and t0, t0, a3 ++ bnez t0, L(end) ++ ld.d t4, a0, 8 ++ ++ ++ addi.d a0, a0, 16 ++ xor t2, t4, a1 ++ sub.d a4, t4, a2 ++ sub.d a5, t2, a2 ++ ++ andn a4, a4, t4 ++ andn a5, a5, t2 ++ or t0, a4, a5 ++ and t0, t0, a3 ++ ++ beqz t0, L(loop) ++ addi.d a0, a0, -8 ++L(end): ++ and t0, a5, a3 ++ and t1, a4, a3 ++ ++ ctz.d t0, t0 ++ ctz.d t1, t1 ++ srli.w t2, t0, 3 ++ sltu t3, t1, t0 ++ ++ ++ add.d a0, a0, t2 ++ masknez a0, a0, t3 ++ jr ra ++END(STRCHR_NAME) +diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S +new file mode 100644 +index 00000000..254402da +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S +@@ -0,0 +1,91 @@ ++/* Optimized strchr implementation using loongarch LASX SIMD instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++#ifndef AS_STRCHRNUL ++# define STRCHR __strchr_lasx ++#endif ++ ++LEAF(STRCHR, 6) ++ andi t1, a0, 0x1f ++ bstrins.d a0, zero, 4, 0 ++ xvld xr0, a0, 0 ++ li.d t2, -1 ++ ++ xvreplgr2vr.b xr1, a1 ++ sll.d t1, t2, t1 ++ xvxor.v xr2, xr0, xr1 ++ xvmin.bu xr0, xr0, xr2 ++ ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr3, xr0, 4 ++ vilvl.h vr0, vr3, vr0 ++ movfr2gr.s t0, fa0 ++ ++ orn t0, t0, t1 ++ bne t0, t2, L(end) ++ addi.d a0, a0, 32 ++ nop ++ ++ ++L(loop): ++ xvld xr0, a0, 0 ++ xvxor.v xr2, xr0, xr1 ++ xvmin.bu xr0, xr0, xr2 ++ xvsetanyeqz.b fcc0, xr0 ++ ++ bcnez fcc0, L(loop_end) ++ xvld xr0, a0, 32 ++ addi.d a0, a0, 64 ++ xvxor.v xr2, xr0, xr1 ++ ++ xvmin.bu xr0, xr0, xr2 ++ xvsetanyeqz.b fcc0, xr0 ++ bceqz fcc0, L(loop) ++ addi.d a0, a0, -32 ++ ++L(loop_end): ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ ++ ++L(end): ++ cto.w t0, t0 ++ add.d a0, a0, t0 ++#ifndef AS_STRCHRNUL ++ vreplgr2vr.b vr0, t0 ++ xvpermi.q xr3, xr2, 1 ++ ++ vshuf.b vr0, vr3, vr2, vr0 ++ vpickve2gr.bu t0, vr0, 0 ++ masknez a0, a0, t0 ++#endif ++ jr ra ++ ++END(STRCHR) ++ ++libc_hidden_builtin_def(STRCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S +new file mode 100644 +index 00000000..dae98b0a +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S +@@ -0,0 +1,73 @@ ++/* Optimized strlen implementation using loongarch LSX SIMD instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++#ifndef AS_STRCHRNUL ++# define STRCHR __strchr_lsx ++#endif ++ ++LEAF(STRCHR, 6) ++ andi t1, a0, 0xf ++ bstrins.d a0, zero, 3, 0 ++ vld vr0, a0, 0 ++ li.d t2, -1 ++ ++ vreplgr2vr.b vr1, a1 ++ sll.d t3, t2, t1 ++ vxor.v vr2, vr0, vr1 ++ vmin.bu vr0, vr0, vr2 ++ ++ vmsknz.b vr0, vr0 ++ movfr2gr.s t0, fa0 ++ ext.w.h t0, t0 ++ orn t0, t0, t3 ++ ++ beq t0, t2, L(loop) ++L(found): ++ cto.w t0, t0 ++ add.d a0, a0, t0 ++#ifndef AS_STRCHRNUL ++ vreplve.b vr2, vr2, t0 ++ vpickve2gr.bu t1, vr2, 0 ++ masknez a0, a0, t1 ++#endif ++ jr ra ++ ++ ++L(loop): ++ vld vr0, a0, 16 ++ addi.d a0, a0, 16 ++ vxor.v vr2, vr0, vr1 ++ vmin.bu vr0, vr0, vr2 ++ ++ vsetanyeqz.b fcc0, vr0 ++ bceqz fcc0, L(loop) ++ vmsknz.b vr0, vr0 ++ movfr2gr.s t0, fa0 ++ ++ b L(found) ++END(STRCHR) ++ ++libc_hidden_builtin_def (STRCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strchr.c b/sysdeps/loongarch/lp64/multiarch/strchr.c +new file mode 100644 +index 00000000..404e97bd +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strchr.c +@@ -0,0 +1,36 @@ ++/* Multiple versions of strchr. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define strchr __redirect_strchr ++# include <string.h> ++# undef strchr ++ ++# define SYMBOL_NAME strchr ++# include "ifunc-strchr.h" ++ ++libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ()); ++weak_alias(strchr, index) ++# ifdef SHARED ++__hidden_ver1 (strchr, __GI_strchr, __redirect_strchr) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strchr); ++# endif ++ ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S +new file mode 100644 +index 00000000..1c01a023 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S +@@ -0,0 +1,95 @@ ++/* Optimized strchrnul implementation using basic Loongarch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <sysdep.h> ++#include <sys/regdef.h> ++#include <sys/asm.h> ++ ++#if IS_IN (libc) ++# define STRCHRNUL_NAME __strchrnul_aligned ++#else ++# define STRCHRNUL_NAME __strchrnul ++#endif ++ ++LEAF(STRCHRNUL_NAME, 6) ++ slli.d t1, a0, 3 ++ bstrins.d a0, zero, 2, 0 ++ lu12i.w a2, 0x01010 ++ ld.d t2, a0, 0 ++ ++ ori a2, a2, 0x101 ++ andi a1, a1, 0xff ++ bstrins.d a2, a2, 63, 32 ++ li.w t0, -1 ++ ++ mul.d a1, a1, a2 ++ sll.d t0, t0, t1 ++ slli.d a3, a2, 7 ++ orn t2, t2, t0 ++ ++ sll.d t3, a1, t1 ++ xor t4, t2, t3 ++ sub.d a4, t2, a2 ++ sub.d a5, t4, a2 ++ ++ ++ andn a4, a4, t2 ++ andn a5, a5, t4 ++ or t0, a4, a5 ++ and t0, t0, a3 ++ ++ bnez t0, L(end) ++ addi.d a0, a0, 8 ++L(loop): ++ ld.d t4, a0, 0 ++ xor t2, t4, a1 ++ ++ sub.d a4, t4, a2 ++ sub.d a5, t2, a2 ++ andn a4, a4, t4 ++ andn a5, a5, t2 ++ ++ or t0, a4, a5 ++ and t0, t0, a3 ++ bnez t0, L(end) ++ ld.d t4, a0, 8 ++ ++ ++ addi.d a0, a0, 16 ++ xor t2, t4, a1 ++ sub.d a4, t4, a2 ++ sub.d a5, t2, a2 ++ ++ andn a4, a4, t4 ++ andn a5, a5, t2 ++ or t0, a4, a5 ++ and t0, t0, a3 ++ ++ beqz t0, L(loop) ++ addi.d a0, a0, -8 ++L(end): ++ ctz.d t0, t0 ++ srli.w t0, t0, 3 ++ ++ ++ add.d a0, a0, t0 ++ jr ra ++END(STRCHRNUL_NAME) ++ ++libc_hidden_builtin_def (STRCHRNUL_NAME) +diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S +new file mode 100644 +index 00000000..d45495e4 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S +@@ -0,0 +1,22 @@ ++/* Optimized strchrnul implementation using loongarch LASX SIMD instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#define STRCHR __strchrnul_lasx ++#define AS_STRCHRNUL ++#include "strchr-lasx.S" +diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S +new file mode 100644 +index 00000000..07d793ae +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S +@@ -0,0 +1,22 @@ ++/* Optimized strchrnul implementation using loongarch LSX SIMD instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#define STRCHR __strchrnul_lsx ++#define AS_STRCHRNUL ++#include "strchr-lsx.S" +diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul.c b/sysdeps/loongarch/lp64/multiarch/strchrnul.c +new file mode 100644 +index 00000000..f3b8296e +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul.c +@@ -0,0 +1,39 @@ ++/* Multiple versions of strchrnul. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++/* Define multiple versions only for the definition in libc. */ ++ ++#if IS_IN (libc) ++# define strchrnul __redirect_strchrnul ++# define __strchrnul __redirect___strchrnul ++# include <string.h> ++# undef __strchrnul ++# undef strchrnul ++ ++# define SYMBOL_NAME strchrnul ++# include "ifunc-strchrnul.h" ++ ++libc_ifunc_redirected (__redirect_strchrnul, __strchrnul, ++ IFUNC_SELECTOR ()); ++weak_alias (__strchrnul, strchrnul) ++# ifdef SHARED ++__hidden_ver1 (__strchrnul, __GI___strchrnul, __redirect_strchrnul) ++ __attribute__((visibility ("hidden"))) __attribute_copy__ (strchrnul); ++# endif ++#endif +-- +2.33.0 +
View file
_service:tar_scm:Revert-LoongArch-Add-glibc.cpu.hwcap-support.patch
Added
@@ -0,0 +1,478 @@ +From c0f3b0a8c71c26d5351e8ddabe3e8a323803e683 Mon Sep 17 00:00:00 2001 +From: caiyinyu <caiyinyu@loongson.cn> +Date: Thu, 21 Sep 2023 09:10:11 +0800 +Subject: PATCH 26/29 Revert "LoongArch: Add glibc.cpu.hwcap support." + +This reverts commit a53451559dc9cce765ea5bcbb92c4007e058e92b. + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + sysdeps/loongarch/Makefile | 4 - + sysdeps/loongarch/Versions | 5 -- + sysdeps/loongarch/cpu-tunables.c | 89 ------------------- + sysdeps/loongarch/dl-get-cpu-features.c | 25 ------ + sysdeps/loongarch/dl-machine.h | 27 +----- + sysdeps/loongarch/dl-tunables.list | 25 ------ + .../unix/sysv/linux/loongarch/cpu-features.c | 29 ------ + .../unix/sysv/linux/loongarch/cpu-features.h | 18 +--- + .../unix/sysv/linux/loongarch/dl-procinfo.c | 60 ------------- + sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c | 21 ----- + .../unix/sysv/linux/loongarch/libc-start.c | 34 ------- + 11 files changed, 8 insertions(+), 329 deletions(-) + delete mode 100644 sysdeps/loongarch/Versions + delete mode 100644 sysdeps/loongarch/cpu-tunables.c + delete mode 100644 sysdeps/loongarch/dl-get-cpu-features.c + delete mode 100644 sysdeps/loongarch/dl-tunables.list + delete mode 100644 sysdeps/unix/sysv/linux/loongarch/cpu-features.c + delete mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c + delete mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c + delete mode 100644 sysdeps/unix/sysv/linux/loongarch/libc-start.c + +diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile +index 30a1f4a8..43d2f583 100644 +--- a/sysdeps/loongarch/Makefile ++++ b/sysdeps/loongarch/Makefile +@@ -6,10 +6,6 @@ ifeq ($(subdir),elf) + gen-as-const-headers += dl-link.sym + endif + +-ifeq ($(subdir),elf) +- sysdep-dl-routines += dl-get-cpu-features +-endif +- + # LoongArch's assembler also needs to know about PIC as it changes the + # definition of some assembler macros. + ASFLAGS-.os += $(pic-ccflag) +diff --git a/sysdeps/loongarch/Versions b/sysdeps/loongarch/Versions +deleted file mode 100644 +index 33ae2cc0..00000000 +--- a/sysdeps/loongarch/Versions ++++ /dev/null +@@ -1,5 +0,0 @@ +-ld { +- GLIBC_PRIVATE { +- _dl_larch_get_cpu_features; +- } +-} +diff --git a/sysdeps/loongarch/cpu-tunables.c b/sysdeps/loongarch/cpu-tunables.c +deleted file mode 100644 +index 8e9fab93..00000000 +--- a/sysdeps/loongarch/cpu-tunables.c ++++ /dev/null +@@ -1,89 +0,0 @@ +-/* LoongArch CPU feature tuning. +- This file is part of the GNU C Library. +- Copyright (C) 2023 Free Software Foundation, Inc. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- <http://www.gnu.org/licenses/>. */ +- +-# include <stdbool.h> +-# include <stdint.h> +-# include <unistd.h> /* Get STDOUT_FILENO for _dl_printf. */ +-# include <elf/dl-tunables.h> +-# include <string.h> +-# include <cpu-features.h> +-# include <ldsodefs.h> +-# include <sys/auxv.h> +- +-# define HWCAP_LOONGARCH_IFUNC \ +- (HWCAP_LOONGARCH_UAL | HWCAP_LOONGARCH_LSX | HWCAP_LOONGARCH_LASX) +- +-# define CHECK_GLIBC_IFUNC_CPU_OFF(f, name, len) \ +- _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \ +- if (!memcmp (f, #name, len) && \ +- (GLRO (dl_hwcap) & HWCAP_LOONGARCH_##name)) \ +- { \ +- hwcap |= (HWCAP_LOONGARCH_##name | (~HWCAP_LOONGARCH_IFUNC)); \ +- break; \ +- } \ +- +-attribute_hidden +-void +-TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) +-{ +- const char *p = valp->strval; +- size_t len; +- unsigned long hwcap = 0; +- const char *c; +- +- do { +- for (c = p; *c != ','; c++) +- if (*c == '\0') +- break; +- +- len = c - p; +- +- switch(len) +- { +- default: +- _dl_fatal_printf ( +- "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n" +- ); +- break; +- case 3: +- { +- CHECK_GLIBC_IFUNC_CPU_OFF (p, LSX, 3); +- CHECK_GLIBC_IFUNC_CPU_OFF (p, UAL, 3); +- _dl_fatal_printf ( +- "Some features are invalid or not supported on this machine!!\n" +- "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n" +- ); +- } +- break; +- case 4: +- { +- CHECK_GLIBC_IFUNC_CPU_OFF (p, LASX, 4); +- _dl_fatal_printf ( +- "Some features are invalid or not supported on this machine!!\n" +- "The valid values of glibc.cpu.hwcaps is UAL, LASX, LSX!!\n" +- ); +- } +- break; +- } +- +- p += len + 1; +- } +- while (*c != '\0'); +- +- GLRO (dl_larch_cpu_features).hwcap &= hwcap; +-} +diff --git a/sysdeps/loongarch/dl-get-cpu-features.c b/sysdeps/loongarch/dl-get-cpu-features.c +deleted file mode 100644 +index 7cd9bc15..00000000 +--- a/sysdeps/loongarch/dl-get-cpu-features.c ++++ /dev/null +@@ -1,25 +0,0 @@ +-/* Define _dl_larch_get_cpu_features. +- Copyright (C) 2023 Free Software Foundation, Inc. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- <https://www.gnu.org/licenses/>. */ +- +- +-#include <ldsodefs.h> +- +-const struct cpu_features * +-_dl_larch_get_cpu_features (void) +-{ +- return &GLRO(dl_larch_cpu_features); +-} +diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h +index b395a928..57913cef 100644 +--- a/sysdeps/loongarch/dl-machine.h ++++ b/sysdeps/loongarch/dl-machine.h +@@ -29,8 +29,6 @@ + #include <dl-static-tls.h> + #include <dl-machine-rel.h> + +-#include <cpu-features.c> +- + #ifndef _RTLD_PROLOGUE + # define _RTLD_PROLOGUE(entry) \ + ".globl\t" __STRING (entry) "\n\t" \ +@@ -55,23 +53,6 @@ + #define ELF_MACHINE_NO_REL 1 + #define ELF_MACHINE_NO_RELA 0 + +-#define DL_PLATFORM_INIT dl_platform_init () +- +-static inline void __attribute__ ((unused)) +-dl_platform_init (void) +-{ +- if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0') +- /* Avoid an empty string which would disturb us. */ +- GLRO(dl_platform) = NULL; +- +-#ifdef SHARED +- /* init_cpu_features has been called early from __libc_start_main in +- static executable. */ +- init_cpu_features (&GLRO(dl_larch_cpu_features)); +-#endif +-} +- +- + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int + elf_machine_matches_host (const ElfW (Ehdr) *ehdr) +@@ -309,9 +290,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope, + if (profile != 0) + { + #if !defined __loongarch_soft_float +- if (RTLD_SUPPORT_LASX) ++ if (SUPPORT_LASX) + gotplt0 = (ElfW(Addr)) &_dl_runtime_profile_lasx; +- else if (RTLD_SUPPORT_LSX) ++ else if (SUPPORT_LSX) + gotplt0 = (ElfW(Addr)) &_dl_runtime_profile_lsx; + else + #endif +@@ -329,9 +310,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope, + indicated by the offset on the stack, and then jump to + the resolved address. */ + #if !defined __loongarch_soft_float +- if (RTLD_SUPPORT_LASX) ++ if (SUPPORT_LASX) + gotplt0 = (ElfW(Addr)) &_dl_runtime_resolve_lasx; +- else if (RTLD_SUPPORT_LSX) ++ else if (SUPPORT_LSX) + gotplt0 = (ElfW(Addr)) &_dl_runtime_resolve_lsx; + else + #endif +diff --git a/sysdeps/loongarch/dl-tunables.list b/sysdeps/loongarch/dl-tunables.list +deleted file mode 100644 +index 66b34275..00000000 +--- a/sysdeps/loongarch/dl-tunables.list ++++ /dev/null +@@ -1,25 +0,0 @@ +-# LoongArch specific tunables. +-# Copyright (C) 2023 Free Software Foundation, Inc. +-# This file is part of the GNU C Library. +- +-# The GNU C Library is free software; you can redistribute it and/or +-# modify it under the terms of the GNU Lesser General Public +-# License as published by the Free Software Foundation; either +-# version 2.1 of the License, or (at your option) any later version. +- +-# The GNU C Library is distributed in the hope that it will be useful, +-# but WITHOUT ANY WARRANTY; without even the implied warranty of +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +-# Lesser General Public License for more details. +- +-# You should have received a copy of the GNU Lesser General Public +-# License along with the GNU C Library; if not, see +-# <http://www.gnu.org/licenses/>. +- +-glibc { +- cpu { +- hwcaps { +- type: STRING +- } +- } +-} +diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c +deleted file mode 100644 +index 1290c4ce..00000000 +--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c ++++ /dev/null +@@ -1,29 +0,0 @@ +-/* Initialize CPU feature data. LoongArch64 version. +- This file is part of the GNU C Library. +- Copyright (C) 2023 Free Software Foundation, Inc. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- <http://www.gnu.org/licenses/>. */ +- +-#include <cpu-features.h> +-#include <elf/dl-hwcaps.h> +-#include <elf/dl-tunables.h> +-extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *) attribute_hidden; +- +-static inline void +-init_cpu_features (struct cpu_features *cpu_features) +-{ +- GLRO (dl_larch_cpu_features).hwcap = GLRO (dl_hwcap); +- TUNABLE_GET (glibc, cpu, hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps)); +-} +diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h +index 450963ce..d1a280a5 100644 +--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h ++++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h +@@ -19,23 +19,13 @@ + #ifndef _CPU_FEATURES_LOONGARCH64_H + #define _CPU_FEATURES_LOONGARCH64_H + +-#include <stdint.h> + #include <sys/auxv.h> + +-struct cpu_features +- { +- uint64_t hwcap; +- }; ++#define SUPPORT_UAL (GLRO (dl_hwcap) & HWCAP_LOONGARCH_UAL) ++#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX) ++#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX) + +-/* Get a pointer to the CPU features structure. */ +-extern const struct cpu_features *_dl_larch_get_cpu_features (void) +- __attribute__ ((pure)); +- +-#define SUPPORT_UAL (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_UAL) +-#define SUPPORT_LSX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LSX) +-#define SUPPORT_LASX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LASX) +-#define RTLD_SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX) +-#define RTLD_SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX) + #define INIT_ARCH() + + #endif /* _CPU_FEATURES_LOONGARCH64_H */ ++ +diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c +deleted file mode 100644 +index 6217fda9..00000000 +--- a/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c ++++ /dev/null +@@ -1,60 +0,0 @@ +-/* Data for LoongArch64 version of processor capability information. +- Linux version. +- Copyright (C) 2023 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- <http://www.gnu.org/licenses/>. */ +- +-/* If anything should be added here check whether the size of each string +- is still ok with the given array size. +- +- All the #ifdefs in the definitions are quite irritating but +- necessary if we want to avoid duplicating the information. There +- are three different modes: +- +- - PROCINFO_DECL is defined. This means we are only interested in +- declarations. +- +- - PROCINFO_DECL is not defined: +- +- + if SHARED is defined the file is included in an array +- initializer. The .element = { ... } syntax is needed. +- +- + if SHARED is not defined a normal array initialization is +- needed. +- */ +- +-#ifndef PROCINFO_CLASS +-# define PROCINFO_CLASS +-#endif +- +-#if !IS_IN (ldconfig) +-# if !defined PROCINFO_DECL && defined SHARED +- ._dl_larch_cpu_features +-# else +-PROCINFO_CLASS struct cpu_features _dl_larch_cpu_features +-# endif +-# ifndef PROCINFO_DECL +-= { } +-# endif +-# if !defined SHARED || defined PROCINFO_DECL +-; +-# else +-, +-# endif +-#endif +- +-#undef PROCINFO_DECL +-#undef PROCINFO_CLASS +diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c +deleted file mode 100644 +index 455fd71a..00000000 +--- a/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c ++++ /dev/null +@@ -1,21 +0,0 @@ +-/* Operating system support for run-time dynamic linker. LoongArch version. +- Copyright (C) 2023 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- <http://www.gnu.org/licenses/>. */ +- +-#include <config.h> +-#include <sysdeps/loongarch/cpu-tunables.c> +-#include <sysdeps/unix/sysv/linux/dl-sysdep.c> +diff --git a/sysdeps/unix/sysv/linux/loongarch/libc-start.c b/sysdeps/unix/sysv/linux/loongarch/libc-start.c +deleted file mode 100644 +index f1346ece..00000000 +--- a/sysdeps/unix/sysv/linux/loongarch/libc-start.c ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* Override csu/libc-start.c on LoongArch64. +- Copyright (C) 2023 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- <http://www.gnu.org/licenses/>. */ +- +-#ifndef SHARED +- +-/* Mark symbols hidden in static PIE for early self relocation to work. */ +-# if BUILD_PIE_DEFAULT +-# pragma GCC visibility push(hidden) +-# endif +- +-# include <ldsodefs.h> +-# include <cpu-features.c> +- +-extern struct cpu_features _dl_larch_cpu_features; +- +-# define ARCH_INIT_CPU_FEATURES() init_cpu_features (&_dl_larch_cpu_features) +- +-#endif +-#include <csu/libc-start.c> +-- +2.33.0 +
View file
_service:tar_scm:elf-Add-new-LoongArch-reloc-types-101-to-108-into-el.patch
Added
@@ -0,0 +1,39 @@ +From fc60db3cf29ba157d09ba4f4b92e3ab382b0339d Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Wed, 9 Aug 2023 19:12:54 +0800 +Subject: PATCH 04/29 elf: Add new LoongArch reloc types (101 to 108) into + elf.h + +These reloc types are generated by GNU assembler >= 2.41 for relaxation +support. + +Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=57a930e3 +Signed-off-by: Xi Ruoyao <xry111@xry111.site> +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + elf/elf.h | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/elf/elf.h b/elf/elf.h +index 89fc8021..d623bdeb 100644 +--- a/elf/elf.h ++++ b/elf/elf.h +@@ -4205,6 +4205,14 @@ enum + #define R_LARCH_TLS_GD_HI20 98 + #define R_LARCH_32_PCREL 99 + #define R_LARCH_RELAX 100 ++#define R_LARCH_DELETE 101 ++#define R_LARCH_ALIGN 102 ++#define R_LARCH_PCREL20_S2 103 ++#define R_LARCH_CFA 104 ++#define R_LARCH_ADD6 105 ++#define R_LARCH_SUB6 106 ++#define R_LARCH_ADD_ULEB128 107 ++#define R_LARCH_SUB_ULEB128 108 + + /* ARC specific declarations. */ + +-- +2.33.0 +
View file
_service:tar_scm:linux-Sync-Linux-6.6-elf.h.patch
Added
@@ -0,0 +1,48 @@ +From 6b3d687470b8f91bc6eb87e924fe97d4592b3aa5 Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella <adhemerval.zanella@linaro.org> +Date: Tue, 31 Oct 2023 13:32:38 -0300 +Subject: PATCH 29/29 linux: Sync Linux 6.6 elf.h + +It adds NT_X86_SHSTK (2fab02b25ae7cf5), NT_RISCV_CSR/NT_RISCV_VECTOR +(9300f00439743c4), and NT_LOONGARCH_HW_BREAK/NT_LOONGARCH_HW_WATCH +(1a69f7a161a78ae). + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + elf/elf.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/elf/elf.h b/elf/elf.h +index 9c51073f..51633079 100644 +--- a/elf/elf.h ++++ b/elf/elf.h +@@ -794,6 +794,7 @@ typedef struct + #define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ + #define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */ + #define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ ++#define NT_X86_SHSTK 0x204 /* x86 SHSTK state */ + #define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */ + #define NT_S390_TIMER 0x301 /* s390 timer register */ + #define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */ +@@ -832,6 +833,8 @@ typedef struct + #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers. */ + #define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode. */ + #define NT_MIPS_MSA 0x802 /* MIPS SIMD registers. */ ++#define NT_RISCV_CSR 0x900 /* RISC-V Control and Status Registers */ ++#define NT_RISCV_VECTOR 0x901 /* RISC-V vector registers */ + #define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers. */ + #define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and + status registers. */ +@@ -841,6 +844,8 @@ typedef struct + SIMD Extension registers. */ + #define NT_LOONGARCH_LBT 0xa04 /* LoongArch Loongson Binary + Translation registers. */ ++#define NT_LOONGARCH_HW_BREAK 0xa05 /* LoongArch hardware breakpoint registers */ ++#define NT_LOONGARCH_HW_WATCH 0xa06 /* LoongArch hardware watchpoint registers */ + + /* Legal values for the note segment descriptor types for object files. */ + +-- +2.33.0 +
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2