Projects
home:lf14:branches:Mega:23.09
glibc
_service:tar_scm:LoongArch-Add-ifunc-support-fo...
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:tar_scm:LoongArch-Add-ifunc-support-for-rawmemchr-aligned-ls.patch of Package glibc
From b412bcb2cf4914a664bcd24924d670a2e37394b3 Mon Sep 17 00:00:00 2001 From: dengjianbo <dengjianbo@loongson.cn> Date: Mon, 28 Aug 2023 10:08:35 +0800 Subject: [PATCH 14/29] LoongArch: Add ifunc support for rawmemchr{aligned, lsx, lasx} According to glibc rawmemchr microbenchmark, A few cases tested with char '\0' experience performance degradation due to the lasx and lsx versions don't handle the '\0' separately. Overall, rawmemchr-lasx implementation could reduce the runtime about 40%-80%, rawmemchr-lsx implementation could reduce the runtime about 40%-66%, rawmemchr-aligned implementation could reduce the runtime about 20%-40%. Signed-off-by: Peng Fan <fanpeng@loongson.cn> Signed-off-by: ticat_fp <fanpeng@loongson.cn> --- sysdeps/loongarch/lp64/multiarch/Makefile | 3 + .../lp64/multiarch/ifunc-impl-list.c | 8 ++ .../lp64/multiarch/ifunc-rawmemchr.h | 40 ++++++ .../lp64/multiarch/rawmemchr-aligned.S | 124 ++++++++++++++++++ .../loongarch/lp64/multiarch/rawmemchr-lasx.S | 82 ++++++++++++ .../loongarch/lp64/multiarch/rawmemchr-lsx.S | 71 ++++++++++ sysdeps/loongarch/lp64/multiarch/rawmemchr.c | 37 ++++++ 7 files changed, 365 insertions(+) create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr.c diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile index 5d7ae7ae..64416b02 100644 --- a/sysdeps/loongarch/lp64/multiarch/Makefile +++ b/sysdeps/loongarch/lp64/multiarch/Makefile @@ -21,5 +21,8 @@ sysdep_routines += \ memmove-unaligned \ memmove-lsx \ memmove-lasx \ + rawmemchr-aligned \ + rawmemchr-lsx \ + rawmemchr-lasx \ # sysdep_routines endif diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c index c8ba87bd..3db9af14 100644 --- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c @@ -94,5 +94,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_aligned) ) + IFUNC_IMPL (i, name, rawmemchr, +#if !defined __loongarch_soft_float + IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LASX, __rawmemchr_lasx) + IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LSX, __rawmemchr_lsx) +#endif + IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned) + ) + return i; } diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h new file mode 100644 index 00000000..a7bb4cf9 --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h @@ -0,0 +1,40 @@ +/* Common definition for rawmemchr ifunc selections. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <ldsodefs.h> +#include <ifunc-init.h> + +#if !defined __loongarch_soft_float +extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; +#endif +extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ +#if !defined __loongarch_soft_float + if (SUPPORT_LASX) + return OPTIMIZE (lasx); + else if (SUPPORT_LSX) + return OPTIMIZE (lsx); + else +#endif + return OPTIMIZE (aligned); +} diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S new file mode 100644 index 00000000..9c7155ae --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S @@ -0,0 +1,124 @@ +/* Optimized rawmemchr implementation using basic LoongArch instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <sys/regdef.h> +#include <sys/asm.h> + +#if IS_IN (libc) +# define RAWMEMCHR_NAME __rawmemchr_aligned +#else +# define RAWMEMCHR_NAME __rawmemchr +#endif + +LEAF(RAWMEMCHR_NAME, 6) + andi t1, a0, 0x7 + bstrins.d a0, zero, 2, 0 + lu12i.w a2, 0x01010 + bstrins.d a1, a1, 15, 8 + + ld.d t0, a0, 0 + slli.d t1, t1, 3 + ori a2, a2, 0x101 + bstrins.d a1, a1, 31, 16 + + li.w t8, -1 + bstrins.d a1, a1, 63, 32 + bstrins.d a2, a2, 63, 32 + sll.d t2, t8, t1 + + sll.d t3, a1, t1 + orn t0, t0, t2 + slli.d a3, a2, 7 + beqz a1, L(find_zero) + + xor t0, t0, t3 + sub.d t1, t0, a2 + andn t2, a3, t0 + and t3, t1, t2 + + bnez t3, L(count_pos) + addi.d a0, a0, 8 + +L(loop): + ld.d t0, a0, 0 + xor t0, t0, a1 + + sub.d t1, t0, a2 + andn t2, a3, t0 + and t3, t1, t2 + bnez t3, L(count_pos) + + ld.d t0, a0, 8 + addi.d a0, a0, 16 + xor t0, t0, a1 + sub.d t1, t0, a2 + + andn t2, a3, t0 + and t3, t1, t2 + beqz t3, L(loop) + addi.d a0, a0, -8 +L(count_pos): + ctz.d t0, t3 + srli.d t0, t0, 3 + add.d a0, a0, t0 + jr ra + +L(loop_7bit): + ld.d t0, a0, 0 +L(find_zero): + sub.d t1, t0, a2 + and t2, t1, a3 + bnez t2, L(more_check) + + ld.d t0, a0, 8 + addi.d a0, a0, 16 + sub.d t1, t0, a2 + and t2, t1, a3 + + beqz t2, L(loop_7bit) + addi.d a0, a0, -8 + +L(more_check): + andn t2, a3, t0 + and t3, t1, t2 + bnez t3, L(count_pos) + addi.d a0, a0, 8 + +L(loop_8bit): + ld.d t0, a0, 0 + + sub.d t1, t0, a2 + andn t2, a3, t0 + and t3, t1, t2 + bnez t3, L(count_pos) + + ld.d t0, a0, 8 + addi.d a0, a0, 16 + sub.d t1, t0, a2 + + andn t2, a3, t0 + and t3, t1, t2 + beqz t3, L(loop_8bit) + + addi.d a0, a0, -8 + b L(count_pos) + +END(RAWMEMCHR_NAME) + +libc_hidden_builtin_def (__rawmemchr) diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S new file mode 100644 index 00000000..be2eb59d --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S @@ -0,0 +1,82 @@ +/* Optimized rawmemchr implementation using LoongArch LASX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <sys/asm.h> +#include <sys/regdef.h> + +#if IS_IN (libc) && !defined __loongarch_soft_float + +# define RAWMEMCHR __rawmemchr_lasx + +LEAF(RAWMEMCHR, 6) + move a2, a0 + bstrins.d a0, zero, 5, 0 + xvld xr0, a0, 0 + xvld xr1, a0, 32 + + xvreplgr2vr.b xr2, a1 + xvseq.b xr0, xr0, xr2 + xvseq.b xr1, xr1, xr2 + xvmsknz.b xr0, xr0 + + xvmsknz.b xr1, xr1 + xvpickve.w xr3, xr0, 4 + xvpickve.w xr4, xr1, 4 + vilvl.h vr0, vr3, vr0 + + vilvl.h vr1, vr4, vr1 + vilvl.w vr0, vr1, vr0 + movfr2gr.d t0, fa0 + sra.d t0, t0, a2 + + + beqz t0, L(loop) + ctz.d t0, t0 + add.d a0, a2, t0 + jr ra + +L(loop): + xvld xr0, a0, 64 + xvld xr1, a0, 96 + addi.d a0, a0, 64 + xvseq.b xr0, xr0, xr2 + + xvseq.b xr1, xr1, xr2 + xvmax.bu xr3, xr0, xr1 + xvseteqz.v fcc0, xr3 + bcnez fcc0, L(loop) + + xvmsknz.b xr0, xr0 + xvmsknz.b xr1, xr1 + xvpickve.w xr3, xr0, 4 + xvpickve.w xr4, xr1, 4 + + + vilvl.h vr0, vr3, vr0 + vilvl.h vr1, vr4, vr1 + vilvl.w vr0, vr1, vr0 + movfr2gr.d t0, fa0 + + ctz.d t0, t0 + add.d a0, a0, t0 + jr ra +END(RAWMEMCHR) + +libc_hidden_builtin_def (RAWMEMCHR) +#endif diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S new file mode 100644 index 00000000..2f6fe024 --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S @@ -0,0 +1,71 @@ +/* Optimized rawmemchr implementation using LoongArch LSX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <sys/regdef.h> +#include <sys/asm.h> + +#if IS_IN (libc) && !defined __loongarch_soft_float + +# define RAWMEMCHR __rawmemchr_lsx + +LEAF(RAWMEMCHR, 6) + move a2, a0 + bstrins.d a0, zero, 4, 0 + vld vr0, a0, 0 + vld vr1, a0, 16 + + vreplgr2vr.b vr2, a1 + vseq.b vr0, vr0, vr2 + vseq.b vr1, vr1, vr2 + vmsknz.b vr0, vr0 + + vmsknz.b vr1, vr1 + vilvl.h vr0, vr1, vr0 + movfr2gr.s t0, fa0 + sra.w t0, t0, a2 + + beqz t0, L(loop) + ctz.w t0, t0 + add.d a0, a2, t0 + jr ra + + +L(loop): + vld vr0, a0, 32 + vld vr1, a0, 48 + addi.d a0, a0, 32 + vseq.b vr0, vr0, vr2 + + vseq.b vr1, vr1, vr2 + vmax.bu vr3, vr0, vr1 + vseteqz.v fcc0, vr3 + bcnez fcc0, L(loop) + + vmsknz.b vr0, vr0 + vmsknz.b vr1, vr1 + vilvl.h vr0, vr1, vr0 + movfr2gr.s t0, fa0 + + ctz.w t0, t0 + add.d a0, a0, t0 + jr ra +END(RAWMEMCHR) + +libc_hidden_builtin_def (RAWMEMCHR) +#endif diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr.c b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c new file mode 100644 index 00000000..89c7ffff --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c @@ -0,0 +1,37 @@ +/* Multiple versions of rawmemchr. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define rawmemchr __redirect_rawmemchr +# define __rawmemchr __redirect___rawmemchr +# include <string.h> +# undef rawmemchr +# undef __rawmemchr + +# define SYMBOL_NAME rawmemchr +# include "ifunc-rawmemchr.h" + +libc_ifunc_redirected (__redirect_rawmemchr, __rawmemchr, + IFUNC_SELECTOR ()); +weak_alias (__rawmemchr, rawmemchr) +# ifdef SHARED +__hidden_ver1 (__rawmemchr, __GI___rawmemchr, __redirect___rawmemchr) + __attribute__((visibility ("hidden"))); +# endif +#endif -- 2.33.0
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2