Projects
openEuler:24.03
gcc
Sign Up
Log In
Username
Password
We truncated the diff of some files because they were too big. If you want to see the full diff for every file,
click here
.
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 16
View file
_service:tar_scm:gcc.spec
Changed
@@ -2,7 +2,7 @@ %global gcc_major 12 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 22 +%global gcc_release 24 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 @@ -23,7 +23,7 @@ %else %global build_libquadmath 0 %endif -%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 loongarch64 +%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 loongarch64 riscv64 %global build_libasan 1 %else %global build_libasan 0 @@ -38,7 +38,7 @@ %else %global build_liblsan 0 %endif -%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 loongarch64 +%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 loongarch64 riscv64 %global build_libubsan 1 %else %global build_libubsan 0 @@ -321,6 +321,10 @@ Patch3124: libsanitizer-add-LoongArch-support.patch Patch3125: LoongArch-fix-error-building.patch Patch3126: libjccjit-do-not-link-objects-contained-same-element.patch +Patch3127: LoongArch-Use-finer-grained-DBAR-hints.patch +Patch3128: LoongArch-Add-LA664-support.patch +Patch3129: LoongArch-Fix-internal-error-running-gcc-march-nativ.patch +Patch3130: LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch %endif # On ARM EABI systems, we do want -gnueabi to be part of the @@ -970,6 +974,10 @@ %patch3124 -p1 %patch3125 -p1 %patch3126 -p1 +%patch3127 -p1 +%patch3128 -p1 +%patch3129 -p1 +%patch3130 -p1 %endif echo '%{_vendor} %{version}-%{release}' > gcc/DEV-PHASE @@ -2416,6 +2424,8 @@ %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-def.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-tune.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-driver.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/lsxintrin.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/lasxintrin.h %endif %ifarch sparc sparcv9 sparc64 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/visintrin.h @@ -3228,6 +3238,13 @@ %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Tue Apr 23 2024 laokz <zhangkai@iscas.ac.cn> - 12.3.1-24 +- Type: SPEC +- DESC: riscv64 enable libasan, libusan package + +* Mon Apr 22 2024 Peng Fan <fanpeng@loongson.cn> - 12.3.1-23 +- DESC: Add LoongArch 3A6000 support + * Fri Apr 12 2024 Zhengchen Hui <zhengchenhui1@huawei.com> - 12.3.1-22 - Type: Sync - DESC: Sync patch from openeuler/gcc
View file
_service:tar_scm:LoongArch-Add-LA664-support.patch
Added
@@ -0,0 +1,332 @@ +From c68463abbab98aa7f5a9b91e71ed6f6834c723df Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 16 Nov 2023 20:43:53 +0800 +Subject: PATCH LoongArch: Add LA664 support. + +Define ISA_BASE_LA64V110, which represents the base instruction set defined in LoongArch1.1. +Support the configure setting --with-arch =la664, and support -march=la664,-mtune=la664. + +gcc/ChangeLog: + + * config.gcc: Support LA664. + * config/loongarch/genopts/loongarch-strings: Likewise. + * config/loongarch/genopts/loongarch.opt.in: Likewise. + * config/loongarch/loongarch-cpu.cc (fill_native_cpu_config): Likewise. + * config/loongarch/loongarch-def.c: Likewise. + * config/loongarch/loongarch-def.h (N_ISA_BASE_TYPES): Likewise. + (ISA_BASE_LA64V110): Define macro. + (N_ARCH_TYPES): Update value. + (N_TUNE_TYPES): Update value. + (CPU_LA664): New macro. + * config/loongarch/loongarch-opts.cc (isa_default_abi): Likewise. + (isa_base_compat_p): Likewise. + * config/loongarch/loongarch-opts.h (TARGET_64BIT): This parameter is enabled + when la_target.isa.base is equal to ISA_BASE_LA64V100 or ISA_BASE_LA64V110. + (TARGET_uARCH_LA664): Define macro. + * config/loongarch/loongarch-str.h (STR_CPU_LA664): Likewise. + * config/loongarch/loongarch.cc (loongarch_cpu_sched_reassociation_width): + Add LA664 support. + * config/loongarch/loongarch.opt: Regenerate. + +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + gcc/config.gcc | 10 ++++----- + .../loongarch/genopts/loongarch-strings | 1 + + gcc/config/loongarch/genopts/loongarch.opt.in | 3 +++ + gcc/config/loongarch/loongarch-cpu.cc | 4 ++++ + gcc/config/loongarch/loongarch-def.c | 21 +++++++++++++++++++ + gcc/config/loongarch/loongarch-def.h | 8 ++++--- + gcc/config/loongarch/loongarch-opts.cc | 8 +++---- + gcc/config/loongarch/loongarch-opts.h | 4 +++- + gcc/config/loongarch/loongarch-str.h | 1 + + gcc/config/loongarch/loongarch.cc | 1 + + gcc/config/loongarch/loongarch.opt | 3 +++ + 11 files changed, 51 insertions(+), 13 deletions(-) + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 6d51bd93f3f..b88591b6fd8 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -5039,7 +5039,7 @@ case "${target}" in + + # Perform initial sanity checks on --with-* options. + case ${with_arch} in +- "" | abi-default | loongarch64 | la464) ;; # OK, append here. ++ "" | abi-default | loongarch64 | la4664) ;; # OK, append here. + native) + if test x${host} != x${target}; then + echo "--with-arch=native is illegal for cross-compiler." 1>&2 +@@ -5088,7 +5088,7 @@ case "${target}" in + case ${abi_base}/${abi_ext} in + lp64*/base) + # architectures that support lp64* ABI +- arch_pattern="native|abi-default|loongarch64|la464" ++ arch_pattern="native|abi-default|loongarch64|la4664" + # default architecture for lp64* ABI + arch_default="abi-default" + ;; +@@ -5163,7 +5163,7 @@ case "${target}" in + # Check default with_tune configuration using with_arch. + case ${with_arch} in + loongarch64) +- tune_pattern="native|abi-default|loongarch64|la464" ++ tune_pattern="native|abi-default|loongarch64|la4664" + ;; + *) + # By default, $with_tune == $with_arch +@@ -5219,7 +5219,7 @@ case "${target}" in + # Fixed: use the default gcc configuration for all multilib + # builds by default. + with_multilib_default="" ;; +- arch,native|arch,loongarch64|arch,la464) # OK, append here. ++ arch,native|arch,loongarch64|arch,la4664) # OK, append here. + with_multilib_default="/march=${component}" ;; + arch,*) + with_multilib_default="/march=abi-default" +@@ -5307,7 +5307,7 @@ case "${target}" in + if test x${parse_state} = x"arch"; then + # -march option + case ${component} in +- native | abi-default | loongarch64 | la464) # OK, append here. ++ native | abi-default | loongarch64 | la4664) # OK, append here. + # Append -march spec for each multilib variant. + loongarch_multilib_list_make="${loongarch_multilib_list_make}/march=${component}" + parse_state="opts" +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +index 8e412f7536e..7bc4824007e 100644 +--- a/gcc/config/loongarch/genopts/loongarch-strings ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -26,6 +26,7 @@ STR_CPU_NATIVE native + STR_CPU_ABI_DEFAULT abi-default + STR_CPU_LOONGARCH64 loongarch64 + STR_CPU_LA464 la464 ++STR_CPU_LA664 la664 + + # Base architecture + STR_ISA_BASE_LA64V100 la64 +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 158701d327a..00b4733d75b 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -107,6 +107,9 @@ Enum(cpu_type) String(@@STR_CPU_LOONGARCH64@@) Value(CPU_LOONGARCH64) + EnumValue + Enum(cpu_type) String(@@STR_CPU_LA464@@) Value(CPU_LA464) + ++EnumValue ++Enum(cpu_type) String(@@STR_CPU_LA664@@) Value(CPU_LA664) ++ + m@@OPTSTR_ARCH@@= + Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) + -m@@OPTSTR_ARCH@@=PROCESSOR Generate code for the given PROCESSOR ISA. +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index 7a2866f60f9..f3a13414143 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -106,6 +106,10 @@ fill_native_cpu_config (struct loongarch_target *tgt) + native_cpu_type = CPU_LA464; + break; + ++ case 0x0014d000: /* LA664 */ ++ native_cpu_type = CPU_LA664; ++ break; ++ + default: + /* Unknown PRID. */ + if (tune_native_p) +diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c +index 430ef8b2d95..067629141b6 100644 +--- a/gcc/config/loongarch/loongarch-def.c ++++ b/gcc/config/loongarch/loongarch-def.c +@@ -28,6 +28,7 @@ loongarch_cpu_stringsN_TUNE_TYPES = { + CPU_ABI_DEFAULT = STR_CPU_ABI_DEFAULT, + CPU_LOONGARCH64 = STR_CPU_LOONGARCH64, + CPU_LA464 = STR_CPU_LA464, ++ CPU_LA664 = STR_CPU_LA664, + }; + + struct loongarch_isa +@@ -42,6 +43,11 @@ loongarch_cpu_default_isaN_ARCH_TYPES = { + .fpu = ISA_EXT_FPU64, + .simd = ISA_EXT_SIMD_LASX, + }, ++ CPU_LA664 = { ++ .base = ISA_BASE_LA64V110, ++ .fpu = ISA_EXT_FPU64, ++ .simd = ISA_EXT_SIMD_LASX, ++ }, + }; + + struct loongarch_cache +@@ -58,6 +64,12 @@ loongarch_cpu_cacheN_TUNE_TYPES = { + .l2d_size = 256, + .simultaneous_prefetches = 4, + }, ++ CPU_LA664 = { ++ .l1d_line_size = 64, ++ .l1d_size = 64, ++ .l2d_size = 256, ++ .simultaneous_prefetches = 4, ++ }, + }; + + struct loongarch_align +@@ -70,6 +82,10 @@ loongarch_cpu_alignN_TUNE_TYPES = { + .function = "32", + .label = "16", + }, ++ CPU_LA664 = { ++ .function = "32", ++ .label = "16", ++ }, + }; + + +@@ -104,6 +120,9 @@ loongarch_cpu_rtx_cost_dataN_TUNE_TYPES = { + CPU_LA464 = { + DEFAULT_COSTS + }, ++ CPU_LA664 = { ++ DEFAULT_COSTS ++ }, + }; + + /* RTX costs to use when optimizing for size. */ +@@ -127,6 +146,7 @@ loongarch_cpu_issue_rateN_TUNE_TYPES = { + CPU_NATIVE = 4, + CPU_LOONGARCH64 = 4, + CPU_LA464 = 4, ++ CPU_LA664 = 6, + };
View file
_service:tar_scm:LoongArch-Fix-internal-error-running-gcc-march-nativ.patch
Added
@@ -0,0 +1,106 @@ +From 56752a6bbfb3d3501d0899b23020c3e2eb58882c Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 17 Nov 2023 20:44:17 +0800 +Subject: PATCH LoongArch: Fix internal error running "gcc -march=native" on + LA664 + +On LA664, the PRID preset is ISA_BASE_LA64V110 but the base architecture +is guessed ISA_BASE_LA64V100. This causes a warning to be outputed: + + cc1: warning: base architecture 'la64' differs from PRID preset '?' + +But we've not set the "?" above in loongarch_isa_base_strings, thus it's +a nullptr and then an ICE is triggered. + +Add ISA_BASE_LA64V110 to genopts and initialize +loongarch_isa_base_stringsISA_BASE_LA64V110 correctly to fix the ICE. +The warning itself will be fixed later. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch-strings: + (STR_ISA_BASE_LA64V110): Add. + * config/loongarch/genopts/loongarch.opt.in: + (ISA_BASE_LA64V110): Add. + * config/loongarch/loongarch-def.c + (loongarch_isa_base_strings): Initialize ISA_BASE_LA64V110 + to STR_ISA_BASE_LA64V110. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch-str.h: Regenerate. + +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + gcc/config/loongarch/genopts/loongarch-strings | 1 + + gcc/config/loongarch/genopts/loongarch.opt.in | 3 +++ + gcc/config/loongarch/loongarch-def.c | 1 + + gcc/config/loongarch/loongarch-str.h | 1 + + gcc/config/loongarch/loongarch.opt | 3 +++ + 5 files changed, 9 insertions(+) + +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +index 7bc4824007e..b2070c83ed0 100644 +--- a/gcc/config/loongarch/genopts/loongarch-strings ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -30,6 +30,7 @@ STR_CPU_LA664 la664 + + # Base architecture + STR_ISA_BASE_LA64V100 la64 ++STR_ISA_BASE_LA64V110 la64v1.1 + + # -mfpu + OPTSTR_ISA_EXT_FPU fpu +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 00b4733d75b..b274b3fb21e 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -32,6 +32,9 @@ Basic ISAs of LoongArch: + EnumValue + Enum(isa_base) String(@@STR_ISA_BASE_LA64V100@@) Value(ISA_BASE_LA64V100) + ++EnumValue ++Enum(isa_base) String(@@STR_ISA_BASE_LA64V110@@) Value(ISA_BASE_LA64V110) ++ + ;; ISA extensions / adjustments + Enum + Name(isa_ext_fpu) Type(int) +diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c +index 067629141b6..f22d488acb2 100644 +--- a/gcc/config/loongarch/loongarch-def.c ++++ b/gcc/config/loongarch/loongarch-def.c +@@ -165,6 +165,7 @@ loongarch_cpu_multipass_dfa_lookaheadN_TUNE_TYPES = { + const char* + loongarch_isa_base_stringsN_ISA_BASE_TYPES = { + ISA_BASE_LA64V100 = STR_ISA_BASE_LA64V100, ++ ISA_BASE_LA64V110 = STR_ISA_BASE_LA64V110, + }; + + const char* +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index fc4f41bfc1e..114dbc692d7 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see + #define STR_CPU_LA664 "la664" + + #define STR_ISA_BASE_LA64V100 "la64" ++#define STR_ISA_BASE_LA64V110 "la64v1.1" + + #define OPTSTR_ISA_EXT_FPU "fpu" + #define STR_NONE "none" +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 7f129e53ba5..350ca30d232 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -39,6 +39,9 @@ Basic ISAs of LoongArch: + EnumValue + Enum(isa_base) String(la64) Value(ISA_BASE_LA64V100) + ++EnumValue ++Enum(isa_base) String(la64v1.1) Value(ISA_BASE_LA64V110) ++ + ;; ISA extensions / adjustments + Enum + Name(isa_ext_fpu) Type(int) +-- +2.33.0 +
View file
_service:tar_scm:LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch
Added
@@ -0,0 +1,907 @@ +From 40366b89e9c8e727af70ecf7007cba6c51e4b7d2 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 29 Nov 2023 11:16:59 +0800 +Subject: PATCH LoongArch: Fix lsx-vshuf.c and lasx-xvshuf_b.c tests fail on + LA664 PR112611 + +For xvshuf instructions, if the index value in the selector exceeds 63, it triggers +undefined behavior on LA464, but not on LA664. To ensure compatibility of these two +tests on both LA464 and LA664, we have modified both tests to ensure that the index +value in the selector does not exceed 63. + +gcc/testsuite/ChangeLog: + + PR target/112611 + * gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c: Sure index less than 64. + * gcc.target/loongarch/vector/lsx/lsx-vshuf.c: Ditto. + +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + .../loongarch/vector/lasx/lasx-xvshuf_b.c | 343 ++++++------------ + .../loongarch/vector/lsx/lsx-vshuf.c | 162 +++------ + 2 files changed, 164 insertions(+), 341 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c +index d8a29dbd225..b8ab387118a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c +@@ -43,9 +43,9 @@ main () + *((unsigned long *)&__m256i_op11) = 0xfffffefefffffefe; + *((unsigned long *)&__m256i_op10) = 0xfffffefefffffefe; + *((unsigned long *)&__m256i_op23) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op22) = 0xfffffff8fffffff8; ++ *((unsigned long *)&__m256i_op22) = 0x3f3f3f383f3f3f38; + *((unsigned long *)&__m256i_op21) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op20) = 0xfffffff8fc000000; ++ *((unsigned long *)&__m256i_op20) = 0x3f3f3f383c000000; + *((unsigned long *)&__m256i_result3) = 0xfafafafafafafafa; + *((unsigned long *)&__m256i_result2) = 0x0000000000000000; + *((unsigned long *)&__m256i_result1) = 0xfefefefefefefefe; +@@ -137,33 +137,14 @@ main () + *((unsigned long *)&__m256i_op12) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op11) = 0x0000000000000000; + *((unsigned long *)&__m256i_op10) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_op23) = 0x0000ffffffffffff; +- *((unsigned long *)&__m256i_op22) = 0x0000ffff0000ffff; +- *((unsigned long *)&__m256i_op21) = 0x0000ffffffffffff; +- *((unsigned long *)&__m256i_op20) = 0x0000ffff0000ffff; ++ *((unsigned long *)&__m256i_op23) = 0x0000111111111111; ++ *((unsigned long *)&__m256i_op22) = 0x0000222200002222; ++ *((unsigned long *)&__m256i_op21) = 0x0000111111111111; ++ *((unsigned long *)&__m256i_op20) = 0x0000222200002222; + *((unsigned long *)&__m256i_result3) = 0xffff000000000000; +- *((unsigned long *)&__m256i_result2) = 0xffff0000ffff0000; ++ *((unsigned long *)&__m256i_result2) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_result1) = 0xffff000000000000; +- *((unsigned long *)&__m256i_result0) = 0xffff0000ffff0000; +- __m256i_out = __lasx_xvshuf_b (__m256i_op0, __m256i_op1, __m256i_op2); +- ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); +- +- *((unsigned long *)&__m256i_op03) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op02) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op01) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op00) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op13) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op12) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op11) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op10) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op23) = 0x000000000000ffff; +- *((unsigned long *)&__m256i_op22) = 0x000000000000ffff; +- *((unsigned long *)&__m256i_op21) = 0x000000000000ffff; +- *((unsigned long *)&__m256i_op20) = 0x000000000000ffff; +- *((unsigned long *)&__m256i_result3) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result2) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result1) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result0) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_result0) = 0xffffffffffffffff; + __m256i_out = __lasx_xvshuf_b (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +@@ -176,7 +157,7 @@ main () + *((unsigned long *)&__m256i_op11) = 0x0000000000000000; + *((unsigned long *)&__m256i_op10) = 0x0000000000000000; + *((unsigned long *)&__m256i_op23) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op22) = 0x0000000000077fff; ++ *((unsigned long *)&__m256i_op22) = 0x0000000000032f1f; + *((unsigned long *)&__m256i_op21) = 0x0000000000000000; + *((unsigned long *)&__m256i_op20) = 0x0000000000000000; + *((unsigned long *)&__m256i_result3) = 0xffffffffffffffff; +@@ -186,9 +167,9 @@ main () + __m256i_out = __lasx_xvshuf_b (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +- *((unsigned long *)&__m256i_op03) = 0xfffffffffffffefe; +- *((unsigned long *)&__m256i_op02) = 0x0000000000000101; +- *((unsigned long *)&__m256i_op01) = 0xfffffffffffffefe; ++ *((unsigned long *)&__m256i_op03) = 0x0011001100110011; ++ *((unsigned long *)&__m256i_op02) = 0x0000000000000001; ++ *((unsigned long *)&__m256i_op01) = 0x0011001100110011; + *((unsigned long *)&__m256i_op00) = 0x0000000000000101; + *((unsigned long *)&__m256i_op13) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op12) = 0x67eee33567eee435; +@@ -198,35 +179,16 @@ main () + *((unsigned long *)&__m256i_op22) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op21) = 0x00000000ffffffff; + *((unsigned long *)&__m256i_op20) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_result3) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_result3) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_result2) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_result1) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_result1) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_result0) = 0xffffffffffffffff; + __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +- *((unsigned long *)&__m256i_op03) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_op03) = 0x0022002200000000; + *((unsigned long *)&__m256i_op02) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op01) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op00) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op13) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op12) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op11) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op10) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op23) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op22) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op21) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op20) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result3) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result2) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result1) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result0) = 0x0000000000000000; +- __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); +- ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); +- +- *((unsigned long *)&__m256i_op03) = 0xffffffff80000000; +- *((unsigned long *)&__m256i_op02) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op01) = 0xffffffff80000000; ++ *((unsigned long *)&__m256i_op01) = 0x001f001f00000000; + *((unsigned long *)&__m256i_op00) = 0x0000000000000000; + *((unsigned long *)&__m256i_op13) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op12) = 0xffffffffffffffff; +@@ -243,10 +205,10 @@ main () + __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +- *((unsigned long *)&__m256i_op03) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_op02) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_op01) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_op00) = 0xffffffffffffffff; ++ *((unsigned long *)&__m256i_op03) = 0x0011001100110011; ++ *((unsigned long *)&__m256i_op02) = 0x0011001100110011; ++ *((unsigned long *)&__m256i_op01) = 0x0011001100110011; ++ *((unsigned long *)&__m256i_op00) = 0x0011001100110011; + *((unsigned long *)&__m256i_op13) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op12) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op11) = 0xffffffffffffffff; +@@ -255,17 +217,17 @@ main () + *((unsigned long *)&__m256i_op22) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op21) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op20) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_result3) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result2) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result1) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result0) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_result3) = 0xffffffffffffffff; ++ *((unsigned long *)&__m256i_result2) = 0xffffffffffffffff; ++ *((unsigned long *)&__m256i_result1) = 0xffffffffffffffff; ++ *((unsigned long *)&__m256i_result0) = 0xffffffffffffffff; + __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +- *((unsigned long *)&__m256i_op03) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_op02) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_op01) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_op00) = 0xffffffffffffffff; ++ *((unsigned long *)&__m256i_op03) = 0x003f003f003f003f; ++ *((unsigned long *)&__m256i_op02) = 0x003f003f003f003f; ++ *((unsigned long *)&__m256i_op01) = 0x003f003f003f003f; ++ *((unsigned long *)&__m256i_op00) = 0x003f003f003f003f; + *((unsigned long *)&__m256i_op13) = 0xefdfefdf00000000; + *((unsigned long *)&__m256i_op12) = 0xefdfefdfefdfefdf; + *((unsigned long *)&__m256i_op11) = 0xefdfefdf00000000; +@@ -274,36 +236,17 @@ main () + *((unsigned long *)&__m256i_op22) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op21) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op20) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_result3) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result2) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result1) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result0) = 0x0000000000000000; +- __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); +- ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); +- +- *((unsigned long *)&__m256i_op03) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op02) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op01) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op00) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op13) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op12) = 0x0000000000000000;
View file
_service:tar_scm:LoongArch-Use-finer-grained-DBAR-hints.patch
Added
@@ -0,0 +1,137 @@ +From 4a70bfbf686c2b6a1ecd83fe851de826c612c3e0 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Tue, 14 Nov 2023 05:32:38 +0800 +Subject: PATCH LoongArch: Use finer-grained DBAR hints + +LA664 defines DBAR hints 0x1 - 0x1f (except 0xf and 0x1f) as follows 1-2: + +- Bit 4: kind of constraint (0: completion, 1: ordering) +- Bit 3: barrier for previous read (0: true, 1: false) +- Bit 2: barrier for previous write (0: true, 1: false) +- Bit 1: barrier for succeeding read (0: true, 1: false) +- Bit 0: barrier for succeeding write (0: true, 1: false) + +LLVM has already utilized them for different memory orders 3: + +- Bit 4 is always set to one because it's only intended to be zero for + things like MMIO devices, which are out of the scope of memory orders. +- An acquire barrier is used to implement acquire loads like + + ld.d $a1, $t0, 0 + dbar acquire_hint + + where the load operation (ld.d) should not be reordered with any load + or store operation after the acquire load. To accomplish this + constraint, we need to prevent the load operation from being reordered + after the barrier, and also prevent any following load/store operation + from being reordered before the barrier. Thus bits 0, 1, and 3 must + be zero, and bit 2 can be one, so acquire_hint should be 0b10100. +- An release barrier is used to implement release stores like + + dbar release_hint + st.d $a1, $t0, 0 + + where the store operation (st.d) should not be reordered with any load + or store operation before the release store. So we need to prevent + the store operation from being reordered before the barrier, and also + prevent any preceding load/store operation from being reordered after + the barrier. So bits 0, 2, 3 must be zero, and bit 1 can be one. So + release_hint should be 0b10010. + +A similar mapping has been utilized for RISC-V GCC 4, LoongArch Linux +kernel 1, and LoongArch LLVM 3. So the mapping should be correct. +And I've also bootstrapped & regtested GCC on a LA664 with this patch. + +The LoongArch CPUs should treat "unknown" hints as dbar 0, so we can +unconditionally emit the new hints without a compiler switch. + +1: https://git.kernel.org/torvalds/c/e031a5f3f1ed +2: https://github.com/loongson-community/docs/pull/12 +3: https://github.com/llvm/llvm-project/pull/68787 +4: https://gcc.gnu.org/r14-406 + +gcc/ChangeLog: + + * config/loongarch/sync.md (mem_thread_fence): Remove redundant + check. + (mem_thread_fence_1): Emit finer-grained DBAR hints for + different memory models, instead of 0. + +Signed-off-by: ticat_fp <fanpeng@loongson.cn> +--- + gcc/config/loongarch/sync.md | 51 +++++++++++++++++++++++++++++------- + 1 file changed, 42 insertions(+), 9 deletions(-) + +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index 9924d522bcd..1ad0c63e0d9 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -50,23 +50,56 @@ + (match_operand:SI 0 "const_int_operand" "") ;; model + "" + { +- if (INTVAL (operands0) != MEMMODEL_RELAXED) +- { +- rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); +- MEM_VOLATILE_P (mem) = 1; +- emit_insn (gen_mem_thread_fence_1 (mem, operands0)); +- } ++ rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (mem) = 1; ++ emit_insn (gen_mem_thread_fence_1 (mem, operands0)); ++ + DONE; + }) + +-;; Until the LoongArch memory model (hence its mapping from C++) is finalized, +-;; conservatively emit a full FENCE. ++;; DBAR hint encoding for LA664 and later micro-architectures, paraphrased from ++;; the Linux patch revealing it 1: ++;; ++;; - Bit 4: kind of constraint (0: completion, 1: ordering) ++;; - Bit 3: barrier for previous read (0: true, 1: false) ++;; - Bit 2: barrier for previous write (0: true, 1: false) ++;; - Bit 1: barrier for succeeding read (0: true, 1: false) ++;; - Bit 0: barrier for succeeding write (0: true, 1: false) ++;; ++;; 1: https://git.kernel.org/torvalds/c/e031a5f3f1ed ++;; ++;; Implementations without support for the finer-granularity hints simply treat ++;; all as the full barrier (DBAR 0), so we can unconditionally start emiting the ++;; more precise hints right away. + (define_insn "mem_thread_fence_1" + (set (match_operand:BLK 0 "" "") + (unspec:BLK (match_dup 0) UNSPEC_MEMORY_BARRIER)) + (match_operand:SI 1 "const_int_operand" "") ;; model + "" +- "dbar\t0") ++ { ++ enum memmodel model = memmodel_base (INTVAL (operands1)); ++ ++ switch (model) ++ { ++ case MEMMODEL_ACQUIRE: ++ return "dbar\t0b10100"; ++ case MEMMODEL_RELEASE: ++ return "dbar\t0b10010"; ++ case MEMMODEL_ACQ_REL: ++ case MEMMODEL_SEQ_CST: ++ return "dbar\t0b10000"; ++ default: ++ /* GCC internal: "For the '__ATOMIC_RELAXED' model no instructions ++ need to be issued and this expansion is not invoked." ++ ++ __atomic builtins doc: "Consume is implemented using the ++ stronger acquire memory order because of a deficiency in C++11's ++ semantics." See PR 59448 and get_memmodel in builtins.cc. ++ ++ Other values should not be returned by memmodel_base. */ ++ gcc_unreachable (); ++ } ++ }) + + ;; Atomic memory operations. + +-- +2.33.0 +
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2