开源软件构建与测试

We truncated the diff of some files because they were too big. If you want to see the full diff for every file, click here.

Changes of Revision 16

_service:tar_scm:gcc.spec Changed

@@ -2,7 +2,7 @@
 %global gcc_major 12
 # Note, gcc_release must be integer, if you want to add suffixes to
 # %%{release}, append them after %%{gcc_release} on Release: line.
-%global gcc_release 22
+%global gcc_release 24
 
 %global _unpackaged_files_terminate_build 0
 %global _performance_build 1
@@ -23,7 +23,7 @@
 %else
 %global build_libquadmath 0
 %endif
-%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 loongarch64
+%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 loongarch64 riscv64
 %global build_libasan 1
 %else
 %global build_libasan 0
@@ -38,7 +38,7 @@
 %else
 %global build_liblsan 0
 %endif
-%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 loongarch64
+%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 loongarch64 riscv64
 %global build_libubsan 1
 %else
 %global build_libubsan 0
@@ -321,6 +321,10 @@
 Patch3124: libsanitizer-add-LoongArch-support.patch
 Patch3125: LoongArch-fix-error-building.patch
 Patch3126: libjccjit-do-not-link-objects-contained-same-element.patch
+Patch3127: LoongArch-Use-finer-grained-DBAR-hints.patch
+Patch3128: LoongArch-Add-LA664-support.patch
+Patch3129: LoongArch-Fix-internal-error-running-gcc-march-nativ.patch
+Patch3130: LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch
 %endif
 
 # On ARM EABI systems, we do want -gnueabi to be part of the
@@ -970,6 +974,10 @@
 %patch3124 -p1
 %patch3125 -p1
 %patch3126 -p1
+%patch3127 -p1
+%patch3128 -p1
+%patch3129 -p1
+%patch3130 -p1
 %endif
 
 echo '%{_vendor} %{version}-%{release}' > gcc/DEV-PHASE
@@ -2416,6 +2424,8 @@
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-def.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-tune.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-driver.h
+%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/lsxintrin.h
+%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/lasxintrin.h
 %endif
 %ifarch sparc sparcv9 sparc64
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/visintrin.h
@@ -3228,6 +3238,13 @@
 %doc rpm.doc/changelogs/libcc1/ChangeLog*
 
 %changelog
+* Tue Apr 23 2024 laokz <zhangkai@iscas.ac.cn> - 12.3.1-24
+- Type: SPEC
+- DESC: riscv64 enable libasan, libusan package
+
+* Mon Apr 22 2024 Peng Fan <fanpeng@loongson.cn> - 12.3.1-23
+- DESC: Add LoongArch 3A6000 support
+
 * Fri Apr 12 2024 Zhengchen Hui <zhengchenhui1@huawei.com> - 12.3.1-22
 - Type: Sync
 - DESC: Sync patch from openeuler/gcc

_service:tar_scm:LoongArch-Add-LA664-support.patch Added

@@ -0,0 +1,332 @@
+From c68463abbab98aa7f5a9b91e71ed6f6834c723df Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 16 Nov 2023 20:43:53 +0800
+Subject: PATCH LoongArch: Add LA664 support.
+
+Define ISA_BASE_LA64V110, which represents the base instruction set defined in LoongArch1.1.
+Support the configure setting --with-arch =la664, and support -march=la664,-mtune=la664.
+
+gcc/ChangeLog:
+
+	* config.gcc: Support LA664.
+	* config/loongarch/genopts/loongarch-strings: Likewise.
+	* config/loongarch/genopts/loongarch.opt.in: Likewise.
+	* config/loongarch/loongarch-cpu.cc (fill_native_cpu_config): Likewise.
+	* config/loongarch/loongarch-def.c: Likewise.
+	* config/loongarch/loongarch-def.h (N_ISA_BASE_TYPES): Likewise.
+	(ISA_BASE_LA64V110): Define macro.
+	(N_ARCH_TYPES): Update value.
+	(N_TUNE_TYPES): Update value.
+	(CPU_LA664): New macro.
+	* config/loongarch/loongarch-opts.cc (isa_default_abi): Likewise.
+	(isa_base_compat_p): Likewise.
+	* config/loongarch/loongarch-opts.h (TARGET_64BIT): This parameter is enabled
+	when la_target.isa.base is equal to ISA_BASE_LA64V100 or ISA_BASE_LA64V110.
+	(TARGET_uARCH_LA664): Define macro.
+	* config/loongarch/loongarch-str.h (STR_CPU_LA664): Likewise.
+	* config/loongarch/loongarch.cc (loongarch_cpu_sched_reassociation_width):
+	Add LA664 support.
+	* config/loongarch/loongarch.opt: Regenerate.
+
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config.gcc                                | 10 ++++-----
+ .../loongarch/genopts/loongarch-strings       |  1 +
+ gcc/config/loongarch/genopts/loongarch.opt.in |  3 +++
+ gcc/config/loongarch/loongarch-cpu.cc         |  4 ++++
+ gcc/config/loongarch/loongarch-def.c          | 21 +++++++++++++++++++
+ gcc/config/loongarch/loongarch-def.h          |  8 ++++---
+ gcc/config/loongarch/loongarch-opts.cc        |  8 +++----
+ gcc/config/loongarch/loongarch-opts.h         |  4 +++-
+ gcc/config/loongarch/loongarch-str.h          |  1 +
+ gcc/config/loongarch/loongarch.cc             |  1 +
+ gcc/config/loongarch/loongarch.opt            |  3 +++
+ 11 files changed, 51 insertions(+), 13 deletions(-)
+
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index 6d51bd93f3f..b88591b6fd8 100644
+--- a/gcc/config.gcc
++++ b/gcc/config.gcc
+@@ -5039,7 +5039,7 @@ case "${target}" in
+ 
+ 		# Perform initial sanity checks on --with-* options.
+ 		case ${with_arch} in
+-		"" | abi-default | loongarch64 | la464) ;; # OK, append here.
++		"" | abi-default | loongarch64 | la4664) ;; # OK, append here.
+ 		native)
+ 			if test x${host} != x${target}; then
+ 				echo "--with-arch=native is illegal for cross-compiler." 1>&2
+@@ -5088,7 +5088,7 @@ case "${target}" in
+ 		case ${abi_base}/${abi_ext} in
+ 		lp64*/base)
+ 			# architectures that support lp64* ABI
+-			arch_pattern="native|abi-default|loongarch64|la464"
++			arch_pattern="native|abi-default|loongarch64|la4664"
+ 			# default architecture for lp64* ABI
+ 			arch_default="abi-default"
+ 			;;
+@@ -5163,7 +5163,7 @@ case "${target}" in
+ 		# Check default with_tune configuration using with_arch.
+ 		case ${with_arch} in
+ 		loongarch64)
+-			tune_pattern="native|abi-default|loongarch64|la464"
++			tune_pattern="native|abi-default|loongarch64|la4664"
+ 			;;
+ 		*)
+ 			# By default, $with_tune == $with_arch
+@@ -5219,7 +5219,7 @@ case "${target}" in
+ 					# Fixed: use the default gcc configuration for all multilib
+ 					# builds by default.
+ 					with_multilib_default="" ;;
+-				arch,native|arch,loongarch64|arch,la464) # OK, append here.
++				arch,native|arch,loongarch64|arch,la4664) # OK, append here.
+ 					with_multilib_default="/march=${component}" ;;
+ 				arch,*)
+ 					with_multilib_default="/march=abi-default"
+@@ -5307,7 +5307,7 @@ case "${target}" in
+ 				if test x${parse_state} = x"arch"; then
+ 					# -march option
+ 					case ${component} in
+-					native | abi-default | loongarch64 | la464) # OK, append here.
++					native | abi-default | loongarch64 | la4664) # OK, append here.
+ 						# Append -march spec for each multilib variant.
+ 						loongarch_multilib_list_make="${loongarch_multilib_list_make}/march=${component}"
+ 						parse_state="opts"
+diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings
+index 8e412f7536e..7bc4824007e 100644
+--- a/gcc/config/loongarch/genopts/loongarch-strings
++++ b/gcc/config/loongarch/genopts/loongarch-strings
+@@ -26,6 +26,7 @@ STR_CPU_NATIVE	      native
+ STR_CPU_ABI_DEFAULT   abi-default
+ STR_CPU_LOONGARCH64   loongarch64
+ STR_CPU_LA464	      la464
++STR_CPU_LA664	      la664
+ 
+ # Base architecture
+ STR_ISA_BASE_LA64V100 la64
+diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
+index 158701d327a..00b4733d75b 100644
+--- a/gcc/config/loongarch/genopts/loongarch.opt.in
++++ b/gcc/config/loongarch/genopts/loongarch.opt.in
+@@ -107,6 +107,9 @@ Enum(cpu_type) String(@@STR_CPU_LOONGARCH64@@) Value(CPU_LOONGARCH64)
+ EnumValue
+ Enum(cpu_type) String(@@STR_CPU_LA464@@) Value(CPU_LA464)
+ 
++EnumValue
++Enum(cpu_type) String(@@STR_CPU_LA664@@) Value(CPU_LA664)
++
+ m@@OPTSTR_ARCH@@=
+ Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET)
+ -m@@OPTSTR_ARCH@@=PROCESSOR	Generate code for the given PROCESSOR ISA.
+diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc
+index 7a2866f60f9..f3a13414143 100644
+--- a/gcc/config/loongarch/loongarch-cpu.cc
++++ b/gcc/config/loongarch/loongarch-cpu.cc
+@@ -106,6 +106,10 @@ fill_native_cpu_config (struct loongarch_target *tgt)
+       native_cpu_type = CPU_LA464;
+       break;
+ 
++    case 0x0014d000:   /* LA664 */
++      native_cpu_type = CPU_LA664;
++      break;
++
+     default:
+       /* Unknown PRID.  */
+       if (tune_native_p)
+diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
+index 430ef8b2d95..067629141b6 100644
+--- a/gcc/config/loongarch/loongarch-def.c
++++ b/gcc/config/loongarch/loongarch-def.c
+@@ -28,6 +28,7 @@ loongarch_cpu_stringsN_TUNE_TYPES = {
+   CPU_ABI_DEFAULT	  = STR_CPU_ABI_DEFAULT,
+   CPU_LOONGARCH64	  = STR_CPU_LOONGARCH64,
+   CPU_LA464		  = STR_CPU_LA464,
++  CPU_LA664		  = STR_CPU_LA664,
+ };
+ 
+ struct loongarch_isa
+@@ -42,6 +43,11 @@ loongarch_cpu_default_isaN_ARCH_TYPES = {
+       .fpu = ISA_EXT_FPU64,
+       .simd = ISA_EXT_SIMD_LASX,
+   },
++  CPU_LA664 = {
++      .base = ISA_BASE_LA64V110,
++      .fpu = ISA_EXT_FPU64,
++      .simd = ISA_EXT_SIMD_LASX,
++  },
+ };
+ 
+ struct loongarch_cache
+@@ -58,6 +64,12 @@ loongarch_cpu_cacheN_TUNE_TYPES = {
+       .l2d_size = 256,
+       .simultaneous_prefetches = 4,
+   },
++  CPU_LA664 = {
++      .l1d_line_size = 64,
++      .l1d_size = 64,
++      .l2d_size = 256,
++      .simultaneous_prefetches = 4,
++  },
+ };
+ 
+ struct loongarch_align
+@@ -70,6 +82,10 @@ loongarch_cpu_alignN_TUNE_TYPES = {
+     .function = "32",
+     .label = "16",
+   },
++  CPU_LA664 = {
++    .function = "32",
++    .label = "16",
++  },
+ };
+ 
+ 
+@@ -104,6 +120,9 @@ loongarch_cpu_rtx_cost_dataN_TUNE_TYPES = {
+   CPU_LA464 = {
+       DEFAULT_COSTS
+   },
++  CPU_LA664 = {
++      DEFAULT_COSTS
++  },
+ };
+ 
+ /* RTX costs to use when optimizing for size.  */
+@@ -127,6 +146,7 @@ loongarch_cpu_issue_rateN_TUNE_TYPES = {
+   CPU_NATIVE	      = 4,
+   CPU_LOONGARCH64   = 4,
+   CPU_LA464	      = 4,
++  CPU_LA664	      = 6,
+ };

_service:tar_scm:LoongArch-Fix-internal-error-running-gcc-march-nativ.patch Added

@@ -0,0 +1,106 @@
+From 56752a6bbfb3d3501d0899b23020c3e2eb58882c Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Fri, 17 Nov 2023 20:44:17 +0800
+Subject: PATCH LoongArch: Fix internal error running "gcc -march=native" on
+ LA664
+
+On LA664, the PRID preset is ISA_BASE_LA64V110 but the base architecture
+is guessed ISA_BASE_LA64V100.  This causes a warning to be outputed:
+
+    cc1: warning: base architecture 'la64' differs from PRID preset '?'
+
+But we've not set the "?" above in loongarch_isa_base_strings, thus it's
+a nullptr and then an ICE is triggered.
+
+Add ISA_BASE_LA64V110 to genopts and initialize
+loongarch_isa_base_stringsISA_BASE_LA64V110 correctly to fix the ICE.
+The warning itself will be fixed later.
+
+gcc/ChangeLog:
+
+	* config/loongarch/genopts/loongarch-strings:
+	(STR_ISA_BASE_LA64V110): Add.
+	* config/loongarch/genopts/loongarch.opt.in:
+	(ISA_BASE_LA64V110): Add.
+	* config/loongarch/loongarch-def.c
+	(loongarch_isa_base_strings): Initialize ISA_BASE_LA64V110
+	to STR_ISA_BASE_LA64V110.
+	* config/loongarch/loongarch.opt: Regenerate.
+	* config/loongarch/loongarch-str.h: Regenerate.
+
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/genopts/loongarch-strings | 1 +
+ gcc/config/loongarch/genopts/loongarch.opt.in  | 3 +++
+ gcc/config/loongarch/loongarch-def.c           | 1 +
+ gcc/config/loongarch/loongarch-str.h           | 1 +
+ gcc/config/loongarch/loongarch.opt             | 3 +++
+ 5 files changed, 9 insertions(+)
+
+diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings
+index 7bc4824007e..b2070c83ed0 100644
+--- a/gcc/config/loongarch/genopts/loongarch-strings
++++ b/gcc/config/loongarch/genopts/loongarch-strings
+@@ -30,6 +30,7 @@ STR_CPU_LA664	      la664
+ 
+ # Base architecture
+ STR_ISA_BASE_LA64V100 la64
++STR_ISA_BASE_LA64V110 la64v1.1
+ 
+ # -mfpu
+ OPTSTR_ISA_EXT_FPU    fpu
+diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
+index 00b4733d75b..b274b3fb21e 100644
+--- a/gcc/config/loongarch/genopts/loongarch.opt.in
++++ b/gcc/config/loongarch/genopts/loongarch.opt.in
+@@ -32,6 +32,9 @@ Basic ISAs of LoongArch:
+ EnumValue
+ Enum(isa_base) String(@@STR_ISA_BASE_LA64V100@@) Value(ISA_BASE_LA64V100)
+ 
++EnumValue
++Enum(isa_base) String(@@STR_ISA_BASE_LA64V110@@) Value(ISA_BASE_LA64V110)
++
+ ;; ISA extensions / adjustments
+ Enum
+ Name(isa_ext_fpu) Type(int)
+diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
+index 067629141b6..f22d488acb2 100644
+--- a/gcc/config/loongarch/loongarch-def.c
++++ b/gcc/config/loongarch/loongarch-def.c
+@@ -165,6 +165,7 @@ loongarch_cpu_multipass_dfa_lookaheadN_TUNE_TYPES = {
+ const char*
+ loongarch_isa_base_stringsN_ISA_BASE_TYPES = {
+   ISA_BASE_LA64V100 = STR_ISA_BASE_LA64V100,
++  ISA_BASE_LA64V110 = STR_ISA_BASE_LA64V110,
+ };
+ 
+ const char*
+diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
+index fc4f41bfc1e..114dbc692d7 100644
+--- a/gcc/config/loongarch/loongarch-str.h
++++ b/gcc/config/loongarch/loongarch-str.h
+@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.  If not see
+ #define STR_CPU_LA664 "la664"
+ 
+ #define STR_ISA_BASE_LA64V100 "la64"
++#define STR_ISA_BASE_LA64V110 "la64v1.1"
+ 
+ #define OPTSTR_ISA_EXT_FPU "fpu"
+ #define STR_NONE "none"
+diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
+index 7f129e53ba5..350ca30d232 100644
+--- a/gcc/config/loongarch/loongarch.opt
++++ b/gcc/config/loongarch/loongarch.opt
+@@ -39,6 +39,9 @@ Basic ISAs of LoongArch:
+ EnumValue
+ Enum(isa_base) String(la64) Value(ISA_BASE_LA64V100)
+ 
++EnumValue
++Enum(isa_base) String(la64v1.1) Value(ISA_BASE_LA64V110)
++
+ ;; ISA extensions / adjustments
+ Enum
+ Name(isa_ext_fpu) Type(int)
+-- 
+2.33.0
+

_service:tar_scm:LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch Added

@@ -0,0 +1,907 @@
+From 40366b89e9c8e727af70ecf7007cba6c51e4b7d2 Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Wed, 29 Nov 2023 11:16:59 +0800
+Subject: PATCH LoongArch: Fix lsx-vshuf.c and lasx-xvshuf_b.c tests fail on
+ LA664 PR112611
+
+For xvshuf instructions, if the index value in the selector exceeds 63, it triggers
+undefined behavior on LA464, but not on LA664. To ensure compatibility of these two
+tests on both LA464 and LA664, we have modified both tests to ensure that the index
+value in the selector does not exceed 63.
+
+gcc/testsuite/ChangeLog:
+
+	PR target/112611
+	* gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c: Sure index less than 64.
+	* gcc.target/loongarch/vector/lsx/lsx-vshuf.c: Ditto.
+
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ .../loongarch/vector/lasx/lasx-xvshuf_b.c     | 343 ++++++------------
+ .../loongarch/vector/lsx/lsx-vshuf.c          | 162 +++------
+ 2 files changed, 164 insertions(+), 341 deletions(-)
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c
+index d8a29dbd225..b8ab387118a 100644
+--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c
++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c
+@@ -43,9 +43,9 @@ main ()
+   *((unsigned long *)&__m256i_op11) = 0xfffffefefffffefe;
+   *((unsigned long *)&__m256i_op10) = 0xfffffefefffffefe;
+   *((unsigned long *)&__m256i_op23) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op22) = 0xfffffff8fffffff8;
++  *((unsigned long *)&__m256i_op22) = 0x3f3f3f383f3f3f38;
+   *((unsigned long *)&__m256i_op21) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op20) = 0xfffffff8fc000000;
++  *((unsigned long *)&__m256i_op20) = 0x3f3f3f383c000000;
+   *((unsigned long *)&__m256i_result3) = 0xfafafafafafafafa;
+   *((unsigned long *)&__m256i_result2) = 0x0000000000000000;
+   *((unsigned long *)&__m256i_result1) = 0xfefefefefefefefe;
+@@ -137,33 +137,14 @@ main ()
+   *((unsigned long *)&__m256i_op12) = 0xffffffffffffffff;
+   *((unsigned long *)&__m256i_op11) = 0x0000000000000000;
+   *((unsigned long *)&__m256i_op10) = 0xffffffffffffffff;
+-  *((unsigned long *)&__m256i_op23) = 0x0000ffffffffffff;
+-  *((unsigned long *)&__m256i_op22) = 0x0000ffff0000ffff;
+-  *((unsigned long *)&__m256i_op21) = 0x0000ffffffffffff;
+-  *((unsigned long *)&__m256i_op20) = 0x0000ffff0000ffff;
++  *((unsigned long *)&__m256i_op23) = 0x0000111111111111;
++  *((unsigned long *)&__m256i_op22) = 0x0000222200002222;
++  *((unsigned long *)&__m256i_op21) = 0x0000111111111111;
++  *((unsigned long *)&__m256i_op20) = 0x0000222200002222;
+   *((unsigned long *)&__m256i_result3) = 0xffff000000000000;
+-  *((unsigned long *)&__m256i_result2) = 0xffff0000ffff0000;
++  *((unsigned long *)&__m256i_result2) = 0xffffffffffffffff;
+   *((unsigned long *)&__m256i_result1) = 0xffff000000000000;
+-  *((unsigned long *)&__m256i_result0) = 0xffff0000ffff0000;
+-  __m256i_out = __lasx_xvshuf_b (__m256i_op0, __m256i_op1, __m256i_op2);
+-  ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
+-
+-  *((unsigned long *)&__m256i_op03) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op02) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op01) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op00) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op13) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op12) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op11) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op10) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op23) = 0x000000000000ffff;
+-  *((unsigned long *)&__m256i_op22) = 0x000000000000ffff;
+-  *((unsigned long *)&__m256i_op21) = 0x000000000000ffff;
+-  *((unsigned long *)&__m256i_op20) = 0x000000000000ffff;
+-  *((unsigned long *)&__m256i_result3) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_result2) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_result1) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_result0) = 0x0000000000000000;
++  *((unsigned long *)&__m256i_result0) = 0xffffffffffffffff;
+   __m256i_out = __lasx_xvshuf_b (__m256i_op0, __m256i_op1, __m256i_op2);
+   ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
+ 
+@@ -176,7 +157,7 @@ main ()
+   *((unsigned long *)&__m256i_op11) = 0x0000000000000000;
+   *((unsigned long *)&__m256i_op10) = 0x0000000000000000;
+   *((unsigned long *)&__m256i_op23) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op22) = 0x0000000000077fff;
++  *((unsigned long *)&__m256i_op22) = 0x0000000000032f1f;
+   *((unsigned long *)&__m256i_op21) = 0x0000000000000000;
+   *((unsigned long *)&__m256i_op20) = 0x0000000000000000;
+   *((unsigned long *)&__m256i_result3) = 0xffffffffffffffff;
+@@ -186,9 +167,9 @@ main ()
+   __m256i_out = __lasx_xvshuf_b (__m256i_op0, __m256i_op1, __m256i_op2);
+   ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
+ 
+-  *((unsigned long *)&__m256i_op03) = 0xfffffffffffffefe;
+-  *((unsigned long *)&__m256i_op02) = 0x0000000000000101;
+-  *((unsigned long *)&__m256i_op01) = 0xfffffffffffffefe;
++  *((unsigned long *)&__m256i_op03) = 0x0011001100110011;
++  *((unsigned long *)&__m256i_op02) = 0x0000000000000001;
++  *((unsigned long *)&__m256i_op01) = 0x0011001100110011;
+   *((unsigned long *)&__m256i_op00) = 0x0000000000000101;
+   *((unsigned long *)&__m256i_op13) = 0xffffffffffffffff;
+   *((unsigned long *)&__m256i_op12) = 0x67eee33567eee435;
+@@ -198,35 +179,16 @@ main ()
+   *((unsigned long *)&__m256i_op22) = 0xffffffffffffffff;
+   *((unsigned long *)&__m256i_op21) = 0x00000000ffffffff;
+   *((unsigned long *)&__m256i_op20) = 0xffffffffffffffff;
+-  *((unsigned long *)&__m256i_result3) = 0x0000000000000000;
++  *((unsigned long *)&__m256i_result3) = 0xffffffffffffffff;
+   *((unsigned long *)&__m256i_result2) = 0xffffffffffffffff;
+-  *((unsigned long *)&__m256i_result1) = 0x0000000000000000;
++  *((unsigned long *)&__m256i_result1) = 0xffffffffffffffff;
+   *((unsigned long *)&__m256i_result0) = 0xffffffffffffffff;
+   __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
+   ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
+ 
+-  *((unsigned long *)&__m256i_op03) = 0x0000000000000000;
++  *((unsigned long *)&__m256i_op03) = 0x0022002200000000;
+   *((unsigned long *)&__m256i_op02) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op01) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op00) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op13) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op12) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op11) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op10) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op23) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op22) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op21) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op20) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_result3) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_result2) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_result1) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_result0) = 0x0000000000000000;
+-  __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
+-  ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
+-
+-  *((unsigned long *)&__m256i_op03) = 0xffffffff80000000;
+-  *((unsigned long *)&__m256i_op02) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op01) = 0xffffffff80000000;
++  *((unsigned long *)&__m256i_op01) = 0x001f001f00000000;
+   *((unsigned long *)&__m256i_op00) = 0x0000000000000000;
+   *((unsigned long *)&__m256i_op13) = 0xffffffffffffffff;
+   *((unsigned long *)&__m256i_op12) = 0xffffffffffffffff;
+@@ -243,10 +205,10 @@ main ()
+   __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
+   ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
+ 
+-  *((unsigned long *)&__m256i_op03) = 0xffffffffffffffff;
+-  *((unsigned long *)&__m256i_op02) = 0xffffffffffffffff;
+-  *((unsigned long *)&__m256i_op01) = 0xffffffffffffffff;
+-  *((unsigned long *)&__m256i_op00) = 0xffffffffffffffff;
++  *((unsigned long *)&__m256i_op03) = 0x0011001100110011;
++  *((unsigned long *)&__m256i_op02) = 0x0011001100110011;
++  *((unsigned long *)&__m256i_op01) = 0x0011001100110011;
++  *((unsigned long *)&__m256i_op00) = 0x0011001100110011;
+   *((unsigned long *)&__m256i_op13) = 0xffffffffffffffff;
+   *((unsigned long *)&__m256i_op12) = 0xffffffffffffffff;
+   *((unsigned long *)&__m256i_op11) = 0xffffffffffffffff;
+@@ -255,17 +217,17 @@ main ()
+   *((unsigned long *)&__m256i_op22) = 0xffffffffffffffff;
+   *((unsigned long *)&__m256i_op21) = 0xffffffffffffffff;
+   *((unsigned long *)&__m256i_op20) = 0xffffffffffffffff;
+-  *((unsigned long *)&__m256i_result3) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_result2) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_result1) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_result0) = 0x0000000000000000;
++  *((unsigned long *)&__m256i_result3) = 0xffffffffffffffff;
++  *((unsigned long *)&__m256i_result2) = 0xffffffffffffffff;
++  *((unsigned long *)&__m256i_result1) = 0xffffffffffffffff;
++  *((unsigned long *)&__m256i_result0) = 0xffffffffffffffff;
+   __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
+   ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
+ 
+-  *((unsigned long *)&__m256i_op03) = 0xffffffffffffffff;
+-  *((unsigned long *)&__m256i_op02) = 0xffffffffffffffff;
+-  *((unsigned long *)&__m256i_op01) = 0xffffffffffffffff;
+-  *((unsigned long *)&__m256i_op00) = 0xffffffffffffffff;
++  *((unsigned long *)&__m256i_op03) = 0x003f003f003f003f;
++  *((unsigned long *)&__m256i_op02) = 0x003f003f003f003f;
++  *((unsigned long *)&__m256i_op01) = 0x003f003f003f003f;
++  *((unsigned long *)&__m256i_op00) = 0x003f003f003f003f;
+   *((unsigned long *)&__m256i_op13) = 0xefdfefdf00000000;
+   *((unsigned long *)&__m256i_op12) = 0xefdfefdfefdfefdf;
+   *((unsigned long *)&__m256i_op11) = 0xefdfefdf00000000;
+@@ -274,36 +236,17 @@ main ()
+   *((unsigned long *)&__m256i_op22) = 0xffffffffffffffff;
+   *((unsigned long *)&__m256i_op21) = 0xffffffffffffffff;
+   *((unsigned long *)&__m256i_op20) = 0xffffffffffffffff;
+-  *((unsigned long *)&__m256i_result3) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_result2) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_result1) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_result0) = 0x0000000000000000;
+-  __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
+-  ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
+-
+-  *((unsigned long *)&__m256i_op03) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op02) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op01) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op00) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op13) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op12) = 0x0000000000000000;

_service:tar_scm:LoongArch-Use-finer-grained-DBAR-hints.patch Added

@@ -0,0 +1,137 @@
+From 4a70bfbf686c2b6a1ecd83fe851de826c612c3e0 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Tue, 14 Nov 2023 05:32:38 +0800
+Subject: PATCH LoongArch: Use finer-grained DBAR hints
+
+LA664 defines DBAR hints 0x1 - 0x1f (except 0xf and 0x1f) as follows 1-2:
+
+- Bit 4: kind of constraint (0: completion, 1: ordering)
+- Bit 3: barrier for previous read (0: true, 1: false)
+- Bit 2: barrier for previous write (0: true, 1: false)
+- Bit 1: barrier for succeeding read (0: true, 1: false)
+- Bit 0: barrier for succeeding write (0: true, 1: false)
+
+LLVM has already utilized them for different memory orders 3:
+
+- Bit 4 is always set to one because it's only intended to be zero for
+  things like MMIO devices, which are out of the scope of memory orders.
+- An acquire barrier is used to implement acquire loads like
+
+    ld.d $a1, $t0, 0
+    dbar acquire_hint
+
+  where the load operation (ld.d) should not be reordered with any load
+  or store operation after the acquire load.  To accomplish this
+  constraint, we need to prevent the load operation from being reordered
+  after the barrier, and also prevent any following load/store operation
+  from being reordered before the barrier.  Thus bits 0, 1, and 3 must
+  be zero, and bit 2 can be one, so acquire_hint should be 0b10100.
+- An release barrier is used to implement release stores like
+
+    dbar release_hint
+    st.d $a1, $t0, 0
+
+  where the store operation (st.d) should not be reordered with any load
+  or store operation before the release store.  So we need to prevent
+  the store operation from being reordered before the barrier, and also
+  prevent any preceding load/store operation from being reordered after
+  the barrier.  So bits 0, 2, 3 must be zero, and bit 1 can be one.  So
+  release_hint should be 0b10010.
+
+A similar mapping has been utilized for RISC-V GCC 4, LoongArch Linux
+kernel 1, and LoongArch LLVM 3.  So the mapping should be correct.
+And I've also bootstrapped & regtested GCC on a LA664 with this patch.
+
+The LoongArch CPUs should treat "unknown" hints as dbar 0, so we can
+unconditionally emit the new hints without a compiler switch.
+
+1: https://git.kernel.org/torvalds/c/e031a5f3f1ed
+2: https://github.com/loongson-community/docs/pull/12
+3: https://github.com/llvm/llvm-project/pull/68787
+4: https://gcc.gnu.org/r14-406
+
+gcc/ChangeLog:
+
+	* config/loongarch/sync.md (mem_thread_fence): Remove redundant
+	check.
+	(mem_thread_fence_1): Emit finer-grained DBAR hints for
+	different memory models, instead of 0.
+
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/sync.md | 51 +++++++++++++++++++++++++++++-------
+ 1 file changed, 42 insertions(+), 9 deletions(-)
+
+diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
+index 9924d522bcd..1ad0c63e0d9 100644
+--- a/gcc/config/loongarch/sync.md
++++ b/gcc/config/loongarch/sync.md
+@@ -50,23 +50,56 @@
+   (match_operand:SI 0 "const_int_operand" "") ;; model
+   ""
+ {
+-  if (INTVAL (operands0) != MEMMODEL_RELAXED)
+-    {
+-      rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+-      MEM_VOLATILE_P (mem) = 1;
+-      emit_insn (gen_mem_thread_fence_1 (mem, operands0));
+-    }
++  rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
++  MEM_VOLATILE_P (mem) = 1;
++  emit_insn (gen_mem_thread_fence_1 (mem, operands0));
++
+   DONE;
+ })
+ 
+-;; Until the LoongArch memory model (hence its mapping from C++) is finalized,
+-;; conservatively emit a full FENCE.
++;; DBAR hint encoding for LA664 and later micro-architectures, paraphrased from
++;; the Linux patch revealing it 1:
++;;
++;; - Bit 4: kind of constraint (0: completion, 1: ordering)
++;; - Bit 3: barrier for previous read (0: true, 1: false)
++;; - Bit 2: barrier for previous write (0: true, 1: false)
++;; - Bit 1: barrier for succeeding read (0: true, 1: false)
++;; - Bit 0: barrier for succeeding write (0: true, 1: false)
++;;
++;; 1: https://git.kernel.org/torvalds/c/e031a5f3f1ed
++;;
++;; Implementations without support for the finer-granularity hints simply treat
++;; all as the full barrier (DBAR 0), so we can unconditionally start emiting the
++;; more precise hints right away.
+ (define_insn "mem_thread_fence_1"
+   (set (match_operand:BLK 0 "" "")
+ 	(unspec:BLK (match_dup 0) UNSPEC_MEMORY_BARRIER))
+    (match_operand:SI 1 "const_int_operand" "") ;; model
+   ""
+-  "dbar\t0")
++  {
++    enum memmodel model = memmodel_base (INTVAL (operands1));
++
++    switch (model)
++      {
++      case MEMMODEL_ACQUIRE:
++	return "dbar\t0b10100";
++      case MEMMODEL_RELEASE:
++	return "dbar\t0b10010";
++      case MEMMODEL_ACQ_REL:
++      case MEMMODEL_SEQ_CST:
++	return "dbar\t0b10000";
++      default:
++	/* GCC internal: "For the '__ATOMIC_RELAXED' model no instructions
++	   need to be issued and this expansion is not invoked."
++
++	   __atomic builtins doc: "Consume is implemented using the
++	   stronger acquire memory order because of a deficiency in C++11's
++	   semantics."  See PR 59448 and get_memmodel in builtins.cc.
++
++	   Other values should not be returned by memmodel_base.  */
++	gcc_unreachable ();
++      }
++  })
+ 
+ ;; Atomic memory operations.
+ 
+-- 
+2.33.0
+