开源软件构建与测试

We truncated the diff of some files because they were too big. If you want to see the full diff for every file, click here.

Changes of Revision 19

_service:tar_scm:gcc.spec Changed

@@ -2,7 +2,7 @@
 %global gcc_major 12
 # Note, gcc_release must be integer, if you want to add suffixes to
 # %%{release}, append them after %%{gcc_release} on Release: line.
-%global gcc_release 4
+%global gcc_release 8
 
 %global _unpackaged_files_terminate_build 0
 %global _performance_build 1
@@ -18,7 +18,7 @@
 %global build_go 0
 %global build_d 0
 
-%ifarch %{ix86} x86_64 ia64 ppc64le
+%ifarch %{ix86} x86_64 ia64 ppc64le aarch64
 %global build_libquadmath 1
 %else
 %global build_libquadmath 0
@@ -136,12 +136,27 @@
 Provides: bundled(libffi)
 Provides: gcc(major) = %{gcc_major}
 
-Patch0: 0000-Version-Set-version-to-12.3.1.patch
-Patch1: 0001-CONFIG-Regenerate-configure-file.patch
-Patch2: 0002-libquadmath-Enable-libquadmath-on-kunpeng.patch
-
-Patch8: 0008-RISCV-Inline-subword-atomic-ops.patch
-Patch9: 0009-riscv-linux-Don-t-add-latomic-with-pthread.patch
+Patch1: 0001-Version-Set-version-to-12.3.1.patch
+Patch2: 0002-RISCV-Backport-inline-subword-atomic-patches.patch
+Patch3: 0003-CONFIG-Regenerate-configure-file.patch
+Patch4: 0004-libquadmath-Enable-libquadmath-on-kunpeng.patch
+Patch6: 0006-MULL64-1-3-Add-A-B-op-CST-B-match-and-simplify-optim.patch
+Patch7: 0007-MULL64-2-3-Fold-series-of-instructions-into-mul.patch
+Patch8: 0008-MULL64-3-3-Fold-series-of-instructions-into-umulh.patch
+Patch9: 0009-MULL64-Disable-mull64-transformation-by-default.patch
+Patch10: 0010-Version-Clear-DATESTAMP_s.patch
+Patch11: 0011-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch
+Patch12: 0012-Enable-small-loop-unrolling-for-O2.patch
+Patch13: 0013-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch
+Patch14: 0014-Array-widen-compare-Add-a-new-optimization-for-array.patch
+Patch15: 0015-Backport-Structure-reorganization-optimization.patch
+Patch16: 0016-CompleteStructRelayout-Complete-Structure-Relayout.patch
+Patch17: 0017-StructReorg-Some-bugfix-for-structure-reorganization.patch
+Patch18: 0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch
+Patch19: 0019-fp-model-Enable-fp-model-on-kunpeng.patch
+Patch20: 0020-simdmath-Enable-simdmath-on-kunpeng.patch
+Patch21: 0021-StructReorderFields-Structure-reorder-fields.patch
+Patch22: 0022-DFE-Add-Dead-Field-Elimination-in-Struct-Reorg.patch
 
 # On ARM EABI systems, we do want -gnueabi to be part of the
 # target triple.
@@ -609,12 +624,27 @@
 %prep
 %setup -q -n gcc-12.3.0
 
-%patch0 -p1
 %patch1 -p1
 %patch2 -p1
-
+%patch3 -p1
+%patch4 -p1
+%patch6 -p1
+%patch7 -p1
 %patch8 -p1
 %patch9 -p1
+%patch10 -p1
+%patch11 -p1
+%patch12 -p1
+%patch13 -p1
+%patch14 -p1
+%patch15 -p1
+%patch16 -p1
+%patch17 -p1
+%patch18 -p1
+%patch19 -p1
+%patch20 -p1
+%patch21 -p1
+%patch22 -p1
 
 echo '%{_vendor} %{version}-%{release}' > gcc/DEV-PHASE
 
@@ -2718,6 +2748,26 @@
 %doc rpm.doc/changelogs/libcc1/ChangeLog*
 
 %changelog
+* Tue Aug 29 2023 huangxiaoquan <huangxiaoquan1@huawei.com> 12.3.1-8
+- Type: Sync
+- DESC: Sync patch from openeuler/gcc
+
+* Fri Aug 11 2023 Hongyu Wang <hongyu.wang@intel.com> 12.3.1-7
+- Type:Sync
+- i386: Only enable small loop unrolling in backend PR 107692.
+
+* Fri Aug 11 2023 Hongyu Wang <hongyu.wang@intel.com> 12.3.1-6
+- Type:Sync
+- Enable small loop unrolling for O2.
+
+* Fri Aug 11 2023 Cui,Lili <lili.cui@intel.com> 12.3.1-5
+- Type:Sync
+- Add attribute hot judgement for INLINE_HINT_known_hot hint.
+
+* Mon Jul 17 2023 huangxiaoquan <huangxiaoquan1@huawei.com> 12.3.1-4
+- Type:SPEC
+- DESC:Enable libquadmath on kunpeng
+
 * Fri Jul 14 2023 huangxiaoquan <huangxiaoquan1@huawei.com> 12.3.1-3
 - Type:Sync
 - DESC:Enable libquadmath on kunpeng

_service:tar_scm:0001-CONFIG-Regenerate-configure-file.patch Deleted

@@ -1,45 +0,0 @@
-From 37ef787e743d98f9f6e53005d99709fb8e284964 Mon Sep 17 00:00:00 2001
-From: eastb233 <xiezhiheng@huawei.com>
-Date: Fri, 14 Jul 2023 11:07:05 +0800
-Subject: PATCH 1/2 CONFIG Regenerate configure file
-
-Regenerate configure file under libquadmath directory
-since it is out of date.
----
- libquadmath/configure | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/libquadmath/configure b/libquadmath/configure
-index b3ee64f9c..603f2f131 100755
---- a/libquadmath/configure
-+++ b/libquadmath/configure
-@@ -10806,7 +10806,7 @@ else
-   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
-   lt_status=$lt_dlunknown
-   cat > conftest.$ac_ext <<_LT_EOF
--#line 10819 "configure"
-+#line 10809 "configure"
- #include "confdefs.h"
- 
- #if HAVE_DLFCN_H
-@@ -10912,7 +10912,7 @@ else
-   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
-   lt_status=$lt_dlunknown
-   cat > conftest.$ac_ext <<_LT_EOF
--#line 10925 "configure"
-+#line 10915 "configure"
- #include "confdefs.h"
- 
- #if HAVE_DLFCN_H
-@@ -13031,7 +13031,7 @@ case "$host" in
-     case "$enable_cet" in
-       auto)
- 	# Check if target supports multi-byte NOPs
--	# and if assembler supports CET insn.
-+	# and if compiler and assembler support CET insn.
- 	cet_save_CFLAGS="$CFLAGS"
- 	CFLAGS="$CFLAGS -fcf-protection"
- 	cat confdefs.h - <<_ACEOF >conftest.$ac_ext
--- 
-2.33.0
-

_service:tar_scm:0000-Version-Set-version-to-12.3.1.patch -> _service:tar_scm:0001-Version-Set-version-to-12.3.1.patch Changed

_service:tar_scm:0002-RISCV-Backport-inline-subword-atomic-patches.patch Added

@@ -0,0 +1,2042 @@
+From 123615a0aac59a731516ef11c1fe433d39b6573a Mon Sep 17 00:00:00 2001
+From: laokz <zhangkai@iscas.ac.cn>
+Date: Tue, 11 Jul 2023 21:03:14 +0800
+Subject: PATCH 02/22 RISCV Backport inline subword atomic patches
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f797260adaf52bee0ec0e16190bbefbe1bfc3692
+
+203f3060dd363361b172f7295f42bb6bf5ac0b3b
+---
+ gcc/config/riscv/linux.h                      |  10 -
+ gcc/config/riscv/riscv-protos.h               |   2 +
+ gcc/config/riscv/riscv.cc                     |  49 ++
+ gcc/config/riscv/riscv.opt                    |   4 +
+ gcc/config/riscv/sync.md                      | 301 +++++++++
+ gcc/doc/invoke.texi                           |  10 +-
+ .../gcc.target/riscv/inline-atomics-1.c       |  18 +
+ .../gcc.target/riscv/inline-atomics-2.c       |   9 +
+ .../gcc.target/riscv/inline-atomics-3.c       | 569 ++++++++++++++++++
+ .../gcc.target/riscv/inline-atomics-4.c       | 566 +++++++++++++++++
+ .../gcc.target/riscv/inline-atomics-5.c       |  87 +++
+ .../gcc.target/riscv/inline-atomics-6.c       |  87 +++
+ .../gcc.target/riscv/inline-atomics-7.c       |  69 +++
+ .../gcc.target/riscv/inline-atomics-8.c       |  69 +++
+ libgcc/config/riscv/atomic.c                  |   2 +
+ 15 files changed, 1841 insertions(+), 11 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-1.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-2.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-3.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-4.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-5.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-6.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-7.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-8.c
+
+diff --git a/gcc/config/riscv/linux.h b/gcc/config/riscv/linux.h
+index 38803723b..b5c6c5027 100644
+--- a/gcc/config/riscv/linux.h
++++ b/gcc/config/riscv/linux.h
+@@ -35,16 +35,6 @@ along with GCC; see the file COPYING3.  If not see
+ #undef MUSL_DYNAMIC_LINKER
+ #define MUSL_DYNAMIC_LINKER "/lib/ld-musl-riscv" XLEN_SPEC MUSL_ABI_SUFFIX ".so.1"
+ 
+-/* Because RISC-V only has word-sized atomics, it requries libatomic where
+-   others do not.  So link libatomic by default, as needed.  */
+-#undef LIB_SPEC
+-#ifdef LD_AS_NEEDED_OPTION
+-#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC \
+-  " %{pthread:" LD_AS_NEEDED_OPTION " -latomic " LD_NO_AS_NEEDED_OPTION "}"
+-#else
+-#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC " -latomic "
+-#endif
+-
+ #define ICACHE_FLUSH_FUNC "__riscv_flush_icache"
+ 
+ #define CPP_SPEC "%{pthread:-D_REENTRANT}"
+diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
+index 65bb85f55..3b039e00d 100644
+--- a/gcc/config/riscv/riscv-protos.h
++++ b/gcc/config/riscv/riscv-protos.h
+@@ -74,6 +74,8 @@ extern bool riscv_expand_block_move (rtx, rtx, rtx);
+ extern bool riscv_store_data_bypass_p (rtx_insn *, rtx_insn *);
+ extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *);
+ extern bool riscv_gpr_save_operation_p (rtx);
++extern void riscv_subword_address (rtx, rtx *, rtx *, rtx *, rtx *);
++extern void riscv_lshift_subword (machine_mode, rtx, rtx, rtx *);
+ 
+ /* Routines implemented in riscv-c.cc.  */
+ void riscv_cpu_cpp_builtins (cpp_reader *);
+diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
+index 4939d9964..9cf79beba 100644
+--- a/gcc/config/riscv/riscv.cc
++++ b/gcc/config/riscv/riscv.cc
+@@ -5605,6 +5605,55 @@ riscv_asan_shadow_offset (void)
+   return TARGET_64BIT ? (HOST_WIDE_INT_1 << 29) : 0;
+ }
+ 
++/* Given memory reference MEM, expand code to compute the aligned
++   memory address, shift and mask values and store them into
++   *ALIGNED_MEM, *SHIFT, *MASK and *NOT_MASK.  */
++
++void
++riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask,
++		       rtx *not_mask)
++{
++  /* Align the memory address to a word.  */
++  rtx addr = force_reg (Pmode, XEXP (mem, 0));
++
++  rtx addr_mask = gen_int_mode (-4, Pmode);
++
++  rtx aligned_addr = gen_reg_rtx (Pmode);
++  emit_move_insn (aligned_addr,  gen_rtx_AND (Pmode, addr, addr_mask));
++
++  *aligned_mem = change_address (mem, SImode, aligned_addr);
++
++  /* Calculate the shift amount.  */
++  emit_move_insn (*shift, gen_rtx_AND (SImode, gen_lowpart (SImode, addr),
++				       gen_int_mode (3, SImode)));
++  emit_move_insn (*shift, gen_rtx_ASHIFT (SImode, *shift,
++					  gen_int_mode (3, SImode)));
++
++  /* Calculate the mask.  */
++  int unshifted_mask = GET_MODE_MASK (GET_MODE (mem));
++
++  emit_move_insn (*mask, gen_int_mode (unshifted_mask, SImode));
++
++  emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask,
++					 gen_lowpart (QImode, *shift)));
++
++  emit_move_insn (*not_mask, gen_rtx_NOT(SImode, *mask));
++}
++
++/* Leftshift a subword within an SImode register.  */
++
++void
++riscv_lshift_subword (machine_mode mode, rtx value, rtx shift,
++		      rtx *shifted_value)
++{
++  rtx value_reg = gen_reg_rtx (SImode);
++  emit_move_insn (value_reg, simplify_gen_subreg (SImode, value,
++						  mode, 0));
++
++  emit_move_insn(*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
++						 gen_lowpart (QImode, shift)));
++}
++
+ /* Initialize the GCC target structure.  */
+ #undef TARGET_ASM_ALIGNED_HI_OP
+ #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
+diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
+index 492aad123..328d848d6 100644
+--- a/gcc/config/riscv/riscv.opt
++++ b/gcc/config/riscv/riscv.opt
+@@ -225,3 +225,7 @@ Enum(isa_spec_class) String(20191213) Value(ISA_SPEC_CLASS_20191213)
+ misa-spec=
+ Target RejectNegative Joined Enum(isa_spec_class) Var(riscv_isa_spec) Init(TARGET_DEFAULT_ISA_SPEC)
+ Set the version of RISC-V ISA spec.
++
++minline-atomics
++Target Var(TARGET_INLINE_SUBWORD_ATOMIC) Init(1)
++Always inline subword atomic operations.
+diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
+index 86b41e6b0..9c4fbabc6 100644
+--- a/gcc/config/riscv/sync.md
++++ b/gcc/config/riscv/sync.md
+@@ -21,8 +21,11 @@
+ 
+ (define_c_enum "unspec" 
+   UNSPEC_COMPARE_AND_SWAP
++  UNSPEC_COMPARE_AND_SWAP_SUBWORD
+   UNSPEC_SYNC_OLD_OP
++  UNSPEC_SYNC_OLD_OP_SUBWORD
+   UNSPEC_SYNC_EXCHANGE
++  UNSPEC_SYNC_EXCHANGE_SUBWORD
+   UNSPEC_ATOMIC_STORE
+   UNSPEC_MEMORY_BARRIER
+ )
+@@ -92,6 +95,135 @@
+   "%F3amo<insn>.<amo>%A3 %0,%z2,%1"
+   (set (attr "length") (const_int 8)))
+ 
++(define_insn "subword_atomic_fetch_strong_<atomic_optab>"
++  (set (match_operand:SI 0 "register_operand" "=&r")		   ;; old value at mem
++	(match_operand:SI 1 "memory_operand" "+A"))		   ;; mem location
++   (set (match_dup 1)
++	(unspec_volatile:SI
++	  (any_atomic:SI (match_dup 1)
++		     (match_operand:SI 2 "register_operand" "rI")) ;; value for op
++	   (match_operand:SI 3 "register_operand" "rI")	   ;; mask
++	 UNSPEC_SYNC_OLD_OP_SUBWORD))
++    (match_operand:SI 4 "register_operand" "rI")		   ;; not_mask
++    (clobber (match_scratch:SI 5 "=&r"))			   ;; tmp_1
++    (clobber (match_scratch:SI 6 "=&r"))			   ;; tmp_2
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++  {
++    return "1:\;"
++	   "lr.w.aq\t%0, %1\;"
++	   "<insn>\t%5, %0, %2\;"
++	   "and\t%5, %5, %3\;"
++	   "and\t%6, %0, %4\;"
++	   "or\t%6, %6, %5\;"
++	   "sc.w.rl\t%5, %6, %1\;"
++	   "bnez\t%5, 1b";
++  }
++  (set (attr "length") (const_int 28)))
++
++(define_expand "atomic_fetch_nand<mode>"
++  (match_operand:SHORT 0 "register_operand")			      ;; old value at mem
++   (not:SHORT (and:SHORT (match_operand:SHORT 1 "memory_operand")     ;; mem location
++			 (match_operand:SHORT 2 "reg_or_0_operand"))) ;; value for op
++   (match_operand:SI 3 "const_int_operand")			      ;; model
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++{
++  /* We have no QImode/HImode atomics, so form a mask, then use
++     subword_atomic_fetch_strong_nand to implement a LR/SC version of the
++     operation. */
++
++  /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining
++     is disabled */
++

_service:tar_scm:0002-libquadmath-Enable-libquadmath-on-kunpeng.patch Deleted

@@ -1,197 +0,0 @@
-From 52a810b4d8a725a7edb2988f6c3813a9938362a5 Mon Sep 17 00:00:00 2001
-From: eastb233 <xiezhiheng@huawei.com>
-Date: Fri, 14 Jul 2023 11:10:24 +0800
-Subject: PATCH 2/2 libquadmath Enable libquadmath on kunpeng
-
-This enable libquadmath on kunpeng platform to convenient
-users that migrating from x86 platform. libquadmath uses "__float128"
-as quad precision floating point type and with math functions with "q"
-suffix like "cosq". For those who do not need to adapt to x86 platform,
-you can use "long double" as quad precision floating point type and math
-functions with "l" suffix like "cosl" in libm for quad precision math.
----
- libquadmath/Makefile.am  |  4 ++++
- libquadmath/Makefile.in  |  3 ++-
- libquadmath/configure    | 28 ++++++++++++++++++++++++++--
- libquadmath/configure.ac |  7 +++++++
- libquadmath/quadmath.h   | 13 +++++++++++--
- 5 files changed, 50 insertions(+), 5 deletions(-)
-
-diff --git a/libquadmath/Makefile.am b/libquadmath/Makefile.am
-index 35dffb46f..bf0398d9c 100644
---- a/libquadmath/Makefile.am
-+++ b/libquadmath/Makefile.am
-@@ -2,6 +2,10 @@
- 
- AUTOMAKE_OPTIONS = foreign info-in-builddir
- 
-+if ARCH_AARCH64
-+DEFS += -D__float128="long double"
-+endif
-+
- ## Skip over everything if the quadlib is not available:
- if BUILD_LIBQUADMATH
- ACLOCAL_AMFLAGS = -I .. -I ../config
-diff --git a/libquadmath/Makefile.in b/libquadmath/Makefile.in
-index 8c0112122..449cc8a06 100644
---- a/libquadmath/Makefile.in
-+++ b/libquadmath/Makefile.in
-@@ -90,6 +90,7 @@ POST_UNINSTALL = :
- build_triplet = @build@
- host_triplet = @host@
- target_triplet = @target@
-+@ARCH_AARCH64_TRUE@am__append_1 = -D__float128="long double"
- @BUILD_LIBQUADMATH_FALSE@libquadmath_la_DEPENDENCIES =
- subdir = .
- ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-@@ -337,7 +338,7 @@ CFLAGS = @CFLAGS@
- CPP = @CPP@
- CPPFLAGS = @CPPFLAGS@
- CYGPATH_W = @CYGPATH_W@
--DEFS = @DEFS@
-+DEFS = @DEFS@ $(am__append_1)
- DEPDIR = @DEPDIR@
- DSYMUTIL = @DSYMUTIL@
- DUMPBIN = @DUMPBIN@
-diff --git a/libquadmath/configure b/libquadmath/configure
-index 603f2f131..13a9088fb 100755
---- a/libquadmath/configure
-+++ b/libquadmath/configure
-@@ -633,6 +633,8 @@ am__EXEEXT_TRUE
- LTLIBOBJS
- LIBOBJS
- get_gcc_base_ver
-+ARCH_AARCH64_FALSE
-+ARCH_AARCH64_TRUE
- GENINSRC_FALSE
- GENINSRC_TRUE
- XCFLAGS
-@@ -10806,7 +10808,7 @@ else
-   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
-   lt_status=$lt_dlunknown
-   cat > conftest.$ac_ext <<_LT_EOF
--#line 10809 "configure"
-+#line 10811 "configure"
- #include "confdefs.h"
- 
- #if HAVE_DLFCN_H
-@@ -10912,7 +10914,7 @@ else
-   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
-   lt_status=$lt_dlunknown
-   cat > conftest.$ac_ext <<_LT_EOF
--#line 10915 "configure"
-+#line 10917 "configure"
- #include "confdefs.h"
- 
- #if HAVE_DLFCN_H
-@@ -12705,6 +12707,11 @@ else
-   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
- /* end confdefs.h.  */
- 
-+    #if defined(__aarch64__)
-+    typedef long double __float128;
-+    #define __builtin_huge_valq() (__extension__ 0x1.0p32767Q)
-+    #endif
-+
-     #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__)
-     typedef _Complex float __attribute__((mode(TC))) __complex128;
-     #else
-@@ -12756,6 +12763,11 @@ fi
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
- /* end confdefs.h.  */
- 
-+    #if defined(__aarch64__)
-+    typedef long double __float128;
-+    #define __builtin_huge_valq() (__extension__ 0x1.0p32767Q)
-+    #endif
-+
-     #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__)
-     typedef _Complex float __attribute__((mode(TC))) __complex128;
-     #else
-@@ -13242,6 +13254,14 @@ else
-   GENINSRC_FALSE=
- fi
- 
-+ if expr "$target_cpu" : "aarch64.*" > /dev/null; then
-+  ARCH_AARCH64_TRUE=
-+  ARCH_AARCH64_FALSE='#'
-+else
-+  ARCH_AARCH64_TRUE='#'
-+  ARCH_AARCH64_FALSE=
-+fi
-+
- 
- # Determine what GCC version number to use in filesystem paths.
- 
-@@ -13425,6 +13445,10 @@ if test -z "${GENINSRC_TRUE}" && test -z "${GENINSRC_FALSE}"; then
-   as_fn_error $? "conditional \"GENINSRC\" was never defined.
- Usually this means the macro was only invoked conditionally." "$LINENO" 5
- fi
-+if test -z "${ARCH_AARCH64_TRUE}" && test -z "${ARCH_AARCH64_FALSE}"; then
-+  as_fn_error $? "conditional \"ARCH_AARCH64\" was never defined.
-+Usually this means the macro was only invoked conditionally." "$LINENO" 5
-+fi
- 
- : "${CONFIG_STATUS=./config.status}"
- ac_write_fail=0
-diff --git a/libquadmath/configure.ac b/libquadmath/configure.ac
-index eec4084a4..507c247d6 100644
---- a/libquadmath/configure.ac
-+++ b/libquadmath/configure.ac
-@@ -218,6 +218,11 @@ AM_CONDITIONAL(LIBQUAD_USE_SYMVER_SUN, test "x$quadmath_use_symver" = xsun)
- 
- AC_CACHE_CHECK(whether __float128 is supported, libquad_cv_have_float128,
-   GCC_TRY_COMPILE_OR_LINK(
-+    #if defined(__aarch64__)
-+    typedef long double __float128;
-+    #define __builtin_huge_valq() (__extension__ 0x1.0p32767Q)
-+    #endif
-+
-     #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__)
-     typedef _Complex float __attribute__((mode(TC))) __complex128;
-     #else
-@@ -393,6 +398,8 @@ AS_HELP_STRING(--enable-generated-files-in-srcdir,
- enable_generated_files_in_srcdir=no)
- AC_MSG_RESULT($enable_generated_files_in_srcdir)
- AM_CONDITIONAL(GENINSRC, test "$enable_generated_files_in_srcdir" = yes)
-+AM_CONDITIONAL(ARCH_AARCH64,
-+               expr "$target_cpu" : "aarch64.*" > /dev/null)
- 
- # Determine what GCC version number to use in filesystem paths.
- GCC_BASE_VER
-diff --git a/libquadmath/quadmath.h b/libquadmath/quadmath.h
-index 81eb957d2..bb1b49df6 100644
---- a/libquadmath/quadmath.h
-+++ b/libquadmath/quadmath.h
-@@ -27,6 +27,12 @@ Boston, MA 02110-1301, USA.  */
- extern "C" {
- #endif
- 
-+#if defined(__aarch64__)
-+#ifndef __float128
-+typedef long double __float128;
-+#endif
-+#endif
-+
- /* Define the complex type corresponding to __float128
-    ("_Complex __float128" is not allowed) */
- #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__)
-@@ -160,10 +166,13 @@ extern int quadmath_snprintf (char *str, size_t size,
- #define FLT128_MAX_10_EXP 4932
- 
- 
--#define HUGE_VALQ __builtin_huge_valq()
-+#if defined(__aarch64__)
- /* The following alternative is valid, but brings the warning:
-    (floating constant exceeds range of ‘__float128’)  */
--/* #define HUGE_VALQ (__extension__ 0x1.0p32767Q) */
-+# define HUGE_VALQ (__extension__ 0x1.0p32767Q)
-+#else
-+# define HUGE_VALQ __builtin_huge_valq()
-+#endif
- 
- #define M_Eq		2.718281828459045235360287471352662498Q  /* e */
- #define M_LOG2Eq	1.442695040888963407359924681001892137Q  /* log_2 e */
--- 
-2.33.0
-

_service:tar_scm:0003-CONFIG-Regenerate-configure-file.patch Added

@@ -0,0 +1,45 @@
+From 37ef787e743d98f9f6e53005d99709fb8e284964 Mon Sep 17 00:00:00 2001
+From: eastb233 <xiezhiheng@huawei.com>
+Date: Fri, 14 Jul 2023 11:07:05 +0800
+Subject: PATCH 03/22 CONFIG Regenerate configure file
+
+Regenerate configure file under libquadmath directory
+since it is out of date.
+---
+ libquadmath/configure | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/libquadmath/configure b/libquadmath/configure
+index b3ee64f9c..603f2f131 100755
+--- a/libquadmath/configure
++++ b/libquadmath/configure
+@@ -10806,7 +10806,7 @@ else
+   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+   lt_status=$lt_dlunknown
+   cat > conftest.$ac_ext <<_LT_EOF
+-#line 10819 "configure"
++#line 10809 "configure"
+ #include "confdefs.h"
+ 
+ #if HAVE_DLFCN_H
+@@ -10912,7 +10912,7 @@ else
+   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+   lt_status=$lt_dlunknown
+   cat > conftest.$ac_ext <<_LT_EOF
+-#line 10925 "configure"
++#line 10915 "configure"
+ #include "confdefs.h"
+ 
+ #if HAVE_DLFCN_H
+@@ -13031,7 +13031,7 @@ case "$host" in
+     case "$enable_cet" in
+       auto)
+ 	# Check if target supports multi-byte NOPs
+-	# and if assembler supports CET insn.
++	# and if compiler and assembler support CET insn.
+ 	cet_save_CFLAGS="$CFLAGS"
+ 	CFLAGS="$CFLAGS -fcf-protection"
+ 	cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+-- 
+2.33.0
+

_service:tar_scm:0004-libquadmath-Enable-libquadmath-on-kunpeng.patch Added

@@ -0,0 +1,197 @@
+From 52a810b4d8a725a7edb2988f6c3813a9938362a5 Mon Sep 17 00:00:00 2001
+From: eastb233 <xiezhiheng@huawei.com>
+Date: Fri, 14 Jul 2023 11:10:24 +0800
+Subject: PATCH 04/22 libquadmath Enable libquadmath on kunpeng
+
+This enable libquadmath on kunpeng platform to convenient
+users that migrating from x86 platform. libquadmath uses "__float128"
+as quad precision floating point type and with math functions with "q"
+suffix like "cosq". For those who do not need to adapt to x86 platform,
+you can use "long double" as quad precision floating point type and math
+functions with "l" suffix like "cosl" in libm for quad precision math.
+---
+ libquadmath/Makefile.am  |  4 ++++
+ libquadmath/Makefile.in  |  3 ++-
+ libquadmath/configure    | 28 ++++++++++++++++++++++++++--
+ libquadmath/configure.ac |  7 +++++++
+ libquadmath/quadmath.h   | 13 +++++++++++--
+ 5 files changed, 50 insertions(+), 5 deletions(-)
+
+diff --git a/libquadmath/Makefile.am b/libquadmath/Makefile.am
+index 35dffb46f..bf0398d9c 100644
+--- a/libquadmath/Makefile.am
++++ b/libquadmath/Makefile.am
+@@ -2,6 +2,10 @@
+ 
+ AUTOMAKE_OPTIONS = foreign info-in-builddir
+ 
++if ARCH_AARCH64
++DEFS += -D__float128="long double"
++endif
++
+ ## Skip over everything if the quadlib is not available:
+ if BUILD_LIBQUADMATH
+ ACLOCAL_AMFLAGS = -I .. -I ../config
+diff --git a/libquadmath/Makefile.in b/libquadmath/Makefile.in
+index 8c0112122..449cc8a06 100644
+--- a/libquadmath/Makefile.in
++++ b/libquadmath/Makefile.in
+@@ -90,6 +90,7 @@ POST_UNINSTALL = :
+ build_triplet = @build@
+ host_triplet = @host@
+ target_triplet = @target@
++@ARCH_AARCH64_TRUE@am__append_1 = -D__float128="long double"
+ @BUILD_LIBQUADMATH_FALSE@libquadmath_la_DEPENDENCIES =
+ subdir = .
+ ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+@@ -337,7 +338,7 @@ CFLAGS = @CFLAGS@
+ CPP = @CPP@
+ CPPFLAGS = @CPPFLAGS@
+ CYGPATH_W = @CYGPATH_W@
+-DEFS = @DEFS@
++DEFS = @DEFS@ $(am__append_1)
+ DEPDIR = @DEPDIR@
+ DSYMUTIL = @DSYMUTIL@
+ DUMPBIN = @DUMPBIN@
+diff --git a/libquadmath/configure b/libquadmath/configure
+index 603f2f131..13a9088fb 100755
+--- a/libquadmath/configure
++++ b/libquadmath/configure
+@@ -633,6 +633,8 @@ am__EXEEXT_TRUE
+ LTLIBOBJS
+ LIBOBJS
+ get_gcc_base_ver
++ARCH_AARCH64_FALSE
++ARCH_AARCH64_TRUE
+ GENINSRC_FALSE
+ GENINSRC_TRUE
+ XCFLAGS
+@@ -10806,7 +10808,7 @@ else
+   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+   lt_status=$lt_dlunknown
+   cat > conftest.$ac_ext <<_LT_EOF
+-#line 10809 "configure"
++#line 10811 "configure"
+ #include "confdefs.h"
+ 
+ #if HAVE_DLFCN_H
+@@ -10912,7 +10914,7 @@ else
+   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+   lt_status=$lt_dlunknown
+   cat > conftest.$ac_ext <<_LT_EOF
+-#line 10915 "configure"
++#line 10917 "configure"
+ #include "confdefs.h"
+ 
+ #if HAVE_DLFCN_H
+@@ -12705,6 +12707,11 @@ else
+   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ /* end confdefs.h.  */
+ 
++    #if defined(__aarch64__)
++    typedef long double __float128;
++    #define __builtin_huge_valq() (__extension__ 0x1.0p32767Q)
++    #endif
++
+     #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__)
+     typedef _Complex float __attribute__((mode(TC))) __complex128;
+     #else
+@@ -12756,6 +12763,11 @@ fi
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ /* end confdefs.h.  */
+ 
++    #if defined(__aarch64__)
++    typedef long double __float128;
++    #define __builtin_huge_valq() (__extension__ 0x1.0p32767Q)
++    #endif
++
+     #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__)
+     typedef _Complex float __attribute__((mode(TC))) __complex128;
+     #else
+@@ -13242,6 +13254,14 @@ else
+   GENINSRC_FALSE=
+ fi
+ 
++ if expr "$target_cpu" : "aarch64.*" > /dev/null; then
++  ARCH_AARCH64_TRUE=
++  ARCH_AARCH64_FALSE='#'
++else
++  ARCH_AARCH64_TRUE='#'
++  ARCH_AARCH64_FALSE=
++fi
++
+ 
+ # Determine what GCC version number to use in filesystem paths.
+ 
+@@ -13425,6 +13445,10 @@ if test -z "${GENINSRC_TRUE}" && test -z "${GENINSRC_FALSE}"; then
+   as_fn_error $? "conditional \"GENINSRC\" was never defined.
+ Usually this means the macro was only invoked conditionally." "$LINENO" 5
+ fi
++if test -z "${ARCH_AARCH64_TRUE}" && test -z "${ARCH_AARCH64_FALSE}"; then
++  as_fn_error $? "conditional \"ARCH_AARCH64\" was never defined.
++Usually this means the macro was only invoked conditionally." "$LINENO" 5
++fi
+ 
+ : "${CONFIG_STATUS=./config.status}"
+ ac_write_fail=0
+diff --git a/libquadmath/configure.ac b/libquadmath/configure.ac
+index eec4084a4..507c247d6 100644
+--- a/libquadmath/configure.ac
++++ b/libquadmath/configure.ac
+@@ -218,6 +218,11 @@ AM_CONDITIONAL(LIBQUAD_USE_SYMVER_SUN, test "x$quadmath_use_symver" = xsun)
+ 
+ AC_CACHE_CHECK(whether __float128 is supported, libquad_cv_have_float128,
+   GCC_TRY_COMPILE_OR_LINK(
++    #if defined(__aarch64__)
++    typedef long double __float128;
++    #define __builtin_huge_valq() (__extension__ 0x1.0p32767Q)
++    #endif
++
+     #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__)
+     typedef _Complex float __attribute__((mode(TC))) __complex128;
+     #else
+@@ -393,6 +398,8 @@ AS_HELP_STRING(--enable-generated-files-in-srcdir,
+ enable_generated_files_in_srcdir=no)
+ AC_MSG_RESULT($enable_generated_files_in_srcdir)
+ AM_CONDITIONAL(GENINSRC, test "$enable_generated_files_in_srcdir" = yes)
++AM_CONDITIONAL(ARCH_AARCH64,
++               expr "$target_cpu" : "aarch64.*" > /dev/null)
+ 
+ # Determine what GCC version number to use in filesystem paths.
+ GCC_BASE_VER
+diff --git a/libquadmath/quadmath.h b/libquadmath/quadmath.h
+index 81eb957d2..bb1b49df6 100644
+--- a/libquadmath/quadmath.h
++++ b/libquadmath/quadmath.h
+@@ -27,6 +27,12 @@ Boston, MA 02110-1301, USA.  */
+ extern "C" {
+ #endif
+ 
++#if defined(__aarch64__)
++#ifndef __float128
++typedef long double __float128;
++#endif
++#endif
++
+ /* Define the complex type corresponding to __float128
+    ("_Complex __float128" is not allowed) */
+ #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__)
+@@ -160,10 +166,13 @@ extern int quadmath_snprintf (char *str, size_t size,
+ #define FLT128_MAX_10_EXP 4932
+ 
+ 
+-#define HUGE_VALQ __builtin_huge_valq()
++#if defined(__aarch64__)
+ /* The following alternative is valid, but brings the warning:
+    (floating constant exceeds range of ‘__float128’)  */
+-/* #define HUGE_VALQ (__extension__ 0x1.0p32767Q) */
++# define HUGE_VALQ (__extension__ 0x1.0p32767Q)
++#else
++# define HUGE_VALQ __builtin_huge_valq()
++#endif
+ 
+ #define M_Eq		2.718281828459045235360287471352662498Q  /* e */
+ #define M_LOG2Eq	1.442695040888963407359924681001892137Q  /* log_2 e */
+-- 
+2.33.0
+

_service:tar_scm:0006-MULL64-1-3-Add-A-B-op-CST-B-match-and-simplify-optim.patch Added

@@ -0,0 +1,89 @@
+From e7013d2640d82e928ebdaf830b6833051ac65296 Mon Sep 17 00:00:00 2001
+From: zhongyunde <zhongyunde@huawei.com>
+Date: Sat, 5 Nov 2022 13:22:33 +0800
+Subject: PATCH 06/22 MULL64 1/3 Add A ? B op CST : B match and simplify
+ optimizations
+
+    Refer to commit b6bdd7a4, use pattern match to simple
+    A ? B op CST : B (where CST is power of 2) simplifications.
+    Fixes the 1st issue of https://gitee.com/openeuler/gcc/issues/I5TSG0?from=project-issue.
+
+    gcc/
+            * match.pd (A ? B op CST : B): Add simplifcations for A ? B op POW2 : B
+
+    gcc/testsuite/
+            * gcc.dg/pr107190.c: New test.
+---
+ gcc/match.pd                    | 21 +++++++++++++++++++++
+ gcc/testsuite/gcc.dg/pr107190.c | 27 +++++++++++++++++++++++++++
+ 2 files changed, 48 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.dg/pr107190.c
+
+diff --git a/gcc/match.pd b/gcc/match.pd
+index fc2833bbd..fd0857fc9 100644
+--- a/gcc/match.pd
++++ b/gcc/match.pd
+@@ -4280,6 +4280,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+ )
+ #endif
+ 
++#if GIMPLE
++(if (canonicalize_math_p ())
++/* These patterns are mostly used by PHIOPT to move some operations outside of
++   the if statements. They should be done late because it gives jump threading
++   and few other passes to reduce what is going on.  */
++/* a ? x op C : x -> x op (a << log2(C)) when C is power of 2. */
++ (for op (plus minus bit_ior bit_xor lshift rshift lrotate rrotate)
++  (simplify
++   (cond @0 (op:s @1 integer_pow2p@2) @1)
++    /* powerof2cst */
++   (if (INTEGRAL_TYPE_P (type))
++    (with {
++      tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
++     }
++     (op @1 (lshift (convert (convert:boolean_type_node @0)) { shift; })))
++   )
++  )
++ )
++)
++#endif
++
+ /* Simplification moved from fold_cond_expr_with_comparison.  It may also
+    be extended.  */
+ /* This pattern implements two kinds simplification:
+diff --git a/gcc/testsuite/gcc.dg/pr107190.c b/gcc/testsuite/gcc.dg/pr107190.c
+new file mode 100644
+index 000000000..235b2761a
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/pr107190.c
+@@ -0,0 +1,27 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fexpensive-optimizations -fdump-tree-phiopt2-details" } */
++
++#  define BN_BITS4        32
++#  define BN_MASK2        (0xffffffffffffffffL)
++#  define BN_MASK2l       (0xffffffffL)
++#  define BN_MASK2h       (0xffffffff00000000L)
++#  define BN_MASK2h1      (0xffffffff80000000L)
++#  define LBITS(a)        ((a)&BN_MASK2l)
++#  define HBITS(a)        (((a)>>BN_BITS4)&BN_MASK2l)
++#  define L2HBITS(a)      (((a)<<BN_BITS4)&BN_MASK2)
++
++unsigned int test_m(unsigned long in0, unsigned long in1) {
++    unsigned long m, m1, lt, ht, bl, bh;
++    lt = LBITS(in0);
++    ht = HBITS(in0);
++    bl = LBITS(in1);
++    bh = HBITS(in1);
++    m  = bh * lt;
++    m1 = bl * ht;
++    ht = bh * ht;
++    m  = (m + m1) & BN_MASK2;
++    if (m < m1) ht += L2HBITS((unsigned long)1);
++    return ht + m;
++}
++
++/* { dg-final { scan-tree-dump "COND_EXPR in block 2 and PHI in block 4 converted to straightline code" "phiopt2" } } */
+-- 
+2.33.0
+

_service:tar_scm:0007-MULL64-2-3-Fold-series-of-instructions-into-mul.patch Added

@@ -0,0 +1,130 @@
+From 547ab9b3e073ef389e5fd89d961bb1e3e6934ae9 Mon Sep 17 00:00:00 2001
+From: zhongyunde <zhongyunde@huawei.com>
+Date: Wed, 9 Nov 2022 17:04:13 +0800
+Subject: PATCH 07/22 MULL64 2/3 Fold series of instructions into mul
+
+    Merge the low part of series instructions into mul
+
+    gcc/
+            * match.pd: Add simplifcations for low part of mul
+            * common.opt: Add new option fmerge-mull enable with -O2
+            * opts.c: default_options_table
+
+    gcc/testsuite/
+            * g++.dg/tree-ssa/mull64.C: New test.
+---
+ gcc/common.opt                         |  4 +++
+ gcc/match.pd                           | 27 ++++++++++++++++++++
+ gcc/opts.cc                            |  1 +
+ gcc/testsuite/g++.dg/tree-ssa/mull64.C | 34 ++++++++++++++++++++++++++
+ 4 files changed, 66 insertions(+)
+ create mode 100644 gcc/testsuite/g++.dg/tree-ssa/mull64.C
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 8a0dafc52..e365a48bc 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -2126,6 +2126,10 @@ fmerge-debug-strings
+ Common Var(flag_merge_debug_strings) Init(1)
+ Attempt to merge identical debug strings across compilation units.
+ 
++fmerge-mull
++Common Var(flag_merge_mull) Init(0) Optimization
++Attempt to merge series instructions into mul.
++
+ fmessage-length=
+ Common RejectNegative Joined UInteger
+ -fmessage-length=<number>	Limit diagnostics to <number> characters per line.  0 suppresses line-wrapping.
+diff --git a/gcc/match.pd b/gcc/match.pd
+index fd0857fc9..2092e6959 100644
+--- a/gcc/match.pd
++++ b/gcc/match.pd
+@@ -4301,6 +4301,33 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+ )
+ #endif
+ 
++#if GIMPLE
++/* These patterns are mostly used by FORWPROP1 to fold some operations into more
++   simple IR. The following scenario should be matched:
++    In0Lo = In0(D) & 4294967295;
++    In0Hi = In0(D) >> 32;
++    In1Lo = In1(D) & 4294967295;
++    In1Hi = In1(D) >> 32;
++    Addc = In0Lo * In1Hi + In0Hi * In1Lo;
++    addc32 = Addc << 32;
++    ResLo = In0Lo * In1Lo + addc32  */
++(simplify
++ (plus:c (mult @4 @5)
++  (lshift
++   (plus:c
++    (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3))
++    (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2)))
++   INTEGER_CST@3
++  )
++ )
++ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)
++      && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1)
++      && TYPE_PRECISION (type) == 64)
++  (mult (convert:type @0)  (convert:type @1))
++ )
++)
++#endif
++
+ /* Simplification moved from fold_cond_expr_with_comparison.  It may also
+    be extended.  */
+ /* This pattern implements two kinds simplification:
+diff --git a/gcc/opts.cc b/gcc/opts.cc
+index a97630d1c..eae71ed20 100644
+--- a/gcc/opts.cc
++++ b/gcc/opts.cc
+@@ -647,6 +647,7 @@ static const struct default_options default_options_table =
+       VECT_COST_MODEL_VERY_CHEAP },
+     { OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 },
+     { OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
++    { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 },
+ 
+     /* -O2 and above optimizations, but not -Os or -Og.  */
+     { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 },
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
+new file mode 100644
+index 000000000..2a3b74604
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
+@@ -0,0 +1,34 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */
++
++#  define BN_BITS4        32
++#  define BN_MASK2        (0xffffffffffffffffL)
++#  define BN_MASK2l       (0xffffffffL)
++#  define BN_MASK2h       (0xffffffff00000000L)
++#  define BN_MASK2h1      (0xffffffff80000000L)
++#  define LBITS(a)        ((a)&BN_MASK2l)
++#  define HBITS(a)        (((a)>>BN_BITS4)&BN_MASK2l)
++#  define L2HBITS(a)      (((a)<<BN_BITS4)&BN_MASK2)
++
++void mul64(unsigned long in0, unsigned long in1,
++           unsigned long &retLo, unsigned long &retHi) {
++    unsigned long m00, m01, m10, m11, al, ah, bl, bh;
++    unsigned long Addc, addc32, low;
++    al = LBITS(in0);
++    ah = HBITS(in0);
++    bl = LBITS(in1);
++    bh = HBITS(in1);
++    m10 = bh * al;
++    m00 = bl * al;
++    m01 = bl * ah;
++    m11 = bh * ah;
++    Addc = (m10 + m01) & BN_MASK2;
++    if (Addc < m01) m11 += L2HBITS((unsigned long)1);
++    m11 += HBITS(Addc);
++    addc32 = L2HBITS(Addc);
++    low = (m00 + addc32) & BN_MASK2; if (low < addc32) m11++;
++    retLo = low;
++    retHi  = m11;
++}
++
++/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */
+-- 
+2.33.0
+

_service:tar_scm:0008-MULL64-3-3-Fold-series-of-instructions-into-umulh.patch Added

@@ -0,0 +1,105 @@
+From 4e536dbb4a08925cea259be13962969efcc0f3c1 Mon Sep 17 00:00:00 2001
+From: zhongyunde <zhongyunde@huawei.com>
+Date: Fri, 11 Nov 2022 11:30:37 +0800
+Subject: PATCH 08/22 MULL64 3/3 Fold series of instructions into umulh
+
+    Merge the high part of series instructions into umulh
+
+    gcc/
+        * match.pd: Add simplifcations for high part of umulh
+
+    gcc/testsuite/
+        * g++.dg/tree-ssa/mull64.C: Add checking of tree pass forwprop4
+---
+ gcc/match.pd                           | 56 ++++++++++++++++++++++++++
+ gcc/testsuite/g++.dg/tree-ssa/mull64.C |  5 ++-
+ 2 files changed, 59 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/match.pd b/gcc/match.pd
+index 2092e6959..b7e3588e8 100644
+--- a/gcc/match.pd
++++ b/gcc/match.pd
+@@ -4301,6 +4301,62 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+ )
+ #endif
+ 
++#if GIMPLE
++/* These patterns are mostly used by FORWPROP4 to move some operations outside of
++   the if statements. They should be done late because it gives jump threading
++   and few other passes to reduce what is going on.  */
++/* Mul64 is defined as a multiplication algorithm which compute two 64-bit
++   integers to one 128-bit integer. Try to match the high part of mul pattern
++   after the low part of mul pattern is simplified. The following scenario
++   should be matched:
++  (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) {
++    In0Lo = In0(D) & 4294967295;        -- bit_and@4 SSA_NAME@0 @2
++    In0Hi = In0(D) >> 32;               -- rshift@5 SSA_NAME@0 @3
++    In1Lo = In1(D) & 4294967295;        -- bit_and@6 SSA_NAME@1 INTEGER_CST@2
++    In1Hi = In1(D) >> 32;               -- rshift@7 SSA_NAME@1 INTEGER_CST@3
++    Mull_01 = In0Hi * In1Lo;            -- mult@8 @5 @6
++    Addc = In0Lo * In1Hi + Mull_01;     -- plus@9 (mult (@4 @7) @8
++    AddH = (Addc >> 32) + In0Hi * In1Hi -- (plus@11 (rshift @9 @3) (mult @5 @7))
++    addc32 = Addc << 32;                -- lshift@10 @9 @3
++    ResLo = In0(D) * In1(D);            -- mult @0 @1
++    ResHi = ((long unsigned int) (addc32 > ResLo)) +
++	        (((long unsigned int) (Mull_01 > Addc)) << 32) + AddH;
++ } */
++(simplify
++ (plus:c
++  (plus:c
++   (convert
++    (gt (lshift@10 @9 @3)
++        (mult:c @0 @1)))
++   (lshift
++    (convert
++     (gt @8 @9))
++    @3))
++  (plus:c@11
++   (rshift
++    (plus:c@9
++     (mult:c (bit_and@4 SSA_NAME@0 @2) @7)
++             (mult:c@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2)))
++    @3)
++   (mult:c (rshift@5 SSA_NAME@0 @3)
++           (rshift@7 SSA_NAME@1 INTEGER_CST@3))
++  )
++ )
++ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)
++      && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1)
++      && TYPE_PRECISION (type) == 64)
++  (with {
++      tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type));
++      tree shift = build_int_cst (integer_type_node, 64);
++   }
++  (convert:type (rshift
++	         (mult (convert:i128_type @0)
++                       (convert:i128_type @1))
++                 { shift; })))
++ )
++)
++#endif
++
+ #if GIMPLE
+ /* These patterns are mostly used by FORWPROP1 to fold some operations into more
+    simple IR. The following scenario should be matched:
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
+index 2a3b74604..f61cf5e6f 100644
+--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C
++++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */
++/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
+ 
+ #  define BN_BITS4        32
+ #  define BN_MASK2        (0xffffffffffffffffL)
+@@ -31,4 +31,5 @@ void mul64(unsigned long in0, unsigned long in1,
+     retHi  = m11;
+ }
+ 
+-/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */
++/* { dg-final { scan-tree-dump "gimple_simplified to" "forwprop1" } } */
++/* { dg-final { scan-tree-dump-times "gimple_simplified to" 1 "forwprop4" } } */
+-- 
+2.33.0
+

_service:tar_scm:0008-RISCV-Inline-subword-atomic-ops.patch Deleted

@@ -1,2057 +0,0 @@
-From f797260adaf52bee0ec0e16190bbefbe1bfc3692 Mon Sep 17 00:00:00 2001
-From: Patrick O'Neill <patrick@rivosinc.com>
-Date: Tue, 18 Apr 2023 14:33:13 -0700
-Subject: PATCH RISCV: Inline subword atomic ops
-
-RISC-V has no support for subword atomic operations; code currently
-generates libatomic library calls.
-
-This patch changes the default behavior to inline subword atomic calls
-(using the same logic as the existing library call).
-Behavior can be specified using the -minline-atomics and
--mno-inline-atomics command line flags.
-
-gcc/libgcc/config/riscv/atomic.c has the same logic implemented in asm.
-This will need to stay for backwards compatibility and the
--mno-inline-atomics flag.
-
-2023-04-18 Patrick O'Neill <patrick@rivosinc.com>
-
-gcc/ChangeLog:
-	PR target/104338
-	* config/riscv/riscv-protos.h: Add helper function stubs.
-	* config/riscv/riscv.cc: Add helper functions for subword masking.
-	* config/riscv/riscv.opt: Add command-line flag.
-	* config/riscv/sync.md: Add masking logic and inline asm for fetch_and_op,
-	fetch_and_nand, CAS, and exchange ops.
-	* doc/invoke.texi: Add blurb regarding command-line flag.
-
-libgcc/ChangeLog:
-	PR target/104338
-	* config/riscv/atomic.c: Add reference to duplicate logic.
-
-gcc/testsuite/ChangeLog:
-	PR target/104338
-	* gcc.target/riscv/inline-atomics-1.c: New test.
-	* gcc.target/riscv/inline-atomics-2.c: New test.
-	* gcc.target/riscv/inline-atomics-3.c: New test.
-	* gcc.target/riscv/inline-atomics-4.c: New test.
-	* gcc.target/riscv/inline-atomics-5.c: New test.
-	* gcc.target/riscv/inline-atomics-6.c: New test.
-	* gcc.target/riscv/inline-atomics-7.c: New test.
-	* gcc.target/riscv/inline-atomics-8.c: New test.
-
-Signed-off-by: Patrick O'Neill <patrick@rivosinc.com>
-Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
----
- gcc/config/riscv/riscv-protos.h               |   2 +
- gcc/config/riscv/riscv.cc                     |  49 ++
- gcc/config/riscv/riscv.opt                    |   4 +
- gcc/config/riscv/sync.md                      | 301 +++++++++
- gcc/doc/invoke.texi                           |  10 +-
- .../gcc.target/riscv/inline-atomics-1.c       |  18 +
- .../gcc.target/riscv/inline-atomics-2.c       |   9 +
- .../gcc.target/riscv/inline-atomics-3.c       | 569 ++++++++++++++++++
- .../gcc.target/riscv/inline-atomics-4.c       | 566 +++++++++++++++++
- .../gcc.target/riscv/inline-atomics-5.c       |  87 +++
- .../gcc.target/riscv/inline-atomics-6.c       |  87 +++
- .../gcc.target/riscv/inline-atomics-7.c       |  69 +++
- .../gcc.target/riscv/inline-atomics-8.c       |  69 +++
- libgcc/config/riscv/atomic.c                  |   2 +
- 14 files changed, 1841 insertions(+), 1 deletion(-)
- create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-1.c
- create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-2.c
- create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-3.c
- create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-4.c
- create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-5.c
- create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-6.c
- create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-7.c
- create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-8.c
-
-diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
-index 607ff6ea697..f87661bde2c 100644
---- a/gcc/config/riscv/riscv-protos.h
-+++ b/gcc/config/riscv/riscv-protos.h
-@@ -74,6 +74,8 @@
- extern bool riscv_store_data_bypass_p (rtx_insn *, rtx_insn *);
- extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *);
- extern bool riscv_gpr_save_operation_p (rtx);
-+extern void riscv_subword_address (rtx, rtx *, rtx *, rtx *, rtx *);
-+extern void riscv_lshift_subword (machine_mode, rtx, rtx, rtx *);
- 
- /* Routines implemented in riscv-c.cc.  */
- void riscv_cpu_cpp_builtins (cpp_reader *);
-diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
-index a2d2dd0bb67..0f890469d7a 100644
---- a/gcc/config/riscv/riscv.cc
-+++ b/gcc/config/riscv/riscv.cc
-@@ -5605,6 +5605,55 @@
-   return TARGET_64BIT ? (HOST_WIDE_INT_1 << 29) : 0;
- }
- 
-+/* Given memory reference MEM, expand code to compute the aligned
-+   memory address, shift and mask values and store them into
-+   *ALIGNED_MEM, *SHIFT, *MASK and *NOT_MASK.  */
-+
-+void
-+riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask,
-+		       rtx *not_mask)
-+{
-+  /* Align the memory address to a word.  */
-+  rtx addr = force_reg (Pmode, XEXP (mem, 0));
-+
-+  rtx addr_mask = gen_int_mode (-4, Pmode);
-+
-+  rtx aligned_addr = gen_reg_rtx (Pmode);
-+  emit_move_insn (aligned_addr,  gen_rtx_AND (Pmode, addr, addr_mask));
-+
-+  *aligned_mem = change_address (mem, SImode, aligned_addr);
-+
-+  /* Calculate the shift amount.  */
-+  emit_move_insn (*shift, gen_rtx_AND (SImode, gen_lowpart (SImode, addr),
-+				       gen_int_mode (3, SImode)));
-+  emit_move_insn (*shift, gen_rtx_ASHIFT (SImode, *shift,
-+					  gen_int_mode (3, SImode)));
-+
-+  /* Calculate the mask.  */
-+  int unshifted_mask = GET_MODE_MASK (GET_MODE (mem));
-+
-+  emit_move_insn (*mask, gen_int_mode (unshifted_mask, SImode));
-+
-+  emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask,
-+					 gen_lowpart (QImode, *shift)));
-+
-+  emit_move_insn (*not_mask, gen_rtx_NOT(SImode, *mask));
-+}
-+
-+/* Leftshift a subword within an SImode register.  */
-+
-+void
-+riscv_lshift_subword (machine_mode mode, rtx value, rtx shift,
-+		      rtx *shifted_value)
-+{
-+  rtx value_reg = gen_reg_rtx (SImode);
-+  emit_move_insn (value_reg, simplify_gen_subreg (SImode, value,
-+						  mode, 0));
-+
-+  emit_move_insn(*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
-+						 gen_lowpart (QImode, shift)));
-+}
-+
- /* Initialize the GCC target structure.  */
- #undef TARGET_ASM_ALIGNED_HI_OP
- #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
-diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
-index ef1bdfcfe28..63d4710cb15 100644
---- a/gcc/config/riscv/riscv.opt
-+++ b/gcc/config/riscv/riscv.opt
-@@ -225,3 +225,7 @@
- misa-spec=
- Target RejectNegative Joined Enum(isa_spec_class) Var(riscv_isa_spec) Init(TARGET_DEFAULT_ISA_SPEC)
- Set the version of RISC-V ISA spec.
-+
-+minline-atomics
-+Target Var(TARGET_INLINE_SUBWORD_ATOMIC) Init(1)
-+Always inline subword atomic operations.
-diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
-index c932ef87b9d..83be6431cb6 100644
---- a/gcc/config/riscv/sync.md
-+++ b/gcc/config/riscv/sync.md
-@@ -21,8 +21,11 @@
- 
- (define_c_enum "unspec" 
-   UNSPEC_COMPARE_AND_SWAP
-+  UNSPEC_COMPARE_AND_SWAP_SUBWORD
-   UNSPEC_SYNC_OLD_OP
-+  UNSPEC_SYNC_OLD_OP_SUBWORD
-   UNSPEC_SYNC_EXCHANGE
-+  UNSPEC_SYNC_EXCHANGE_SUBWORD
-   UNSPEC_ATOMIC_STORE
-   UNSPEC_MEMORY_BARRIER
- )
-@@ -92,6 +95,135 @@
-   "%F3amo<insn>.<amo>%A3 %0,%z2,%1"
-   (set (attr "length") (const_int 8)))
- 
-+(define_insn "subword_atomic_fetch_strong_<atomic_optab>"
-+  (set (match_operand:SI 0 "register_operand" "=&r")		   ;; old value at mem
-+	(match_operand:SI 1 "memory_operand" "+A"))		   ;; mem location
-+   (set (match_dup 1)
-+	(unspec_volatile:SI
-+	  (any_atomic:SI (match_dup 1)
-+		     (match_operand:SI 2 "register_operand" "rI")) ;; value for op
-+	   (match_operand:SI 3 "register_operand" "rI")	   ;; mask
-+	 UNSPEC_SYNC_OLD_OP_SUBWORD))
-+    (match_operand:SI 4 "register_operand" "rI")		   ;; not_mask
-+    (clobber (match_scratch:SI 5 "=&r"))			   ;; tmp_1
-+    (clobber (match_scratch:SI 6 "=&r"))			   ;; tmp_2
-+  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
-+  {
-+    return "1:\;"
-+	   "lr.w.aq\t%0, %1\;"
-+	   "<insn>\t%5, %0, %2\;"
-+	   "and\t%5, %5, %3\;"
-+	   "and\t%6, %0, %4\;"
-+	   "or\t%6, %6, %5\;"
-+	   "sc.w.rl\t%5, %6, %1\;"
-+	   "bnez\t%5, 1b";
-+  }
-+  (set (attr "length") (const_int 28)))

_service:tar_scm:0009-MULL64-Disable-mull64-transformation-by-default.patch Added

@@ -0,0 +1,66 @@
+From 7c1f4425c680ea144d29bc55a1283d46444a2691 Mon Sep 17 00:00:00 2001
+From: eastb233 <xiezhiheng@huawei.com>
+Date: Wed, 7 Dec 2022 09:43:15 +0800
+Subject: PATCH 09/22 MULL64 Disable mull64 transformation by default
+
+This commit disables mull64 transformation by default since
+it shows some runtime failure in workloads.
+
+This is a workaround fix for https://gitee.com/src-openeuler/gcc/issues/I64UQH
+---
+ gcc/match.pd                           | 2 +-
+ gcc/opts.cc                            | 1 -
+ gcc/testsuite/g++.dg/tree-ssa/mull64.C | 2 +-
+ gcc/testsuite/gcc.dg/pr107190.c        | 2 +-
+ 4 files changed, 3 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/match.pd b/gcc/match.pd
+index b7e3588e8..6f24d5079 100644
+--- a/gcc/match.pd
++++ b/gcc/match.pd
+@@ -4290,7 +4290,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+   (simplify
+    (cond @0 (op:s @1 integer_pow2p@2) @1)
+     /* powerof2cst */
+-   (if (INTEGRAL_TYPE_P (type))
++   (if (flag_merge_mull && INTEGRAL_TYPE_P (type))
+     (with {
+       tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
+      }
+diff --git a/gcc/opts.cc b/gcc/opts.cc
+index eae71ed20..a97630d1c 100644
+--- a/gcc/opts.cc
++++ b/gcc/opts.cc
+@@ -647,7 +647,6 @@ static const struct default_options default_options_table =
+       VECT_COST_MODEL_VERY_CHEAP },
+     { OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 },
+     { OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
+-    { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 },
+ 
+     /* -O2 and above optimizations, but not -Os or -Og.  */
+     { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 },
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
+index f61cf5e6f..cad891e62 100644
+--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C
++++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
++/* { dg-options "-O2 -fmerge-mull -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
+ 
+ #  define BN_BITS4        32
+ #  define BN_MASK2        (0xffffffffffffffffL)
+diff --git a/gcc/testsuite/gcc.dg/pr107190.c b/gcc/testsuite/gcc.dg/pr107190.c
+index 235b2761a..d1e72e5df 100644
+--- a/gcc/testsuite/gcc.dg/pr107190.c
++++ b/gcc/testsuite/gcc.dg/pr107190.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -fexpensive-optimizations -fdump-tree-phiopt2-details" } */
++/* { dg-options "-O2 -fmerge-mull -fexpensive-optimizations -fdump-tree-phiopt2-details" } */
+ 
+ #  define BN_BITS4        32
+ #  define BN_MASK2        (0xffffffffffffffffL)
+-- 
+2.33.0
+

_service:tar_scm:0009-riscv-linux-Don-t-add-latomic-with-pthread.patch Deleted

@@ -1,41 +0,0 @@
-From 203f3060dd363361b172f7295f42bb6bf5ac0b3b Mon Sep 17 00:00:00 2001
-From: Andreas Schwab <schwab@suse.de>
-Date: Sat, 23 Apr 2022 15:48:42 +0200
-Subject: PATCH riscv/linux: Don't add -latomic with -pthread
-
-Now that we have support for inline subword atomic operations, it is no
-longer necessary to link against libatomic.  This also fixes testsuite
-failures because the framework does not properly set up the linker flags
-for finding libatomic.
-The use of atomic operations is also independent of the use of libpthread.
-
-gcc/
-	* config/riscv/linux.h (LIB_SPEC): Don't redefine.
----
- gcc/config/riscv/linux.h | 10 ----------
- 1 file changed, 10 deletions(-)
-
-diff --git a/gcc/config/riscv/linux.h b/gcc/config/riscv/linux.h
-index b9557a75dc7..2fdfd930cf2 100644
---- a/gcc/config/riscv/linux.h
-+++ b/gcc/config/riscv/linux.h
-@@ -35,16 +35,6 @@ along with GCC; see the file COPYING3.  If not see
- #undef MUSL_DYNAMIC_LINKER
- #define MUSL_DYNAMIC_LINKER "/lib/ld-musl-riscv" XLEN_SPEC MUSL_ABI_SUFFIX ".so.1"
- 
--/* Because RISC-V only has word-sized atomics, it requries libatomic where
--   others do not.  So link libatomic by default, as needed.  */
--#undef LIB_SPEC
--#ifdef LD_AS_NEEDED_OPTION
--#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC \
--  " %{pthread:" LD_AS_NEEDED_OPTION " -latomic " LD_NO_AS_NEEDED_OPTION "}"
--#else
--#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC " -latomic "
--#endif
--
- #define ICACHE_FLUSH_FUNC "__riscv_flush_icache"
- 
- #define CPP_SPEC "%{pthread:-D_REENTRANT}"
--- 
-2.39.2
-

_service:tar_scm:0010-Version-Clear-DATESTAMP_s.patch Added

_service:tar_scm:0011-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch Added

@@ -0,0 +1,124 @@
+From 355eb8e20327242442d139fb052d3a3befde3dd7 Mon Sep 17 00:00:00 2001
+From: "Cui,Lili" <lili.cui@intel.com>
+Date: Tue, 1 Nov 2022 09:16:49 +0800
+Subject: PATCH 11/22 Add attribute hot judgement for INLINE_HINT_known_hot
+ hint.
+
+We set up INLINE_HINT_known_hot hint only when we have profile feedback,
+now add function attribute judgement for it, when both caller and callee
+have __attribute__((hot)), we will also set up INLINE_HINT_known_hot hint
+for it.
+
+With this patch applied,
+ADL Multi-copy:    538.imagic_r  16.7%
+ICX Multi-copy:    538.imagic_r  15.2%
+CLX Multi-copy:    538.imagic_r  12.7%
+Znver3 Multi-copy: 538.imagic_r  10.6%
+Arm Multi-copy:    538.imagic_r  13.4%
+
+gcc/ChangeLog
+
+	* ipa-inline-analysis.cc (do_estimate_edge_time): Add function attribute
+	judgement for INLINE_HINT_known_hot hint.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.dg/ipa/inlinehint-6.c: New test.
+---
+ gcc/ipa-inline-analysis.cc              | 13 ++++---
+ gcc/testsuite/gcc.dg/ipa/inlinehint-6.c | 47 +++++++++++++++++++++++++
+ 2 files changed, 56 insertions(+), 4 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/ipa/inlinehint-6.c
+
+diff --git a/gcc/ipa-inline-analysis.cc b/gcc/ipa-inline-analysis.cc
+index 11d8d09ee..16ac24cfc 100644
+--- a/gcc/ipa-inline-analysis.cc
++++ b/gcc/ipa-inline-analysis.cc
+@@ -48,6 +48,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "ipa-utils.h"
+ #include "cfgexpand.h"
+ #include "gimplify.h"
++#include "attribs.h"
+ 
+ /* Cached node/edge growths.  */
+ fast_call_summary<edge_growth_cache_entry *, va_heap> *edge_growth_cache = NULL;
+@@ -249,15 +250,19 @@ do_estimate_edge_time (struct cgraph_edge *edge, sreal *ret_nonspec_time)
+       hints = estimates.hints;
+     }
+ 
+-  /* When we have profile feedback, we can quite safely identify hot
+-     edges and for those we disable size limits.  Don't do that when
+-     probability that caller will call the callee is low however, since it
++  /* When we have profile feedback or function attribute, we can quite safely
++     identify hot edges and for those we disable size limits.  Don't do that
++     when probability that caller will call the callee is low however, since it
+      may hurt optimization of the caller's hot path.  */
+-  if (edge->count.ipa ().initialized_p () && edge->maybe_hot_p ()
++  if ((edge->count.ipa ().initialized_p () && edge->maybe_hot_p ()
+       && (edge->count.ipa ().apply_scale (2, 1)
+ 	  > (edge->caller->inlined_to
+ 	     ? edge->caller->inlined_to->count.ipa ()
+ 	     : edge->caller->count.ipa ())))
++      || (lookup_attribute ("hot", DECL_ATTRIBUTES (edge->caller->decl))
++	  != NULL
++	 && lookup_attribute ("hot", DECL_ATTRIBUTES (edge->callee->decl))
++	  != NULL))
+     hints |= INLINE_HINT_known_hot;
+ 
+   gcc_checking_assert (size >= 0);
+diff --git a/gcc/testsuite/gcc.dg/ipa/inlinehint-6.c b/gcc/testsuite/gcc.dg/ipa/inlinehint-6.c
+new file mode 100644
+index 000000000..1f3be641c
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/ipa/inlinehint-6.c
+@@ -0,0 +1,47 @@
++/* { dg-options "-O3 -c -fdump-ipa-inline-details -fno-early-inlining -fno-ipa-cp"  } */
++/* { dg-add-options bind_pic_locally } */
++
++#define size_t long long int
++
++struct A
++{
++  size_t f1, f2, f3, f4;
++};
++struct C
++{
++  struct A a;
++  size_t b;
++};
++struct C x;
++
++__attribute__((hot)) struct C callee (struct A *a, struct C *c)
++{
++  c->a=(*a);
++
++  if((c->b + 7) & 17)
++   {
++      c->a.f1 = c->a.f2 + c->a.f1;
++      c->a.f2 = c->a.f3 - c->a.f2;
++      c->a.f3 = c->a.f2 + c->a.f3;
++      c->a.f4 = c->a.f2 - c->a.f4;
++      c->b = c->a.f2;
++
++    }
++  return *c;
++}
++
++__attribute__((hot)) struct C caller (size_t d, size_t e, size_t f, size_t g, struct C *c)
++{
++  struct A a;
++  a.f1 = 1 + d;
++  a.f2 = e;
++  a.f3 = 12 + f;
++  a.f4 = 68 + g;
++  if (c->b > 0)
++    return callee (&a, c);
++  else
++    return *c;
++}
++
++/* { dg-final { scan-ipa-dump "known_hot"  "inline"  } } */
++
+-- 
+2.33.0
+

_service:tar_scm:0012-Enable-small-loop-unrolling-for-O2.patch Added

@@ -0,0 +1,490 @@
+From 1070bc24f53e851cae55320e26715cc594efcd2f Mon Sep 17 00:00:00 2001
+From: Hongyu Wang <hongyu.wang@intel.com>
+Date: Thu, 8 Sep 2022 16:52:02 +0800
+Subject: PATCH 12/22 Enable small loop unrolling for O2
+
+Modern processors has multiple way instruction decoders
+For x86, icelake/zen3 has 5 uops, so for small loop with <= 4
+instructions (usually has 3 uops with a cmp/jmp pair that can be
+macro-fused), the decoder would have 2 uops bubble for each iteration
+and the pipeline could not be fully utilized.
+
+Therefore, this patch enables loop unrolling for small size loop at O2
+to fullfill the decoder as much as possible. It turns on rtl loop
+unrolling when targetm.loop_unroll_adjust exists and O2 plus speed only.
+In x86 backend the default behavior is to unroll small loops with less
+than 4 insns by 1 time.
+
+This improves 548.exchange2 by 9% on icelake and 7.4% on zen3 with
+0.9% codesize increment. For other benchmarks the variants are minor
+and overall codesize increased by 0.2%.
+
+The kernel image size increased by 0.06%, and no impact on eembc.
+
+gcc/ChangeLog:
+
+	* common/config/i386/i386-common.cc (ix86_optimization_table):
+	Enable small loop unroll at O2 by default.
+	* config/i386/i386.cc (ix86_loop_unroll_adjust): Adjust unroll
+	factor if -munroll-only-small-loops enabled and -funroll-loops/
+	-funroll-all-loops are disabled.
+	* config/i386/i386.h (struct processor_costs): Add 2 field
+	small_unroll_ninsns and small_unroll_factor.
+	* config/i386/i386.opt: Add -munroll-only-small-loops.
+	* doc/invoke.texi: Document -munroll-only-small-loops.
+	* loop-init.cc (pass_rtl_unroll_loops::gate): Enable rtl
+	loop unrolling for -O2-speed and above if target hook
+	loop_unroll_adjust exists.
+	(pass_rtl_unroll_loops::execute): Set UAP_UNROLL flag
+	when target hook loop_unroll_adjust exists.
+	* config/i386/x86-tune-costs.h: Update all processor costs
+	with small_unroll_ninsns = 4 and small_unroll_factor = 2.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.dg/guality/loop-1.c: Add additional option
+	-mno-unroll-only-small-loops.
+	* gcc.target/i386/pr86270.c: Add -mno-unroll-only-small-loops.
+	* gcc.target/i386/pr93002.c: Likewise.
+---
+ gcc/common/config/i386/i386-common.cc   |  1 +
+ gcc/config/i386/i386.cc                 | 18 ++++++++
+ gcc/config/i386/i386.h                  |  5 +++
+ gcc/config/i386/i386.opt                |  4 ++
+ gcc/config/i386/x86-tune-costs.h        | 58 +++++++++++++++++++++++++
+ gcc/doc/invoke.texi                     | 11 ++++-
+ gcc/loop-init.cc                        | 10 +++--
+ gcc/testsuite/gcc.dg/guality/loop-1.c   |  2 +
+ gcc/testsuite/gcc.target/i386/pr86270.c |  2 +-
+ gcc/testsuite/gcc.target/i386/pr93002.c |  2 +-
+ 10 files changed, 107 insertions(+), 6 deletions(-)
+
+diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
+index e2594cae4..cdd5caa55 100644
+--- a/gcc/common/config/i386/i386-common.cc
++++ b/gcc/common/config/i386/i386-common.cc
+@@ -1687,6 +1687,7 @@ static const struct default_options ix86_option_optimization_table =
+     /* The STC algorithm produces the smallest code at -Os, for x86.  */
+     { OPT_LEVELS_2_PLUS, OPT_freorder_blocks_algorithm_, NULL,
+       REORDER_BLOCKS_ALGORITHM_STC },
++    { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_only_small_loops, NULL, 1 },
+     /* Turn off -fschedule-insns by default.  It tends to make the
+        problem with not enough registers even worse.  */
+     { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
+diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
+index 9a9ff3b34..e56004300 100644
+--- a/gcc/config/i386/i386.cc
++++ b/gcc/config/i386/i386.cc
+@@ -23570,6 +23570,24 @@ ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
+   unsigned i;
+   unsigned mem_count = 0;
+ 
++  /* Unroll small size loop when unroll factor is not explicitly
++     specified.  */
++  if (!(flag_unroll_loops
++	|| flag_unroll_all_loops
++	|| loop->unroll))
++    {
++      nunroll = 1;
++
++      /* Any explicit -f{no-}unroll-{all-}loops turns off
++	 -munroll-only-small-loops.  */
++      if (ix86_unroll_only_small_loops
++	  && !OPTION_SET_P (flag_unroll_loops)
++	  && loop->ninsns <= ix86_cost->small_unroll_ninsns)
++	nunroll = ix86_cost->small_unroll_factor;
++
++      return nunroll;
++    }
++
+   if (!TARGET_ADJUST_UNROLL)
+      return nunroll;
+ 
+diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
+index fce0b3564..688aaabd3 100644
+--- a/gcc/config/i386/i386.h
++++ b/gcc/config/i386/i386.h
+@@ -219,6 +219,11 @@ struct processor_costs {
+   const char *const align_jump;		/* Jump alignment.  */
+   const char *const align_label;	/* Label alignment.  */
+   const char *const align_func;		/* Function alignment.  */
++
++  const unsigned small_unroll_ninsns;	/* Insn count limit for small loop
++					   to be unrolled.  */
++  const unsigned small_unroll_factor;   /* Unroll factor for small loop to
++					   be unrolled.  */
+ };
+ 
+ extern const struct processor_costs *ix86_cost;
+diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
+index a3675e515..fc1b944ac 100644
+--- a/gcc/config/i386/i386.opt
++++ b/gcc/config/i386/i386.opt
+@@ -1214,3 +1214,7 @@ Do not use GOT to access external symbols.
+ -param=x86-stlf-window-ninsns=
+ Target Joined UInteger Var(x86_stlf_window_ninsns) Init(64) Param
+ Instructions number above which STFL stall penalty can be compensated.
++
++munroll-only-small-loops
++Target Var(ix86_unroll_only_small_loops) Init(0) Save
++Enable conservative small loop unrolling.
+diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
+index f105d57ca..db4c2da34 100644
+--- a/gcc/config/i386/x86-tune-costs.h
++++ b/gcc/config/i386/x86-tune-costs.h
+@@ -135,6 +135,8 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
+   NULL,					/* Jump alignment.  */
+   NULL,					/* Label alignment.  */
+   NULL,					/* Func alignment.  */
++  4,					/* Small unroll limit.  */
++  2,					/* Small unroll factor.  */
+ };
+ 
+ /* Processor costs (relative to an add) */
+@@ -244,6 +246,8 @@ struct processor_costs i386_cost = {	/* 386 specific costs */
+   "4",					/* Jump alignment.  */
+   NULL,					/* Label alignment.  */
+   "4",					/* Func alignment.  */
++  4,					/* Small unroll limit.  */
++  2,					/* Small unroll factor.  */
+ };
+ 
+ static stringop_algs i486_memcpy2 = {
+@@ -354,6 +358,8 @@ struct processor_costs i486_cost = {	/* 486 specific costs */
+   "16",					/* Jump alignment.  */
+   "0:0:8",				/* Label alignment.  */
+   "16",					/* Func alignment.  */
++  4,					/* Small unroll limit.  */
++  2,					/* Small unroll factor.  */
+ };
+ 
+ static stringop_algs pentium_memcpy2 = {
+@@ -462,6 +468,8 @@ struct processor_costs pentium_cost = {
+   "16:8:8",				/* Jump alignment.  */
+   "0:0:8",				/* Label alignment.  */
+   "16",					/* Func alignment.  */
++  4,					/* Small unroll limit.  */
++  2,					/* Small unroll factor.  */
+ };
+ 
+ static const
+@@ -563,6 +571,8 @@ struct processor_costs lakemont_cost = {
+   "16:8:8",				/* Jump alignment.  */
+   "0:0:8",				/* Label alignment.  */
+   "16",					/* Func alignment.  */
++  4,					/* Small unroll limit.  */
++  2,					/* Small unroll factor.  */
+ };
+ 
+ /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
+@@ -679,6 +689,8 @@ struct processor_costs pentiumpro_cost = {
+   "16:11:8",				/* Jump alignment.  */
+   "0:0:8",				/* Label alignment.  */
+   "16",					/* Func alignment.  */
++  4,					/* Small unroll limit.  */
++  2,					/* Small unroll factor.  */
+ };
+ 
+ static stringop_algs geode_memcpy2 = {
+@@ -786,6 +798,8 @@ struct processor_costs geode_cost = {
+   NULL,					/* Jump alignment.  */
+   NULL,					/* Label alignment.  */
+   NULL,					/* Func alignment.  */
++  4,					/* Small unroll limit.  */
++  2,					/* Small unroll factor.  */
+ };
+ 
+ static stringop_algs k6_memcpy2 = {
+@@ -896,6 +910,8 @@ struct processor_costs k6_cost = {
+   "32:8:8",				/* Jump alignment.  */

_service:tar_scm:0013-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch Added

@@ -0,0 +1,230 @@
+From 96898a9cd8c159625848247bd2f3a09e5c12fcfa Mon Sep 17 00:00:00 2001
+From: Hongyu Wang <hongyu.wang@intel.com>
+Date: Sat, 19 Nov 2022 09:38:00 +0800
+Subject: PATCH 13/22 i386: Only enable small loop unrolling in backend PR
+ 107692
+
+Followed by the discussion in pr107692, -munroll-only-small-loops
+Does not turns on/off -funroll-loops, and current check in
+pass_rtl_unroll_loops::gate would cause -fno-unroll-loops do not take
+effect. Revert the change about targetm.loop_unroll_adjust and apply
+the backend option change to strictly follow the rule that
+-funroll-loops takes full control of loop unrolling, and
+munroll-only-small-loops just change its behavior to unroll small size
+loops.
+
+gcc/ChangeLog:
+
+	PR target/107692
+	* common/config/i386/i386-common.cc (ix86_optimization_table):
+	Enable loop unroll O2, disable -fweb and -frename-registers
+	by default.
+	* config/i386/i386-options.cc
+	(ix86_override_options_after_change):
+	Disable small loop unroll when funroll-loops enabled, reset
+	cunroll_grow_size when it is not explicitly enabled.
+	(ix86_option_override_internal): Call
+	ix86_override_options_after_change instead of calling
+	ix86_recompute_optlev_based_flags and ix86_default_align
+	separately.
+	* config/i386/i386.cc (ix86_loop_unroll_adjust): Adjust unroll
+	factor if -munroll-only-small-loops enabled.
+	* loop-init.cc (pass_rtl_unroll_loops::gate): Do not enable
+	loop unrolling for -O2-speed.
+	(pass_rtl_unroll_loops::execute): Rmove
+	targetm.loop_unroll_adjust check.
+
+gcc/testsuite/ChangeLog:
+
+	PR target/107692
+	* gcc.dg/guality/loop-1.c: Remove additional option for ia32.
+	* gcc.target/i386/pr86270.c: Add -fno-unroll-loops.
+	* gcc.target/i386/pr93002.c: Likewise.
+---
+ gcc/common/config/i386/i386-common.cc   |  8 ++++++
+ gcc/config/i386/i386-options.cc         | 34 ++++++++++++++++++++++---
+ gcc/config/i386/i386.cc                 | 18 ++++---------
+ gcc/loop-init.cc                        | 10 +++-----
+ gcc/testsuite/gcc.dg/guality/loop-1.c   |  2 --
+ gcc/testsuite/gcc.target/i386/pr86270.c |  2 +-
+ gcc/testsuite/gcc.target/i386/pr93002.c |  2 +-
+ 7 files changed, 48 insertions(+), 28 deletions(-)
+
+diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
+index cdd5caa55..f650e255f 100644
+--- a/gcc/common/config/i386/i386-common.cc
++++ b/gcc/common/config/i386/i386-common.cc
+@@ -1687,7 +1687,15 @@ static const struct default_options ix86_option_optimization_table =
+     /* The STC algorithm produces the smallest code at -Os, for x86.  */
+     { OPT_LEVELS_2_PLUS, OPT_freorder_blocks_algorithm_, NULL,
+       REORDER_BLOCKS_ALGORITHM_STC },
++
++    /* Turn on -funroll-loops with -munroll-only-small-loops to enable small
++       loop unrolling at -O2.  */
++    { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
+     { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_only_small_loops, NULL, 1 },
++    /* Turns off -frename-registers and -fweb which are enabled by
++       funroll-loops.  */
++    { OPT_LEVELS_ALL, OPT_frename_registers, NULL, 0 },
++    { OPT_LEVELS_ALL, OPT_fweb, NULL, 0 },
+     /* Turn off -fschedule-insns by default.  It tends to make the
+        problem with not enough registers even worse.  */
+     { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
+diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
+index 099cec4b6..ff44ad4e0 100644
+--- a/gcc/config/i386/i386-options.cc
++++ b/gcc/config/i386/i386-options.cc
+@@ -1816,8 +1816,37 @@ ix86_recompute_optlev_based_flags (struct gcc_options *opts,
+ void
+ ix86_override_options_after_change (void)
+ {
++  /* Default align_* from the processor table.  */
+   ix86_default_align (&global_options);
++
+   ix86_recompute_optlev_based_flags (&global_options, &global_options_set);
++
++  /* Disable unrolling small loops when there's explicit
++     -f{,no}unroll-loop.  */
++  if ((OPTION_SET_P (flag_unroll_loops))
++     || (OPTION_SET_P (flag_unroll_all_loops)
++	 && flag_unroll_all_loops))
++    {
++      if (!OPTION_SET_P (ix86_unroll_only_small_loops))
++	ix86_unroll_only_small_loops = 0;
++      /* Re-enable -frename-registers and -fweb if funroll-loops
++	 enabled.  */
++      if (!OPTION_SET_P (flag_web))
++	flag_web = flag_unroll_loops;
++      if (!OPTION_SET_P (flag_rename_registers))
++	flag_rename_registers = flag_unroll_loops;
++      /* -fcunroll-grow-size default follws -fno-unroll-loops.  */
++      if (!OPTION_SET_P (flag_cunroll_grow_size))
++	flag_cunroll_grow_size = flag_unroll_loops
++				 || flag_peel_loops
++				 || optimize >= 3;
++    }
++  else
++    {
++      if (!OPTION_SET_P (flag_cunroll_grow_size))
++	flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
++    }
++
+ }
+ 
+ /* Clear stack slot assignments remembered from previous functions.
+@@ -2329,7 +2358,7 @@ ix86_option_override_internal (bool main_args_p,
+ 
+   set_ix86_tune_features (opts, ix86_tune, opts->x_ix86_dump_tunes);
+ 
+-  ix86_recompute_optlev_based_flags (opts, opts_set);
++  ix86_override_options_after_change ();
+ 
+   ix86_tune_cost = processor_cost_tableix86_tune;
+   /* TODO: ix86_cost should be chosen at instruction or function granuality
+@@ -2360,9 +2389,6 @@ ix86_option_override_internal (bool main_args_p,
+       || TARGET_64BIT_P (opts->x_ix86_isa_flags))
+     opts->x_ix86_regparm = REGPARM_MAX;
+ 
+-  /* Default align_* from the processor table.  */
+-  ix86_default_align (opts);
+-
+   /* Provide default for -mbranch-cost= value.  */
+   SET_OPTION_IF_UNSET (opts, opts_set, ix86_branch_cost,
+ 		       ix86_tune_cost->branch_cost);
+diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
+index e56004300..462dce10e 100644
+--- a/gcc/config/i386/i386.cc
++++ b/gcc/config/i386/i386.cc
+@@ -23572,20 +23572,12 @@ ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
+ 
+   /* Unroll small size loop when unroll factor is not explicitly
+      specified.  */
+-  if (!(flag_unroll_loops
+-	|| flag_unroll_all_loops
+-	|| loop->unroll))
++  if (ix86_unroll_only_small_loops && !loop->unroll)
+     {
+-      nunroll = 1;
+-
+-      /* Any explicit -f{no-}unroll-{all-}loops turns off
+-	 -munroll-only-small-loops.  */
+-      if (ix86_unroll_only_small_loops
+-	  && !OPTION_SET_P (flag_unroll_loops)
+-	  && loop->ninsns <= ix86_cost->small_unroll_ninsns)
+-	nunroll = ix86_cost->small_unroll_factor;
+-
+-      return nunroll;
++      if (loop->ninsns <= ix86_cost->small_unroll_ninsns)
++	return MIN (nunroll, ix86_cost->small_unroll_factor);
++      else
++	return 1;
+     }
+ 
+   if (!TARGET_ADJUST_UNROLL)
+diff --git a/gcc/loop-init.cc b/gcc/loop-init.cc
+index f1c717041..1e4f6cfd7 100644
+--- a/gcc/loop-init.cc
++++ b/gcc/loop-init.cc
+@@ -565,12 +565,9 @@ public:
+   {}
+ 
+   /* opt_pass methods: */
+-  virtual bool gate (function *fun)
++  virtual bool gate (function *)
+     {
+-      return (flag_unroll_loops || flag_unroll_all_loops || cfun->has_unroll
+-	      || (targetm.loop_unroll_adjust
+-		  && optimize >= 2
+-		  && optimize_function_for_speed_p (fun)));
++      return (flag_unroll_loops || flag_unroll_all_loops || cfun->has_unroll);
+     }
+ 
+   virtual unsigned int execute (function *);
+@@ -586,8 +583,7 @@ pass_rtl_unroll_loops::execute (function *fun)
+       if (dump_file)
+ 	df_dump (dump_file);
+ 
+-      if (flag_unroll_loops
+-	  || targetm.loop_unroll_adjust)
++      if (flag_unroll_loops)
+ 	flags |= UAP_UNROLL;
+       if (flag_unroll_all_loops)
+ 	flags |= UAP_UNROLL_ALL;
+diff --git a/gcc/testsuite/gcc.dg/guality/loop-1.c b/gcc/testsuite/gcc.dg/guality/loop-1.c
+index a32ea445a..1b1f6d322 100644
+--- a/gcc/testsuite/gcc.dg/guality/loop-1.c
++++ b/gcc/testsuite/gcc.dg/guality/loop-1.c
+@@ -1,7 +1,5 @@
+ /* { dg-do run } */
+ /* { dg-options "-fno-tree-scev-cprop -fno-tree-vectorize -g" } */

_service:tar_scm:0014-Array-widen-compare-Add-a-new-optimization-for-array.patch Added

@@ -0,0 +1,1981 @@
+From 5ef5f6c4ae806f56ff81450c759f36d59b5b23db Mon Sep 17 00:00:00 2001
+From: dingguangya <dingguangya1@huawei.com>
+Date: Sat, 29 Jul 2023 17:45:01 +0800
+Subject: PATCH 14/22 Array-widen-compare Add a new optimization for array
+ comparison scenarios
+
+Add option farray-widen-compare.
+For an array pointer whose element is a single-byte type,
+by changing the pointer type to a long-byte type, the elements
+can be combined and compared after loading.
+---
+ gcc/Makefile.in                               |    1 +
+ gcc/common.opt                                |    5 +
+ gcc/doc/invoke.texi                           |   13 +-
+ gcc/passes.def                                |    1 +
+ .../gcc.dg/tree-ssa/awiden-compare-1.c        |   19 +
+ .../gcc.dg/tree-ssa/awiden-compare-2.c        |   90 +
+ .../gcc.dg/tree-ssa/awiden-compare-3.c        |   22 +
+ .../gcc.dg/tree-ssa/awiden-compare-4.c        |   22 +
+ .../gcc.dg/tree-ssa/awiden-compare-5.c        |   19 +
+ .../gcc.dg/tree-ssa/awiden-compare-6.c        |   19 +
+ .../gcc.dg/tree-ssa/awiden-compare-7.c        |   22 +
+ .../gcc.dg/tree-ssa/awiden-compare-8.c        |   24 +
+ gcc/timevar.def                               |    1 +
+ gcc/tree-pass.h                               |    1 +
+ gcc/tree-ssa-loop-array-widen-compare.cc      | 1555 +++++++++++++++++
+ 15 files changed, 1813 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c
+ create mode 100644 gcc/tree-ssa-loop-array-widen-compare.cc
+
+diff --git a/gcc/Makefile.in b/gcc/Makefile.in
+index 31ff95500..0aabc6ea3 100644
+--- a/gcc/Makefile.in
++++ b/gcc/Makefile.in
+@@ -1653,6 +1653,7 @@ OBJS = \
+ 	tree-ssa-loop-ivopts.o \
+ 	tree-ssa-loop-manip.o \
+ 	tree-ssa-loop-niter.o \
++	tree-ssa-loop-array-widen-compare.o \
+ 	tree-ssa-loop-prefetch.o \
+ 	tree-ssa-loop-split.o \
+ 	tree-ssa-loop-unswitch.o \
+diff --git a/gcc/common.opt b/gcc/common.opt
+index e365a48bc..4d91ce8cf 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1116,6 +1116,11 @@ fasynchronous-unwind-tables
+ Common Var(flag_asynchronous_unwind_tables) Optimization
+ Generate unwind tables that are exact at each instruction boundary.
+ 
++farray-widen-compare
++Common Var(flag_array_widen_compare) Optimization
++Extends types for pointers to arrays to improve array comparsion performance.
++In some extreme situations this may result in unsafe behavior.
++
+ fauto-inc-dec
+ Common Var(flag_auto_inc_dec) Init(1) Optimization
+ Generate auto-inc/dec instructions.
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index ff8cd032f..a11e2c24b 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -507,7 +507,7 @@ Objective-C and Objective-C++ Dialects}.
+ -falign-loops=@var{n}:@var{m}:@var{n2}:@var{m2} @gol
+ -fno-allocation-dce -fallow-store-data-races @gol
+ -fassociative-math  -fauto-profile  -fauto-profile=@var{path} @gol
+--fauto-inc-dec  -fbranch-probabilities @gol
++-farray-widen-compare -fauto-inc-dec  -fbranch-probabilities @gol
+ -fcaller-saves @gol
+ -fcombine-stack-adjustments  -fconserve-stack @gol
+ -fcompare-elim  -fcprop-registers  -fcrossjumping @gol
+@@ -11387,6 +11387,17 @@ This pass is always skipped on architectures that do not have
+ instructions to support this.  Enabled by default at @option{-O1} and
+ higher on architectures that support this.
+ 
++@item -farray-widen-compare
++@opindex farray-widen-compare
++In the narrow-byte array comparison scenario, the types of pointers
++pointing to array are extended so that elements of multiple bytes can
++be loaded at a time when a wide type is used to dereference an array,
++thereby improving the performance of this comparison scenario.  In some
++extreme situations this may result in unsafe behavior.
++
++This option may generate better or worse code; results are highly dependent
++on the structure of loops within the source code.
++
+ @item -fdce
+ @opindex fdce
+ Perform dead code elimination (DCE) on RTL@.
+diff --git a/gcc/passes.def b/gcc/passes.def
+index 375d3d62d..8dbb7983e 100644
+--- a/gcc/passes.def
++++ b/gcc/passes.def
+@@ -94,6 +94,7 @@ along with GCC; see the file COPYING3.  If not see
+           NEXT_PASS (pass_dse);
+ 	  NEXT_PASS (pass_cd_dce, false /* update_address_taken_p */);
+ 	  NEXT_PASS (pass_phiopt, true /* early_p */);
++	  NEXT_PASS (pass_array_widen_compare);
+ 	  NEXT_PASS (pass_tail_recursion);
+ 	  NEXT_PASS (pass_if_to_switch);
+ 	  NEXT_PASS (pass_convert_switch);
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c
+new file mode 100644
+index 000000000..e18ef5ec1
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c
+@@ -0,0 +1,19 @@
++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
++
++#include <stdint.h>
++#include <stdio.h>
++
++#define my_min(x, y) ((x) < (y) ? (x) : (y))
++
++uint32_t
++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
++{
++  uint32_t len = my_min(len0, len1);
++  while (++len != len_limit)
++    if (pblen != curlen)
++      break;
++  return len;
++}
++
++/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c
+new file mode 100644
+index 000000000..f4b20b43c
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c
+@@ -0,0 +1,90 @@
++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
++
++#include <stdint.h>
++#include <stdio.h>
++
++#define EMPTY_HASH_VALUE 0
++#define my_min(x, y) ((x) < (y) ? (x) : (y))
++#define true 1
++
++typedef struct {
++  uint32_t len;
++  uint32_t dist;
++} lzma_match;
++
++
++lzma_match *
++func (
++  const uint32_t len_limit,
++  const uint32_t pos,
++  const uint8_t *const cur,
++  uint32_t cur_match,
++  uint32_t depth,
++  uint32_t *const son,
++  const uint32_t cyclic_pos,
++  const uint32_t cyclic_size,
++  lzma_match *matches,
++  uint32_t len_best)
++{
++  uint32_t *ptr0 = son + (cyclic_pos << 1) + 1;
++  uint32_t *ptr1 = son + (cyclic_pos << 1);
++
++  uint32_t len0 = 0;
++  uint32_t len1 = 0;
++
++  while (true)
++    {
++      const uint32_t delta = pos - cur_match;
++      if (depth-- == 0 || delta >= cyclic_size)
++        {
++          *ptr0 = EMPTY_HASH_VALUE;
++          *ptr1 = EMPTY_HASH_VALUE;
++          return matches;
++        }
++
++      uint32_t *const pair = son + ((cyclic_pos - delta + (delta > cyclic_pos ? cyclic_size : 0)) << 1);
++
++      const uint8_t *const pb = cur -delta;
++      uint32_t len = my_min(len0, len1);
++
++      if (pblen == curlen)
++        {
++          while (++len != len_limit)
++            if (pblen != curlen)
++              break;
++
++          if (len_best < len)
++            {
++              len_best = len;
++              matches->len = len;

_service:tar_scm:0015-Backport-Structure-reorganization-optimization.patch Added

@@ -0,0 +1,6170 @@
+From 8631d4a39453bb262675bea9abb5c1b7d52af624 Mon Sep 17 00:00:00 2001
+From: eastb233 <xiezhiheng@huawei.com>
+Date: Wed, 19 Jul 2023 10:28:04 +0800
+Subject: PATCH 15/22 Backport Structure reorganization optimization
+
+Reference: https://gcc.gnu.org/git/?p=gcc-old.git;a=commit;h=6e1bd1c900533c627b5e4fbbecb41dcd7974b522
+
+Introduce structure reorganization optimization, that change C-like
+structures layout in order to better utilize spatial locality. This
+transformation is affective for programs containing arrays of structures.
+---
+ gcc/Makefile.in                               |    1 +
+ gcc/common.opt                                |    4 +-
+ gcc/configure                                 |    2 +-
+ gcc/configure.ac                              |    2 +-
+ gcc/doc/invoke.texi                           |   23 +
+ gcc/gimple-ssa-warn-access.cc                 |    8 +
+ gcc/ipa-param-manipulation.cc                 |    3 +-
+ gcc/ipa-param-manipulation.h                  |    3 +-
+ gcc/ipa-struct-reorg/escapes.def              |   60 +
+ gcc/ipa-struct-reorg/ipa-struct-reorg.cc      | 4015 +++++++++++++++++
+ gcc/ipa-struct-reorg/ipa-struct-reorg.h       |  235 +
+ gcc/params.opt                                |    4 +
+ gcc/passes.def                                |    2 +
+ gcc/testsuite/gcc.dg/struct/struct-reorg.exp  |   35 +
+ gcc/testsuite/gcc.dg/struct/struct_reorg-1.c  |   24 +
+ gcc/testsuite/gcc.dg/struct/struct_reorg-2.c  |   29 +
+ gcc/testsuite/gcc.dg/struct/struct_reorg-3.c  |   23 +
+ gcc/testsuite/gcc.dg/struct/struct_reorg-4.c  |   59 +
+ .../gcc.dg/struct/w_prof_global_array.c       |   29 +
+ .../gcc.dg/struct/w_prof_global_var.c         |   42 +
+ .../gcc.dg/struct/w_prof_local_array.c        |   37 +
+ .../gcc.dg/struct/w_prof_local_var.c          |   40 +
+ .../gcc.dg/struct/w_prof_single_str_global.c  |   31 +
+ gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c |   64 +
+ .../gcc.dg/struct/w_ratio_cold_str.c          |   43 +
+ .../gcc.dg/struct/wo_prof_array_field.c       |   26 +
+ .../struct/wo_prof_array_through_pointer.c    |   38 +
+ .../gcc.dg/struct/wo_prof_double_malloc.c     |   29 +
+ .../gcc.dg/struct/wo_prof_empty_str.c         |   44 +
+ .../struct/wo_prof_escape_arg_to_local.c      |   44 +
+ .../gcc.dg/struct/wo_prof_escape_return-1.c   |   33 +
+ .../gcc.dg/struct/wo_prof_escape_return.c     |   32 +
+ .../gcc.dg/struct/wo_prof_escape_str_init.c   |   31 +
+ .../struct/wo_prof_escape_substr_array.c      |   33 +
+ .../struct/wo_prof_escape_substr_pointer.c    |   48 +
+ .../struct/wo_prof_escape_substr_value.c      |   45 +
+ .../gcc.dg/struct/wo_prof_global_array.c      |   32 +
+ .../gcc.dg/struct/wo_prof_global_var.c        |   45 +
+ .../gcc.dg/struct/wo_prof_local_array.c       |   40 +
+ .../gcc.dg/struct/wo_prof_local_var.c         |   43 +
+ .../gcc.dg/struct/wo_prof_malloc_size_var-1.c |   47 +
+ .../gcc.dg/struct/wo_prof_malloc_size_var.c   |   47 +
+ .../struct/wo_prof_mult_field_peeling.c       |   42 +
+ .../gcc.dg/struct/wo_prof_single_str_global.c |   34 +
+ .../gcc.dg/struct/wo_prof_single_str_local.c  |   34 +
+ .../struct/wo_prof_single_str_pointer.c       |   38 +
+ .../gcc.dg/struct/wo_prof_two_strs.c          |   67 +
+ gcc/timevar.def                               |    1 +
+ gcc/tree-pass.h                               |    1 +
+ 49 files changed, 5686 insertions(+), 6 deletions(-)
+ create mode 100644 gcc/ipa-struct-reorg/escapes.def
+ create mode 100644 gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+ create mode 100644 gcc/ipa-struct-reorg/ipa-struct-reorg.h
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct-reorg.exp
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-2.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-3.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-4.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_global_array.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_global_var.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_local_array.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_local_var.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_single_str_local.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_single_str_pointer.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c
+
+diff --git a/gcc/Makefile.in b/gcc/Makefile.in
+index 31ff95500..c863ad992 100644
+--- a/gcc/Makefile.in
++++ b/gcc/Makefile.in
+@@ -1451,6 +1451,7 @@ OBJS = \
+ 	incpath.o \
+ 	init-regs.o \
+ 	internal-fn.o \
++	ipa-struct-reorg/ipa-struct-reorg.o \
+ 	ipa-cp.o \
+ 	ipa-sra.o \
+ 	ipa-devirt.o \
+diff --git a/gcc/common.opt b/gcc/common.opt
+index e365a48bc..b48fa3228 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1950,8 +1950,8 @@ Common Ignore
+ Does nothing. Preserved for backward compatibility.
+ 
+ fipa-struct-reorg
+-Common Ignore
+-Does nothing. Preserved for backward compatibility.
++Common Var(flag_ipa_struct_reorg) Init(0) Optimization
++Perform structure layout optimizations.
+ 
+ fipa-vrp
+ Common Var(flag_ipa_vrp) Optimization
+diff --git a/gcc/configure b/gcc/configure
+index c749ace01..98bbf0f85 100755
+--- a/gcc/configure
++++ b/gcc/configure
+@@ -34191,7 +34191,7 @@ $as_echo "$as_me: executing $ac_file commands" >&6;}
+     "depdir":C) $SHELL $ac_aux_dir/mkinstalldirs $DEPDIR ;;
+     "gccdepdir":C)
+   ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs build/$DEPDIR
+-  for lang in $subdirs c-family common analyzer rtl-ssa
++  for lang in $subdirs c-family common analyzer rtl-ssa ipa-struct-reorg
+   do
+       ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs $lang/$DEPDIR
+   done ;;
+diff --git a/gcc/configure.ac b/gcc/configure.ac
+index 992a50e7b..c74f4b555 100644
+--- a/gcc/configure.ac
++++ b/gcc/configure.ac
+@@ -1340,7 +1340,7 @@ AC_CHECK_HEADERS(ext/hash_map)
+ ZW_CREATE_DEPDIR
+ AC_CONFIG_COMMANDS(gccdepdir,
+   ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs build/$DEPDIR
+-  for lang in $subdirs c-family common analyzer rtl-ssa
++  for lang in $subdirs c-family common analyzer rtl-ssa ipa-struct-reorg
+   do
+       ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs $lang/$DEPDIR
+   done, subdirs="$subdirs" ac_aux_dir=$ac_aux_dir DEPDIR=$DEPDIR)
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index ff8cd032f..e37bae5b1 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -526,6 +526,7 @@ Objective-C and Objective-C++ Dialects}.
+ -finline-functions  -finline-functions-called-once  -finline-limit=@var{n} @gol
+ -finline-small-functions -fipa-modref -fipa-cp  -fipa-cp-clone @gol
+ -fipa-bit-cp  -fipa-vrp  -fipa-pta  -fipa-profile  -fipa-pure-const @gol
++-fipa-struct-reorg @gol
+ -fipa-reference  -fipa-reference-addressable @gol
+ -fipa-stack-alignment  -fipa-icf  -fira-algorithm=@var{algorithm} @gol
+ -flive-patching=@var{level} @gol
+@@ -11886,6 +11887,19 @@ higher.
+ Discover which functions are pure or constant.
+ Enabled by default at @option{-O1} and higher.
+ 
++@item -fipa-struct-reorg
++@opindex fipa-struct-reorg
++Perform structure reorganization optimization, that change C-like structures
++layout in order to better utilize spatial locality.  This transformation is
++affective for programs containing arrays of structures.  Available in two
++compilation modes: profile-based (enabled with @option{-fprofile-generate})
++or static (which uses built-in heuristics).  It works only in whole program
++mode, so it requires @option{-fwhole-program} to be
++enabled.  Structures considered @samp{cold} by this transformation are not
++affected (see @option{--param struct-reorg-cold-struct-ratio=@var{value}}).
++
++With this flag, the program debug info reflects a new structure layout.
++
+ @item -fipa-reference
+ @opindex fipa-reference
+ Discover which static variables do not escape the
+@@ -13772,6 +13786,15 @@ In each case, the @var{value} is an integer.  The following choices
+ of @var{name} are recognized for all targets:
+ 
+ @table @gcctabopt
++@item struct-reorg-cold-struct-ratio
++The threshold ratio (as a percentage) between a structure frequency
++and the frequency of the hottest structure in the program.  This parameter
++is used by struct-reorg optimization enabled by @option{-fipa-struct-reorg}.
++We say that if the ratio of a structure frequency, calculated by profiling,
++to the hottest structure frequency in the program is less than this
++parameter, then structure reorganization is not applied to this structure.
++The default is 10.
++
+ @item predictable-branch-outcome
+ When branch is predicted to be taken with probability lower than this threshold

_service:tar_scm:0016-CompleteStructRelayout-Complete-Structure-Relayout.patch Added

@@ -0,0 +1,2056 @@
+From 699caeaa2d89966e4af1d36bc96b53eb4dac0a09 Mon Sep 17 00:00:00 2001
+From: eastb233 <xiezhiheng@huawei.com>
+Date: Fri, 25 Aug 2023 09:59:39 +0800
+Subject: PATCH 16/22 CompleteStructRelayout Complete Structure Relayout
+
+Introduce complete structure reorganization based on original
+structure reorganization optimization, which change array of
+structure to structure of array in order to better utilize
+spatial locality.
+---
+ gcc/ipa-struct-reorg/escapes.def              |   2 +
+ gcc/ipa-struct-reorg/ipa-struct-reorg.cc      | 994 ++++++++++++++++--
+ gcc/ipa-struct-reorg/ipa-struct-reorg.h       |  33 +
+ .../g++.dg/struct/no-body-function.cpp        |  18 +
+ .../g++.dg/struct/struct-reorg-1.cpp          |  13 +
+ .../g++.dg/struct/struct-reorg-2.cpp          |  17 +
+ .../g++.dg/struct/struct-reorg-3.cpp          |  24 +
+ gcc/testsuite/g++.dg/struct/struct-reorg.exp  |  26 +
+ gcc/testsuite/gcc.dg/struct/csr_1.c           |  60 ++
+ .../gcc.dg/struct/csr_allocation-1.c          |  46 +
+ .../gcc.dg/struct/csr_allocation-2.c          |  59 ++
+ .../gcc.dg/struct/csr_allocation-3.c          |  77 ++
+ gcc/testsuite/gcc.dg/struct/csr_cast_int.c    |  52 +
+ .../gcc.dg/struct/csr_separate_instance.c     |  48 +
+ .../gcc.dg/struct/sr_address_of_field.c       |  37 +
+ gcc/testsuite/gcc.dg/struct/sr_convert_mem.c  |  23 +
+ gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c  |  25 +
+ gcc/testsuite/gcc.dg/struct/sr_pointer_and.c  |  17 +
+ .../gcc.dg/struct/sr_pointer_minus.c          |  33 +
+ 19 files changed, 1539 insertions(+), 65 deletions(-)
+ create mode 100644 gcc/testsuite/g++.dg/struct/no-body-function.cpp
+ create mode 100644 gcc/testsuite/g++.dg/struct/struct-reorg-1.cpp
+ create mode 100644 gcc/testsuite/g++.dg/struct/struct-reorg-2.cpp
+ create mode 100644 gcc/testsuite/g++.dg/struct/struct-reorg-3.cpp
+ create mode 100644 gcc/testsuite/g++.dg/struct/struct-reorg.exp
+ create mode 100644 gcc/testsuite/gcc.dg/struct/csr_1.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/csr_allocation-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/csr_allocation-2.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/csr_allocation-3.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/csr_cast_int.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/csr_separate_instance.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/sr_address_of_field.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/sr_convert_mem.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/sr_pointer_and.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c
+
+diff --git a/gcc/ipa-struct-reorg/escapes.def b/gcc/ipa-struct-reorg/escapes.def
+index c4c8e0739..d825eb3e6 100644
+--- a/gcc/ipa-struct-reorg/escapes.def
++++ b/gcc/ipa-struct-reorg/escapes.def
+@@ -56,5 +56,7 @@ DEF_ESCAPE (escape_non_optimize, "Type used by a function which turns off struct
+ DEF_ESCAPE (escape_array, "Type is used in an array not handled yet")
+ DEF_ESCAPE (escape_ptr_ptr, "Type is used in a pointer to a pointer not handled yet")
+ DEF_ESCAPE (escape_return, "Type escapes via a return not handled yet")
++DEF_ESCAPE (escape_separate_instance, "Type escapes via a separate instance")
++DEF_ESCAPE (escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt")
+ 
+ #undef DEF_ESCAPE
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+index 238530860..c8b975a92 100644
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+@@ -104,10 +104,12 @@ along with GCC; see the file COPYING3.  If not see
+ #include "tree-ssa-live.h"  /* For remove_unused_locals.  */
+ #include "ipa-param-manipulation.h"
+ #include "gimplify-me.h"
++#include "cfgloop.h"
+ 
+ namespace {
+ 
+ using namespace struct_reorg;
++using namespace struct_relayout;
+ 
+ #define VOID_POINTER_P(type) \
+   (POINTER_TYPE_P (type) && VOID_TYPE_P (TREE_TYPE (type)))
+@@ -194,6 +196,14 @@ gimplify_build1 (gimple_stmt_iterator *gsi, enum tree_code code, tree type,
+ 				   GSI_SAME_STMT);
+ }
+ 
++enum srmode
++{
++  NORMAL = 0,
++  COMPLETE_STRUCT_RELAYOUT
++};
++
++static bool is_result_of_mult (tree, tree *, tree);
++
+ } // anon namespace
+ 
+ 
+@@ -283,7 +293,8 @@ srtype::srtype (tree type)
+   : type (type),
+     chain_type (false),
+     escapes (does_not_escape),
+-    visited (false)
++    visited (false),
++    has_alloc_array (0)
+ {
+   for (int i = 0; i < max_split; i++)
+     newtypei = NULL_TREE;
+@@ -483,13 +494,6 @@ srtype::dump (FILE *f)
+       fn->simple_dump (f);
+     }
+   fprintf (f, "\n }\n");
+-  fprintf (f, "\n field_sites = {");
+-  FOR_EACH_VEC_ELT (field_sites, i, field)
+-    {
+-      fprintf (f, "  \n");
+-      field->simple_dump (f);
+-    }
+-  fprintf (f, "\n }\n");
+   fprintf (f, "}\n");
+ }
+ 
+@@ -631,15 +635,7 @@ srtype::create_new_type (void)
+ 
+   maxclusters++;
+ 
+-  const char *tname = NULL;
+-
+-  if (TYPE_NAME (type) != NULL)
+-    {
+-      if (TREE_CODE (TYPE_NAME (type)) == IDENTIFIER_NODE)
+-	tname = IDENTIFIER_POINTER (TYPE_NAME (type));
+-      else if (DECL_NAME (TYPE_NAME (type)) != NULL)
+-	tname = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
+-    }
++  const char *tname = get_type_name (type);
+ 
+   for (unsigned i = 0; i < maxclusters; i++)
+     {
+@@ -653,7 +649,10 @@ srtype::create_new_type (void)
+       if (tname)
+ 	{
+ 	  name = concat (tname, ".reorg.", id, NULL);
+-	  TYPE_NAME (newtypei) = get_identifier (name);
++	  TYPE_NAME (newtypei) = build_decl (UNKNOWN_LOCATION,
++					       TYPE_DECL,
++					       get_identifier (name),
++					       newtypei);
+ 	  free (name);
+ 	}
+     }
+@@ -673,6 +672,8 @@ srtype::create_new_type (void)
+     {
+       TYPE_FIELDS (newtypei) = newfieldsi;
+       layout_type (newtypei);
++      if (TYPE_NAME (newtypei) != NULL)
++	layout_decl (TYPE_NAME (newtypei), 0);
+     }
+ 
+   warn_padded = save_warn_padded;
+@@ -841,12 +842,6 @@ srfield::dump (FILE *f)
+   fprintf (f, ", offset = " HOST_WIDE_INT_PRINT_DEC, offset);
+   fprintf (f, ", type = ");
+   print_generic_expr (f, fieldtype);
+-  if (type)
+-    {
+-      fprintf (f, "( srtype = ");
+-      type->simple_dump (f);
+-      fprintf (f, ")");
+-    }
+   fprintf (f, "\n}\n");
+ }
+ 
+@@ -855,7 +850,8 @@ srfield::dump (FILE *f)
+ void
+ srfield::simple_dump (FILE *f)
+ {
+-  fprintf (f, "field (%d)", DECL_UID (fielddecl));
++  if (fielddecl)
++    fprintf (f, "field (%d)", DECL_UID (fielddecl));
+ }
+ 
+ /* Dump out the access structure to FILE.  */
+@@ -899,6 +895,92 @@ srdecl::dump (FILE *file)
+ } // namespace struct_reorg
+ 
+ 
++namespace struct_relayout {
++
++/* Complete Structure Relayout Optimization.
++   It reorganizes all structure members, and puts same member together.
++   struct s {
++     long a;
++     int b;
++     struct s *c;
++   };
++   Array looks like
++     abcabcabcabc...
++   will be transformed to
++     aaaa...bbbb...cccc...
++*/
++
++#define GPTR_SIZE(i) \
++  TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (gptri)))
++
++unsigned transformed = 0;

_service:tar_scm:0017-StructReorg-Some-bugfix-for-structure-reorganization.patch Added

@@ -0,0 +1,489 @@
+From 2b4db34d3b21ff8597373e9e67858b3b60cc7dae Mon Sep 17 00:00:00 2001
+From: eastb233 <xiezhiheng@huawei.com>
+Date: Fri, 21 Jul 2023 11:20:51 +0800
+Subject: PATCH 17/22 StructReorg Some bugfix for structure reorganization
+
+Some bugfix for structure reorganization,
+1. disable type simplify in LTO within optimizations
+2. only enable optimizations in C language
+3. use new to initialize allocated memory in symbol-summary.h
+4. cover escape scenarios not considered
+---
+ gcc/ipa-free-lang-data.cc                    |  11 ++
+ gcc/ipa-struct-reorg/ipa-struct-reorg.cc     | 101 +++++++++++--------
+ gcc/symbol-summary.h                         |  13 ++-
+ gcc/testsuite/gcc.dg/struct/struct_reorg-5.c |  31 ++++++
+ gcc/testsuite/gcc.dg/struct/struct_reorg-6.c |  54 ++++++++++
+ gcc/testsuite/gcc.dg/struct/struct_reorg-7.c |  38 +++++++
+ gcc/testsuite/gcc.dg/struct/struct_reorg-8.c |  25 +++++
+ gcc/testsuite/gcc.dg/struct/struct_reorg-9.c |  54 ++++++++++
+ 8 files changed, 283 insertions(+), 44 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-5.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-6.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-7.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-8.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-9.c
+
+diff --git a/gcc/ipa-free-lang-data.cc b/gcc/ipa-free-lang-data.cc
+index a74215685..5450be9fe 100644
+--- a/gcc/ipa-free-lang-data.cc
++++ b/gcc/ipa-free-lang-data.cc
+@@ -102,6 +102,12 @@ fld_worklist_push (tree t, class free_lang_data_d *fld)
+ static tree
+ fld_simplified_type_name (tree type)
+ {
++  /* Simplify type will cause that struct A and struct A within
++     struct B are different type pointers, so skip it in structure
++     optimizations.  */
++  if (flag_ipa_struct_reorg)
++    return TYPE_NAME (type);
++
+   if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL)
+     return TYPE_NAME (type);
+   /* Drop TYPE_DECLs in TYPE_NAME in favor of the identifier in the
+@@ -340,6 +346,11 @@ fld_simplified_type (tree t, class free_lang_data_d *fld)
+ {
+   if (!t)
+     return t;
++  /* Simplify type will cause that struct A and struct A within
++     struct B are different type pointers, so skip it in structure
++     optimizations.  */
++  if (flag_ipa_struct_reorg)
++    return t;
+   if (POINTER_TYPE_P (t))
+     return fld_incomplete_type_of (t, fld);
+   /* FIXME: This triggers verification error, see PR88140.  */
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+index c8b975a92..9f790b28b 100644
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+@@ -105,6 +105,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "ipa-param-manipulation.h"
+ #include "gimplify-me.h"
+ #include "cfgloop.h"
++#include "langhooks.h"
+ 
+ namespace {
+ 
+@@ -196,6 +197,39 @@ gimplify_build1 (gimple_stmt_iterator *gsi, enum tree_code code, tree type,
+ 				   GSI_SAME_STMT);
+ }
+ 
++/* Check whether in C language or LTO with only C language.  */
++
++static bool
++lang_c_p (void)
++{
++  const char *language_string = lang_hooks.name;
++
++  if (!language_string)
++    return false;
++
++  if (strcmp (language_string, "GNU GIMPLE") == 0)
++    {
++      unsigned i = 0;
++      tree t = NULL;
++      const char *unit_string = NULL;
++
++      FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t)
++	{
++	  unit_string = TRANSLATION_UNIT_LANGUAGE (t);
++	  if (!unit_string
++	      || (strncmp (unit_string, "GNU C", 5) != 0)
++	      || (!ISDIGIT (unit_string5)))
++	    return false;
++	}
++      return true;
++    }
++  else if (strncmp (language_string, "GNU C", 5) == 0
++	   && ISDIGIT (language_string5))
++    return true;
++
++  return false;
++}
++
+ enum srmode
+ {
+   NORMAL = 0,
+@@ -1018,7 +1052,6 @@ public:
+   void analyze_types (void);
+   void clear_visited (void);
+   bool create_new_types (void);
+-  void restore_field_type (void);
+   void create_new_decls (void);
+   srdecl *find_decl (tree);
+   void create_new_functions (void);
+@@ -2107,7 +2140,12 @@ ipa_struct_reorg::find_vars (gimple *stmt)
+ 	      srtype *t = find_type (inner_type (TREE_TYPE (rhs)));
+ 	      srdecl *d = find_decl (lhs);
+ 	      if (!d && t)
+-		current_function->record_decl (t, lhs, -1);
++		{
++		  current_function->record_decl (t, lhs, -1);
++		  tree var = SSA_NAME_VAR (lhs);
++		  if (var && VOID_POINTER_P (TREE_TYPE (var)))
++		    current_function->record_decl (t, var, -1);
++		}
+ 	    }
+ 	  if (TREE_CODE (rhs) == SSA_NAME
+ 	      && VOID_POINTER_P (TREE_TYPE (rhs))
+@@ -2116,7 +2154,12 @@ ipa_struct_reorg::find_vars (gimple *stmt)
+ 	      srtype *t = find_type (inner_type (TREE_TYPE (lhs)));
+ 	      srdecl *d = find_decl (rhs);
+ 	      if (!d && t)
+-		current_function->record_decl (t, rhs, -1);
++		{
++		  current_function->record_decl (t, rhs, -1);
++		  tree var = SSA_NAME_VAR (rhs);
++		  if (var && VOID_POINTER_P (TREE_TYPE (var)))
++		    current_function->record_decl (t, var, -1);
++		}
+ 	    }
+ 	}
+       else
+@@ -2796,8 +2839,14 @@ ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt)
+   if (escapes != does_not_escape)
+     {
+       for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
+-	mark_type_as_escape (TREE_TYPE (gimple_call_arg (stmt, i)),
+-			     escapes);
++	{
++	  mark_type_as_escape (TREE_TYPE (gimple_call_arg (stmt, i)),
++			       escapes);
++	  srdecl *d = current_function->find_decl (
++					  gimple_call_arg (stmt, i));
++	  if (d)
++	    d->type->mark_escape (escapes, stmt);
++	}
+       return;
+     }
+ 
+@@ -3731,42 +3780,6 @@ ipa_struct_reorg::analyze_types (void)
+     }
+ }
+ 
+-/* When struct A has a struct B member, B's type info
+-   is not stored in
+-     TYPE_FIELDS (TREE_TYPE (TYPE_FIELDS (typeA)))
+-   Try to restore B's type information.  */
+-
+-void
+-ipa_struct_reorg::restore_field_type (void)
+-{
+-  for (unsigned i = 0; i < types.length (); i++)
+-    {
+-      for (unsigned j = 0; j < typesi->fields.length (); j++)
+-	{
+-	  srfield *field = typesi->fieldsj;
+-	  if (TREE_CODE (inner_type (field->fieldtype)) == RECORD_TYPE)
+-	    {
+-	      /* If field type has TYPE_FIELDS information,
+-		 we do not need to do this.  */
+-	      if (TYPE_FIELDS (field->type->type) != NULL)
+-		continue;
+-	      for (unsigned k = 0; k < types.length (); k++)
+-		{
+-		  if (i == k)
+-		    continue;
+-		  const char *type1 = get_type_name (field->type->type);
+-		  const char *type2 = get_type_name (typesk->type);
+-		  if (type1 == NULL || type2 == NULL)
+-		    continue;
+-		  if (type1 == type2
+-		      && TYPE_FIELDS (typesk->type))
+-		    field->type = typesk;
+-		}
+-	    }
+-	}
+-    }
+-}

_service:tar_scm:0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch Added

@@ -0,0 +1,342 @@
+From 19ded9dad06b22b9b7aa9e3902e3e7a38a2256ab Mon Sep 17 00:00:00 2001
+From: dingguangya <dingguangya1@huawei.com>
+Date: Sat, 29 Jul 2023 18:27:10 +0800
+Subject: PATCH 18/22 ccmp Add another optimization opportunity for ccmp
+ instruction
+
+Add flag -fccmp2.
+Enables the use of the ccmp instruction by creating a new conflict
+relationship for instances where temporary expressions replacement
+cannot be effectively created.
+---
+ gcc/ccmp.cc                               |  33 ++++
+ gcc/ccmp.h                                |   1 +
+ gcc/common.opt                            |   4 +
+ gcc/testsuite/gcc.target/aarch64/ccmp_3.c |  15 ++
+ gcc/tree-ssa-coalesce.cc                  | 197 ++++++++++++++++++++++
+ 5 files changed, 250 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
+
+diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc
+index 3db0a264e..e34f3bcc6 100644
+--- a/gcc/ccmp.cc
++++ b/gcc/ccmp.cc
+@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "cfgexpand.h"
+ #include "ccmp.h"
+ #include "predict.h"
++#include "gimple-iterator.h"
+ 
+ /* Check whether T is a simple boolean variable or a SSA name
+    set by a comparison operator in the same basic block.  */
+@@ -129,6 +130,38 @@ ccmp_candidate_p (gimple *g)
+   return false;
+ }
+ 
++/* Check whether bb is a potential conditional compare candidate.  */
++bool
++check_ccmp_candidate (basic_block bb)
++{
++  gimple_stmt_iterator gsi;
++  gimple *bb_last_stmt, *stmt;
++  tree op0, op1;
++
++  gsi = gsi_last_bb (bb);
++  bb_last_stmt = gsi_stmt (gsi);
++
++  if (bb_last_stmt && gimple_code (bb_last_stmt) == GIMPLE_COND)
++    {
++      op0 = gimple_cond_lhs (bb_last_stmt);
++      op1 = gimple_cond_rhs (bb_last_stmt);
++
++      if (TREE_CODE (op0) == SSA_NAME
++	  && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
++	  && TREE_CODE (op1) == INTEGER_CST
++	  && ((gimple_cond_code (bb_last_stmt) == NE_EXPR)
++	      || (gimple_cond_code (bb_last_stmt) == EQ_EXPR)))
++	{
++	  stmt = SSA_NAME_DEF_STMT (op0);
++	  if (stmt && gimple_code (stmt) == GIMPLE_ASSIGN)
++	    {
++	      return ccmp_candidate_p (stmt);
++	    }
++	}
++    }
++  return false;
++}
++
+ /* Extract the comparison we want to do from the tree.  */
+ void
+ get_compare_parts (tree t, int *up, rtx_code *rcode,
+diff --git a/gcc/ccmp.h b/gcc/ccmp.h
+index 1799d5fed..efe3a1c14 100644
+--- a/gcc/ccmp.h
++++ b/gcc/ccmp.h
+@@ -21,5 +21,6 @@ along with GCC; see the file COPYING3.  If not see
+ #define GCC_CCMP_H
+ 
+ extern rtx expand_ccmp_expr (gimple *, machine_mode);
++extern bool check_ccmp_candidate (basic_block bb);
+ 
+ #endif  /* GCC_CCMP_H  */
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 4d91ce8cf..0aa516719 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -2017,6 +2017,10 @@ fira-verbose=
+ Common RejectNegative Joined UInteger Var(flag_ira_verbose) Init(5)
+ -fira-verbose=<number>	Control IRA's level of diagnostic messages.
+ 
++fccmp2
++Common Var(flag_ccmp2) Init(0) Optimization
++Optimize potential ccmp instruction in complex scenarios.
++
+ fivopts
+ Common Var(flag_ivopts) Init(1) Optimization
+ Optimize induction variables on trees.
+diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
+new file mode 100644
+index 000000000..b509ba810
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
+@@ -0,0 +1,15 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-options "-O -fdump-rtl-expand-details -fccmp2" } */
++
++int func (int a, int b, int c)
++{
++  while(1)
++    {
++      if(a-- == 0 || b >= c)
++	{
++	  return 1;
++	}
++    }
++}
++
++/* { dg-final { scan-assembler-times "\tccmp\t" 1} } */
+diff --git a/gcc/tree-ssa-coalesce.cc b/gcc/tree-ssa-coalesce.cc
+index dccf41ab8..195e06428 100644
+--- a/gcc/tree-ssa-coalesce.cc
++++ b/gcc/tree-ssa-coalesce.cc
+@@ -38,6 +38,9 @@ along with GCC; see the file COPYING3.  If not see
+ #include "explow.h"
+ #include "tree-dfa.h"
+ #include "stor-layout.h"
++#include "ccmp.h"
++#include "target.h"
++#include "tree-outof-ssa.h"
+ 
+ /* This set of routines implements a coalesce_list.  This is an object which
+    is used to track pairs of ssa_names which are desirable to coalesce
+@@ -854,6 +857,198 @@ live_track_clear_base_vars (live_track *ptr)
+   bitmap_clear (&ptr->live_base_var);
+ }
+ 
++/* Return true if gimple is a copy assignment.  */
++
++static inline bool
++gimple_is_assign_copy_p (gimple *gs)
++{
++  return (is_gimple_assign (gs) && gimple_assign_copy_p (gs)
++	  && TREE_CODE (gimple_assign_lhs (gs)) == SSA_NAME
++	  && TREE_CODE (gimple_assign_rhs1 (gs)) == SSA_NAME);
++}
++
++#define MAX_CCMP_CONFLICT_NUM 5
++
++/* Clear high-cost conflict graphs.  */
++
++static void
++remove_high_cost_graph_for_ccmp (ssa_conflicts *conflict_graph)
++{
++  unsigned x = 0;
++  int add_conflict_num = 0;
++  bitmap b;
++  FOR_EACH_VEC_ELT (conflict_graph->conflicts, x, b)
++    {
++      if (b)
++	{
++	  add_conflict_num++;
++	}
++    }
++  if (add_conflict_num >= MAX_CCMP_CONFLICT_NUM)
++    {
++      conflict_graph->conflicts.release ();
++    }
++}
++
++/* Adding a new conflict graph to the original graph.  */
++
++static void
++process_add_graph (live_track *live, basic_block bb,
++		   ssa_conflicts *conflict_graph)
++{
++  tree use, def;
++  ssa_op_iter iter;
++  gimple *first_visit_stmt = NULL;
++  for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
++       gsi_next (&gsi))
++    {
++      if (gimple_visited_p (gsi_stmt (gsi)))
++	{
++	  first_visit_stmt = gsi_stmt (gsi);
++	  break;
++	}
++    }
++  if (!first_visit_stmt)
++    return;
++
++  for (gimple_stmt_iterator gsi = gsi_last_bb (bb);
++       gsi_stmt (gsi) != first_visit_stmt; gsi_prev (&gsi))
++    {
++      gimple *stmt = gsi_stmt (gsi);
++      if (gimple_visited_p (gsi_stmt (gsi)) && is_gimple_debug (stmt))
++	{
++	  continue;
++	}
++      if (gimple_is_assign_copy_p (stmt))
++	{

_service:tar_scm:0019-fp-model-Enable-fp-model-on-kunpeng.patch Added

@@ -0,0 +1,405 @@
+From 8cdb316a3fe205a3089b9c17aec0442f4d5f75be Mon Sep 17 00:00:00 2001
+From: bule <bule1@huawei.com>
+Date: Sun, 27 Aug 2023 16:49:04 +0800
+Subject: PATCH 19/22 fp-model Enable fp-model on kunpeng
+
+Enable fp-model options on kunpeng for precision control.
+---
+ gcc/common.opt                     |  26 +++++
+ gcc/config/aarch64/aarch64-linux.h |   3 +-
+ gcc/flag-types.h                   |   9 ++
+ gcc/fortran/options.cc             |   8 ++
+ gcc/opts-common.cc                 | 146 ++++++++++++++++++++++++++++-
+ gcc/opts.cc                        |  68 ++++++++++++++
+ 6 files changed, 256 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 8a0dafc52..f5eef8a45 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1642,6 +1642,32 @@ ffp-int-builtin-inexact
+ Common Var(flag_fp_int_builtin_inexact) Init(1) Optimization
+ Allow built-in functions ceil, floor, round, trunc to raise \"inexact\" exceptions.
+ 
++fftz
++Common Var(flag_ftz) Optimization
++Control fpcr register for flush to zero.
++
++fp-model=
++Common Joined RejectNegative Enum(fp_model) Var(flag_fp_model) Init(FP_MODEL_NORMAL) Optimization
++-fp-model=normal|fast|precise|except|strict Perform floating-point precision control.
++
++Enum
++Name(fp_model) Type(enum fp_model) UnknownError(unknown floating point precision model %qs)
++
++EnumValue
++Enum(fp_model) String(normal) Value(FP_MODEL_NORMAL)
++
++EnumValue
++Enum(fp_model) String(fast) Value(FP_MODEL_FAST)
++
++EnumValue
++Enum(fp_model) String(precise) Value(FP_MODEL_PRECISE)
++
++EnumValue
++Enum(fp_model) String(except) Value(FP_MODEL_EXCEPT)
++
++EnumValue
++Enum(fp_model) String(strict) Value(FP_MODEL_STRICT)
++
+ ; Nonzero means don't put addresses of constant functions in registers.
+ ; Used for compiling the Unix kernel, where strange substitutions are
+ ; done on the assembly output.
+diff --git a/gcc/config/aarch64/aarch64-linux.h b/gcc/config/aarch64/aarch64-linux.h
+index 5e4553d79..a5cba6391 100644
+--- a/gcc/config/aarch64/aarch64-linux.h
++++ b/gcc/config/aarch64/aarch64-linux.h
+@@ -50,7 +50,8 @@
+ #define LINK_SPEC LINUX_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC
+ 
+ #define GNU_USER_TARGET_MATHFILE_SPEC \
+-  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
++  "%{Ofast|ffast-math|funsafe-math-optimizations|fp-model=fast|fftz:\
++  %{!fno-ftz:crtfastmath.o%s}}"
+ 
+ #undef ENDFILE_SPEC
+ #define ENDFILE_SPEC   \
+diff --git a/gcc/flag-types.h b/gcc/flag-types.h
+index 2c8498169..64c64eb32 100644
+--- a/gcc/flag-types.h
++++ b/gcc/flag-types.h
+@@ -260,6 +260,15 @@ enum fp_contract_mode {
+   FP_CONTRACT_FAST = 2
+ };
+ 
++/* Floating-point precision mode.  */
++enum fp_model {
++  FP_MODEL_NORMAL = 0,
++  FP_MODEL_FAST = 1,
++  FP_MODEL_PRECISE = 2,
++  FP_MODEL_EXCEPT = 3,
++  FP_MODEL_STRICT = 4
++};
++
+ /* Scalar storage order kind.  */
+ enum scalar_storage_order_kind {
+   SSO_NATIVE = 0,
+diff --git a/gcc/fortran/options.cc b/gcc/fortran/options.cc
+index d0fa634f1..3eb99a84a 100644
+--- a/gcc/fortran/options.cc
++++ b/gcc/fortran/options.cc
+@@ -243,6 +243,7 @@ form_from_filename (const char *filename)
+   return f_form;
+ }
+ 
++static void gfc_handle_fpe_option (const char *arg, bool trap);
+ 
+ /* Finalize commandline options.  */
+ 
+@@ -286,6 +287,13 @@ gfc_post_options (const char **pfilename)
+   if (flag_protect_parens == -1)
+     flag_protect_parens = !optimize_fast;
+ 
++  /* If fp-model=precise/strict, turn on all ffpe-trap and ffpe-summary.  */
++  if (flag_fp_model == FP_MODEL_EXCEPT || flag_fp_model == FP_MODEL_STRICT)
++    {
++      gfc_handle_fpe_option ("all", false);
++      gfc_handle_fpe_option ("invalid,zero,overflow,underflow", true);
++    }
++
+   /* -Ofast sets implies -fstack-arrays unless an explicit size is set for
+      stack arrays.  */
+   if (flag_stack_arrays == -1 && flag_max_stack_var_size == -2)
+diff --git a/gcc/opts-common.cc b/gcc/opts-common.cc
+index 7c07d5046..489a6e02a 100644
+--- a/gcc/opts-common.cc
++++ b/gcc/opts-common.cc
+@@ -28,7 +28,8 @@ along with GCC; see the file COPYING3.  If not see
+ #include "spellcheck.h"
+ #include "opts-jobserver.h"
+ 
+-static void prune_options (struct cl_decoded_option **, unsigned int *);
++static void prune_options (struct cl_decoded_option **, unsigned int *,
++			   unsigned int);
+ 
+ /* An option that is undocumented, that takes a joined argument, and
+    that doesn't fit any of the classes of uses (language/common,
+@@ -1091,7 +1092,7 @@ decode_cmdline_options_to_array (unsigned int argc, const char **argv,
+ 
+   *decoded_options = opt_array;
+   *decoded_options_count = num_decoded_options;
+-  prune_options (decoded_options, decoded_options_count);
++  prune_options (decoded_options, decoded_options_count, lang_mask);
+ }
+ 
+ /* Return true if NEXT_OPT_IDX cancels OPT_IDX.  Return false if the
+@@ -1112,11 +1113,109 @@ cancel_option (int opt_idx, int next_opt_idx, int orig_next_opt_idx)
+   return false;
+ }
+ 
++/* Check whether opt_idx exists in decoded_options array between index
++   start and end.  If found, return its index in decoded_options,
++   else return end.  */
++static unsigned int
++find_opt_idx (const struct cl_decoded_option *decoded_options,
++	       unsigned int decoded_options_count,
++	       unsigned int start, unsigned int end, unsigned int opt_idx)
++{
++  gcc_assert (end <= decoded_options_count);
++  gcc_assert (opt_idx < cl_options_count);
++  unsigned int k;
++  for (k = start; k < end; k++)
++    {
++      if (decoded_optionsk.opt_index == opt_idx)
++	{
++	  return k;
++	}
++    }
++  return k;
++}
++
++/* remove the opt_index element from decoded_options array.  */
++static unsigned int
++remove_option (struct cl_decoded_option *decoded_options,
++	       unsigned int decoded_options_count,
++	       unsigned int opt_index)
++{
++  gcc_assert (opt_index < decoded_options_count);
++  unsigned int i;
++  for (i = opt_index; i < decoded_options_count - 1; i++)
++    {
++      decoded_optionsi = decoded_optionsi + 1;
++    }
++  return decoded_options_count - 1;
++}
++
++/* Handle the priority between fp-model, Ofast, and
++   ffast-math.  */
++static unsigned int
++handle_fp_model_driver (struct cl_decoded_option *decoded_options,
++			unsigned int decoded_options_count,
++			unsigned int fp_model_index,
++			unsigned int lang_mask)
++{
++  struct cl_decoded_option fp_model_opt = decoded_optionsfp_model_index;
++  enum fp_model model = (enum fp_model) fp_model_opt.value;
++  if (model == FP_MODEL_PRECISE || model == FP_MODEL_STRICT)
++    {
++      /* If found Ofast, override Ofast with O3.  */
++      unsigned int Ofast_index;
++      Ofast_index = find_opt_idx (decoded_options, decoded_options_count,
++				  0, decoded_options_count, OPT_Ofast);
++      while (Ofast_index != decoded_options_count)
++	{
++	  const char *tmp_argv = "-O3";
++	  decode_cmdline_option (&tmp_argv, lang_mask,
++				 &decoded_optionsOfast_index);
++	  warning (0, "%<-Ofast%> is degraded to %<-O3%> due to %qs",
++		   fp_model_opt.orig_option_with_args_text);
++	  Ofast_index = find_opt_idx (decoded_options, decoded_options_count,

_service:tar_scm:0020-simdmath-Enable-simdmath-on-kunpeng.patch Added

@@ -0,0 +1,317 @@
+From 49ad10199dbdda2c36850a2617f5c985977939c5 Mon Sep 17 00:00:00 2001
+From: bule <bule1@huawei.com>
+Date: Sun, 27 Aug 2023 16:49:42 +0800
+Subject: PATCH 20/22 simdmath Enable simdmath on kunpeng
+
+This enable simd math function supported by libmathlib on fortran/c/c++.
+Use -fsimdmath to turn on the generation of simdmath function. The
+supported functions can be found in simdmath.h. Add more simd declaration
+if you need more kinds of math functions. -msimdmath-64 is used to turn
+on 64-bit simd math functions which is not supported by libmathlib.
+Therefore, this option is default to off.
+---
+ gcc/c-family/c-opts.cc                        |  4 ++
+ gcc/common.opt                                |  4 ++
+ gcc/config/aarch64/aarch64.cc                 |  9 ++++-
+ gcc/config/aarch64/aarch64.opt                |  6 +++
+ gcc/fortran/scanner.cc                        |  3 ++
+ gcc/opts.cc                                   | 17 ++++++++
+ .../gcc.target/aarch64/simd_pcs_attribute-3.c |  2 +-
+ libgomp/Makefile.am                           |  4 +-
+ libgomp/Makefile.in                           | 10 +++--
+ libgomp/configure                             |  4 +-
+ libgomp/configure.ac                          |  2 +-
+ libgomp/simdmath.h.in                         | 40 +++++++++++++++++++
+ libgomp/simdmath_f.h.in                       | 11 +++++
+ 13 files changed, 106 insertions(+), 10 deletions(-)
+ create mode 100644 libgomp/simdmath.h.in
+ create mode 100644 libgomp/simdmath_f.h.in
+
+diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
+index a341a0617..5134f6128 100644
+--- a/gcc/c-family/c-opts.cc
++++ b/gcc/c-family/c-opts.cc
+@@ -801,6 +801,10 @@ c_common_post_options (const char **pfilename)
+   if (cpp_opts->deps.style == DEPS_NONE)
+     check_deps_environment_vars ();
+ 
++  if (flag_simdmath)
++    {
++      defer_opt (OPT_include, "simdmath.h");
++    }
+   handle_deferred_opts ();
+ 
+   sanitize_cpp_opts ();
+diff --git a/gcc/common.opt b/gcc/common.opt
+index f5eef8a45..e9d580957 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -2125,6 +2125,10 @@ fmath-errno
+ Common Var(flag_errno_math) Init(1) Optimization SetByCombined
+ Set errno after built-in math functions.
+ 
++fsimdmath
++Common Var(flag_simdmath) Init(0) Optimization
++Enable auto-vectorize math functions for mathlib.  This option will turn on -fno-math-errno and -fopenmp-simd.
++
+ fmax-errors=
+ Common Joined RejectNegative UInteger Var(flag_max_errors)
+ -fmax-errors=<number>	Maximum number of errors to report.
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 226dc9dff..a3da4ca30 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -26904,8 +26904,13 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
+   elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
+   if (known_eq (clonei->simdlen, 0U))
+     {
+-      count = 2;
+-      vec_bits = (num == 0 ? 64 : 128);
++      /* Currently mathlib or sleef hasn't provide function for V2SF mode
++      simdclone of single precision functions. (e.g._ZCVnN2v_expf)
++      Therefore this mode is disabled by default to avoid link error.
++      Use -msimdmath-64 option to enable this mode.  */
++      count = flag_simdmath_64 ? 2 : 1;
++      vec_bits = ((num == 0 && flag_simdmath_64) ? 64 : 128);
++
+       clonei->simdlen = exact_div (vec_bits, elt_bits);
+     }
+   else
+diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
+index 92220b26e..a64b927e9 100644
+--- a/gcc/config/aarch64/aarch64.opt
++++ b/gcc/config/aarch64/aarch64.opt
+@@ -190,6 +190,12 @@ precision of square root results to about 16 bits for
+ single precision and to 32 bits for double precision.
+ If enabled, it implies -mlow-precision-recip-sqrt.
+ 
++msimdmath-64
++Target Var(flag_simdmath_64) Optimization
++Allow compiler to generate V2SF 64 bits simdclone of math functions,
++which is not currently supported in mathlib or sleef.
++Therefore this option is disabled by default.
++
+ mlow-precision-div
+ Target Var(flag_mlow_precision_div) Optimization
+ Enable the division approximation.  Enabling this reduces
+diff --git a/gcc/fortran/scanner.cc b/gcc/fortran/scanner.cc
+index 2dff25147..63e262f51 100644
+--- a/gcc/fortran/scanner.cc
++++ b/gcc/fortran/scanner.cc
+@@ -2769,6 +2769,9 @@ gfc_new_file (void)
+   if (flag_pre_include != NULL)
+     load_file (flag_pre_include, NULL, false);
+ 
++  if (flag_simdmath)
++    load_file ("simdmath_f.h", NULL, false);
++
+   if (gfc_cpp_enabled ())
+     {
+       gfc_cpp_preprocess (gfc_source_file);
+diff --git a/gcc/opts.cc b/gcc/opts.cc
+index b522ed7e2..c3cc2c169 100644
+--- a/gcc/opts.cc
++++ b/gcc/opts.cc
+@@ -322,6 +322,7 @@ static const char undocumented_msg = N_("This option lacks documentation.");
+ static const char use_diagnosed_msg = N_("Uses of this option are diagnosed.");
+ 
+ typedef char *char_p; /* For DEF_VEC_P.  */
++static void set_simdmath_flags (struct gcc_options *opts, int set);
+ 
+ static void set_debug_level (uint32_t dinfo, int extended,
+ 			     const char *arg, struct gcc_options *opts,
+@@ -2850,6 +2851,10 @@ common_handle_option (struct gcc_options *opts,
+       dc->min_margin_width = value;
+       break;
+ 
++    case OPT_fsimdmath:
++      set_simdmath_flags (opts, value);
++      break;
++
+     case OPT_fdump_:
+       /* Deferred.  */
+       break;
+@@ -3227,6 +3232,18 @@ common_handle_option (struct gcc_options *opts,
+   return true;
+ }
+ 
++/* The following routines are used to set -fno-math-errno and -fopenmp-simd
++   to enable vector mathlib.  */
++static void
++set_simdmath_flags (struct gcc_options *opts, int set)
++{
++  if (set)
++    {
++      opts->x_flag_errno_math = 0;
++      opts->x_flag_openmp_simd = 1;
++    }
++}
++
+ /* Used to set the level of strict aliasing warnings in OPTS,
+    when no level is specified (i.e., when -Wstrict-aliasing, and not
+    -Wstrict-aliasing=level was given).
+diff --git a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c
+index 95f6a6803..e0e0efa9d 100644
+--- a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c
++++ b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-Ofast" } */
++/* { dg-options "-Ofast -msimdmath-64" } */
+ 
+ __attribute__ ((__simd__))
+ __attribute__ ((__nothrow__ , __leaf__ , __const__))
+diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
+index f8b2a06d6..8dfa160d6 100644
+--- a/libgomp/Makefile.am
++++ b/libgomp/Makefile.am
+@@ -75,10 +75,10 @@ libgomp_la_SOURCES += openacc.f90
+ endif
+ 
+ nodist_noinst_HEADERS = libgomp_f.h
+-nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h
++nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h simdmath.h
+ if USE_FORTRAN
+ nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \
+-	openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod
++	openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod simdmath_f.h
+ endif
+ 
+ LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS))
+diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
+index 6f0cb7161..90fc326f0 100644
+--- a/libgomp/Makefile.in
++++ b/libgomp/Makefile.in
+@@ -147,7 +147,7 @@ am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
+  configure.lineno config.status.lineno
+ mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs
+ CONFIG_HEADER = config.h
+-CONFIG_CLEAN_FILES = omp.h omp_lib.h omp_lib.f90 libgomp_f.h \
++CONFIG_CLEAN_FILES = omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h \
+ 	libgomp.spec
+ CONFIG_CLEAN_VPATH_FILES =
+ am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+@@ -583,9 +583,9 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
+ @PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_LIBADD = libgomp.la $(PLUGIN_GCN_LIBS)
+ @PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_LIBTOOLFLAGS = --tag=disable-static
+ nodist_noinst_HEADERS = libgomp_f.h
+-nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h
++nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h simdmath.h

_service:tar_scm:0021-StructReorderFields-Structure-reorder-fields.patch Added

@@ -0,0 +1,5739 @@
+From 6997c9ad8985f6f0bfc16cdb46e7386af299a226 Mon Sep 17 00:00:00 2001
+From: h00564365 <huangxiaoquan1@huawei.com>
+Date: Mon, 31 Jul 2023 22:01:56 +0800
+Subject: PATCH 21/22 StructReorderFields Structure reorder fields
+
+Introduce structure fields reordering optimization, that change
+fields ordering of C-like structures in order to better utilize spatial
+locality.
+---
+ gcc/common.opt                                |    4 +
+ gcc/doc/invoke.texi                           |    1 +
+ gcc/gimple-ssa-warn-access.cc                 |    2 +-
+ gcc/ipa-free-lang-data.cc                     |    4 +-
+ gcc/ipa-struct-reorg/escapes.def              |    3 +
+ gcc/ipa-struct-reorg/ipa-struct-reorg.cc      | 2545 +++++++++++++----
+ gcc/ipa-struct-reorg/ipa-struct-reorg.h       |   14 +-
+ gcc/passes.def                                |    1 +
+ gcc/symbol-summary.h                          |    4 +-
+ .../struct/rf_DTE_struct_instance_field.c     |   75 +
+ gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c   |   94 +
+ .../gcc.dg/struct/rf_check_ptr_layers_bug.c   |   24 +
+ .../gcc.dg/struct/rf_create_fields_bug.c      |   82 +
+ .../gcc.dg/struct/rf_create_new_func_bug.c    |   56 +
+ .../gcc.dg/struct/rf_ele_minus_verify.c       |   60 +
+ .../gcc.dg/struct/rf_escape_by_base.c         |   83 +
+ .../gcc.dg/struct/rf_external_func_types.c    |   69 +
+ gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c |   72 +
+ .../gcc.dg/struct/rf_mem_ref_offset.c         |   58 +
+ .../struct/rf_mul_layer_ptr_record_bug.c      |   30 +
+ .../gcc.dg/struct/rf_pass_conflict.c          |  109 +
+ gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c |   87 +
+ gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c     |   71 +
+ .../gcc.dg/struct/rf_ptr_negate_expr.c        |   55 +
+ gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c   |   34 +
+ gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c      |   55 +
+ gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c  |   58 +
+ .../gcc.dg/struct/rf_rescusive_type.c         |   57 +
+ .../struct/rf_rewrite_assign_more_cmp.c       |   65 +
+ .../gcc.dg/struct/rf_rewrite_cond_bug.c       |   72 +
+ .../gcc.dg/struct/rf_rewrite_cond_more_cmp.c  |   58 +
+ .../gcc.dg/struct/rf_rewrite_phi_bug.c        |   81 +
+ gcc/testsuite/gcc.dg/struct/rf_shwi.c         |   23 +
+ gcc/testsuite/gcc.dg/struct/rf_visible_func.c |   92 +
+ .../gcc.dg/struct/rf_void_ptr_param_func.c    |   54 +
+ gcc/testsuite/gcc.dg/struct/struct-reorg.exp  |   15 +-
+ gcc/testsuite/gcc.dg/struct/struct_reorg-1.c  |    8 +-
+ gcc/testsuite/gcc.dg/struct/struct_reorg-3.c  |    9 +-
+ gcc/timevar.def                               |    1 +
+ gcc/tree-pass.h                               |    1 +
+ 40 files changed, 3796 insertions(+), 490 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_external_func_types.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_shwi.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_visible_func.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 0c7bd2f6c..98169de7c 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1954,6 +1954,10 @@ fipa-matrix-reorg
+ Common Ignore
+ Does nothing. Preserved for backward compatibility.
+ 
++fipa-reorder-fields
++Common Var(flag_ipa_reorder_fields) Init(0) Optimization
++Perform structure fields reorder optimizations.
++
+ fipa-struct-reorg
+ Common Var(flag_ipa_struct_reorg) Init(0) Optimization
+ Perform structure layout optimizations.
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 3485cc8af..2b376e0e9 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -526,6 +526,7 @@ Objective-C and Objective-C++ Dialects}.
+ -finline-functions  -finline-functions-called-once  -finline-limit=@var{n} @gol
+ -finline-small-functions -fipa-modref -fipa-cp  -fipa-cp-clone @gol
+ -fipa-bit-cp  -fipa-vrp  -fipa-pta  -fipa-profile  -fipa-pure-const @gol
++-fipa-reorder-fields @gol
+ -fipa-struct-reorg @gol
+ -fipa-reference  -fipa-reference-addressable @gol
+ -fipa-stack-alignment  -fipa-icf  -fira-algorithm=@var{algorithm} @gol
+diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc
+index a24645783..7f5c92c96 100644
+--- a/gcc/gimple-ssa-warn-access.cc
++++ b/gcc/gimple-ssa-warn-access.cc
+@@ -2198,7 +2198,7 @@ pass_waccess::gate (function *)
+      In pass waccess, it will traverse all SSA and cause ICE
+      when handling these unused SSA.  So temporarily disable
+      pass waccess when enable structure optimizations.  */
+-  if (flag_ipa_struct_reorg)
++  if (flag_ipa_struct_reorg || flag_ipa_reorder_fields)
+     return false;
+ 
+   return (warn_free_nonheap_object
+diff --git a/gcc/ipa-free-lang-data.cc b/gcc/ipa-free-lang-data.cc
+index 5450be9fe..a88381ddb 100644
+--- a/gcc/ipa-free-lang-data.cc
++++ b/gcc/ipa-free-lang-data.cc
+@@ -105,7 +105,7 @@ fld_simplified_type_name (tree type)
+   /* Simplify type will cause that struct A and struct A within
+      struct B are different type pointers, so skip it in structure
+      optimizations.  */
+-  if (flag_ipa_struct_reorg)
++  if (flag_ipa_struct_reorg || flag_ipa_reorder_fields)
+     return TYPE_NAME (type);
+ 
+   if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL)
+@@ -349,7 +349,7 @@ fld_simplified_type (tree t, class free_lang_data_d *fld)
+   /* Simplify type will cause that struct A and struct A within
+      struct B are different type pointers, so skip it in structure
+      optimizations.  */
+-  if (flag_ipa_struct_reorg)
++  if (flag_ipa_struct_reorg || flag_ipa_reorder_fields)
+     return t;
+   if (POINTER_TYPE_P (t))
+     return fld_incomplete_type_of (t, fld);
+diff --git a/gcc/ipa-struct-reorg/escapes.def b/gcc/ipa-struct-reorg/escapes.def
+index d825eb3e6..996a09bac 100644
+--- a/gcc/ipa-struct-reorg/escapes.def
++++ b/gcc/ipa-struct-reorg/escapes.def
+@@ -58,5 +58,8 @@ DEF_ESCAPE (escape_ptr_ptr, "Type is used in a pointer to a pointer not handled
+ DEF_ESCAPE (escape_return, "Type escapes via a return not handled yet")
+ DEF_ESCAPE (escape_separate_instance, "Type escapes via a separate instance")
+ DEF_ESCAPE (escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt")
++DEF_ESCAPE (escape_via_orig_escape, "Type escapes via a original escape type")
++DEF_ESCAPE (escape_instance_field, "Type escapes via a field of instance")
++DEF_ESCAPE (escape_via_empty_no_orig, "Type escapes via empty and no original")
+ 
+ #undef DEF_ESCAPE
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+index 9f790b28b..3e5f9538b 100644
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+@@ -207,50 +207,88 @@ lang_c_p (void)
+   if (!language_string)
+     return false;
+ 
+-  if (strcmp (language_string, "GNU GIMPLE") == 0)
++  if (lang_GNU_C ())
++    return true;
++  else if (strcmp (language_string, "GNU GIMPLE") == 0) // For LTO check
+     {
+       unsigned i = 0;
+-      tree t = NULL;
+-      const char *unit_string = NULL;
++      tree t = NULL_TREE;
+ 
+       FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t)
+ 	{
+-	  unit_string = TRANSLATION_UNIT_LANGUAGE (t);
+-	  if (!unit_string
+-	      || (strncmp (unit_string, "GNU C", 5) != 0)
+-	      || (!ISDIGIT (unit_string5)))
++	  language_string = TRANSLATION_UNIT_LANGUAGE (t);
++	  if (language_string == NULL
++	      || strncmp (language_string, "GNU C", 5)
++	      || (language_string5 != '\0'
++		  && !(ISDIGIT (language_string5))))
+ 	    return false;
+ 	}
+       return true;
+     }
+-  else if (strncmp (language_string, "GNU C", 5) == 0
+-	   && ISDIGIT (language_string5))
+-    return true;
+-
+   return false;
+ }
+ 
++/* Get the number of pointer layers.  */
++
++int
++get_ptr_layers (tree expr)
++{
++  int layers = 0;
++  while (POINTER_TYPE_P (expr) || TREE_CODE (expr) == ARRAY_TYPE)

_service:tar_scm:0022-DFE-Add-Dead-Field-Elimination-in-Struct-Reorg.patch Added

@@ -0,0 +1,1753 @@
+From 9d03b0a7741915e3a0172d60b9c21bf5abbda89e Mon Sep 17 00:00:00 2001
+From: Mingchuan Wu <wumingchuan1992@foxmail.com>
+Date: Mon, 28 Aug 2023 18:11:02 +0800
+Subject: PATCH 22/22 DFE Add Dead Field Elimination in Struct-Reorg.
+
+We can transform gimple to eliminate fields that are never read
+and replace the dead fields in stmt by creating a new ssa.
+---
+ gcc/common.opt                                |   4 +
+ gcc/ipa-struct-reorg/ipa-struct-reorg.cc      | 240 +++++++++++++++++-
+ gcc/ipa-struct-reorg/ipa-struct-reorg.h       |   8 +
+ gcc/opts.cc                                   |  17 ++
+ gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c  |  86 +++++++
+ .../gcc.dg/struct/dfe_ele_minus_verify.c      |  60 +++++
+ .../gcc.dg/struct/dfe_extr_board_init.c       |  77 ++++++
+ gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c   |  84 ++++++
+ gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c |  56 ++++
+ gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c     | 162 ++++++++++++
+ gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c   | 126 +++++++++
+ .../gcc.dg/struct/dfe_extr_mv_udc_core.c      |  82 ++++++
+ .../gcc.dg/struct/dfe_extr_tcp_usrreq.c       |  58 +++++
+ .../gcc.dg/struct/dfe_extr_ui_main.c          |  61 +++++
+ .../gcc.dg/struct/dfe_mem_ref_offset.c        |  58 +++++
+ .../struct/dfe_mul_layer_ptr_record_bug.c     |  30 +++
+ gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c    |  71 ++++++
+ .../gcc.dg/struct/dfe_ptr_negate_expr.c       |  55 ++++
+ gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c     |  55 ++++
+ gcc/testsuite/gcc.dg/struct/struct-reorg.exp  |   4 +
+ .../struct/wo_prof_escape_replace_type.c      |  49 ++++
+ 21 files changed, 1436 insertions(+), 7 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_replace_type.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 14633c821..8bb735551 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1988,6 +1988,10 @@ fipa-struct-reorg
+ Common Var(flag_ipa_struct_reorg) Init(0) Optimization
+ Perform structure layout optimizations.
+ 
++fipa-struct-reorg=
++Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 3)
++-fipa-struct-reorg=0,1,2,3 adding none, struct-reorg, reorder-fields, dfe optimizations.
++
+ fipa-vrp
+ Common Var(flag_ipa_vrp) Optimization
+ Perform IPA Value Range Propagation.
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+index 3e5f9538b..eac5fac7e 100644
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+@@ -87,6 +87,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "tree-pretty-print.h"
+ #include "gimple-pretty-print.h"
+ #include "gimple-iterator.h"
++#include "gimple-walk.h"
+ #include "cfg.h"
+ #include "ssa.h"
+ #include "tree-dfa.h"
+@@ -268,10 +269,43 @@ enum srmode
+   STRUCT_REORDER_FIELDS
+ };
+ 
++/* Enum the struct layout optimize level,
++   which should be the same as the option -fstruct-reorg=.  */
++
++enum struct_layout_opt_level
++{
++  NONE = 0,
++  STRUCT_REORG,
++  STRUCT_REORDER_FIELDS_SLO,
++  DEAD_FIELD_ELIMINATION
++};
++
+ static bool is_result_of_mult (tree arg, tree *num, tree struct_size);
+ static bool isptrptr (tree type);
++void get_base (tree &base, tree expr);
+ 
+ srmode current_mode;
++hash_map<tree, tree> replace_type_map;
++
++/* Return true if one of these types is created by struct-reorg.  */
++
++static bool
++is_replace_type (tree type1, tree type2)
++{
++  if (replace_type_map.is_empty ())
++    return false;
++  if (type1 == NULL_TREE || type2 == NULL_TREE)
++    return false;
++  tree *type_value = replace_type_map.get (type1);
++  if (type_value)
++    if (types_compatible_p (*type_value, type2))
++      return true;
++  type_value = replace_type_map.get (type2);
++  if (type_value)
++    if (types_compatible_p (*type_value, type1))
++      return true;
++  return false;
++}
+ 
+ } // anon namespace
+ 
+@@ -353,7 +387,8 @@ srfield::srfield (tree field, srtype *base)
+     fielddecl (field),
+     base (base),
+     type (NULL),
+-    clusternum (0)
++    clusternum (0),
++    field_access (EMPTY_FIELD)
+ {
+   for (int i = 0; i < max_split; i++)
+     newfieldi = NULL_TREE;
+@@ -392,6 +427,25 @@ srtype::srtype (tree type)
+     }
+ }
+ 
++/* Check it if all fields in the RECORD_TYPE are referenced.  */
++
++bool
++srtype::has_dead_field (void)
++{
++  bool may_dfe = false;
++  srfield *this_field;
++  unsigned i;
++  FOR_EACH_VEC_ELT (fields, i, this_field)
++    {
++      if (!(this_field->field_access & READ_FIELD))
++	{
++	  may_dfe = true;
++	  break;
++	}
++    }
++  return may_dfe;
++}
++
+ /* Mark the type as escaping type E at statement STMT.  */
+ 
+ void
+@@ -595,7 +649,17 @@ srtype::analyze (void)
+       into 2 different structures.  In future we intend to add profile
+       info and/or static heuristics to differentiate splitting process.  */
+   if (fields.length () == 2)
+-    fields1->clusternum = 1;
++    {
++      /* Currently, when the replacement structure type exists,
++	 we only split the replacement structure. */
++      for (hash_map<tree, tree>::iterator it = replace_type_map.begin ();
++	   it != replace_type_map.end (); ++it)
++	{
++	  if (types_compatible_p ((*it).second, this->type))
++	    return;
++	}
++      fields1->clusternum = 1;
++    }
+ 
+   /* Otherwise we do nothing.  */
+   if (fields.length () >= 3)
+@@ -838,6 +902,10 @@ srtype::create_new_type (void)
+   for (unsigned i = 0; i < fields.length (); i++)
+     {
+       srfield *f = fieldsi;
++      if (current_mode == STRUCT_REORDER_FIELDS
++	  && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION
++	  && !(f->field_access & READ_FIELD))
++	continue;
+       f->create_new_fields (newtype, newfields, newlast);
+     }
+ 
+@@ -856,6 +924,16 @@ srtype::create_new_type (void)
+ 
+   warn_padded = save_warn_padded;
+ 
++  if (current_mode == STRUCT_REORDER_FIELDS
++      && replace_type_map.get (this->newtype0) == NULL)
++    replace_type_map.put (this->newtype0, this->type);
++  if (dump_file)
++    {
++      if (current_mode == STRUCT_REORDER_FIELDS
++	  && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION
++	  && has_dead_field ())
++	fprintf (dump_file, "Dead field elimination.\n");
++    }
+   if (dump_file && (dump_flags & TDF_DETAILS))

_service Changed