Projects
openEuler:Mainline
gcc
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 19
View file
_service:tar_scm:gcc.spec
Changed
@@ -2,7 +2,7 @@ %global gcc_major 12 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 4 +%global gcc_release 8 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 @@ -18,7 +18,7 @@ %global build_go 0 %global build_d 0 -%ifarch %{ix86} x86_64 ia64 ppc64le +%ifarch %{ix86} x86_64 ia64 ppc64le aarch64 %global build_libquadmath 1 %else %global build_libquadmath 0 @@ -136,12 +136,27 @@ Provides: bundled(libffi) Provides: gcc(major) = %{gcc_major} -Patch0: 0000-Version-Set-version-to-12.3.1.patch -Patch1: 0001-CONFIG-Regenerate-configure-file.patch -Patch2: 0002-libquadmath-Enable-libquadmath-on-kunpeng.patch - -Patch8: 0008-RISCV-Inline-subword-atomic-ops.patch -Patch9: 0009-riscv-linux-Don-t-add-latomic-with-pthread.patch +Patch1: 0001-Version-Set-version-to-12.3.1.patch +Patch2: 0002-RISCV-Backport-inline-subword-atomic-patches.patch +Patch3: 0003-CONFIG-Regenerate-configure-file.patch +Patch4: 0004-libquadmath-Enable-libquadmath-on-kunpeng.patch +Patch6: 0006-MULL64-1-3-Add-A-B-op-CST-B-match-and-simplify-optim.patch +Patch7: 0007-MULL64-2-3-Fold-series-of-instructions-into-mul.patch +Patch8: 0008-MULL64-3-3-Fold-series-of-instructions-into-umulh.patch +Patch9: 0009-MULL64-Disable-mull64-transformation-by-default.patch +Patch10: 0010-Version-Clear-DATESTAMP_s.patch +Patch11: 0011-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch +Patch12: 0012-Enable-small-loop-unrolling-for-O2.patch +Patch13: 0013-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch +Patch14: 0014-Array-widen-compare-Add-a-new-optimization-for-array.patch +Patch15: 0015-Backport-Structure-reorganization-optimization.patch +Patch16: 0016-CompleteStructRelayout-Complete-Structure-Relayout.patch +Patch17: 0017-StructReorg-Some-bugfix-for-structure-reorganization.patch +Patch18: 0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch +Patch19: 0019-fp-model-Enable-fp-model-on-kunpeng.patch +Patch20: 0020-simdmath-Enable-simdmath-on-kunpeng.patch +Patch21: 0021-StructReorderFields-Structure-reorder-fields.patch +Patch22: 0022-DFE-Add-Dead-Field-Elimination-in-Struct-Reorg.patch # On ARM EABI systems, we do want -gnueabi to be part of the # target triple. @@ -609,12 +624,27 @@ %prep %setup -q -n gcc-12.3.0 -%patch0 -p1 %patch1 -p1 %patch2 -p1 - +%patch3 -p1 +%patch4 -p1 +%patch6 -p1 +%patch7 -p1 %patch8 -p1 %patch9 -p1 +%patch10 -p1 +%patch11 -p1 +%patch12 -p1 +%patch13 -p1 +%patch14 -p1 +%patch15 -p1 +%patch16 -p1 +%patch17 -p1 +%patch18 -p1 +%patch19 -p1 +%patch20 -p1 +%patch21 -p1 +%patch22 -p1 echo '%{_vendor} %{version}-%{release}' > gcc/DEV-PHASE @@ -2718,6 +2748,26 @@ %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Tue Aug 29 2023 huangxiaoquan <huangxiaoquan1@huawei.com> 12.3.1-8 +- Type: Sync +- DESC: Sync patch from openeuler/gcc + +* Fri Aug 11 2023 Hongyu Wang <hongyu.wang@intel.com> 12.3.1-7 +- Type:Sync +- i386: Only enable small loop unrolling in backend PR 107692. + +* Fri Aug 11 2023 Hongyu Wang <hongyu.wang@intel.com> 12.3.1-6 +- Type:Sync +- Enable small loop unrolling for O2. + +* Fri Aug 11 2023 Cui,Lili <lili.cui@intel.com> 12.3.1-5 +- Type:Sync +- Add attribute hot judgement for INLINE_HINT_known_hot hint. + +* Mon Jul 17 2023 huangxiaoquan <huangxiaoquan1@huawei.com> 12.3.1-4 +- Type:SPEC +- DESC:Enable libquadmath on kunpeng + * Fri Jul 14 2023 huangxiaoquan <huangxiaoquan1@huawei.com> 12.3.1-3 - Type:Sync - DESC:Enable libquadmath on kunpeng
View file
_service:tar_scm:0001-CONFIG-Regenerate-configure-file.patch
Deleted
@@ -1,45 +0,0 @@ -From 37ef787e743d98f9f6e53005d99709fb8e284964 Mon Sep 17 00:00:00 2001 -From: eastb233 <xiezhiheng@huawei.com> -Date: Fri, 14 Jul 2023 11:07:05 +0800 -Subject: PATCH 1/2 CONFIG Regenerate configure file - -Regenerate configure file under libquadmath directory -since it is out of date. ---- - libquadmath/configure | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/libquadmath/configure b/libquadmath/configure -index b3ee64f9c..603f2f131 100755 ---- a/libquadmath/configure -+++ b/libquadmath/configure -@@ -10806,7 +10806,7 @@ else - lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 - lt_status=$lt_dlunknown - cat > conftest.$ac_ext <<_LT_EOF --#line 10819 "configure" -+#line 10809 "configure" - #include "confdefs.h" - - #if HAVE_DLFCN_H -@@ -10912,7 +10912,7 @@ else - lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 - lt_status=$lt_dlunknown - cat > conftest.$ac_ext <<_LT_EOF --#line 10925 "configure" -+#line 10915 "configure" - #include "confdefs.h" - - #if HAVE_DLFCN_H -@@ -13031,7 +13031,7 @@ case "$host" in - case "$enable_cet" in - auto) - # Check if target supports multi-byte NOPs -- # and if assembler supports CET insn. -+ # and if compiler and assembler support CET insn. - cet_save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -fcf-protection" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext --- -2.33.0 -
View file
_service:tar_scm:0000-Version-Set-version-to-12.3.1.patch -> _service:tar_scm:0001-Version-Set-version-to-12.3.1.patch
Changed
@@ -1,7 +1,7 @@ From 73ee6351353b036f466ba1aab9a9e7d7865bf972 Mon Sep 17 00:00:00 2001 From: eastb233 <xiezhiheng@huawei.com> Date: Tue, 11 Jul 2023 16:07:51 +0800 -Subject: PATCH Version Set version to 12.3.1 +Subject: PATCH 01/22 Version Set version to 12.3.1 --- gcc/BASE-VER | 2 +-
View file
_service:tar_scm:0002-RISCV-Backport-inline-subword-atomic-patches.patch
Added
@@ -0,0 +1,2042 @@ +From 123615a0aac59a731516ef11c1fe433d39b6573a Mon Sep 17 00:00:00 2001 +From: laokz <zhangkai@iscas.ac.cn> +Date: Tue, 11 Jul 2023 21:03:14 +0800 +Subject: PATCH 02/22 RISCV Backport inline subword atomic patches + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f797260adaf52bee0ec0e16190bbefbe1bfc3692 + +203f3060dd363361b172f7295f42bb6bf5ac0b3b +--- + gcc/config/riscv/linux.h | 10 - + gcc/config/riscv/riscv-protos.h | 2 + + gcc/config/riscv/riscv.cc | 49 ++ + gcc/config/riscv/riscv.opt | 4 + + gcc/config/riscv/sync.md | 301 +++++++++ + gcc/doc/invoke.texi | 10 +- + .../gcc.target/riscv/inline-atomics-1.c | 18 + + .../gcc.target/riscv/inline-atomics-2.c | 9 + + .../gcc.target/riscv/inline-atomics-3.c | 569 ++++++++++++++++++ + .../gcc.target/riscv/inline-atomics-4.c | 566 +++++++++++++++++ + .../gcc.target/riscv/inline-atomics-5.c | 87 +++ + .../gcc.target/riscv/inline-atomics-6.c | 87 +++ + .../gcc.target/riscv/inline-atomics-7.c | 69 +++ + .../gcc.target/riscv/inline-atomics-8.c | 69 +++ + libgcc/config/riscv/atomic.c | 2 + + 15 files changed, 1841 insertions(+), 11 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-1.c + create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-2.c + create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-3.c + create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-4.c + create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-5.c + create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-6.c + create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-7.c + create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-8.c + +diff --git a/gcc/config/riscv/linux.h b/gcc/config/riscv/linux.h +index 38803723b..b5c6c5027 100644 +--- a/gcc/config/riscv/linux.h ++++ b/gcc/config/riscv/linux.h +@@ -35,16 +35,6 @@ along with GCC; see the file COPYING3. If not see + #undef MUSL_DYNAMIC_LINKER + #define MUSL_DYNAMIC_LINKER "/lib/ld-musl-riscv" XLEN_SPEC MUSL_ABI_SUFFIX ".so.1" + +-/* Because RISC-V only has word-sized atomics, it requries libatomic where +- others do not. So link libatomic by default, as needed. */ +-#undef LIB_SPEC +-#ifdef LD_AS_NEEDED_OPTION +-#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC \ +- " %{pthread:" LD_AS_NEEDED_OPTION " -latomic " LD_NO_AS_NEEDED_OPTION "}" +-#else +-#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC " -latomic " +-#endif +- + #define ICACHE_FLUSH_FUNC "__riscv_flush_icache" + + #define CPP_SPEC "%{pthread:-D_REENTRANT}" +diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h +index 65bb85f55..3b039e00d 100644 +--- a/gcc/config/riscv/riscv-protos.h ++++ b/gcc/config/riscv/riscv-protos.h +@@ -74,6 +74,8 @@ extern bool riscv_expand_block_move (rtx, rtx, rtx); + extern bool riscv_store_data_bypass_p (rtx_insn *, rtx_insn *); + extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *); + extern bool riscv_gpr_save_operation_p (rtx); ++extern void riscv_subword_address (rtx, rtx *, rtx *, rtx *, rtx *); ++extern void riscv_lshift_subword (machine_mode, rtx, rtx, rtx *); + + /* Routines implemented in riscv-c.cc. */ + void riscv_cpu_cpp_builtins (cpp_reader *); +diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc +index 4939d9964..9cf79beba 100644 +--- a/gcc/config/riscv/riscv.cc ++++ b/gcc/config/riscv/riscv.cc +@@ -5605,6 +5605,55 @@ riscv_asan_shadow_offset (void) + return TARGET_64BIT ? (HOST_WIDE_INT_1 << 29) : 0; + } + ++/* Given memory reference MEM, expand code to compute the aligned ++ memory address, shift and mask values and store them into ++ *ALIGNED_MEM, *SHIFT, *MASK and *NOT_MASK. */ ++ ++void ++riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask, ++ rtx *not_mask) ++{ ++ /* Align the memory address to a word. */ ++ rtx addr = force_reg (Pmode, XEXP (mem, 0)); ++ ++ rtx addr_mask = gen_int_mode (-4, Pmode); ++ ++ rtx aligned_addr = gen_reg_rtx (Pmode); ++ emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, addr_mask)); ++ ++ *aligned_mem = change_address (mem, SImode, aligned_addr); ++ ++ /* Calculate the shift amount. */ ++ emit_move_insn (*shift, gen_rtx_AND (SImode, gen_lowpart (SImode, addr), ++ gen_int_mode (3, SImode))); ++ emit_move_insn (*shift, gen_rtx_ASHIFT (SImode, *shift, ++ gen_int_mode (3, SImode))); ++ ++ /* Calculate the mask. */ ++ int unshifted_mask = GET_MODE_MASK (GET_MODE (mem)); ++ ++ emit_move_insn (*mask, gen_int_mode (unshifted_mask, SImode)); ++ ++ emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask, ++ gen_lowpart (QImode, *shift))); ++ ++ emit_move_insn (*not_mask, gen_rtx_NOT(SImode, *mask)); ++} ++ ++/* Leftshift a subword within an SImode register. */ ++ ++void ++riscv_lshift_subword (machine_mode mode, rtx value, rtx shift, ++ rtx *shifted_value) ++{ ++ rtx value_reg = gen_reg_rtx (SImode); ++ emit_move_insn (value_reg, simplify_gen_subreg (SImode, value, ++ mode, 0)); ++ ++ emit_move_insn(*shifted_value, gen_rtx_ASHIFT (SImode, value_reg, ++ gen_lowpart (QImode, shift))); ++} ++ + /* Initialize the GCC target structure. */ + #undef TARGET_ASM_ALIGNED_HI_OP + #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" +diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt +index 492aad123..328d848d6 100644 +--- a/gcc/config/riscv/riscv.opt ++++ b/gcc/config/riscv/riscv.opt +@@ -225,3 +225,7 @@ Enum(isa_spec_class) String(20191213) Value(ISA_SPEC_CLASS_20191213) + misa-spec= + Target RejectNegative Joined Enum(isa_spec_class) Var(riscv_isa_spec) Init(TARGET_DEFAULT_ISA_SPEC) + Set the version of RISC-V ISA spec. ++ ++minline-atomics ++Target Var(TARGET_INLINE_SUBWORD_ATOMIC) Init(1) ++Always inline subword atomic operations. +diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md +index 86b41e6b0..9c4fbabc6 100644 +--- a/gcc/config/riscv/sync.md ++++ b/gcc/config/riscv/sync.md +@@ -21,8 +21,11 @@ + + (define_c_enum "unspec" + UNSPEC_COMPARE_AND_SWAP ++ UNSPEC_COMPARE_AND_SWAP_SUBWORD + UNSPEC_SYNC_OLD_OP ++ UNSPEC_SYNC_OLD_OP_SUBWORD + UNSPEC_SYNC_EXCHANGE ++ UNSPEC_SYNC_EXCHANGE_SUBWORD + UNSPEC_ATOMIC_STORE + UNSPEC_MEMORY_BARRIER + ) +@@ -92,6 +95,135 @@ + "%F3amo<insn>.<amo>%A3 %0,%z2,%1" + (set (attr "length") (const_int 8))) + ++(define_insn "subword_atomic_fetch_strong_<atomic_optab>" ++ (set (match_operand:SI 0 "register_operand" "=&r") ;; old value at mem ++ (match_operand:SI 1 "memory_operand" "+A")) ;; mem location ++ (set (match_dup 1) ++ (unspec_volatile:SI ++ (any_atomic:SI (match_dup 1) ++ (match_operand:SI 2 "register_operand" "rI")) ;; value for op ++ (match_operand:SI 3 "register_operand" "rI") ;; mask ++ UNSPEC_SYNC_OLD_OP_SUBWORD)) ++ (match_operand:SI 4 "register_operand" "rI") ;; not_mask ++ (clobber (match_scratch:SI 5 "=&r")) ;; tmp_1 ++ (clobber (match_scratch:SI 6 "=&r")) ;; tmp_2 ++ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" ++ { ++ return "1:\;" ++ "lr.w.aq\t%0, %1\;" ++ "<insn>\t%5, %0, %2\;" ++ "and\t%5, %5, %3\;" ++ "and\t%6, %0, %4\;" ++ "or\t%6, %6, %5\;" ++ "sc.w.rl\t%5, %6, %1\;" ++ "bnez\t%5, 1b"; ++ } ++ (set (attr "length") (const_int 28))) ++ ++(define_expand "atomic_fetch_nand<mode>" ++ (match_operand:SHORT 0 "register_operand") ;; old value at mem ++ (not:SHORT (and:SHORT (match_operand:SHORT 1 "memory_operand") ;; mem location ++ (match_operand:SHORT 2 "reg_or_0_operand"))) ;; value for op ++ (match_operand:SI 3 "const_int_operand") ;; model ++ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" ++{ ++ /* We have no QImode/HImode atomics, so form a mask, then use ++ subword_atomic_fetch_strong_nand to implement a LR/SC version of the ++ operation. */ ++ ++ /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining ++ is disabled */ ++ ++ rtx old = gen_reg_rtx (SImode); ++ rtx mem = operands1; ++ rtx value = operands2; ++ rtx aligned_mem = gen_reg_rtx (SImode); ++ rtx shift = gen_reg_rtx (SImode); ++ rtx mask = gen_reg_rtx (SImode); ++ rtx not_mask = gen_reg_rtx (SImode); ++ ++ riscv_subword_address (mem, &aligned_mem, &shift, &mask, ¬_mask); ++ ++ rtx shifted_value = gen_reg_rtx (SImode); ++ riscv_lshift_subword (<MODE>mode, value, shift, &shifted_value); ++ ++ emit_insn (gen_subword_atomic_fetch_strong_nand (old, aligned_mem, ++ shifted_value, ++ mask, not_mask)); ++ ++ emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old, ++ gen_lowpart (QImode, shift))); ++ ++ emit_move_insn (operands0, gen_lowpart (<MODE>mode, old)); ++ ++ DONE; ++}) ++ ++(define_insn "subword_atomic_fetch_strong_nand" ++ (set (match_operand:SI 0 "register_operand" "=&r") ;; old value at mem ++ (match_operand:SI 1 "memory_operand" "+A")) ;; mem location ++ (set (match_dup 1) ++ (unspec_volatile:SI ++ (not:SI (and:SI (match_dup 1) ++ (match_operand:SI 2 "register_operand" "rI"))) ;; value for op ++ (match_operand:SI 3 "register_operand" "rI") ;; mask ++ UNSPEC_SYNC_OLD_OP_SUBWORD)) ++ (match_operand:SI 4 "register_operand" "rI") ;; not_mask ++ (clobber (match_scratch:SI 5 "=&r")) ;; tmp_1 ++ (clobber (match_scratch:SI 6 "=&r")) ;; tmp_2 ++ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" ++ { ++ return "1:\;" ++ "lr.w.aq\t%0, %1\;" ++ "and\t%5, %0, %2\;" ++ "not\t%5, %5\;" ++ "and\t%5, %5, %3\;" ++ "and\t%6, %0, %4\;" ++ "or\t%6, %6, %5\;" ++ "sc.w.rl\t%5, %6, %1\;" ++ "bnez\t%5, 1b"; ++ } ++ (set (attr "length") (const_int 32))) ++ ++(define_expand "atomic_fetch_<atomic_optab><mode>" ++ (match_operand:SHORT 0 "register_operand") ;; old value at mem ++ (any_atomic:SHORT (match_operand:SHORT 1 "memory_operand") ;; mem location ++ (match_operand:SHORT 2 "reg_or_0_operand")) ;; value for op ++ (match_operand:SI 3 "const_int_operand") ;; model ++ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" ++{ ++ /* We have no QImode/HImode atomics, so form a mask, then use ++ subword_atomic_fetch_strong_<mode> to implement a LR/SC version of the ++ operation. */ ++ ++ /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining ++ is disabled */ ++ ++ rtx old = gen_reg_rtx (SImode); ++ rtx mem = operands1; ++ rtx value = operands2; ++ rtx aligned_mem = gen_reg_rtx (SImode); ++ rtx shift = gen_reg_rtx (SImode); ++ rtx mask = gen_reg_rtx (SImode); ++ rtx not_mask = gen_reg_rtx (SImode); ++ ++ riscv_subword_address (mem, &aligned_mem, &shift, &mask, ¬_mask); ++ ++ rtx shifted_value = gen_reg_rtx (SImode); ++ riscv_lshift_subword (<MODE>mode, value, shift, &shifted_value); ++ ++ emit_insn (gen_subword_atomic_fetch_strong_<atomic_optab> (old, aligned_mem, ++ shifted_value, ++ mask, not_mask)); ++ ++ emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old, ++ gen_lowpart (QImode, shift))); ++ ++ emit_move_insn (operands0, gen_lowpart (<MODE>mode, old)); ++ ++ DONE; ++}) ++ + (define_insn "atomic_exchange<mode>" + (set (match_operand:GPR 0 "register_operand" "=&r") + (unspec_volatile:GPR +@@ -104,6 +236,56 @@ + "%F3amoswap.<amo>%A3 %0,%z2,%1" + (set (attr "length") (const_int 8))) + ++(define_expand "atomic_exchange<mode>" ++ (match_operand:SHORT 0 "register_operand") ;; old value at mem ++ (match_operand:SHORT 1 "memory_operand") ;; mem location ++ (match_operand:SHORT 2 "register_operand") ;; value ++ (match_operand:SI 3 "const_int_operand") ;; model ++ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" ++{ ++ rtx old = gen_reg_rtx (SImode); ++ rtx mem = operands1; ++ rtx value = operands2; ++ rtx aligned_mem = gen_reg_rtx (SImode); ++ rtx shift = gen_reg_rtx (SImode); ++ rtx mask = gen_reg_rtx (SImode); ++ rtx not_mask = gen_reg_rtx (SImode); ++ ++ riscv_subword_address (mem, &aligned_mem, &shift, &mask, ¬_mask); ++ ++ rtx shifted_value = gen_reg_rtx (SImode); ++ riscv_lshift_subword (<MODE>mode, value, shift, &shifted_value); ++ ++ emit_insn (gen_subword_atomic_exchange_strong (old, aligned_mem, ++ shifted_value, not_mask)); ++ ++ emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old, ++ gen_lowpart (QImode, shift))); ++ ++ emit_move_insn (operands0, gen_lowpart (<MODE>mode, old)); ++ DONE; ++}) ++ ++(define_insn "subword_atomic_exchange_strong" ++ (set (match_operand:SI 0 "register_operand" "=&r") ;; old value at mem ++ (match_operand:SI 1 "memory_operand" "+A")) ;; mem location ++ (set (match_dup 1) ++ (unspec_volatile:SI ++ (match_operand:SI 2 "reg_or_0_operand" "rI") ;; value ++ (match_operand:SI 3 "reg_or_0_operand" "rI") ;; not_mask ++ UNSPEC_SYNC_EXCHANGE_SUBWORD)) ++ (clobber (match_scratch:SI 4 "=&r")) ;; tmp_1 ++ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" ++ { ++ return "1:\;" ++ "lr.w.aq\t%0, %1\;" ++ "and\t%4, %0, %3\;" ++ "or\t%4, %4, %2\;" ++ "sc.w.rl\t%4, %4, %1\;" ++ "bnez\t%4, 1b"; ++ } ++ (set (attr "length") (const_int 20))) ++ + (define_insn "atomic_cas_value_strong<mode>" + (set (match_operand:GPR 0 "register_operand" "=&r") + (match_operand:GPR 1 "memory_operand" "+A")) +@@ -152,6 +334,125 @@ + DONE; + }) + ++(define_expand "atomic_compare_and_swap<mode>" ++ (match_operand:SI 0 "register_operand") ;; bool output ++ (match_operand:SHORT 1 "register_operand") ;; val output ++ (match_operand:SHORT 2 "memory_operand") ;; memory ++ (match_operand:SHORT 3 "reg_or_0_operand") ;; expected value ++ (match_operand:SHORT 4 "reg_or_0_operand") ;; desired value ++ (match_operand:SI 5 "const_int_operand") ;; is_weak ++ (match_operand:SI 6 "const_int_operand") ;; mod_s ++ (match_operand:SI 7 "const_int_operand") ;; mod_f ++ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" ++{ ++ emit_insn (gen_atomic_cas_value_strong<mode> (operands1, operands2, ++ operands3, operands4, ++ operands6, operands7)); ++ ++ rtx val = gen_reg_rtx (SImode); ++ if (operands1 != const0_rtx) ++ emit_move_insn (val, gen_rtx_SIGN_EXTEND (SImode, operands1)); ++ else ++ emit_move_insn (val, const0_rtx); ++ ++ rtx exp = gen_reg_rtx (SImode); ++ if (operands3 != const0_rtx) ++ emit_move_insn (exp, gen_rtx_SIGN_EXTEND (SImode, operands3)); ++ else ++ emit_move_insn (exp, const0_rtx); ++ ++ rtx compare = val; ++ if (exp != const0_rtx) ++ { ++ rtx difference = gen_rtx_MINUS (SImode, val, exp); ++ compare = gen_reg_rtx (SImode); ++ emit_move_insn (compare, difference); ++ } ++ ++ if (word_mode != SImode) ++ { ++ rtx reg = gen_reg_rtx (word_mode); ++ emit_move_insn (reg, gen_rtx_SIGN_EXTEND (word_mode, compare)); ++ compare = reg; ++ } ++ ++ emit_move_insn (operands0, gen_rtx_EQ (SImode, compare, const0_rtx)); ++ DONE; ++}) ++ ++(define_expand "atomic_cas_value_strong<mode>" ++ (match_operand:SHORT 0 "register_operand") ;; val output ++ (match_operand:SHORT 1 "memory_operand") ;; memory ++ (match_operand:SHORT 2 "reg_or_0_operand") ;; expected value ++ (match_operand:SHORT 3 "reg_or_0_operand") ;; desired value ++ (match_operand:SI 4 "const_int_operand") ;; mod_s ++ (match_operand:SI 5 "const_int_operand") ;; mod_f ++ (match_scratch:SHORT 6) ++ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" ++{ ++ /* We have no QImode/HImode atomics, so form a mask, then use ++ subword_atomic_cas_strong<mode> to implement a LR/SC version of the ++ operation. */ ++ ++ /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining ++ is disabled */ ++ ++ rtx old = gen_reg_rtx (SImode); ++ rtx mem = operands1; ++ rtx aligned_mem = gen_reg_rtx (SImode); ++ rtx shift = gen_reg_rtx (SImode); ++ rtx mask = gen_reg_rtx (SImode); ++ rtx not_mask = gen_reg_rtx (SImode); ++ ++ riscv_subword_address (mem, &aligned_mem, &shift, &mask, ¬_mask); ++ ++ rtx o = operands2; ++ rtx n = operands3; ++ rtx shifted_o = gen_reg_rtx (SImode); ++ rtx shifted_n = gen_reg_rtx (SImode); ++ ++ riscv_lshift_subword (<MODE>mode, o, shift, &shifted_o); ++ riscv_lshift_subword (<MODE>mode, n, shift, &shifted_n); ++ ++ emit_move_insn (shifted_o, gen_rtx_AND (SImode, shifted_o, mask)); ++ emit_move_insn (shifted_n, gen_rtx_AND (SImode, shifted_n, mask)); ++ ++ emit_insn (gen_subword_atomic_cas_strong (old, aligned_mem, ++ shifted_o, shifted_n, ++ mask, not_mask)); ++ ++ emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old, ++ gen_lowpart (QImode, shift))); ++ ++ emit_move_insn (operands0, gen_lowpart (<MODE>mode, old)); ++ ++ DONE; ++}) ++ ++(define_insn "subword_atomic_cas_strong" ++ (set (match_operand:SI 0 "register_operand" "=&r") ;; old value at mem ++ (match_operand:SI 1 "memory_operand" "+A")) ;; mem location ++ (set (match_dup 1) ++ (unspec_volatile:SI (match_operand:SI 2 "reg_or_0_operand" "rJ") ;; expected value ++ (match_operand:SI 3 "reg_or_0_operand" "rJ") ;; desired value ++ UNSPEC_COMPARE_AND_SWAP_SUBWORD)) ++ (match_operand:SI 4 "register_operand" "rI") ;; mask ++ (match_operand:SI 5 "register_operand" "rI") ;; not_mask ++ (clobber (match_scratch:SI 6 "=&r")) ;; tmp_1 ++ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" ++ { ++ return "1:\;" ++ "lr.w.aq\t%0, %1\;" ++ "and\t%6, %0, %4\;" ++ "bne\t%6, %z2, 1f\;" ++ "and\t%6, %0, %5\;" ++ "or\t%6, %6, %3\;" ++ "sc.w.rl\t%6, %6, %1\;" ++ "bnez\t%6, 1b\;" ++ "1:"; ++ } ++ (set (attr "length") (const_int 28))) ++ + (define_expand "atomic_test_and_set" + (match_operand:QI 0 "register_operand" "") ;; bool output + (match_operand:QI 1 "memory_operand" "+A") ;; memory +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index cb83dd8a1..ff8cd032f 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1210,7 +1210,8 @@ See RS/6000 and PowerPC Options. + -malign-data=@var{type} @gol + -mbig-endian -mlittle-endian @gol + -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{reg} @gol +--mstack-protector-guard-offset=@var{offset}} ++-mstack-protector-guard-offset=@var{offset} ++-minline-atomics -mno-inline-atomics} + + @emph{RL78 Options} + @gccoptlist{-msim -mmul=none -mmul=g13 -mmul=g14 -mallregs @gol +@@ -28035,6 +28036,13 @@ Do or don't use smaller but slower prologue and epilogue code that uses + library function calls. The default is to use fast inline prologues and + epilogues. + ++@opindex minline-atomics ++@item -minline-atomics ++@itemx -mno-inline-atomics ++Do or don't use smaller but slower subword atomic emulation code that uses ++libatomic function calls. The default is to use fast inline subword atomics ++that do not require libatomic. ++ + @item -mshorten-memrefs + @itemx -mno-shorten-memrefs + @opindex mshorten-memrefs +diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-1.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-1.c +new file mode 100644 +index 000000000..5c5623d9b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-1.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mno-inline-atomics" } */ ++/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "fetch_and_nand" { target *-*-* } 0 } */ ++/* { dg-final { scan-assembler "\tcall\t__sync_fetch_and_add_1" } } */ ++/* { dg-final { scan-assembler "\tcall\t__sync_fetch_and_nand_1" } } */ ++/* { dg-final { scan-assembler "\tcall\t__sync_bool_compare_and_swap_1" } } */ ++ ++char foo; ++char bar; ++char baz; ++ ++int ++main () ++{ ++ __sync_fetch_and_add(&foo, 1); ++ __sync_fetch_and_nand(&bar, 1); ++ __sync_bool_compare_and_swap (&baz, 1, 2); ++} +diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-2.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-2.c +new file mode 100644 +index 000000000..01b439086 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-2.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* Verify that subword atomics do not generate calls. */ ++/* { dg-options "-minline-atomics" } */ ++/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "fetch_and_nand" { target *-*-* } 0 } */ ++/* { dg-final { scan-assembler-not "\tcall\t__sync_fetch_and_add_1" } } */ ++/* { dg-final { scan-assembler-not "\tcall\t__sync_fetch_and_nand_1" } } */ ++/* { dg-final { scan-assembler-not "\tcall\t__sync_bool_compare_and_swap_1" } } */ ++ ++#include "inline-atomics-1.c" +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-3.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-3.c +new file mode 100644 +index 000000000..709f37343 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-3.c +@@ -0,0 +1,569 @@ ++/* Check all char alignments. */ ++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-op-1.c */ ++/* Test __atomic routines for existence and proper execution on 1 byte ++ values with each valid memory model. */ ++/* { dg-do run } */ ++/* { dg-options "-minline-atomics -Wno-address-of-packed-member" } */ ++ ++/* Test the execution of the __atomic_*OP builtin routines for a char. */ ++ ++extern void abort(void); ++ ++char count, res; ++const char init = ~0; ++ ++struct A ++{ ++ char a; ++ char b; ++ char c; ++ char d; ++} __attribute__ ((packed)) A; ++ ++/* The fetch_op routines return the original value before the operation. */ ++ ++void ++test_fetch_add (char* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ if (__atomic_fetch_add (v, count, __ATOMIC_RELAXED) != 0) ++ abort (); ++ ++ if (__atomic_fetch_add (v, 1, __ATOMIC_CONSUME) != 1) ++ abort (); ++ ++ if (__atomic_fetch_add (v, count, __ATOMIC_ACQUIRE) != 2) ++ abort (); ++ ++ if (__atomic_fetch_add (v, 1, __ATOMIC_RELEASE) != 3) ++ abort (); ++ ++ if (__atomic_fetch_add (v, count, __ATOMIC_ACQ_REL) != 4) ++ abort (); ++ ++ if (__atomic_fetch_add (v, 1, __ATOMIC_SEQ_CST) != 5) ++ abort (); ++} ++ ++ ++void ++test_fetch_sub (char* v) ++{ ++ *v = res = 20; ++ count = 0; ++ ++ if (__atomic_fetch_sub (v, count + 1, __ATOMIC_RELAXED) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, 1, __ATOMIC_CONSUME) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQUIRE) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, 1, __ATOMIC_RELEASE) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQ_REL) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, 1, __ATOMIC_SEQ_CST) != res--) ++ abort (); ++} ++ ++void ++test_fetch_and (char* v) ++{ ++ *v = init; ++ ++ if (__atomic_fetch_and (v, 0, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_fetch_and (v, init, __ATOMIC_CONSUME) != 0) ++ abort (); ++ ++ if (__atomic_fetch_and (v, 0, __ATOMIC_ACQUIRE) != 0) ++ abort (); ++ ++ *v = ~*v; ++ if (__atomic_fetch_and (v, init, __ATOMIC_RELEASE) != init) ++ abort (); ++ ++ if (__atomic_fetch_and (v, 0, __ATOMIC_ACQ_REL) != init) ++ abort (); ++ ++ if (__atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST) != 0) ++ abort (); ++} ++ ++void ++test_fetch_nand (char* v) ++{ ++ *v = init; ++ ++ if (__atomic_fetch_nand (v, 0, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, init, __ATOMIC_CONSUME) != init) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, 0, __ATOMIC_ACQUIRE) != 0 ) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, init, __ATOMIC_RELEASE) != init) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL) != 0) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, 0, __ATOMIC_SEQ_CST) != init) ++ abort (); ++} ++ ++void ++test_fetch_xor (char* v) ++{ ++ *v = init; ++ count = 0; ++ ++ if (__atomic_fetch_xor (v, count, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME) != init) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQUIRE) != 0) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE) != 0) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL) != init) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, ~count, __ATOMIC_SEQ_CST) != init) ++ abort (); ++} ++ ++void ++test_fetch_or (char* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ if (__atomic_fetch_or (v, count, __ATOMIC_RELAXED) != 0) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, 2, __ATOMIC_CONSUME) != 1) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, count, __ATOMIC_ACQUIRE) != 3) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, 8, __ATOMIC_RELEASE) != 7) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, count, __ATOMIC_ACQ_REL) != 15) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, count, __ATOMIC_SEQ_CST) != 31) ++ abort (); ++} ++ ++/* The OP_fetch routines return the new value after the operation. */ ++ ++void ++test_add_fetch (char* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ if (__atomic_add_fetch (v, count, __ATOMIC_RELAXED) != 1) ++ abort (); ++ ++ if (__atomic_add_fetch (v, 1, __ATOMIC_CONSUME) != 2) ++ abort (); ++ ++ if (__atomic_add_fetch (v, count, __ATOMIC_ACQUIRE) != 3) ++ abort (); ++ ++ if (__atomic_add_fetch (v, 1, __ATOMIC_RELEASE) != 4) ++ abort (); ++ ++ if (__atomic_add_fetch (v, count, __ATOMIC_ACQ_REL) != 5) ++ abort (); ++ ++ if (__atomic_add_fetch (v, count, __ATOMIC_SEQ_CST) != 6) ++ abort (); ++} ++ ++ ++void ++test_sub_fetch (char* v) ++{ ++ *v = res = 20; ++ count = 0; ++ ++ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, 1, __ATOMIC_CONSUME) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQUIRE) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, 1, __ATOMIC_RELEASE) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_SEQ_CST) != --res) ++ abort (); ++} ++ ++void ++test_and_fetch (char* v) ++{ ++ *v = init; ++ ++ if (__atomic_and_fetch (v, 0, __ATOMIC_RELAXED) != 0) ++ abort (); ++ ++ *v = init; ++ if (__atomic_and_fetch (v, init, __ATOMIC_CONSUME) != init) ++ abort (); ++ ++ if (__atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE) != 0) ++ abort (); ++ ++ *v = ~*v; ++ if (__atomic_and_fetch (v, init, __ATOMIC_RELEASE) != init) ++ abort (); ++ ++ if (__atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL) != 0) ++ abort (); ++ ++ *v = ~*v; ++ if (__atomic_and_fetch (v, 0, __ATOMIC_SEQ_CST) != 0) ++ abort (); ++} ++ ++void ++test_nand_fetch (char* v) ++{ ++ *v = init; ++ ++ if (__atomic_nand_fetch (v, 0, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, init, __ATOMIC_CONSUME) != 0) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE) != init) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, init, __ATOMIC_RELEASE) != 0) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, init, __ATOMIC_ACQ_REL) != init) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST) != init) ++ abort (); ++} ++ ++ ++ ++void ++test_xor_fetch (char* v) ++{ ++ *v = init; ++ count = 0; ++ ++ if (__atomic_xor_fetch (v, count, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, ~count, __ATOMIC_CONSUME) != 0) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE) != 0) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, ~count, __ATOMIC_RELEASE) != init) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQ_REL) != init) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST) != 0) ++ abort (); ++} ++ ++void ++test_or_fetch (char* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ if (__atomic_or_fetch (v, count, __ATOMIC_RELAXED) != 1) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, 2, __ATOMIC_CONSUME) != 3) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, count, __ATOMIC_ACQUIRE) != 7) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, 8, __ATOMIC_RELEASE) != 15) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, count, __ATOMIC_ACQ_REL) != 31) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, count, __ATOMIC_SEQ_CST) != 63) ++ abort (); ++} ++ ++ ++/* Test the OP routines with a result which isn't used. Use both variations ++ within each function. */ ++ ++void ++test_add (char* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ __atomic_add_fetch (v, count, __ATOMIC_RELAXED); ++ if (*v != 1) ++ abort (); ++ ++ __atomic_fetch_add (v, count, __ATOMIC_CONSUME); ++ if (*v != 2) ++ abort (); ++ ++ __atomic_add_fetch (v, 1 , __ATOMIC_ACQUIRE); ++ if (*v != 3) ++ abort (); ++ ++ __atomic_fetch_add (v, 1, __ATOMIC_RELEASE); ++ if (*v != 4) ++ abort (); ++ ++ __atomic_add_fetch (v, count, __ATOMIC_ACQ_REL); ++ if (*v != 5) ++ abort (); ++ ++ __atomic_fetch_add (v, count, __ATOMIC_SEQ_CST); ++ if (*v != 6) ++ abort (); ++} ++ ++ ++void ++test_sub (char* v) ++{ ++ *v = res = 20; ++ count = 0; ++ ++ __atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_fetch_sub (v, count + 1, __ATOMIC_CONSUME); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_sub_fetch (v, 1, __ATOMIC_ACQUIRE); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_fetch_sub (v, 1, __ATOMIC_RELEASE); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_fetch_sub (v, count + 1, __ATOMIC_SEQ_CST); ++ if (*v != --res) ++ abort (); ++} ++ ++void ++test_and (char* v) ++{ ++ *v = init; ++ ++ __atomic_and_fetch (v, 0, __ATOMIC_RELAXED); ++ if (*v != 0) ++ abort (); ++ ++ *v = init; ++ __atomic_fetch_and (v, init, __ATOMIC_CONSUME); ++ if (*v != init) ++ abort (); ++ ++ __atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE); ++ if (*v != 0) ++ abort (); ++ ++ *v = ~*v; ++ __atomic_fetch_and (v, init, __ATOMIC_RELEASE); ++ if (*v != init) ++ abort (); ++ ++ __atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL); ++ if (*v != 0) ++ abort (); ++ ++ *v = ~*v; ++ __atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST); ++ if (*v != 0) ++ abort (); ++} ++ ++void ++test_nand (char* v) ++{ ++ *v = init; ++ ++ __atomic_fetch_nand (v, 0, __ATOMIC_RELAXED); ++ if (*v != init) ++ abort (); ++ ++ __atomic_fetch_nand (v, init, __ATOMIC_CONSUME); ++ if (*v != 0) ++ abort (); ++ ++ __atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE); ++ if (*v != init) ++ abort (); ++ ++ __atomic_nand_fetch (v, init, __ATOMIC_RELEASE); ++ if (*v != 0) ++ abort (); ++ ++ __atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL); ++ if (*v != init) ++ abort (); ++ ++ __atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST); ++ if (*v != init) ++ abort (); ++} ++ ++ ++ ++void ++test_xor (char* v) ++{ ++ *v = init; ++ count = 0; ++ ++ __atomic_xor_fetch (v, count, __ATOMIC_RELAXED); ++ if (*v != init) ++ abort (); ++ ++ __atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME); ++ if (*v != 0) ++ abort (); ++ ++ __atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE); ++ if (*v != 0) ++ abort (); ++ ++ __atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE); ++ if (*v != init) ++ abort (); ++ ++ __atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL); ++ if (*v != init) ++ abort (); ++ ++ __atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST); ++ if (*v != 0) ++ abort (); ++} ++ ++void ++test_or (char* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ __atomic_or_fetch (v, count, __ATOMIC_RELAXED); ++ if (*v != 1) ++ abort (); ++ ++ count *= 2; ++ __atomic_fetch_or (v, count, __ATOMIC_CONSUME); ++ if (*v != 3) ++ abort (); ++ ++ count *= 2; ++ __atomic_or_fetch (v, 4, __ATOMIC_ACQUIRE); ++ if (*v != 7) ++ abort (); ++ ++ count *= 2; ++ __atomic_fetch_or (v, 8, __ATOMIC_RELEASE); ++ if (*v != 15) ++ abort (); ++ ++ count *= 2; ++ __atomic_or_fetch (v, count, __ATOMIC_ACQ_REL); ++ if (*v != 31) ++ abort (); ++ ++ count *= 2; ++ __atomic_fetch_or (v, count, __ATOMIC_SEQ_CST); ++ if (*v != 63) ++ abort (); ++} ++ ++int ++main () ++{ ++ char* V = {&A.a, &A.b, &A.c, &A.d}; ++ ++ for (int i = 0; i < 4; i++) { ++ test_fetch_add (Vi); ++ test_fetch_sub (Vi); ++ test_fetch_and (Vi); ++ test_fetch_nand (Vi); ++ test_fetch_xor (Vi); ++ test_fetch_or (Vi); ++ ++ test_add_fetch (Vi); ++ test_sub_fetch (Vi); ++ test_and_fetch (Vi); ++ test_nand_fetch (Vi); ++ test_xor_fetch (Vi); ++ test_or_fetch (Vi); ++ ++ test_add (Vi); ++ test_sub (Vi); ++ test_and (Vi); ++ test_nand (Vi); ++ test_xor (Vi); ++ test_or (Vi); ++ } ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-4.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-4.c +new file mode 100644 +index 000000000..eecfaae5c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-4.c +@@ -0,0 +1,566 @@ ++/* Check all short alignments. */ ++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-op-2.c */ ++/* Test __atomic routines for existence and proper execution on 2 byte ++ values with each valid memory model. */ ++/* { dg-do run } */ ++/* { dg-options "-minline-atomics -Wno-address-of-packed-member" } */ ++ ++/* Test the execution of the __atomic_*OP builtin routines for a short. */ ++ ++extern void abort(void); ++ ++short count, res; ++const short init = ~0; ++ ++struct A ++{ ++ short a; ++ short b; ++} __attribute__ ((packed)) A; ++ ++/* The fetch_op routines return the original value before the operation. */ ++ ++void ++test_fetch_add (short* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ if (__atomic_fetch_add (v, count, __ATOMIC_RELAXED) != 0) ++ abort (); ++ ++ if (__atomic_fetch_add (v, 1, __ATOMIC_CONSUME) != 1) ++ abort (); ++ ++ if (__atomic_fetch_add (v, count, __ATOMIC_ACQUIRE) != 2) ++ abort (); ++ ++ if (__atomic_fetch_add (v, 1, __ATOMIC_RELEASE) != 3) ++ abort (); ++ ++ if (__atomic_fetch_add (v, count, __ATOMIC_ACQ_REL) != 4) ++ abort (); ++ ++ if (__atomic_fetch_add (v, 1, __ATOMIC_SEQ_CST) != 5) ++ abort (); ++} ++ ++ ++void ++test_fetch_sub (short* v) ++{ ++ *v = res = 20; ++ count = 0; ++ ++ if (__atomic_fetch_sub (v, count + 1, __ATOMIC_RELAXED) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, 1, __ATOMIC_CONSUME) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQUIRE) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, 1, __ATOMIC_RELEASE) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQ_REL) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, 1, __ATOMIC_SEQ_CST) != res--) ++ abort (); ++} ++ ++void ++test_fetch_and (short* v) ++{ ++ *v = init; ++ ++ if (__atomic_fetch_and (v, 0, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_fetch_and (v, init, __ATOMIC_CONSUME) != 0) ++ abort (); ++ ++ if (__atomic_fetch_and (v, 0, __ATOMIC_ACQUIRE) != 0) ++ abort (); ++ ++ *v = ~*v; ++ if (__atomic_fetch_and (v, init, __ATOMIC_RELEASE) != init) ++ abort (); ++ ++ if (__atomic_fetch_and (v, 0, __ATOMIC_ACQ_REL) != init) ++ abort (); ++ ++ if (__atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST) != 0) ++ abort (); ++} ++ ++void ++test_fetch_nand (short* v) ++{ ++ *v = init; ++ ++ if (__atomic_fetch_nand (v, 0, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, init, __ATOMIC_CONSUME) != init) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, 0, __ATOMIC_ACQUIRE) != 0 ) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, init, __ATOMIC_RELEASE) != init) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL) != 0) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, 0, __ATOMIC_SEQ_CST) != init) ++ abort (); ++} ++ ++void ++test_fetch_xor (short* v) ++{ ++ *v = init; ++ count = 0; ++ ++ if (__atomic_fetch_xor (v, count, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME) != init) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQUIRE) != 0) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE) != 0) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL) != init) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, ~count, __ATOMIC_SEQ_CST) != init) ++ abort (); ++} ++ ++void ++test_fetch_or (short* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ if (__atomic_fetch_or (v, count, __ATOMIC_RELAXED) != 0) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, 2, __ATOMIC_CONSUME) != 1) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, count, __ATOMIC_ACQUIRE) != 3) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, 8, __ATOMIC_RELEASE) != 7) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, count, __ATOMIC_ACQ_REL) != 15) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, count, __ATOMIC_SEQ_CST) != 31) ++ abort (); ++} ++ ++/* The OP_fetch routines return the new value after the operation. */ ++ ++void ++test_add_fetch (short* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ if (__atomic_add_fetch (v, count, __ATOMIC_RELAXED) != 1) ++ abort (); ++ ++ if (__atomic_add_fetch (v, 1, __ATOMIC_CONSUME) != 2) ++ abort (); ++ ++ if (__atomic_add_fetch (v, count, __ATOMIC_ACQUIRE) != 3) ++ abort (); ++ ++ if (__atomic_add_fetch (v, 1, __ATOMIC_RELEASE) != 4) ++ abort (); ++ ++ if (__atomic_add_fetch (v, count, __ATOMIC_ACQ_REL) != 5) ++ abort (); ++ ++ if (__atomic_add_fetch (v, count, __ATOMIC_SEQ_CST) != 6) ++ abort (); ++} ++ ++ ++void ++test_sub_fetch (short* v) ++{ ++ *v = res = 20; ++ count = 0; ++ ++ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, 1, __ATOMIC_CONSUME) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQUIRE) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, 1, __ATOMIC_RELEASE) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_SEQ_CST) != --res) ++ abort (); ++} ++ ++void ++test_and_fetch (short* v) ++{ ++ *v = init; ++ ++ if (__atomic_and_fetch (v, 0, __ATOMIC_RELAXED) != 0) ++ abort (); ++ ++ *v = init; ++ if (__atomic_and_fetch (v, init, __ATOMIC_CONSUME) != init) ++ abort (); ++ ++ if (__atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE) != 0) ++ abort (); ++ ++ *v = ~*v; ++ if (__atomic_and_fetch (v, init, __ATOMIC_RELEASE) != init) ++ abort (); ++ ++ if (__atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL) != 0) ++ abort (); ++ ++ *v = ~*v; ++ if (__atomic_and_fetch (v, 0, __ATOMIC_SEQ_CST) != 0) ++ abort (); ++} ++ ++void ++test_nand_fetch (short* v) ++{ ++ *v = init; ++ ++ if (__atomic_nand_fetch (v, 0, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, init, __ATOMIC_CONSUME) != 0) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE) != init) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, init, __ATOMIC_RELEASE) != 0) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, init, __ATOMIC_ACQ_REL) != init) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST) != init) ++ abort (); ++} ++ ++ ++ ++void ++test_xor_fetch (short* v) ++{ ++ *v = init; ++ count = 0; ++ ++ if (__atomic_xor_fetch (v, count, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, ~count, __ATOMIC_CONSUME) != 0) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE) != 0) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, ~count, __ATOMIC_RELEASE) != init) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQ_REL) != init) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST) != 0) ++ abort (); ++} ++ ++void ++test_or_fetch (short* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ if (__atomic_or_fetch (v, count, __ATOMIC_RELAXED) != 1) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, 2, __ATOMIC_CONSUME) != 3) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, count, __ATOMIC_ACQUIRE) != 7) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, 8, __ATOMIC_RELEASE) != 15) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, count, __ATOMIC_ACQ_REL) != 31) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, count, __ATOMIC_SEQ_CST) != 63) ++ abort (); ++} ++ ++ ++/* Test the OP routines with a result which isn't used. Use both variations ++ within each function. */ ++ ++void ++test_add (short* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ __atomic_add_fetch (v, count, __ATOMIC_RELAXED); ++ if (*v != 1) ++ abort (); ++ ++ __atomic_fetch_add (v, count, __ATOMIC_CONSUME); ++ if (*v != 2) ++ abort (); ++ ++ __atomic_add_fetch (v, 1 , __ATOMIC_ACQUIRE); ++ if (*v != 3) ++ abort (); ++ ++ __atomic_fetch_add (v, 1, __ATOMIC_RELEASE); ++ if (*v != 4) ++ abort (); ++ ++ __atomic_add_fetch (v, count, __ATOMIC_ACQ_REL); ++ if (*v != 5) ++ abort (); ++ ++ __atomic_fetch_add (v, count, __ATOMIC_SEQ_CST); ++ if (*v != 6) ++ abort (); ++} ++ ++ ++void ++test_sub (short* v) ++{ ++ *v = res = 20; ++ count = 0; ++ ++ __atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_fetch_sub (v, count + 1, __ATOMIC_CONSUME); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_sub_fetch (v, 1, __ATOMIC_ACQUIRE); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_fetch_sub (v, 1, __ATOMIC_RELEASE); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_fetch_sub (v, count + 1, __ATOMIC_SEQ_CST); ++ if (*v != --res) ++ abort (); ++} ++ ++void ++test_and (short* v) ++{ ++ *v = init; ++ ++ __atomic_and_fetch (v, 0, __ATOMIC_RELAXED); ++ if (*v != 0) ++ abort (); ++ ++ *v = init; ++ __atomic_fetch_and (v, init, __ATOMIC_CONSUME); ++ if (*v != init) ++ abort (); ++ ++ __atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE); ++ if (*v != 0) ++ abort (); ++ ++ *v = ~*v; ++ __atomic_fetch_and (v, init, __ATOMIC_RELEASE); ++ if (*v != init) ++ abort (); ++ ++ __atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL); ++ if (*v != 0) ++ abort (); ++ ++ *v = ~*v; ++ __atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST); ++ if (*v != 0) ++ abort (); ++} ++ ++void ++test_nand (short* v) ++{ ++ *v = init; ++ ++ __atomic_fetch_nand (v, 0, __ATOMIC_RELAXED); ++ if (*v != init) ++ abort (); ++ ++ __atomic_fetch_nand (v, init, __ATOMIC_CONSUME); ++ if (*v != 0) ++ abort (); ++ ++ __atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE); ++ if (*v != init) ++ abort (); ++ ++ __atomic_nand_fetch (v, init, __ATOMIC_RELEASE); ++ if (*v != 0) ++ abort (); ++ ++ __atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL); ++ if (*v != init) ++ abort (); ++ ++ __atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST); ++ if (*v != init) ++ abort (); ++} ++ ++ ++ ++void ++test_xor (short* v) ++{ ++ *v = init; ++ count = 0; ++ ++ __atomic_xor_fetch (v, count, __ATOMIC_RELAXED); ++ if (*v != init) ++ abort (); ++ ++ __atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME); ++ if (*v != 0) ++ abort (); ++ ++ __atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE); ++ if (*v != 0) ++ abort (); ++ ++ __atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE); ++ if (*v != init) ++ abort (); ++ ++ __atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL); ++ if (*v != init) ++ abort (); ++ ++ __atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST); ++ if (*v != 0) ++ abort (); ++} ++ ++void ++test_or (short* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ __atomic_or_fetch (v, count, __ATOMIC_RELAXED); ++ if (*v != 1) ++ abort (); ++ ++ count *= 2; ++ __atomic_fetch_or (v, count, __ATOMIC_CONSUME); ++ if (*v != 3) ++ abort (); ++ ++ count *= 2; ++ __atomic_or_fetch (v, 4, __ATOMIC_ACQUIRE); ++ if (*v != 7) ++ abort (); ++ ++ count *= 2; ++ __atomic_fetch_or (v, 8, __ATOMIC_RELEASE); ++ if (*v != 15) ++ abort (); ++ ++ count *= 2; ++ __atomic_or_fetch (v, count, __ATOMIC_ACQ_REL); ++ if (*v != 31) ++ abort (); ++ ++ count *= 2; ++ __atomic_fetch_or (v, count, __ATOMIC_SEQ_CST); ++ if (*v != 63) ++ abort (); ++} ++ ++int ++main () { ++ short* V = {&A.a, &A.b}; ++ ++ for (int i = 0; i < 2; i++) { ++ test_fetch_add (Vi); ++ test_fetch_sub (Vi); ++ test_fetch_and (Vi); ++ test_fetch_nand (Vi); ++ test_fetch_xor (Vi); ++ test_fetch_or (Vi); ++ ++ test_add_fetch (Vi); ++ test_sub_fetch (Vi); ++ test_and_fetch (Vi); ++ test_nand_fetch (Vi); ++ test_xor_fetch (Vi); ++ test_or_fetch (Vi); ++ ++ test_add (Vi); ++ test_sub (Vi); ++ test_and (Vi); ++ test_nand (Vi); ++ test_xor (Vi); ++ test_or (Vi); ++ } ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-5.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-5.c +new file mode 100644 +index 000000000..52093894a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-5.c +@@ -0,0 +1,87 @@ ++/* Test __atomic routines for existence and proper execution on 1 byte ++ values with each valid memory model. */ ++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-compare-exchange-1.c */ ++/* { dg-do run } */ ++/* { dg-options "-minline-atomics" } */ ++ ++/* Test the execution of the __atomic_compare_exchange_n builtin for a char. */ ++ ++extern void abort(void); ++ ++char v = 0; ++char expected = 0; ++char max = ~0; ++char desired = ~0; ++char zero = 0; ++ ++#define STRONG 0 ++#define WEAK 1 ++ ++int ++main () ++{ ++ ++ if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ ++ if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) ++ abort (); ++ if (expected != max) ++ abort (); ++ ++ if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) ++ abort (); ++ if (expected != max) ++ abort (); ++ if (v != 0) ++ abort (); ++ ++ if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ ++ if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ if (v != max) ++ abort (); ++ ++ /* Now test the generic version. */ ++ ++ v = 0; ++ ++ if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ ++ if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) ++ abort (); ++ if (expected != max) ++ abort (); ++ ++ if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) ++ abort (); ++ if (expected != max) ++ abort (); ++ if (v != 0) ++ abort (); ++ ++ if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ ++ if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ if (v != max) ++ abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-6.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-6.c +new file mode 100644 +index 000000000..8fee8c448 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-6.c +@@ -0,0 +1,87 @@ ++/* Test __atomic routines for existence and proper execution on 2 byte ++ values with each valid memory model. */ ++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-compare-exchange-2.c */ ++/* { dg-do run } */ ++/* { dg-options "-minline-atomics" } */ ++ ++/* Test the execution of the __atomic_compare_exchange_n builtin for a short. */ ++ ++extern void abort(void); ++ ++short v = 0; ++short expected = 0; ++short max = ~0; ++short desired = ~0; ++short zero = 0; ++ ++#define STRONG 0 ++#define WEAK 1 ++ ++int ++main () ++{ ++ ++ if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ ++ if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) ++ abort (); ++ if (expected != max) ++ abort (); ++ ++ if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) ++ abort (); ++ if (expected != max) ++ abort (); ++ if (v != 0) ++ abort (); ++ ++ if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ ++ if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ if (v != max) ++ abort (); ++ ++ /* Now test the generic version. */ ++ ++ v = 0; ++ ++ if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ ++ if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) ++ abort (); ++ if (expected != max) ++ abort (); ++ ++ if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) ++ abort (); ++ if (expected != max) ++ abort (); ++ if (v != 0) ++ abort (); ++ ++ if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ ++ if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ if (v != max) ++ abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-7.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-7.c +new file mode 100644 +index 000000000..24c344c0c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-7.c +@@ -0,0 +1,69 @@ ++/* Test __atomic routines for existence and proper execution on 1 byte ++ values with each valid memory model. */ ++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-exchange-1.c */ ++/* { dg-do run } */ ++/* { dg-options "-minline-atomics" } */ ++ ++/* Test the execution of the __atomic_exchange_n builtin for a char. */ ++ ++extern void abort(void); ++ ++char v, count, ret; ++ ++int ++main () ++{ ++ v = 0; ++ count = 0; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) != count) ++ abort (); ++ count++; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) != count) ++ abort (); ++ count++; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) != count) ++ abort (); ++ count++; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) != count) ++ abort (); ++ count++; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) != count) ++ abort (); ++ count++; ++ ++ /* Now test the generic version. */ ++ ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-8.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-8.c +new file mode 100644 +index 000000000..edc212df0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-8.c +@@ -0,0 +1,69 @@ ++/* Test __atomic routines for existence and proper execution on 2 byte ++ values with each valid memory model. */ ++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-exchange-2.c */ ++/* { dg-do run } */ ++/* { dg-options "-minline-atomics" } */ ++ ++/* Test the execution of the __atomic_X builtin for a short. */ ++ ++extern void abort(void); ++ ++short v, count, ret; ++ ++int ++main () ++{ ++ v = 0; ++ count = 0; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) != count) ++ abort (); ++ count++; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) != count) ++ abort (); ++ count++; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) != count) ++ abort (); ++ count++; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) != count) ++ abort (); ++ count++; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) != count) ++ abort (); ++ count++; ++ ++ /* Now test the generic version. */ ++ ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ return 0; ++} +diff --git a/libgcc/config/riscv/atomic.c b/libgcc/config/riscv/atomic.c +index 7007e7a20..a29909b97 100644 +--- a/libgcc/config/riscv/atomic.c ++++ b/libgcc/config/riscv/atomic.c +@@ -30,6 +30,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + #define INVERT "not %tmp1, %tmp1\n\t" + #define DONT_INVERT "" + ++/* Logic duplicated in gcc/gcc/config/riscv/sync.md for use when inlining is enabled */ ++ + #define GENERATE_FETCH_AND_OP(type, size, opname, insn, invert, cop) \ + type __sync_fetch_and_ ## opname ## _ ## size (type *p, type v) \ + { \ +-- +2.33.0 +
View file
_service:tar_scm:0002-libquadmath-Enable-libquadmath-on-kunpeng.patch
Deleted
@@ -1,197 +0,0 @@ -From 52a810b4d8a725a7edb2988f6c3813a9938362a5 Mon Sep 17 00:00:00 2001 -From: eastb233 <xiezhiheng@huawei.com> -Date: Fri, 14 Jul 2023 11:10:24 +0800 -Subject: PATCH 2/2 libquadmath Enable libquadmath on kunpeng - -This enable libquadmath on kunpeng platform to convenient -users that migrating from x86 platform. libquadmath uses "__float128" -as quad precision floating point type and with math functions with "q" -suffix like "cosq". For those who do not need to adapt to x86 platform, -you can use "long double" as quad precision floating point type and math -functions with "l" suffix like "cosl" in libm for quad precision math. ---- - libquadmath/Makefile.am | 4 ++++ - libquadmath/Makefile.in | 3 ++- - libquadmath/configure | 28 ++++++++++++++++++++++++++-- - libquadmath/configure.ac | 7 +++++++ - libquadmath/quadmath.h | 13 +++++++++++-- - 5 files changed, 50 insertions(+), 5 deletions(-) - -diff --git a/libquadmath/Makefile.am b/libquadmath/Makefile.am -index 35dffb46f..bf0398d9c 100644 ---- a/libquadmath/Makefile.am -+++ b/libquadmath/Makefile.am -@@ -2,6 +2,10 @@ - - AUTOMAKE_OPTIONS = foreign info-in-builddir - -+if ARCH_AARCH64 -+DEFS += -D__float128="long double" -+endif -+ - ## Skip over everything if the quadlib is not available: - if BUILD_LIBQUADMATH - ACLOCAL_AMFLAGS = -I .. -I ../config -diff --git a/libquadmath/Makefile.in b/libquadmath/Makefile.in -index 8c0112122..449cc8a06 100644 ---- a/libquadmath/Makefile.in -+++ b/libquadmath/Makefile.in -@@ -90,6 +90,7 @@ POST_UNINSTALL = : - build_triplet = @build@ - host_triplet = @host@ - target_triplet = @target@ -+@ARCH_AARCH64_TRUE@am__append_1 = -D__float128="long double" - @BUILD_LIBQUADMATH_FALSE@libquadmath_la_DEPENDENCIES = - subdir = . - ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -@@ -337,7 +338,7 @@ CFLAGS = @CFLAGS@ - CPP = @CPP@ - CPPFLAGS = @CPPFLAGS@ - CYGPATH_W = @CYGPATH_W@ --DEFS = @DEFS@ -+DEFS = @DEFS@ $(am__append_1) - DEPDIR = @DEPDIR@ - DSYMUTIL = @DSYMUTIL@ - DUMPBIN = @DUMPBIN@ -diff --git a/libquadmath/configure b/libquadmath/configure -index 603f2f131..13a9088fb 100755 ---- a/libquadmath/configure -+++ b/libquadmath/configure -@@ -633,6 +633,8 @@ am__EXEEXT_TRUE - LTLIBOBJS - LIBOBJS - get_gcc_base_ver -+ARCH_AARCH64_FALSE -+ARCH_AARCH64_TRUE - GENINSRC_FALSE - GENINSRC_TRUE - XCFLAGS -@@ -10806,7 +10808,7 @@ else - lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 - lt_status=$lt_dlunknown - cat > conftest.$ac_ext <<_LT_EOF --#line 10809 "configure" -+#line 10811 "configure" - #include "confdefs.h" - - #if HAVE_DLFCN_H -@@ -10912,7 +10914,7 @@ else - lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 - lt_status=$lt_dlunknown - cat > conftest.$ac_ext <<_LT_EOF --#line 10915 "configure" -+#line 10917 "configure" - #include "confdefs.h" - - #if HAVE_DLFCN_H -@@ -12705,6 +12707,11 @@ else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext - /* end confdefs.h. */ - -+ #if defined(__aarch64__) -+ typedef long double __float128; -+ #define __builtin_huge_valq() (__extension__ 0x1.0p32767Q) -+ #endif -+ - #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__) - typedef _Complex float __attribute__((mode(TC))) __complex128; - #else -@@ -12756,6 +12763,11 @@ fi - cat confdefs.h - <<_ACEOF >conftest.$ac_ext - /* end confdefs.h. */ - -+ #if defined(__aarch64__) -+ typedef long double __float128; -+ #define __builtin_huge_valq() (__extension__ 0x1.0p32767Q) -+ #endif -+ - #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__) - typedef _Complex float __attribute__((mode(TC))) __complex128; - #else -@@ -13242,6 +13254,14 @@ else - GENINSRC_FALSE= - fi - -+ if expr "$target_cpu" : "aarch64.*" > /dev/null; then -+ ARCH_AARCH64_TRUE= -+ ARCH_AARCH64_FALSE='#' -+else -+ ARCH_AARCH64_TRUE='#' -+ ARCH_AARCH64_FALSE= -+fi -+ - - # Determine what GCC version number to use in filesystem paths. - -@@ -13425,6 +13445,10 @@ if test -z "${GENINSRC_TRUE}" && test -z "${GENINSRC_FALSE}"; then - as_fn_error $? "conditional \"GENINSRC\" was never defined. - Usually this means the macro was only invoked conditionally." "$LINENO" 5 - fi -+if test -z "${ARCH_AARCH64_TRUE}" && test -z "${ARCH_AARCH64_FALSE}"; then -+ as_fn_error $? "conditional \"ARCH_AARCH64\" was never defined. -+Usually this means the macro was only invoked conditionally." "$LINENO" 5 -+fi - - : "${CONFIG_STATUS=./config.status}" - ac_write_fail=0 -diff --git a/libquadmath/configure.ac b/libquadmath/configure.ac -index eec4084a4..507c247d6 100644 ---- a/libquadmath/configure.ac -+++ b/libquadmath/configure.ac -@@ -218,6 +218,11 @@ AM_CONDITIONAL(LIBQUAD_USE_SYMVER_SUN, test "x$quadmath_use_symver" = xsun) - - AC_CACHE_CHECK(whether __float128 is supported, libquad_cv_have_float128, - GCC_TRY_COMPILE_OR_LINK( -+ #if defined(__aarch64__) -+ typedef long double __float128; -+ #define __builtin_huge_valq() (__extension__ 0x1.0p32767Q) -+ #endif -+ - #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__) - typedef _Complex float __attribute__((mode(TC))) __complex128; - #else -@@ -393,6 +398,8 @@ AS_HELP_STRING(--enable-generated-files-in-srcdir, - enable_generated_files_in_srcdir=no) - AC_MSG_RESULT($enable_generated_files_in_srcdir) - AM_CONDITIONAL(GENINSRC, test "$enable_generated_files_in_srcdir" = yes) -+AM_CONDITIONAL(ARCH_AARCH64, -+ expr "$target_cpu" : "aarch64.*" > /dev/null) - - # Determine what GCC version number to use in filesystem paths. - GCC_BASE_VER -diff --git a/libquadmath/quadmath.h b/libquadmath/quadmath.h -index 81eb957d2..bb1b49df6 100644 ---- a/libquadmath/quadmath.h -+++ b/libquadmath/quadmath.h -@@ -27,6 +27,12 @@ Boston, MA 02110-1301, USA. */ - extern "C" { - #endif - -+#if defined(__aarch64__) -+#ifndef __float128 -+typedef long double __float128; -+#endif -+#endif -+ - /* Define the complex type corresponding to __float128 - ("_Complex __float128" is not allowed) */ - #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__) -@@ -160,10 +166,13 @@ extern int quadmath_snprintf (char *str, size_t size, - #define FLT128_MAX_10_EXP 4932 - - --#define HUGE_VALQ __builtin_huge_valq() -+#if defined(__aarch64__) - /* The following alternative is valid, but brings the warning: - (floating constant exceeds range of ‘__float128’) */ --/* #define HUGE_VALQ (__extension__ 0x1.0p32767Q) */ -+# define HUGE_VALQ (__extension__ 0x1.0p32767Q) -+#else -+# define HUGE_VALQ __builtin_huge_valq() -+#endif - - #define M_Eq 2.718281828459045235360287471352662498Q /* e */ - #define M_LOG2Eq 1.442695040888963407359924681001892137Q /* log_2 e */ --- -2.33.0 -
View file
_service:tar_scm:0003-CONFIG-Regenerate-configure-file.patch
Added
@@ -0,0 +1,45 @@ +From 37ef787e743d98f9f6e53005d99709fb8e284964 Mon Sep 17 00:00:00 2001 +From: eastb233 <xiezhiheng@huawei.com> +Date: Fri, 14 Jul 2023 11:07:05 +0800 +Subject: PATCH 03/22 CONFIG Regenerate configure file + +Regenerate configure file under libquadmath directory +since it is out of date. +--- + libquadmath/configure | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/libquadmath/configure b/libquadmath/configure +index b3ee64f9c..603f2f131 100755 +--- a/libquadmath/configure ++++ b/libquadmath/configure +@@ -10806,7 +10806,7 @@ else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +-#line 10819 "configure" ++#line 10809 "configure" + #include "confdefs.h" + + #if HAVE_DLFCN_H +@@ -10912,7 +10912,7 @@ else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +-#line 10925 "configure" ++#line 10915 "configure" + #include "confdefs.h" + + #if HAVE_DLFCN_H +@@ -13031,7 +13031,7 @@ case "$host" in + case "$enable_cet" in + auto) + # Check if target supports multi-byte NOPs +- # and if assembler supports CET insn. ++ # and if compiler and assembler support CET insn. + cet_save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -fcf-protection" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +-- +2.33.0 +
View file
_service:tar_scm:0004-libquadmath-Enable-libquadmath-on-kunpeng.patch
Added
@@ -0,0 +1,197 @@ +From 52a810b4d8a725a7edb2988f6c3813a9938362a5 Mon Sep 17 00:00:00 2001 +From: eastb233 <xiezhiheng@huawei.com> +Date: Fri, 14 Jul 2023 11:10:24 +0800 +Subject: PATCH 04/22 libquadmath Enable libquadmath on kunpeng + +This enable libquadmath on kunpeng platform to convenient +users that migrating from x86 platform. libquadmath uses "__float128" +as quad precision floating point type and with math functions with "q" +suffix like "cosq". For those who do not need to adapt to x86 platform, +you can use "long double" as quad precision floating point type and math +functions with "l" suffix like "cosl" in libm for quad precision math. +--- + libquadmath/Makefile.am | 4 ++++ + libquadmath/Makefile.in | 3 ++- + libquadmath/configure | 28 ++++++++++++++++++++++++++-- + libquadmath/configure.ac | 7 +++++++ + libquadmath/quadmath.h | 13 +++++++++++-- + 5 files changed, 50 insertions(+), 5 deletions(-) + +diff --git a/libquadmath/Makefile.am b/libquadmath/Makefile.am +index 35dffb46f..bf0398d9c 100644 +--- a/libquadmath/Makefile.am ++++ b/libquadmath/Makefile.am +@@ -2,6 +2,10 @@ + + AUTOMAKE_OPTIONS = foreign info-in-builddir + ++if ARCH_AARCH64 ++DEFS += -D__float128="long double" ++endif ++ + ## Skip over everything if the quadlib is not available: + if BUILD_LIBQUADMATH + ACLOCAL_AMFLAGS = -I .. -I ../config +diff --git a/libquadmath/Makefile.in b/libquadmath/Makefile.in +index 8c0112122..449cc8a06 100644 +--- a/libquadmath/Makefile.in ++++ b/libquadmath/Makefile.in +@@ -90,6 +90,7 @@ POST_UNINSTALL = : + build_triplet = @build@ + host_triplet = @host@ + target_triplet = @target@ ++@ARCH_AARCH64_TRUE@am__append_1 = -D__float128="long double" + @BUILD_LIBQUADMATH_FALSE@libquadmath_la_DEPENDENCIES = + subdir = . + ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +@@ -337,7 +338,7 @@ CFLAGS = @CFLAGS@ + CPP = @CPP@ + CPPFLAGS = @CPPFLAGS@ + CYGPATH_W = @CYGPATH_W@ +-DEFS = @DEFS@ ++DEFS = @DEFS@ $(am__append_1) + DEPDIR = @DEPDIR@ + DSYMUTIL = @DSYMUTIL@ + DUMPBIN = @DUMPBIN@ +diff --git a/libquadmath/configure b/libquadmath/configure +index 603f2f131..13a9088fb 100755 +--- a/libquadmath/configure ++++ b/libquadmath/configure +@@ -633,6 +633,8 @@ am__EXEEXT_TRUE + LTLIBOBJS + LIBOBJS + get_gcc_base_ver ++ARCH_AARCH64_FALSE ++ARCH_AARCH64_TRUE + GENINSRC_FALSE + GENINSRC_TRUE + XCFLAGS +@@ -10806,7 +10808,7 @@ else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +-#line 10809 "configure" ++#line 10811 "configure" + #include "confdefs.h" + + #if HAVE_DLFCN_H +@@ -10912,7 +10914,7 @@ else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +-#line 10915 "configure" ++#line 10917 "configure" + #include "confdefs.h" + + #if HAVE_DLFCN_H +@@ -12705,6 +12707,11 @@ else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext + /* end confdefs.h. */ + ++ #if defined(__aarch64__) ++ typedef long double __float128; ++ #define __builtin_huge_valq() (__extension__ 0x1.0p32767Q) ++ #endif ++ + #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__) + typedef _Complex float __attribute__((mode(TC))) __complex128; + #else +@@ -12756,6 +12763,11 @@ fi + cat confdefs.h - <<_ACEOF >conftest.$ac_ext + /* end confdefs.h. */ + ++ #if defined(__aarch64__) ++ typedef long double __float128; ++ #define __builtin_huge_valq() (__extension__ 0x1.0p32767Q) ++ #endif ++ + #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__) + typedef _Complex float __attribute__((mode(TC))) __complex128; + #else +@@ -13242,6 +13254,14 @@ else + GENINSRC_FALSE= + fi + ++ if expr "$target_cpu" : "aarch64.*" > /dev/null; then ++ ARCH_AARCH64_TRUE= ++ ARCH_AARCH64_FALSE='#' ++else ++ ARCH_AARCH64_TRUE='#' ++ ARCH_AARCH64_FALSE= ++fi ++ + + # Determine what GCC version number to use in filesystem paths. + +@@ -13425,6 +13445,10 @@ if test -z "${GENINSRC_TRUE}" && test -z "${GENINSRC_FALSE}"; then + as_fn_error $? "conditional \"GENINSRC\" was never defined. + Usually this means the macro was only invoked conditionally." "$LINENO" 5 + fi ++if test -z "${ARCH_AARCH64_TRUE}" && test -z "${ARCH_AARCH64_FALSE}"; then ++ as_fn_error $? "conditional \"ARCH_AARCH64\" was never defined. ++Usually this means the macro was only invoked conditionally." "$LINENO" 5 ++fi + + : "${CONFIG_STATUS=./config.status}" + ac_write_fail=0 +diff --git a/libquadmath/configure.ac b/libquadmath/configure.ac +index eec4084a4..507c247d6 100644 +--- a/libquadmath/configure.ac ++++ b/libquadmath/configure.ac +@@ -218,6 +218,11 @@ AM_CONDITIONAL(LIBQUAD_USE_SYMVER_SUN, test "x$quadmath_use_symver" = xsun) + + AC_CACHE_CHECK(whether __float128 is supported, libquad_cv_have_float128, + GCC_TRY_COMPILE_OR_LINK( ++ #if defined(__aarch64__) ++ typedef long double __float128; ++ #define __builtin_huge_valq() (__extension__ 0x1.0p32767Q) ++ #endif ++ + #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__) + typedef _Complex float __attribute__((mode(TC))) __complex128; + #else +@@ -393,6 +398,8 @@ AS_HELP_STRING(--enable-generated-files-in-srcdir, + enable_generated_files_in_srcdir=no) + AC_MSG_RESULT($enable_generated_files_in_srcdir) + AM_CONDITIONAL(GENINSRC, test "$enable_generated_files_in_srcdir" = yes) ++AM_CONDITIONAL(ARCH_AARCH64, ++ expr "$target_cpu" : "aarch64.*" > /dev/null) + + # Determine what GCC version number to use in filesystem paths. + GCC_BASE_VER +diff --git a/libquadmath/quadmath.h b/libquadmath/quadmath.h +index 81eb957d2..bb1b49df6 100644 +--- a/libquadmath/quadmath.h ++++ b/libquadmath/quadmath.h +@@ -27,6 +27,12 @@ Boston, MA 02110-1301, USA. */ + extern "C" { + #endif + ++#if defined(__aarch64__) ++#ifndef __float128 ++typedef long double __float128; ++#endif ++#endif ++ + /* Define the complex type corresponding to __float128 + ("_Complex __float128" is not allowed) */ + #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__) +@@ -160,10 +166,13 @@ extern int quadmath_snprintf (char *str, size_t size, + #define FLT128_MAX_10_EXP 4932 + + +-#define HUGE_VALQ __builtin_huge_valq() ++#if defined(__aarch64__) + /* The following alternative is valid, but brings the warning: + (floating constant exceeds range of ‘__float128’) */ +-/* #define HUGE_VALQ (__extension__ 0x1.0p32767Q) */ ++# define HUGE_VALQ (__extension__ 0x1.0p32767Q) ++#else ++# define HUGE_VALQ __builtin_huge_valq() ++#endif + + #define M_Eq 2.718281828459045235360287471352662498Q /* e */ + #define M_LOG2Eq 1.442695040888963407359924681001892137Q /* log_2 e */ +-- +2.33.0 +
View file
_service:tar_scm:0006-MULL64-1-3-Add-A-B-op-CST-B-match-and-simplify-optim.patch
Added
@@ -0,0 +1,89 @@ +From e7013d2640d82e928ebdaf830b6833051ac65296 Mon Sep 17 00:00:00 2001 +From: zhongyunde <zhongyunde@huawei.com> +Date: Sat, 5 Nov 2022 13:22:33 +0800 +Subject: PATCH 06/22 MULL64 1/3 Add A ? B op CST : B match and simplify + optimizations + + Refer to commit b6bdd7a4, use pattern match to simple + A ? B op CST : B (where CST is power of 2) simplifications. + Fixes the 1st issue of https://gitee.com/openeuler/gcc/issues/I5TSG0?from=project-issue. + + gcc/ + * match.pd (A ? B op CST : B): Add simplifcations for A ? B op POW2 : B + + gcc/testsuite/ + * gcc.dg/pr107190.c: New test. +--- + gcc/match.pd | 21 +++++++++++++++++++++ + gcc/testsuite/gcc.dg/pr107190.c | 27 +++++++++++++++++++++++++++ + 2 files changed, 48 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/pr107190.c + +diff --git a/gcc/match.pd b/gcc/match.pd +index fc2833bbd..fd0857fc9 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -4280,6 +4280,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + #endif + ++#if GIMPLE ++(if (canonicalize_math_p ()) ++/* These patterns are mostly used by PHIOPT to move some operations outside of ++ the if statements. They should be done late because it gives jump threading ++ and few other passes to reduce what is going on. */ ++/* a ? x op C : x -> x op (a << log2(C)) when C is power of 2. */ ++ (for op (plus minus bit_ior bit_xor lshift rshift lrotate rrotate) ++ (simplify ++ (cond @0 (op:s @1 integer_pow2p@2) @1) ++ /* powerof2cst */ ++ (if (INTEGRAL_TYPE_P (type)) ++ (with { ++ tree shift = build_int_cst (integer_type_node, tree_log2 (@2)); ++ } ++ (op @1 (lshift (convert (convert:boolean_type_node @0)) { shift; }))) ++ ) ++ ) ++ ) ++) ++#endif ++ + /* Simplification moved from fold_cond_expr_with_comparison. It may also + be extended. */ + /* This pattern implements two kinds simplification: +diff --git a/gcc/testsuite/gcc.dg/pr107190.c b/gcc/testsuite/gcc.dg/pr107190.c +new file mode 100644 +index 000000000..235b2761a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/pr107190.c +@@ -0,0 +1,27 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fexpensive-optimizations -fdump-tree-phiopt2-details" } */ ++ ++# define BN_BITS4 32 ++# define BN_MASK2 (0xffffffffffffffffL) ++# define BN_MASK2l (0xffffffffL) ++# define BN_MASK2h (0xffffffff00000000L) ++# define BN_MASK2h1 (0xffffffff80000000L) ++# define LBITS(a) ((a)&BN_MASK2l) ++# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) ++# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2) ++ ++unsigned int test_m(unsigned long in0, unsigned long in1) { ++ unsigned long m, m1, lt, ht, bl, bh; ++ lt = LBITS(in0); ++ ht = HBITS(in0); ++ bl = LBITS(in1); ++ bh = HBITS(in1); ++ m = bh * lt; ++ m1 = bl * ht; ++ ht = bh * ht; ++ m = (m + m1) & BN_MASK2; ++ if (m < m1) ht += L2HBITS((unsigned long)1); ++ return ht + m; ++} ++ ++/* { dg-final { scan-tree-dump "COND_EXPR in block 2 and PHI in block 4 converted to straightline code" "phiopt2" } } */ +-- +2.33.0 +
View file
_service:tar_scm:0007-MULL64-2-3-Fold-series-of-instructions-into-mul.patch
Added
@@ -0,0 +1,130 @@ +From 547ab9b3e073ef389e5fd89d961bb1e3e6934ae9 Mon Sep 17 00:00:00 2001 +From: zhongyunde <zhongyunde@huawei.com> +Date: Wed, 9 Nov 2022 17:04:13 +0800 +Subject: PATCH 07/22 MULL64 2/3 Fold series of instructions into mul + + Merge the low part of series instructions into mul + + gcc/ + * match.pd: Add simplifcations for low part of mul + * common.opt: Add new option fmerge-mull enable with -O2 + * opts.c: default_options_table + + gcc/testsuite/ + * g++.dg/tree-ssa/mull64.C: New test. +--- + gcc/common.opt | 4 +++ + gcc/match.pd | 27 ++++++++++++++++++++ + gcc/opts.cc | 1 + + gcc/testsuite/g++.dg/tree-ssa/mull64.C | 34 ++++++++++++++++++++++++++ + 4 files changed, 66 insertions(+) + create mode 100644 gcc/testsuite/g++.dg/tree-ssa/mull64.C + +diff --git a/gcc/common.opt b/gcc/common.opt +index 8a0dafc52..e365a48bc 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -2126,6 +2126,10 @@ fmerge-debug-strings + Common Var(flag_merge_debug_strings) Init(1) + Attempt to merge identical debug strings across compilation units. + ++fmerge-mull ++Common Var(flag_merge_mull) Init(0) Optimization ++Attempt to merge series instructions into mul. ++ + fmessage-length= + Common RejectNegative Joined UInteger + -fmessage-length=<number> Limit diagnostics to <number> characters per line. 0 suppresses line-wrapping. +diff --git a/gcc/match.pd b/gcc/match.pd +index fd0857fc9..2092e6959 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -4301,6 +4301,33 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + #endif + ++#if GIMPLE ++/* These patterns are mostly used by FORWPROP1 to fold some operations into more ++ simple IR. The following scenario should be matched: ++ In0Lo = In0(D) & 4294967295; ++ In0Hi = In0(D) >> 32; ++ In1Lo = In1(D) & 4294967295; ++ In1Hi = In1(D) >> 32; ++ Addc = In0Lo * In1Hi + In0Hi * In1Lo; ++ addc32 = Addc << 32; ++ ResLo = In0Lo * In1Lo + addc32 */ ++(simplify ++ (plus:c (mult @4 @5) ++ (lshift ++ (plus:c ++ (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3)) ++ (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2))) ++ INTEGER_CST@3 ++ ) ++ ) ++ (if (flag_merge_mull && INTEGRAL_TYPE_P (type) ++ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) ++ && TYPE_PRECISION (type) == 64) ++ (mult (convert:type @0) (convert:type @1)) ++ ) ++) ++#endif ++ + /* Simplification moved from fold_cond_expr_with_comparison. It may also + be extended. */ + /* This pattern implements two kinds simplification: +diff --git a/gcc/opts.cc b/gcc/opts.cc +index a97630d1c..eae71ed20 100644 +--- a/gcc/opts.cc ++++ b/gcc/opts.cc +@@ -647,6 +647,7 @@ static const struct default_options default_options_table = + VECT_COST_MODEL_VERY_CHEAP }, + { OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 }, ++ { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 }, + + /* -O2 and above optimizations, but not -Os or -Og. */ + { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 }, +diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +new file mode 100644 +index 000000000..2a3b74604 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +@@ -0,0 +1,34 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */ ++ ++# define BN_BITS4 32 ++# define BN_MASK2 (0xffffffffffffffffL) ++# define BN_MASK2l (0xffffffffL) ++# define BN_MASK2h (0xffffffff00000000L) ++# define BN_MASK2h1 (0xffffffff80000000L) ++# define LBITS(a) ((a)&BN_MASK2l) ++# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) ++# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2) ++ ++void mul64(unsigned long in0, unsigned long in1, ++ unsigned long &retLo, unsigned long &retHi) { ++ unsigned long m00, m01, m10, m11, al, ah, bl, bh; ++ unsigned long Addc, addc32, low; ++ al = LBITS(in0); ++ ah = HBITS(in0); ++ bl = LBITS(in1); ++ bh = HBITS(in1); ++ m10 = bh * al; ++ m00 = bl * al; ++ m01 = bl * ah; ++ m11 = bh * ah; ++ Addc = (m10 + m01) & BN_MASK2; ++ if (Addc < m01) m11 += L2HBITS((unsigned long)1); ++ m11 += HBITS(Addc); ++ addc32 = L2HBITS(Addc); ++ low = (m00 + addc32) & BN_MASK2; if (low < addc32) m11++; ++ retLo = low; ++ retHi = m11; ++} ++ ++/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */ +-- +2.33.0 +
View file
_service:tar_scm:0008-MULL64-3-3-Fold-series-of-instructions-into-umulh.patch
Added
@@ -0,0 +1,105 @@ +From 4e536dbb4a08925cea259be13962969efcc0f3c1 Mon Sep 17 00:00:00 2001 +From: zhongyunde <zhongyunde@huawei.com> +Date: Fri, 11 Nov 2022 11:30:37 +0800 +Subject: PATCH 08/22 MULL64 3/3 Fold series of instructions into umulh + + Merge the high part of series instructions into umulh + + gcc/ + * match.pd: Add simplifcations for high part of umulh + + gcc/testsuite/ + * g++.dg/tree-ssa/mull64.C: Add checking of tree pass forwprop4 +--- + gcc/match.pd | 56 ++++++++++++++++++++++++++ + gcc/testsuite/g++.dg/tree-ssa/mull64.C | 5 ++- + 2 files changed, 59 insertions(+), 2 deletions(-) + +diff --git a/gcc/match.pd b/gcc/match.pd +index 2092e6959..b7e3588e8 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -4301,6 +4301,62 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + #endif + ++#if GIMPLE ++/* These patterns are mostly used by FORWPROP4 to move some operations outside of ++ the if statements. They should be done late because it gives jump threading ++ and few other passes to reduce what is going on. */ ++/* Mul64 is defined as a multiplication algorithm which compute two 64-bit ++ integers to one 128-bit integer. Try to match the high part of mul pattern ++ after the low part of mul pattern is simplified. The following scenario ++ should be matched: ++ (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) { ++ In0Lo = In0(D) & 4294967295; -- bit_and@4 SSA_NAME@0 @2 ++ In0Hi = In0(D) >> 32; -- rshift@5 SSA_NAME@0 @3 ++ In1Lo = In1(D) & 4294967295; -- bit_and@6 SSA_NAME@1 INTEGER_CST@2 ++ In1Hi = In1(D) >> 32; -- rshift@7 SSA_NAME@1 INTEGER_CST@3 ++ Mull_01 = In0Hi * In1Lo; -- mult@8 @5 @6 ++ Addc = In0Lo * In1Hi + Mull_01; -- plus@9 (mult (@4 @7) @8 ++ AddH = (Addc >> 32) + In0Hi * In1Hi -- (plus@11 (rshift @9 @3) (mult @5 @7)) ++ addc32 = Addc << 32; -- lshift@10 @9 @3 ++ ResLo = In0(D) * In1(D); -- mult @0 @1 ++ ResHi = ((long unsigned int) (addc32 > ResLo)) + ++ (((long unsigned int) (Mull_01 > Addc)) << 32) + AddH; ++ } */ ++(simplify ++ (plus:c ++ (plus:c ++ (convert ++ (gt (lshift@10 @9 @3) ++ (mult:c @0 @1))) ++ (lshift ++ (convert ++ (gt @8 @9)) ++ @3)) ++ (plus:c@11 ++ (rshift ++ (plus:c@9 ++ (mult:c (bit_and@4 SSA_NAME@0 @2) @7) ++ (mult:c@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2))) ++ @3) ++ (mult:c (rshift@5 SSA_NAME@0 @3) ++ (rshift@7 SSA_NAME@1 INTEGER_CST@3)) ++ ) ++ ) ++ (if (flag_merge_mull && INTEGRAL_TYPE_P (type) ++ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) ++ && TYPE_PRECISION (type) == 64) ++ (with { ++ tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); ++ tree shift = build_int_cst (integer_type_node, 64); ++ } ++ (convert:type (rshift ++ (mult (convert:i128_type @0) ++ (convert:i128_type @1)) ++ { shift; }))) ++ ) ++) ++#endif ++ + #if GIMPLE + /* These patterns are mostly used by FORWPROP1 to fold some operations into more + simple IR. The following scenario should be matched: +diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +index 2a3b74604..f61cf5e6f 100644 +--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C ++++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */ ++/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */ + + # define BN_BITS4 32 + # define BN_MASK2 (0xffffffffffffffffL) +@@ -31,4 +31,5 @@ void mul64(unsigned long in0, unsigned long in1, + retHi = m11; + } + +-/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */ ++/* { dg-final { scan-tree-dump "gimple_simplified to" "forwprop1" } } */ ++/* { dg-final { scan-tree-dump-times "gimple_simplified to" 1 "forwprop4" } } */ +-- +2.33.0 +
View file
_service:tar_scm:0008-RISCV-Inline-subword-atomic-ops.patch
Deleted
@@ -1,2057 +0,0 @@ -From f797260adaf52bee0ec0e16190bbefbe1bfc3692 Mon Sep 17 00:00:00 2001 -From: Patrick O'Neill <patrick@rivosinc.com> -Date: Tue, 18 Apr 2023 14:33:13 -0700 -Subject: PATCH RISCV: Inline subword atomic ops - -RISC-V has no support for subword atomic operations; code currently -generates libatomic library calls. - -This patch changes the default behavior to inline subword atomic calls -(using the same logic as the existing library call). -Behavior can be specified using the -minline-atomics and --mno-inline-atomics command line flags. - -gcc/libgcc/config/riscv/atomic.c has the same logic implemented in asm. -This will need to stay for backwards compatibility and the --mno-inline-atomics flag. - -2023-04-18 Patrick O'Neill <patrick@rivosinc.com> - -gcc/ChangeLog: - PR target/104338 - * config/riscv/riscv-protos.h: Add helper function stubs. - * config/riscv/riscv.cc: Add helper functions for subword masking. - * config/riscv/riscv.opt: Add command-line flag. - * config/riscv/sync.md: Add masking logic and inline asm for fetch_and_op, - fetch_and_nand, CAS, and exchange ops. - * doc/invoke.texi: Add blurb regarding command-line flag. - -libgcc/ChangeLog: - PR target/104338 - * config/riscv/atomic.c: Add reference to duplicate logic. - -gcc/testsuite/ChangeLog: - PR target/104338 - * gcc.target/riscv/inline-atomics-1.c: New test. - * gcc.target/riscv/inline-atomics-2.c: New test. - * gcc.target/riscv/inline-atomics-3.c: New test. - * gcc.target/riscv/inline-atomics-4.c: New test. - * gcc.target/riscv/inline-atomics-5.c: New test. - * gcc.target/riscv/inline-atomics-6.c: New test. - * gcc.target/riscv/inline-atomics-7.c: New test. - * gcc.target/riscv/inline-atomics-8.c: New test. - -Signed-off-by: Patrick O'Neill <patrick@rivosinc.com> -Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com> ---- - gcc/config/riscv/riscv-protos.h | 2 + - gcc/config/riscv/riscv.cc | 49 ++ - gcc/config/riscv/riscv.opt | 4 + - gcc/config/riscv/sync.md | 301 +++++++++ - gcc/doc/invoke.texi | 10 +- - .../gcc.target/riscv/inline-atomics-1.c | 18 + - .../gcc.target/riscv/inline-atomics-2.c | 9 + - .../gcc.target/riscv/inline-atomics-3.c | 569 ++++++++++++++++++ - .../gcc.target/riscv/inline-atomics-4.c | 566 +++++++++++++++++ - .../gcc.target/riscv/inline-atomics-5.c | 87 +++ - .../gcc.target/riscv/inline-atomics-6.c | 87 +++ - .../gcc.target/riscv/inline-atomics-7.c | 69 +++ - .../gcc.target/riscv/inline-atomics-8.c | 69 +++ - libgcc/config/riscv/atomic.c | 2 + - 14 files changed, 1841 insertions(+), 1 deletion(-) - create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-1.c - create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-2.c - create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-3.c - create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-4.c - create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-5.c - create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-6.c - create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-7.c - create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-8.c - -diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h -index 607ff6ea697..f87661bde2c 100644 ---- a/gcc/config/riscv/riscv-protos.h -+++ b/gcc/config/riscv/riscv-protos.h -@@ -74,6 +74,8 @@ - extern bool riscv_store_data_bypass_p (rtx_insn *, rtx_insn *); - extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *); - extern bool riscv_gpr_save_operation_p (rtx); -+extern void riscv_subword_address (rtx, rtx *, rtx *, rtx *, rtx *); -+extern void riscv_lshift_subword (machine_mode, rtx, rtx, rtx *); - - /* Routines implemented in riscv-c.cc. */ - void riscv_cpu_cpp_builtins (cpp_reader *); -diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc -index a2d2dd0bb67..0f890469d7a 100644 ---- a/gcc/config/riscv/riscv.cc -+++ b/gcc/config/riscv/riscv.cc -@@ -5605,6 +5605,55 @@ - return TARGET_64BIT ? (HOST_WIDE_INT_1 << 29) : 0; - } - -+/* Given memory reference MEM, expand code to compute the aligned -+ memory address, shift and mask values and store them into -+ *ALIGNED_MEM, *SHIFT, *MASK and *NOT_MASK. */ -+ -+void -+riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask, -+ rtx *not_mask) -+{ -+ /* Align the memory address to a word. */ -+ rtx addr = force_reg (Pmode, XEXP (mem, 0)); -+ -+ rtx addr_mask = gen_int_mode (-4, Pmode); -+ -+ rtx aligned_addr = gen_reg_rtx (Pmode); -+ emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, addr_mask)); -+ -+ *aligned_mem = change_address (mem, SImode, aligned_addr); -+ -+ /* Calculate the shift amount. */ -+ emit_move_insn (*shift, gen_rtx_AND (SImode, gen_lowpart (SImode, addr), -+ gen_int_mode (3, SImode))); -+ emit_move_insn (*shift, gen_rtx_ASHIFT (SImode, *shift, -+ gen_int_mode (3, SImode))); -+ -+ /* Calculate the mask. */ -+ int unshifted_mask = GET_MODE_MASK (GET_MODE (mem)); -+ -+ emit_move_insn (*mask, gen_int_mode (unshifted_mask, SImode)); -+ -+ emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask, -+ gen_lowpart (QImode, *shift))); -+ -+ emit_move_insn (*not_mask, gen_rtx_NOT(SImode, *mask)); -+} -+ -+/* Leftshift a subword within an SImode register. */ -+ -+void -+riscv_lshift_subword (machine_mode mode, rtx value, rtx shift, -+ rtx *shifted_value) -+{ -+ rtx value_reg = gen_reg_rtx (SImode); -+ emit_move_insn (value_reg, simplify_gen_subreg (SImode, value, -+ mode, 0)); -+ -+ emit_move_insn(*shifted_value, gen_rtx_ASHIFT (SImode, value_reg, -+ gen_lowpart (QImode, shift))); -+} -+ - /* Initialize the GCC target structure. */ - #undef TARGET_ASM_ALIGNED_HI_OP - #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" -diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt -index ef1bdfcfe28..63d4710cb15 100644 ---- a/gcc/config/riscv/riscv.opt -+++ b/gcc/config/riscv/riscv.opt -@@ -225,3 +225,7 @@ - misa-spec= - Target RejectNegative Joined Enum(isa_spec_class) Var(riscv_isa_spec) Init(TARGET_DEFAULT_ISA_SPEC) - Set the version of RISC-V ISA spec. -+ -+minline-atomics -+Target Var(TARGET_INLINE_SUBWORD_ATOMIC) Init(1) -+Always inline subword atomic operations. -diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md -index c932ef87b9d..83be6431cb6 100644 ---- a/gcc/config/riscv/sync.md -+++ b/gcc/config/riscv/sync.md -@@ -21,8 +21,11 @@ - - (define_c_enum "unspec" - UNSPEC_COMPARE_AND_SWAP -+ UNSPEC_COMPARE_AND_SWAP_SUBWORD - UNSPEC_SYNC_OLD_OP -+ UNSPEC_SYNC_OLD_OP_SUBWORD - UNSPEC_SYNC_EXCHANGE -+ UNSPEC_SYNC_EXCHANGE_SUBWORD - UNSPEC_ATOMIC_STORE - UNSPEC_MEMORY_BARRIER - ) -@@ -92,6 +95,135 @@ - "%F3amo<insn>.<amo>%A3 %0,%z2,%1" - (set (attr "length") (const_int 8))) - -+(define_insn "subword_atomic_fetch_strong_<atomic_optab>" -+ (set (match_operand:SI 0 "register_operand" "=&r") ;; old value at mem -+ (match_operand:SI 1 "memory_operand" "+A")) ;; mem location -+ (set (match_dup 1) -+ (unspec_volatile:SI -+ (any_atomic:SI (match_dup 1) -+ (match_operand:SI 2 "register_operand" "rI")) ;; value for op -+ (match_operand:SI 3 "register_operand" "rI") ;; mask -+ UNSPEC_SYNC_OLD_OP_SUBWORD)) -+ (match_operand:SI 4 "register_operand" "rI") ;; not_mask -+ (clobber (match_scratch:SI 5 "=&r")) ;; tmp_1 -+ (clobber (match_scratch:SI 6 "=&r")) ;; tmp_2 -+ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" -+ { -+ return "1:\;" -+ "lr.w.aq\t%0, %1\;" -+ "<insn>\t%5, %0, %2\;" -+ "and\t%5, %5, %3\;" -+ "and\t%6, %0, %4\;" -+ "or\t%6, %6, %5\;" -+ "sc.w.rl\t%5, %6, %1\;" -+ "bnez\t%5, 1b"; -+ } -+ (set (attr "length") (const_int 28))) -+ -+(define_expand "atomic_fetch_nand<mode>" -+ (match_operand:SHORT 0 "register_operand") ;; old value at mem -+ (not:SHORT (and:SHORT (match_operand:SHORT 1 "memory_operand") ;; mem location -+ (match_operand:SHORT 2 "reg_or_0_operand"))) ;; value for op -+ (match_operand:SI 3 "const_int_operand") ;; model -+ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" -+{ -+ /* We have no QImode/HImode atomics, so form a mask, then use -+ subword_atomic_fetch_strong_nand to implement a LR/SC version of the -+ operation. */ -+ -+ /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining -+ is disabled */ -+ -+ rtx old = gen_reg_rtx (SImode); -+ rtx mem = operands1; -+ rtx value = operands2; -+ rtx aligned_mem = gen_reg_rtx (SImode); -+ rtx shift = gen_reg_rtx (SImode); -+ rtx mask = gen_reg_rtx (SImode); -+ rtx not_mask = gen_reg_rtx (SImode); -+ -+ riscv_subword_address (mem, &aligned_mem, &shift, &mask, ¬_mask); -+ -+ rtx shifted_value = gen_reg_rtx (SImode); -+ riscv_lshift_subword (<MODE>mode, value, shift, &shifted_value); -+ -+ emit_insn (gen_subword_atomic_fetch_strong_nand (old, aligned_mem, -+ shifted_value, -+ mask, not_mask)); -+ -+ emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old, -+ gen_lowpart (QImode, shift))); -+ -+ emit_move_insn (operands0, gen_lowpart (<MODE>mode, old)); -+ -+ DONE; -+}) -+ -+(define_insn "subword_atomic_fetch_strong_nand" -+ (set (match_operand:SI 0 "register_operand" "=&r") ;; old value at mem -+ (match_operand:SI 1 "memory_operand" "+A")) ;; mem location -+ (set (match_dup 1) -+ (unspec_volatile:SI -+ (not:SI (and:SI (match_dup 1) -+ (match_operand:SI 2 "register_operand" "rI"))) ;; value for op -+ (match_operand:SI 3 "register_operand" "rI") ;; mask -+ UNSPEC_SYNC_OLD_OP_SUBWORD)) -+ (match_operand:SI 4 "register_operand" "rI") ;; not_mask -+ (clobber (match_scratch:SI 5 "=&r")) ;; tmp_1 -+ (clobber (match_scratch:SI 6 "=&r")) ;; tmp_2 -+ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" -+ { -+ return "1:\;" -+ "lr.w.aq\t%0, %1\;" -+ "and\t%5, %0, %2\;" -+ "not\t%5, %5\;" -+ "and\t%5, %5, %3\;" -+ "and\t%6, %0, %4\;" -+ "or\t%6, %6, %5\;" -+ "sc.w.rl\t%5, %6, %1\;" -+ "bnez\t%5, 1b"; -+ } -+ (set (attr "length") (const_int 32))) -+ -+(define_expand "atomic_fetch_<atomic_optab><mode>" -+ (match_operand:SHORT 0 "register_operand") ;; old value at mem -+ (any_atomic:SHORT (match_operand:SHORT 1 "memory_operand") ;; mem location -+ (match_operand:SHORT 2 "reg_or_0_operand")) ;; value for op -+ (match_operand:SI 3 "const_int_operand") ;; model -+ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" -+{ -+ /* We have no QImode/HImode atomics, so form a mask, then use -+ subword_atomic_fetch_strong_<mode> to implement a LR/SC version of the -+ operation. */ -+ -+ /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining -+ is disabled */ -+ -+ rtx old = gen_reg_rtx (SImode); -+ rtx mem = operands1; -+ rtx value = operands2; -+ rtx aligned_mem = gen_reg_rtx (SImode); -+ rtx shift = gen_reg_rtx (SImode); -+ rtx mask = gen_reg_rtx (SImode); -+ rtx not_mask = gen_reg_rtx (SImode); -+ -+ riscv_subword_address (mem, &aligned_mem, &shift, &mask, ¬_mask); -+ -+ rtx shifted_value = gen_reg_rtx (SImode); -+ riscv_lshift_subword (<MODE>mode, value, shift, &shifted_value); -+ -+ emit_insn (gen_subword_atomic_fetch_strong_<atomic_optab> (old, aligned_mem, -+ shifted_value, -+ mask, not_mask)); -+ -+ emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old, -+ gen_lowpart (QImode, shift))); -+ -+ emit_move_insn (operands0, gen_lowpart (<MODE>mode, old)); -+ -+ DONE; -+}) -+ - (define_insn "atomic_exchange<mode>" - (set (match_operand:GPR 0 "register_operand" "=&r") - (unspec_volatile:GPR -@@ -104,6 +236,56 @@ - "%F3amoswap.<amo>%A3 %0,%z2,%1" - (set (attr "length") (const_int 8))) - -+(define_expand "atomic_exchange<mode>" -+ (match_operand:SHORT 0 "register_operand") ;; old value at mem -+ (match_operand:SHORT 1 "memory_operand") ;; mem location -+ (match_operand:SHORT 2 "register_operand") ;; value -+ (match_operand:SI 3 "const_int_operand") ;; model -+ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" -+{ -+ rtx old = gen_reg_rtx (SImode); -+ rtx mem = operands1; -+ rtx value = operands2; -+ rtx aligned_mem = gen_reg_rtx (SImode); -+ rtx shift = gen_reg_rtx (SImode); -+ rtx mask = gen_reg_rtx (SImode); -+ rtx not_mask = gen_reg_rtx (SImode); -+ -+ riscv_subword_address (mem, &aligned_mem, &shift, &mask, ¬_mask); -+ -+ rtx shifted_value = gen_reg_rtx (SImode); -+ riscv_lshift_subword (<MODE>mode, value, shift, &shifted_value); -+ -+ emit_insn (gen_subword_atomic_exchange_strong (old, aligned_mem, -+ shifted_value, not_mask)); -+ -+ emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old, -+ gen_lowpart (QImode, shift))); -+ -+ emit_move_insn (operands0, gen_lowpart (<MODE>mode, old)); -+ DONE; -+}) -+ -+(define_insn "subword_atomic_exchange_strong" -+ (set (match_operand:SI 0 "register_operand" "=&r") ;; old value at mem -+ (match_operand:SI 1 "memory_operand" "+A")) ;; mem location -+ (set (match_dup 1) -+ (unspec_volatile:SI -+ (match_operand:SI 2 "reg_or_0_operand" "rI") ;; value -+ (match_operand:SI 3 "reg_or_0_operand" "rI") ;; not_mask -+ UNSPEC_SYNC_EXCHANGE_SUBWORD)) -+ (clobber (match_scratch:SI 4 "=&r")) ;; tmp_1 -+ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" -+ { -+ return "1:\;" -+ "lr.w.aq\t%0, %1\;" -+ "and\t%4, %0, %3\;" -+ "or\t%4, %4, %2\;" -+ "sc.w.rl\t%4, %4, %1\;" -+ "bnez\t%4, 1b"; -+ } -+ (set (attr "length") (const_int 20))) -+ - (define_insn "atomic_cas_value_strong<mode>" - (set (match_operand:GPR 0 "register_operand" "=&r") - (match_operand:GPR 1 "memory_operand" "+A")) -@@ -153,6 +335,125 @@ - DONE; - }) - -+(define_expand "atomic_compare_and_swap<mode>" -+ (match_operand:SI 0 "register_operand") ;; bool output -+ (match_operand:SHORT 1 "register_operand") ;; val output -+ (match_operand:SHORT 2 "memory_operand") ;; memory -+ (match_operand:SHORT 3 "reg_or_0_operand") ;; expected value -+ (match_operand:SHORT 4 "reg_or_0_operand") ;; desired value -+ (match_operand:SI 5 "const_int_operand") ;; is_weak -+ (match_operand:SI 6 "const_int_operand") ;; mod_s -+ (match_operand:SI 7 "const_int_operand") ;; mod_f -+ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" -+{ -+ emit_insn (gen_atomic_cas_value_strong<mode> (operands1, operands2, -+ operands3, operands4, -+ operands6, operands7)); -+ -+ rtx val = gen_reg_rtx (SImode); -+ if (operands1 != const0_rtx) -+ emit_move_insn (val, gen_rtx_SIGN_EXTEND (SImode, operands1)); -+ else -+ emit_move_insn (val, const0_rtx); -+ -+ rtx exp = gen_reg_rtx (SImode); -+ if (operands3 != const0_rtx) -+ emit_move_insn (exp, gen_rtx_SIGN_EXTEND (SImode, operands3)); -+ else -+ emit_move_insn (exp, const0_rtx); -+ -+ rtx compare = val; -+ if (exp != const0_rtx) -+ { -+ rtx difference = gen_rtx_MINUS (SImode, val, exp); -+ compare = gen_reg_rtx (SImode); -+ emit_move_insn (compare, difference); -+ } -+ -+ if (word_mode != SImode) -+ { -+ rtx reg = gen_reg_rtx (word_mode); -+ emit_move_insn (reg, gen_rtx_SIGN_EXTEND (word_mode, compare)); -+ compare = reg; -+ } -+ -+ emit_move_insn (operands0, gen_rtx_EQ (SImode, compare, const0_rtx)); -+ DONE; -+}) -+ -+(define_expand "atomic_cas_value_strong<mode>" -+ (match_operand:SHORT 0 "register_operand") ;; val output -+ (match_operand:SHORT 1 "memory_operand") ;; memory -+ (match_operand:SHORT 2 "reg_or_0_operand") ;; expected value -+ (match_operand:SHORT 3 "reg_or_0_operand") ;; desired value -+ (match_operand:SI 4 "const_int_operand") ;; mod_s -+ (match_operand:SI 5 "const_int_operand") ;; mod_f -+ (match_scratch:SHORT 6) -+ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" -+{ -+ /* We have no QImode/HImode atomics, so form a mask, then use -+ subword_atomic_cas_strong<mode> to implement a LR/SC version of the -+ operation. */ -+ -+ /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining -+ is disabled */ -+ -+ rtx old = gen_reg_rtx (SImode); -+ rtx mem = operands1; -+ rtx aligned_mem = gen_reg_rtx (SImode); -+ rtx shift = gen_reg_rtx (SImode); -+ rtx mask = gen_reg_rtx (SImode); -+ rtx not_mask = gen_reg_rtx (SImode); -+ -+ riscv_subword_address (mem, &aligned_mem, &shift, &mask, ¬_mask); -+ -+ rtx o = operands2; -+ rtx n = operands3; -+ rtx shifted_o = gen_reg_rtx (SImode); -+ rtx shifted_n = gen_reg_rtx (SImode); -+ -+ riscv_lshift_subword (<MODE>mode, o, shift, &shifted_o); -+ riscv_lshift_subword (<MODE>mode, n, shift, &shifted_n); -+ -+ emit_move_insn (shifted_o, gen_rtx_AND (SImode, shifted_o, mask)); -+ emit_move_insn (shifted_n, gen_rtx_AND (SImode, shifted_n, mask)); -+ -+ emit_insn (gen_subword_atomic_cas_strong (old, aligned_mem, -+ shifted_o, shifted_n, -+ mask, not_mask)); -+ -+ emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old, -+ gen_lowpart (QImode, shift))); -+ -+ emit_move_insn (operands0, gen_lowpart (<MODE>mode, old)); -+ -+ DONE; -+}) -+ -+(define_insn "subword_atomic_cas_strong" -+ (set (match_operand:SI 0 "register_operand" "=&r") ;; old value at mem -+ (match_operand:SI 1 "memory_operand" "+A")) ;; mem location -+ (set (match_dup 1) -+ (unspec_volatile:SI (match_operand:SI 2 "reg_or_0_operand" "rJ") ;; expected value -+ (match_operand:SI 3 "reg_or_0_operand" "rJ") ;; desired value -+ UNSPEC_COMPARE_AND_SWAP_SUBWORD)) -+ (match_operand:SI 4 "register_operand" "rI") ;; mask -+ (match_operand:SI 5 "register_operand" "rI") ;; not_mask -+ (clobber (match_scratch:SI 6 "=&r")) ;; tmp_1 -+ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" -+ { -+ return "1:\;" -+ "lr.w.aq\t%0, %1\;" -+ "and\t%6, %0, %4\;" -+ "bne\t%6, %z2, 1f\;" -+ "and\t%6, %0, %5\;" -+ "or\t%6, %6, %3\;" -+ "sc.w.rl\t%6, %6, %1\;" -+ "bnez\t%6, 1b\;" -+ "1:"; -+ } -+ (set (attr "length") (const_int 28))) -+ - (define_expand "atomic_test_and_set" - (match_operand:QI 0 "register_operand" "") ;; bool output - (match_operand:QI 1 "memory_operand" "+A") ;; memory -diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi -index e5ee2d536fc..2f40c58b21c 100644 ---- a/gcc/doc/invoke.texi -+++ b/gcc/doc/invoke.texi -@@ -1227,7 +1227,8 @@ - -malign-data=@var{type} @gol - -mbig-endian -mlittle-endian @gol - -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{reg} @gol ---mstack-protector-guard-offset=@var{offset}} -+-mstack-protector-guard-offset=@var{offset} -+-minline-atomics -mno-inline-atomics} - - @emph{RL78 Options} - @gccoptlist{-msim -mmul=none -mmul=g13 -mmul=g14 -mallregs @gol -@@ -29024,6 +29025,13 @@ Do or don't use smaller but slower prologue and epilogue code that uses - library function calls. The default is to use fast inline prologues and - epilogues. - -+@opindex minline-atomics -+@item -minline-atomics -+@itemx -mno-inline-atomics -+Do or don't use smaller but slower subword atomic emulation code that uses -+libatomic function calls. The default is to use fast inline subword atomics -+that do not require libatomic. -+ - @item -mshorten-memrefs - @itemx -mno-shorten-memrefs - @opindex mshorten-memrefs -diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-1.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-1.c -new file mode 100644 -index 00000000000..5c5623d9b2f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-1.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mno-inline-atomics" } */ -+/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "fetch_and_nand" { target *-*-* } 0 } */ -+/* { dg-final { scan-assembler "\tcall\t__sync_fetch_and_add_1" } } */ -+/* { dg-final { scan-assembler "\tcall\t__sync_fetch_and_nand_1" } } */ -+/* { dg-final { scan-assembler "\tcall\t__sync_bool_compare_and_swap_1" } } */ -+ -+char foo; -+char bar; -+char baz; -+ -+int -+main () -+{ -+ __sync_fetch_and_add(&foo, 1); -+ __sync_fetch_and_nand(&bar, 1); -+ __sync_bool_compare_and_swap (&baz, 1, 2); -+} -diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-2.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-2.c -new file mode 100644 -index 00000000000..01b43908692 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-2.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* Verify that subword atomics do not generate calls. */ -+/* { dg-options "-minline-atomics" } */ -+/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "fetch_and_nand" { target *-*-* } 0 } */ -+/* { dg-final { scan-assembler-not "\tcall\t__sync_fetch_and_add_1" } } */ -+/* { dg-final { scan-assembler-not "\tcall\t__sync_fetch_and_nand_1" } } */ -+/* { dg-final { scan-assembler-not "\tcall\t__sync_bool_compare_and_swap_1" } } */ -+ -+#include "inline-atomics-1.c" -\ No newline at end of file -diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-3.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-3.c -new file mode 100644 -index 00000000000..709f3734377 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-3.c -@@ -0,0 +1,569 @@ -+/* Check all char alignments. */ -+/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-op-1.c */ -+/* Test __atomic routines for existence and proper execution on 1 byte -+ values with each valid memory model. */ -+/* { dg-do run } */ -+/* { dg-options "-minline-atomics -Wno-address-of-packed-member" } */ -+ -+/* Test the execution of the __atomic_*OP builtin routines for a char. */ -+ -+extern void abort(void); -+ -+char count, res; -+const char init = ~0; -+ -+struct A -+{ -+ char a; -+ char b; -+ char c; -+ char d; -+} __attribute__ ((packed)) A; -+ -+/* The fetch_op routines return the original value before the operation. */ -+ -+void -+test_fetch_add (char* v) -+{ -+ *v = 0; -+ count = 1; -+ -+ if (__atomic_fetch_add (v, count, __ATOMIC_RELAXED) != 0) -+ abort (); -+ -+ if (__atomic_fetch_add (v, 1, __ATOMIC_CONSUME) != 1) -+ abort (); -+ -+ if (__atomic_fetch_add (v, count, __ATOMIC_ACQUIRE) != 2) -+ abort (); -+ -+ if (__atomic_fetch_add (v, 1, __ATOMIC_RELEASE) != 3) -+ abort (); -+ -+ if (__atomic_fetch_add (v, count, __ATOMIC_ACQ_REL) != 4) -+ abort (); -+ -+ if (__atomic_fetch_add (v, 1, __ATOMIC_SEQ_CST) != 5) -+ abort (); -+} -+ -+ -+void -+test_fetch_sub (char* v) -+{ -+ *v = res = 20; -+ count = 0; -+ -+ if (__atomic_fetch_sub (v, count + 1, __ATOMIC_RELAXED) != res--) -+ abort (); -+ -+ if (__atomic_fetch_sub (v, 1, __ATOMIC_CONSUME) != res--) -+ abort (); -+ -+ if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQUIRE) != res--) -+ abort (); -+ -+ if (__atomic_fetch_sub (v, 1, __ATOMIC_RELEASE) != res--) -+ abort (); -+ -+ if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQ_REL) != res--) -+ abort (); -+ -+ if (__atomic_fetch_sub (v, 1, __ATOMIC_SEQ_CST) != res--) -+ abort (); -+} -+ -+void -+test_fetch_and (char* v) -+{ -+ *v = init; -+ -+ if (__atomic_fetch_and (v, 0, __ATOMIC_RELAXED) != init) -+ abort (); -+ -+ if (__atomic_fetch_and (v, init, __ATOMIC_CONSUME) != 0) -+ abort (); -+ -+ if (__atomic_fetch_and (v, 0, __ATOMIC_ACQUIRE) != 0) -+ abort (); -+ -+ *v = ~*v; -+ if (__atomic_fetch_and (v, init, __ATOMIC_RELEASE) != init) -+ abort (); -+ -+ if (__atomic_fetch_and (v, 0, __ATOMIC_ACQ_REL) != init) -+ abort (); -+ -+ if (__atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST) != 0) -+ abort (); -+} -+ -+void -+test_fetch_nand (char* v) -+{ -+ *v = init; -+ -+ if (__atomic_fetch_nand (v, 0, __ATOMIC_RELAXED) != init) -+ abort (); -+ -+ if (__atomic_fetch_nand (v, init, __ATOMIC_CONSUME) != init) -+ abort (); -+ -+ if (__atomic_fetch_nand (v, 0, __ATOMIC_ACQUIRE) != 0 ) -+ abort (); -+ -+ if (__atomic_fetch_nand (v, init, __ATOMIC_RELEASE) != init) -+ abort (); -+ -+ if (__atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL) != 0) -+ abort (); -+ -+ if (__atomic_fetch_nand (v, 0, __ATOMIC_SEQ_CST) != init) -+ abort (); -+} -+ -+void -+test_fetch_xor (char* v) -+{ -+ *v = init; -+ count = 0; -+ -+ if (__atomic_fetch_xor (v, count, __ATOMIC_RELAXED) != init) -+ abort (); -+ -+ if (__atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME) != init) -+ abort (); -+ -+ if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQUIRE) != 0) -+ abort (); -+ -+ if (__atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE) != 0) -+ abort (); -+ -+ if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL) != init) -+ abort (); -+ -+ if (__atomic_fetch_xor (v, ~count, __ATOMIC_SEQ_CST) != init) -+ abort (); -+} -+ -+void -+test_fetch_or (char* v) -+{ -+ *v = 0; -+ count = 1; -+ -+ if (__atomic_fetch_or (v, count, __ATOMIC_RELAXED) != 0) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_fetch_or (v, 2, __ATOMIC_CONSUME) != 1) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_fetch_or (v, count, __ATOMIC_ACQUIRE) != 3) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_fetch_or (v, 8, __ATOMIC_RELEASE) != 7) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_fetch_or (v, count, __ATOMIC_ACQ_REL) != 15) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_fetch_or (v, count, __ATOMIC_SEQ_CST) != 31) -+ abort (); -+} -+ -+/* The OP_fetch routines return the new value after the operation. */ -+ -+void -+test_add_fetch (char* v) -+{ -+ *v = 0; -+ count = 1; -+ -+ if (__atomic_add_fetch (v, count, __ATOMIC_RELAXED) != 1) -+ abort (); -+ -+ if (__atomic_add_fetch (v, 1, __ATOMIC_CONSUME) != 2) -+ abort (); -+ -+ if (__atomic_add_fetch (v, count, __ATOMIC_ACQUIRE) != 3) -+ abort (); -+ -+ if (__atomic_add_fetch (v, 1, __ATOMIC_RELEASE) != 4) -+ abort (); -+ -+ if (__atomic_add_fetch (v, count, __ATOMIC_ACQ_REL) != 5) -+ abort (); -+ -+ if (__atomic_add_fetch (v, count, __ATOMIC_SEQ_CST) != 6) -+ abort (); -+} -+ -+ -+void -+test_sub_fetch (char* v) -+{ -+ *v = res = 20; -+ count = 0; -+ -+ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED) != --res) -+ abort (); -+ -+ if (__atomic_sub_fetch (v, 1, __ATOMIC_CONSUME) != --res) -+ abort (); -+ -+ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQUIRE) != --res) -+ abort (); -+ -+ if (__atomic_sub_fetch (v, 1, __ATOMIC_RELEASE) != --res) -+ abort (); -+ -+ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL) != --res) -+ abort (); -+ -+ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_SEQ_CST) != --res) -+ abort (); -+} -+ -+void -+test_and_fetch (char* v) -+{ -+ *v = init; -+ -+ if (__atomic_and_fetch (v, 0, __ATOMIC_RELAXED) != 0) -+ abort (); -+ -+ *v = init; -+ if (__atomic_and_fetch (v, init, __ATOMIC_CONSUME) != init) -+ abort (); -+ -+ if (__atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE) != 0) -+ abort (); -+ -+ *v = ~*v; -+ if (__atomic_and_fetch (v, init, __ATOMIC_RELEASE) != init) -+ abort (); -+ -+ if (__atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL) != 0) -+ abort (); -+ -+ *v = ~*v; -+ if (__atomic_and_fetch (v, 0, __ATOMIC_SEQ_CST) != 0) -+ abort (); -+} -+ -+void -+test_nand_fetch (char* v) -+{ -+ *v = init; -+ -+ if (__atomic_nand_fetch (v, 0, __ATOMIC_RELAXED) != init) -+ abort (); -+ -+ if (__atomic_nand_fetch (v, init, __ATOMIC_CONSUME) != 0) -+ abort (); -+ -+ if (__atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE) != init) -+ abort (); -+ -+ if (__atomic_nand_fetch (v, init, __ATOMIC_RELEASE) != 0) -+ abort (); -+ -+ if (__atomic_nand_fetch (v, init, __ATOMIC_ACQ_REL) != init) -+ abort (); -+ -+ if (__atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST) != init) -+ abort (); -+} -+ -+ -+ -+void -+test_xor_fetch (char* v) -+{ -+ *v = init; -+ count = 0; -+ -+ if (__atomic_xor_fetch (v, count, __ATOMIC_RELAXED) != init) -+ abort (); -+ -+ if (__atomic_xor_fetch (v, ~count, __ATOMIC_CONSUME) != 0) -+ abort (); -+ -+ if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE) != 0) -+ abort (); -+ -+ if (__atomic_xor_fetch (v, ~count, __ATOMIC_RELEASE) != init) -+ abort (); -+ -+ if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQ_REL) != init) -+ abort (); -+ -+ if (__atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST) != 0) -+ abort (); -+} -+ -+void -+test_or_fetch (char* v) -+{ -+ *v = 0; -+ count = 1; -+ -+ if (__atomic_or_fetch (v, count, __ATOMIC_RELAXED) != 1) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_or_fetch (v, 2, __ATOMIC_CONSUME) != 3) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_or_fetch (v, count, __ATOMIC_ACQUIRE) != 7) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_or_fetch (v, 8, __ATOMIC_RELEASE) != 15) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_or_fetch (v, count, __ATOMIC_ACQ_REL) != 31) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_or_fetch (v, count, __ATOMIC_SEQ_CST) != 63) -+ abort (); -+} -+ -+ -+/* Test the OP routines with a result which isn't used. Use both variations -+ within each function. */ -+ -+void -+test_add (char* v) -+{ -+ *v = 0; -+ count = 1; -+ -+ __atomic_add_fetch (v, count, __ATOMIC_RELAXED); -+ if (*v != 1) -+ abort (); -+ -+ __atomic_fetch_add (v, count, __ATOMIC_CONSUME); -+ if (*v != 2) -+ abort (); -+ -+ __atomic_add_fetch (v, 1 , __ATOMIC_ACQUIRE); -+ if (*v != 3) -+ abort (); -+ -+ __atomic_fetch_add (v, 1, __ATOMIC_RELEASE); -+ if (*v != 4) -+ abort (); -+ -+ __atomic_add_fetch (v, count, __ATOMIC_ACQ_REL); -+ if (*v != 5) -+ abort (); -+ -+ __atomic_fetch_add (v, count, __ATOMIC_SEQ_CST); -+ if (*v != 6) -+ abort (); -+} -+ -+ -+void -+test_sub (char* v) -+{ -+ *v = res = 20; -+ count = 0; -+ -+ __atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED); -+ if (*v != --res) -+ abort (); -+ -+ __atomic_fetch_sub (v, count + 1, __ATOMIC_CONSUME); -+ if (*v != --res) -+ abort (); -+ -+ __atomic_sub_fetch (v, 1, __ATOMIC_ACQUIRE); -+ if (*v != --res) -+ abort (); -+ -+ __atomic_fetch_sub (v, 1, __ATOMIC_RELEASE); -+ if (*v != --res) -+ abort (); -+ -+ __atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL); -+ if (*v != --res) -+ abort (); -+ -+ __atomic_fetch_sub (v, count + 1, __ATOMIC_SEQ_CST); -+ if (*v != --res) -+ abort (); -+} -+ -+void -+test_and (char* v) -+{ -+ *v = init; -+ -+ __atomic_and_fetch (v, 0, __ATOMIC_RELAXED); -+ if (*v != 0) -+ abort (); -+ -+ *v = init; -+ __atomic_fetch_and (v, init, __ATOMIC_CONSUME); -+ if (*v != init) -+ abort (); -+ -+ __atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE); -+ if (*v != 0) -+ abort (); -+ -+ *v = ~*v; -+ __atomic_fetch_and (v, init, __ATOMIC_RELEASE); -+ if (*v != init) -+ abort (); -+ -+ __atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL); -+ if (*v != 0) -+ abort (); -+ -+ *v = ~*v; -+ __atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST); -+ if (*v != 0) -+ abort (); -+} -+ -+void -+test_nand (char* v) -+{ -+ *v = init; -+ -+ __atomic_fetch_nand (v, 0, __ATOMIC_RELAXED); -+ if (*v != init) -+ abort (); -+ -+ __atomic_fetch_nand (v, init, __ATOMIC_CONSUME); -+ if (*v != 0) -+ abort (); -+ -+ __atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE); -+ if (*v != init) -+ abort (); -+ -+ __atomic_nand_fetch (v, init, __ATOMIC_RELEASE); -+ if (*v != 0) -+ abort (); -+ -+ __atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL); -+ if (*v != init) -+ abort (); -+ -+ __atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST); -+ if (*v != init) -+ abort (); -+} -+ -+ -+ -+void -+test_xor (char* v) -+{ -+ *v = init; -+ count = 0; -+ -+ __atomic_xor_fetch (v, count, __ATOMIC_RELAXED); -+ if (*v != init) -+ abort (); -+ -+ __atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME); -+ if (*v != 0) -+ abort (); -+ -+ __atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE); -+ if (*v != 0) -+ abort (); -+ -+ __atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE); -+ if (*v != init) -+ abort (); -+ -+ __atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL); -+ if (*v != init) -+ abort (); -+ -+ __atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST); -+ if (*v != 0) -+ abort (); -+} -+ -+void -+test_or (char* v) -+{ -+ *v = 0; -+ count = 1; -+ -+ __atomic_or_fetch (v, count, __ATOMIC_RELAXED); -+ if (*v != 1) -+ abort (); -+ -+ count *= 2; -+ __atomic_fetch_or (v, count, __ATOMIC_CONSUME); -+ if (*v != 3) -+ abort (); -+ -+ count *= 2; -+ __atomic_or_fetch (v, 4, __ATOMIC_ACQUIRE); -+ if (*v != 7) -+ abort (); -+ -+ count *= 2; -+ __atomic_fetch_or (v, 8, __ATOMIC_RELEASE); -+ if (*v != 15) -+ abort (); -+ -+ count *= 2; -+ __atomic_or_fetch (v, count, __ATOMIC_ACQ_REL); -+ if (*v != 31) -+ abort (); -+ -+ count *= 2; -+ __atomic_fetch_or (v, count, __ATOMIC_SEQ_CST); -+ if (*v != 63) -+ abort (); -+} -+ -+int -+main () -+{ -+ char* V = {&A.a, &A.b, &A.c, &A.d}; -+ -+ for (int i = 0; i < 4; i++) { -+ test_fetch_add (Vi); -+ test_fetch_sub (Vi); -+ test_fetch_and (Vi); -+ test_fetch_nand (Vi); -+ test_fetch_xor (Vi); -+ test_fetch_or (Vi); -+ -+ test_add_fetch (Vi); -+ test_sub_fetch (Vi); -+ test_and_fetch (Vi); -+ test_nand_fetch (Vi); -+ test_xor_fetch (Vi); -+ test_or_fetch (Vi); -+ -+ test_add (Vi); -+ test_sub (Vi); -+ test_and (Vi); -+ test_nand (Vi); -+ test_xor (Vi); -+ test_or (Vi); -+ } -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-4.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-4.c -new file mode 100644 -index 00000000000..eecfaae5cc6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-4.c -@@ -0,0 +1,566 @@ -+/* Check all short alignments. */ -+/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-op-2.c */ -+/* Test __atomic routines for existence and proper execution on 2 byte -+ values with each valid memory model. */ -+/* { dg-do run } */ -+/* { dg-options "-minline-atomics -Wno-address-of-packed-member" } */ -+ -+/* Test the execution of the __atomic_*OP builtin routines for a short. */ -+ -+extern void abort(void); -+ -+short count, res; -+const short init = ~0; -+ -+struct A -+{ -+ short a; -+ short b; -+} __attribute__ ((packed)) A; -+ -+/* The fetch_op routines return the original value before the operation. */ -+ -+void -+test_fetch_add (short* v) -+{ -+ *v = 0; -+ count = 1; -+ -+ if (__atomic_fetch_add (v, count, __ATOMIC_RELAXED) != 0) -+ abort (); -+ -+ if (__atomic_fetch_add (v, 1, __ATOMIC_CONSUME) != 1) -+ abort (); -+ -+ if (__atomic_fetch_add (v, count, __ATOMIC_ACQUIRE) != 2) -+ abort (); -+ -+ if (__atomic_fetch_add (v, 1, __ATOMIC_RELEASE) != 3) -+ abort (); -+ -+ if (__atomic_fetch_add (v, count, __ATOMIC_ACQ_REL) != 4) -+ abort (); -+ -+ if (__atomic_fetch_add (v, 1, __ATOMIC_SEQ_CST) != 5) -+ abort (); -+} -+ -+ -+void -+test_fetch_sub (short* v) -+{ -+ *v = res = 20; -+ count = 0; -+ -+ if (__atomic_fetch_sub (v, count + 1, __ATOMIC_RELAXED) != res--) -+ abort (); -+ -+ if (__atomic_fetch_sub (v, 1, __ATOMIC_CONSUME) != res--) -+ abort (); -+ -+ if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQUIRE) != res--) -+ abort (); -+ -+ if (__atomic_fetch_sub (v, 1, __ATOMIC_RELEASE) != res--) -+ abort (); -+ -+ if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQ_REL) != res--) -+ abort (); -+ -+ if (__atomic_fetch_sub (v, 1, __ATOMIC_SEQ_CST) != res--) -+ abort (); -+} -+ -+void -+test_fetch_and (short* v) -+{ -+ *v = init; -+ -+ if (__atomic_fetch_and (v, 0, __ATOMIC_RELAXED) != init) -+ abort (); -+ -+ if (__atomic_fetch_and (v, init, __ATOMIC_CONSUME) != 0) -+ abort (); -+ -+ if (__atomic_fetch_and (v, 0, __ATOMIC_ACQUIRE) != 0) -+ abort (); -+ -+ *v = ~*v; -+ if (__atomic_fetch_and (v, init, __ATOMIC_RELEASE) != init) -+ abort (); -+ -+ if (__atomic_fetch_and (v, 0, __ATOMIC_ACQ_REL) != init) -+ abort (); -+ -+ if (__atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST) != 0) -+ abort (); -+} -+ -+void -+test_fetch_nand (short* v) -+{ -+ *v = init; -+ -+ if (__atomic_fetch_nand (v, 0, __ATOMIC_RELAXED) != init) -+ abort (); -+ -+ if (__atomic_fetch_nand (v, init, __ATOMIC_CONSUME) != init) -+ abort (); -+ -+ if (__atomic_fetch_nand (v, 0, __ATOMIC_ACQUIRE) != 0 ) -+ abort (); -+ -+ if (__atomic_fetch_nand (v, init, __ATOMIC_RELEASE) != init) -+ abort (); -+ -+ if (__atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL) != 0) -+ abort (); -+ -+ if (__atomic_fetch_nand (v, 0, __ATOMIC_SEQ_CST) != init) -+ abort (); -+} -+ -+void -+test_fetch_xor (short* v) -+{ -+ *v = init; -+ count = 0; -+ -+ if (__atomic_fetch_xor (v, count, __ATOMIC_RELAXED) != init) -+ abort (); -+ -+ if (__atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME) != init) -+ abort (); -+ -+ if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQUIRE) != 0) -+ abort (); -+ -+ if (__atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE) != 0) -+ abort (); -+ -+ if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL) != init) -+ abort (); -+ -+ if (__atomic_fetch_xor (v, ~count, __ATOMIC_SEQ_CST) != init) -+ abort (); -+} -+ -+void -+test_fetch_or (short* v) -+{ -+ *v = 0; -+ count = 1; -+ -+ if (__atomic_fetch_or (v, count, __ATOMIC_RELAXED) != 0) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_fetch_or (v, 2, __ATOMIC_CONSUME) != 1) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_fetch_or (v, count, __ATOMIC_ACQUIRE) != 3) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_fetch_or (v, 8, __ATOMIC_RELEASE) != 7) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_fetch_or (v, count, __ATOMIC_ACQ_REL) != 15) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_fetch_or (v, count, __ATOMIC_SEQ_CST) != 31) -+ abort (); -+} -+ -+/* The OP_fetch routines return the new value after the operation. */ -+ -+void -+test_add_fetch (short* v) -+{ -+ *v = 0; -+ count = 1; -+ -+ if (__atomic_add_fetch (v, count, __ATOMIC_RELAXED) != 1) -+ abort (); -+ -+ if (__atomic_add_fetch (v, 1, __ATOMIC_CONSUME) != 2) -+ abort (); -+ -+ if (__atomic_add_fetch (v, count, __ATOMIC_ACQUIRE) != 3) -+ abort (); -+ -+ if (__atomic_add_fetch (v, 1, __ATOMIC_RELEASE) != 4) -+ abort (); -+ -+ if (__atomic_add_fetch (v, count, __ATOMIC_ACQ_REL) != 5) -+ abort (); -+ -+ if (__atomic_add_fetch (v, count, __ATOMIC_SEQ_CST) != 6) -+ abort (); -+} -+ -+ -+void -+test_sub_fetch (short* v) -+{ -+ *v = res = 20; -+ count = 0; -+ -+ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED) != --res) -+ abort (); -+ -+ if (__atomic_sub_fetch (v, 1, __ATOMIC_CONSUME) != --res) -+ abort (); -+ -+ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQUIRE) != --res) -+ abort (); -+ -+ if (__atomic_sub_fetch (v, 1, __ATOMIC_RELEASE) != --res) -+ abort (); -+ -+ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL) != --res) -+ abort (); -+ -+ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_SEQ_CST) != --res) -+ abort (); -+} -+ -+void -+test_and_fetch (short* v) -+{ -+ *v = init; -+ -+ if (__atomic_and_fetch (v, 0, __ATOMIC_RELAXED) != 0) -+ abort (); -+ -+ *v = init; -+ if (__atomic_and_fetch (v, init, __ATOMIC_CONSUME) != init) -+ abort (); -+ -+ if (__atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE) != 0) -+ abort (); -+ -+ *v = ~*v; -+ if (__atomic_and_fetch (v, init, __ATOMIC_RELEASE) != init) -+ abort (); -+ -+ if (__atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL) != 0) -+ abort (); -+ -+ *v = ~*v; -+ if (__atomic_and_fetch (v, 0, __ATOMIC_SEQ_CST) != 0) -+ abort (); -+} -+ -+void -+test_nand_fetch (short* v) -+{ -+ *v = init; -+ -+ if (__atomic_nand_fetch (v, 0, __ATOMIC_RELAXED) != init) -+ abort (); -+ -+ if (__atomic_nand_fetch (v, init, __ATOMIC_CONSUME) != 0) -+ abort (); -+ -+ if (__atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE) != init) -+ abort (); -+ -+ if (__atomic_nand_fetch (v, init, __ATOMIC_RELEASE) != 0) -+ abort (); -+ -+ if (__atomic_nand_fetch (v, init, __ATOMIC_ACQ_REL) != init) -+ abort (); -+ -+ if (__atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST) != init) -+ abort (); -+} -+ -+ -+ -+void -+test_xor_fetch (short* v) -+{ -+ *v = init; -+ count = 0; -+ -+ if (__atomic_xor_fetch (v, count, __ATOMIC_RELAXED) != init) -+ abort (); -+ -+ if (__atomic_xor_fetch (v, ~count, __ATOMIC_CONSUME) != 0) -+ abort (); -+ -+ if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE) != 0) -+ abort (); -+ -+ if (__atomic_xor_fetch (v, ~count, __ATOMIC_RELEASE) != init) -+ abort (); -+ -+ if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQ_REL) != init) -+ abort (); -+ -+ if (__atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST) != 0) -+ abort (); -+} -+ -+void -+test_or_fetch (short* v) -+{ -+ *v = 0; -+ count = 1; -+ -+ if (__atomic_or_fetch (v, count, __ATOMIC_RELAXED) != 1) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_or_fetch (v, 2, __ATOMIC_CONSUME) != 3) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_or_fetch (v, count, __ATOMIC_ACQUIRE) != 7) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_or_fetch (v, 8, __ATOMIC_RELEASE) != 15) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_or_fetch (v, count, __ATOMIC_ACQ_REL) != 31) -+ abort (); -+ -+ count *= 2; -+ if (__atomic_or_fetch (v, count, __ATOMIC_SEQ_CST) != 63) -+ abort (); -+} -+ -+ -+/* Test the OP routines with a result which isn't used. Use both variations -+ within each function. */ -+ -+void -+test_add (short* v) -+{ -+ *v = 0; -+ count = 1; -+ -+ __atomic_add_fetch (v, count, __ATOMIC_RELAXED); -+ if (*v != 1) -+ abort (); -+ -+ __atomic_fetch_add (v, count, __ATOMIC_CONSUME); -+ if (*v != 2) -+ abort (); -+ -+ __atomic_add_fetch (v, 1 , __ATOMIC_ACQUIRE); -+ if (*v != 3) -+ abort (); -+ -+ __atomic_fetch_add (v, 1, __ATOMIC_RELEASE); -+ if (*v != 4) -+ abort (); -+ -+ __atomic_add_fetch (v, count, __ATOMIC_ACQ_REL); -+ if (*v != 5) -+ abort (); -+ -+ __atomic_fetch_add (v, count, __ATOMIC_SEQ_CST); -+ if (*v != 6) -+ abort (); -+} -+ -+ -+void -+test_sub (short* v) -+{ -+ *v = res = 20; -+ count = 0; -+ -+ __atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED); -+ if (*v != --res) -+ abort (); -+ -+ __atomic_fetch_sub (v, count + 1, __ATOMIC_CONSUME); -+ if (*v != --res) -+ abort (); -+ -+ __atomic_sub_fetch (v, 1, __ATOMIC_ACQUIRE); -+ if (*v != --res) -+ abort (); -+ -+ __atomic_fetch_sub (v, 1, __ATOMIC_RELEASE); -+ if (*v != --res) -+ abort (); -+ -+ __atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL); -+ if (*v != --res) -+ abort (); -+ -+ __atomic_fetch_sub (v, count + 1, __ATOMIC_SEQ_CST); -+ if (*v != --res) -+ abort (); -+} -+ -+void -+test_and (short* v) -+{ -+ *v = init; -+ -+ __atomic_and_fetch (v, 0, __ATOMIC_RELAXED); -+ if (*v != 0) -+ abort (); -+ -+ *v = init; -+ __atomic_fetch_and (v, init, __ATOMIC_CONSUME); -+ if (*v != init) -+ abort (); -+ -+ __atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE); -+ if (*v != 0) -+ abort (); -+ -+ *v = ~*v; -+ __atomic_fetch_and (v, init, __ATOMIC_RELEASE); -+ if (*v != init) -+ abort (); -+ -+ __atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL); -+ if (*v != 0) -+ abort (); -+ -+ *v = ~*v; -+ __atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST); -+ if (*v != 0) -+ abort (); -+} -+ -+void -+test_nand (short* v) -+{ -+ *v = init; -+ -+ __atomic_fetch_nand (v, 0, __ATOMIC_RELAXED); -+ if (*v != init) -+ abort (); -+ -+ __atomic_fetch_nand (v, init, __ATOMIC_CONSUME); -+ if (*v != 0) -+ abort (); -+ -+ __atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE); -+ if (*v != init) -+ abort (); -+ -+ __atomic_nand_fetch (v, init, __ATOMIC_RELEASE); -+ if (*v != 0) -+ abort (); -+ -+ __atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL); -+ if (*v != init) -+ abort (); -+ -+ __atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST); -+ if (*v != init) -+ abort (); -+} -+ -+ -+ -+void -+test_xor (short* v) -+{ -+ *v = init; -+ count = 0; -+ -+ __atomic_xor_fetch (v, count, __ATOMIC_RELAXED); -+ if (*v != init) -+ abort (); -+ -+ __atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME); -+ if (*v != 0) -+ abort (); -+ -+ __atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE); -+ if (*v != 0) -+ abort (); -+ -+ __atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE); -+ if (*v != init) -+ abort (); -+ -+ __atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL); -+ if (*v != init) -+ abort (); -+ -+ __atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST); -+ if (*v != 0) -+ abort (); -+} -+ -+void -+test_or (short* v) -+{ -+ *v = 0; -+ count = 1; -+ -+ __atomic_or_fetch (v, count, __ATOMIC_RELAXED); -+ if (*v != 1) -+ abort (); -+ -+ count *= 2; -+ __atomic_fetch_or (v, count, __ATOMIC_CONSUME); -+ if (*v != 3) -+ abort (); -+ -+ count *= 2; -+ __atomic_or_fetch (v, 4, __ATOMIC_ACQUIRE); -+ if (*v != 7) -+ abort (); -+ -+ count *= 2; -+ __atomic_fetch_or (v, 8, __ATOMIC_RELEASE); -+ if (*v != 15) -+ abort (); -+ -+ count *= 2; -+ __atomic_or_fetch (v, count, __ATOMIC_ACQ_REL); -+ if (*v != 31) -+ abort (); -+ -+ count *= 2; -+ __atomic_fetch_or (v, count, __ATOMIC_SEQ_CST); -+ if (*v != 63) -+ abort (); -+} -+ -+int -+main () { -+ short* V = {&A.a, &A.b}; -+ -+ for (int i = 0; i < 2; i++) { -+ test_fetch_add (Vi); -+ test_fetch_sub (Vi); -+ test_fetch_and (Vi); -+ test_fetch_nand (Vi); -+ test_fetch_xor (Vi); -+ test_fetch_or (Vi); -+ -+ test_add_fetch (Vi); -+ test_sub_fetch (Vi); -+ test_and_fetch (Vi); -+ test_nand_fetch (Vi); -+ test_xor_fetch (Vi); -+ test_or_fetch (Vi); -+ -+ test_add (Vi); -+ test_sub (Vi); -+ test_and (Vi); -+ test_nand (Vi); -+ test_xor (Vi); -+ test_or (Vi); -+ } -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-5.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-5.c -new file mode 100644 -index 00000000000..52093894a79 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-5.c -@@ -0,0 +1,87 @@ -+/* Test __atomic routines for existence and proper execution on 1 byte -+ values with each valid memory model. */ -+/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-compare-exchange-1.c */ -+/* { dg-do run } */ -+/* { dg-options "-minline-atomics" } */ -+ -+/* Test the execution of the __atomic_compare_exchange_n builtin for a char. */ -+ -+extern void abort(void); -+ -+char v = 0; -+char expected = 0; -+char max = ~0; -+char desired = ~0; -+char zero = 0; -+ -+#define STRONG 0 -+#define WEAK 1 -+ -+int -+main () -+{ -+ -+ if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) -+ abort (); -+ if (expected != 0) -+ abort (); -+ -+ if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) -+ abort (); -+ if (expected != max) -+ abort (); -+ -+ if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) -+ abort (); -+ if (expected != max) -+ abort (); -+ if (v != 0) -+ abort (); -+ -+ if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) -+ abort (); -+ if (expected != 0) -+ abort (); -+ -+ if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) -+ abort (); -+ if (expected != 0) -+ abort (); -+ if (v != max) -+ abort (); -+ -+ /* Now test the generic version. */ -+ -+ v = 0; -+ -+ if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) -+ abort (); -+ if (expected != 0) -+ abort (); -+ -+ if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) -+ abort (); -+ if (expected != max) -+ abort (); -+ -+ if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) -+ abort (); -+ if (expected != max) -+ abort (); -+ if (v != 0) -+ abort (); -+ -+ if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) -+ abort (); -+ if (expected != 0) -+ abort (); -+ -+ if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) -+ abort (); -+ if (expected != 0) -+ abort (); -+ if (v != max) -+ abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-6.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-6.c -new file mode 100644 -index 00000000000..8fee8c44811 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-6.c -@@ -0,0 +1,87 @@ -+/* Test __atomic routines for existence and proper execution on 2 byte -+ values with each valid memory model. */ -+/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-compare-exchange-2.c */ -+/* { dg-do run } */ -+/* { dg-options "-minline-atomics" } */ -+ -+/* Test the execution of the __atomic_compare_exchange_n builtin for a short. */ -+ -+extern void abort(void); -+ -+short v = 0; -+short expected = 0; -+short max = ~0; -+short desired = ~0; -+short zero = 0; -+ -+#define STRONG 0 -+#define WEAK 1 -+ -+int -+main () -+{ -+ -+ if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) -+ abort (); -+ if (expected != 0) -+ abort (); -+ -+ if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) -+ abort (); -+ if (expected != max) -+ abort (); -+ -+ if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) -+ abort (); -+ if (expected != max) -+ abort (); -+ if (v != 0) -+ abort (); -+ -+ if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) -+ abort (); -+ if (expected != 0) -+ abort (); -+ -+ if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) -+ abort (); -+ if (expected != 0) -+ abort (); -+ if (v != max) -+ abort (); -+ -+ /* Now test the generic version. */ -+ -+ v = 0; -+ -+ if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) -+ abort (); -+ if (expected != 0) -+ abort (); -+ -+ if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) -+ abort (); -+ if (expected != max) -+ abort (); -+ -+ if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) -+ abort (); -+ if (expected != max) -+ abort (); -+ if (v != 0) -+ abort (); -+ -+ if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) -+ abort (); -+ if (expected != 0) -+ abort (); -+ -+ if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) -+ abort (); -+ if (expected != 0) -+ abort (); -+ if (v != max) -+ abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-7.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-7.c -new file mode 100644 -index 00000000000..24c344c0ce3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-7.c -@@ -0,0 +1,69 @@ -+/* Test __atomic routines for existence and proper execution on 1 byte -+ values with each valid memory model. */ -+/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-exchange-1.c */ -+/* { dg-do run } */ -+/* { dg-options "-minline-atomics" } */ -+ -+/* Test the execution of the __atomic_exchange_n builtin for a char. */ -+ -+extern void abort(void); -+ -+char v, count, ret; -+ -+int -+main () -+{ -+ v = 0; -+ count = 0; -+ -+ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) != count) -+ abort (); -+ count++; -+ -+ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) != count) -+ abort (); -+ count++; -+ -+ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) != count) -+ abort (); -+ count++; -+ -+ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) != count) -+ abort (); -+ count++; -+ -+ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) != count) -+ abort (); -+ count++; -+ -+ /* Now test the generic version. */ -+ -+ count++; -+ -+ __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED); -+ if (ret != count - 1 || v != count) -+ abort (); -+ count++; -+ -+ __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE); -+ if (ret != count - 1 || v != count) -+ abort (); -+ count++; -+ -+ __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE); -+ if (ret != count - 1 || v != count) -+ abort (); -+ count++; -+ -+ __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL); -+ if (ret != count - 1 || v != count) -+ abort (); -+ count++; -+ -+ __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST); -+ if (ret != count - 1 || v != count) -+ abort (); -+ count++; -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-8.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-8.c -new file mode 100644 -index 00000000000..edc212df04e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-8.c -@@ -0,0 +1,69 @@ -+/* Test __atomic routines for existence and proper execution on 2 byte -+ values with each valid memory model. */ -+/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-exchange-2.c */ -+/* { dg-do run } */ -+/* { dg-options "-minline-atomics" } */ -+ -+/* Test the execution of the __atomic_X builtin for a short. */ -+ -+extern void abort(void); -+ -+short v, count, ret; -+ -+int -+main () -+{ -+ v = 0; -+ count = 0; -+ -+ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) != count) -+ abort (); -+ count++; -+ -+ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) != count) -+ abort (); -+ count++; -+ -+ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) != count) -+ abort (); -+ count++; -+ -+ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) != count) -+ abort (); -+ count++; -+ -+ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) != count) -+ abort (); -+ count++; -+ -+ /* Now test the generic version. */ -+ -+ count++; -+ -+ __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED); -+ if (ret != count - 1 || v != count) -+ abort (); -+ count++; -+ -+ __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE); -+ if (ret != count - 1 || v != count) -+ abort (); -+ count++; -+ -+ __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE); -+ if (ret != count - 1 || v != count) -+ abort (); -+ count++; -+ -+ __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL); -+ if (ret != count - 1 || v != count) -+ abort (); -+ count++; -+ -+ __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST); -+ if (ret != count - 1 || v != count) -+ abort (); -+ count++; -+ -+ return 0; -+} -diff --git a/libgcc/config/riscv/atomic.c b/libgcc/config/riscv/atomic.c -index 69f53623509..573d163ea04 100644 ---- a/libgcc/config/riscv/atomic.c -+++ b/libgcc/config/riscv/atomic.c -@@ -30,6 +30,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - #define INVERT "not %tmp1, %tmp1\n\t" - #define DONT_INVERT "" - -+/* Logic duplicated in gcc/gcc/config/riscv/sync.md for use when inlining is enabled */ -+ - #define GENERATE_FETCH_AND_OP(type, size, opname, insn, invert, cop) \ - type __sync_fetch_and_ ## opname ## _ ## size (type *p, type v) \ - { \ --- -2.39.2 -
View file
_service:tar_scm:0009-MULL64-Disable-mull64-transformation-by-default.patch
Added
@@ -0,0 +1,66 @@ +From 7c1f4425c680ea144d29bc55a1283d46444a2691 Mon Sep 17 00:00:00 2001 +From: eastb233 <xiezhiheng@huawei.com> +Date: Wed, 7 Dec 2022 09:43:15 +0800 +Subject: PATCH 09/22 MULL64 Disable mull64 transformation by default + +This commit disables mull64 transformation by default since +it shows some runtime failure in workloads. + +This is a workaround fix for https://gitee.com/src-openeuler/gcc/issues/I64UQH +--- + gcc/match.pd | 2 +- + gcc/opts.cc | 1 - + gcc/testsuite/g++.dg/tree-ssa/mull64.C | 2 +- + gcc/testsuite/gcc.dg/pr107190.c | 2 +- + 4 files changed, 3 insertions(+), 4 deletions(-) + +diff --git a/gcc/match.pd b/gcc/match.pd +index b7e3588e8..6f24d5079 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -4290,7 +4290,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + (simplify + (cond @0 (op:s @1 integer_pow2p@2) @1) + /* powerof2cst */ +- (if (INTEGRAL_TYPE_P (type)) ++ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)) + (with { + tree shift = build_int_cst (integer_type_node, tree_log2 (@2)); + } +diff --git a/gcc/opts.cc b/gcc/opts.cc +index eae71ed20..a97630d1c 100644 +--- a/gcc/opts.cc ++++ b/gcc/opts.cc +@@ -647,7 +647,6 @@ static const struct default_options default_options_table = + VECT_COST_MODEL_VERY_CHEAP }, + { OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 }, +- { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 }, + + /* -O2 and above optimizations, but not -Os or -Og. */ + { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 }, +diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +index f61cf5e6f..cad891e62 100644 +--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C ++++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */ ++/* { dg-options "-O2 -fmerge-mull -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */ + + # define BN_BITS4 32 + # define BN_MASK2 (0xffffffffffffffffL) +diff --git a/gcc/testsuite/gcc.dg/pr107190.c b/gcc/testsuite/gcc.dg/pr107190.c +index 235b2761a..d1e72e5df 100644 +--- a/gcc/testsuite/gcc.dg/pr107190.c ++++ b/gcc/testsuite/gcc.dg/pr107190.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fexpensive-optimizations -fdump-tree-phiopt2-details" } */ ++/* { dg-options "-O2 -fmerge-mull -fexpensive-optimizations -fdump-tree-phiopt2-details" } */ + + # define BN_BITS4 32 + # define BN_MASK2 (0xffffffffffffffffL) +-- +2.33.0 +
View file
_service:tar_scm:0009-riscv-linux-Don-t-add-latomic-with-pthread.patch
Deleted
@@ -1,41 +0,0 @@ -From 203f3060dd363361b172f7295f42bb6bf5ac0b3b Mon Sep 17 00:00:00 2001 -From: Andreas Schwab <schwab@suse.de> -Date: Sat, 23 Apr 2022 15:48:42 +0200 -Subject: PATCH riscv/linux: Don't add -latomic with -pthread - -Now that we have support for inline subword atomic operations, it is no -longer necessary to link against libatomic. This also fixes testsuite -failures because the framework does not properly set up the linker flags -for finding libatomic. -The use of atomic operations is also independent of the use of libpthread. - -gcc/ - * config/riscv/linux.h (LIB_SPEC): Don't redefine. ---- - gcc/config/riscv/linux.h | 10 ---------- - 1 file changed, 10 deletions(-) - -diff --git a/gcc/config/riscv/linux.h b/gcc/config/riscv/linux.h -index b9557a75dc7..2fdfd930cf2 100644 ---- a/gcc/config/riscv/linux.h -+++ b/gcc/config/riscv/linux.h -@@ -35,16 +35,6 @@ along with GCC; see the file COPYING3. If not see - #undef MUSL_DYNAMIC_LINKER - #define MUSL_DYNAMIC_LINKER "/lib/ld-musl-riscv" XLEN_SPEC MUSL_ABI_SUFFIX ".so.1" - --/* Because RISC-V only has word-sized atomics, it requries libatomic where -- others do not. So link libatomic by default, as needed. */ --#undef LIB_SPEC --#ifdef LD_AS_NEEDED_OPTION --#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC \ -- " %{pthread:" LD_AS_NEEDED_OPTION " -latomic " LD_NO_AS_NEEDED_OPTION "}" --#else --#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC " -latomic " --#endif -- - #define ICACHE_FLUSH_FUNC "__riscv_flush_icache" - - #define CPP_SPEC "%{pthread:-D_REENTRANT}" --- -2.39.2 -
View file
_service:tar_scm:0010-Version-Clear-DATESTAMP_s.patch
Added
@@ -0,0 +1,26 @@ +From 8e8f783b02df155e3aafa94af6cc1f66604e08eb Mon Sep 17 00:00:00 2001 +From: eastb233 <xiezhiheng@huawei.com> +Date: Fri, 21 Jul 2023 14:45:27 +0800 +Subject: PATCH 10/22 Version Clear DATESTAMP_s + +--- + gcc/Makefile.in | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index 31ff95500..db2a0e1bd 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -897,8 +897,7 @@ PATCHLEVEL_c := \ + # significant - do not remove it. + BASEVER_s := "\"$(BASEVER_c)\"" + DEVPHASE_s := "\"$(if $(DEVPHASE_c), ($(DEVPHASE_c)))\"" +-DATESTAMP_s := \ +- "\"$(if $(DEVPHASE_c)$(filter-out 0,$(PATCHLEVEL_c)), $(DATESTAMP_c))\"" ++DATESTAMP_s := "\"\"" + PKGVERSION_s:= "\"@PKGVERSION@\"" + BUGURL_s := "\"@REPORT_BUGS_TO@\"" + +-- +2.33.0 +
View file
_service:tar_scm:0011-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch
Added
@@ -0,0 +1,124 @@ +From 355eb8e20327242442d139fb052d3a3befde3dd7 Mon Sep 17 00:00:00 2001 +From: "Cui,Lili" <lili.cui@intel.com> +Date: Tue, 1 Nov 2022 09:16:49 +0800 +Subject: PATCH 11/22 Add attribute hot judgement for INLINE_HINT_known_hot + hint. + +We set up INLINE_HINT_known_hot hint only when we have profile feedback, +now add function attribute judgement for it, when both caller and callee +have __attribute__((hot)), we will also set up INLINE_HINT_known_hot hint +for it. + +With this patch applied, +ADL Multi-copy: 538.imagic_r 16.7% +ICX Multi-copy: 538.imagic_r 15.2% +CLX Multi-copy: 538.imagic_r 12.7% +Znver3 Multi-copy: 538.imagic_r 10.6% +Arm Multi-copy: 538.imagic_r 13.4% + +gcc/ChangeLog + + * ipa-inline-analysis.cc (do_estimate_edge_time): Add function attribute + judgement for INLINE_HINT_known_hot hint. + +gcc/testsuite/ChangeLog: + + * gcc.dg/ipa/inlinehint-6.c: New test. +--- + gcc/ipa-inline-analysis.cc | 13 ++++--- + gcc/testsuite/gcc.dg/ipa/inlinehint-6.c | 47 +++++++++++++++++++++++++ + 2 files changed, 56 insertions(+), 4 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/ipa/inlinehint-6.c + +diff --git a/gcc/ipa-inline-analysis.cc b/gcc/ipa-inline-analysis.cc +index 11d8d09ee..16ac24cfc 100644 +--- a/gcc/ipa-inline-analysis.cc ++++ b/gcc/ipa-inline-analysis.cc +@@ -48,6 +48,7 @@ along with GCC; see the file COPYING3. If not see + #include "ipa-utils.h" + #include "cfgexpand.h" + #include "gimplify.h" ++#include "attribs.h" + + /* Cached node/edge growths. */ + fast_call_summary<edge_growth_cache_entry *, va_heap> *edge_growth_cache = NULL; +@@ -249,15 +250,19 @@ do_estimate_edge_time (struct cgraph_edge *edge, sreal *ret_nonspec_time) + hints = estimates.hints; + } + +- /* When we have profile feedback, we can quite safely identify hot +- edges and for those we disable size limits. Don't do that when +- probability that caller will call the callee is low however, since it ++ /* When we have profile feedback or function attribute, we can quite safely ++ identify hot edges and for those we disable size limits. Don't do that ++ when probability that caller will call the callee is low however, since it + may hurt optimization of the caller's hot path. */ +- if (edge->count.ipa ().initialized_p () && edge->maybe_hot_p () ++ if ((edge->count.ipa ().initialized_p () && edge->maybe_hot_p () + && (edge->count.ipa ().apply_scale (2, 1) + > (edge->caller->inlined_to + ? edge->caller->inlined_to->count.ipa () + : edge->caller->count.ipa ()))) ++ || (lookup_attribute ("hot", DECL_ATTRIBUTES (edge->caller->decl)) ++ != NULL ++ && lookup_attribute ("hot", DECL_ATTRIBUTES (edge->callee->decl)) ++ != NULL)) + hints |= INLINE_HINT_known_hot; + + gcc_checking_assert (size >= 0); +diff --git a/gcc/testsuite/gcc.dg/ipa/inlinehint-6.c b/gcc/testsuite/gcc.dg/ipa/inlinehint-6.c +new file mode 100644 +index 000000000..1f3be641c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/ipa/inlinehint-6.c +@@ -0,0 +1,47 @@ ++/* { dg-options "-O3 -c -fdump-ipa-inline-details -fno-early-inlining -fno-ipa-cp" } */ ++/* { dg-add-options bind_pic_locally } */ ++ ++#define size_t long long int ++ ++struct A ++{ ++ size_t f1, f2, f3, f4; ++}; ++struct C ++{ ++ struct A a; ++ size_t b; ++}; ++struct C x; ++ ++__attribute__((hot)) struct C callee (struct A *a, struct C *c) ++{ ++ c->a=(*a); ++ ++ if((c->b + 7) & 17) ++ { ++ c->a.f1 = c->a.f2 + c->a.f1; ++ c->a.f2 = c->a.f3 - c->a.f2; ++ c->a.f3 = c->a.f2 + c->a.f3; ++ c->a.f4 = c->a.f2 - c->a.f4; ++ c->b = c->a.f2; ++ ++ } ++ return *c; ++} ++ ++__attribute__((hot)) struct C caller (size_t d, size_t e, size_t f, size_t g, struct C *c) ++{ ++ struct A a; ++ a.f1 = 1 + d; ++ a.f2 = e; ++ a.f3 = 12 + f; ++ a.f4 = 68 + g; ++ if (c->b > 0) ++ return callee (&a, c); ++ else ++ return *c; ++} ++ ++/* { dg-final { scan-ipa-dump "known_hot" "inline" } } */ ++ +-- +2.33.0 +
View file
_service:tar_scm:0012-Enable-small-loop-unrolling-for-O2.patch
Added
@@ -0,0 +1,490 @@ +From 1070bc24f53e851cae55320e26715cc594efcd2f Mon Sep 17 00:00:00 2001 +From: Hongyu Wang <hongyu.wang@intel.com> +Date: Thu, 8 Sep 2022 16:52:02 +0800 +Subject: PATCH 12/22 Enable small loop unrolling for O2 + +Modern processors has multiple way instruction decoders +For x86, icelake/zen3 has 5 uops, so for small loop with <= 4 +instructions (usually has 3 uops with a cmp/jmp pair that can be +macro-fused), the decoder would have 2 uops bubble for each iteration +and the pipeline could not be fully utilized. + +Therefore, this patch enables loop unrolling for small size loop at O2 +to fullfill the decoder as much as possible. It turns on rtl loop +unrolling when targetm.loop_unroll_adjust exists and O2 plus speed only. +In x86 backend the default behavior is to unroll small loops with less +than 4 insns by 1 time. + +This improves 548.exchange2 by 9% on icelake and 7.4% on zen3 with +0.9% codesize increment. For other benchmarks the variants are minor +and overall codesize increased by 0.2%. + +The kernel image size increased by 0.06%, and no impact on eembc. + +gcc/ChangeLog: + + * common/config/i386/i386-common.cc (ix86_optimization_table): + Enable small loop unroll at O2 by default. + * config/i386/i386.cc (ix86_loop_unroll_adjust): Adjust unroll + factor if -munroll-only-small-loops enabled and -funroll-loops/ + -funroll-all-loops are disabled. + * config/i386/i386.h (struct processor_costs): Add 2 field + small_unroll_ninsns and small_unroll_factor. + * config/i386/i386.opt: Add -munroll-only-small-loops. + * doc/invoke.texi: Document -munroll-only-small-loops. + * loop-init.cc (pass_rtl_unroll_loops::gate): Enable rtl + loop unrolling for -O2-speed and above if target hook + loop_unroll_adjust exists. + (pass_rtl_unroll_loops::execute): Set UAP_UNROLL flag + when target hook loop_unroll_adjust exists. + * config/i386/x86-tune-costs.h: Update all processor costs + with small_unroll_ninsns = 4 and small_unroll_factor = 2. + +gcc/testsuite/ChangeLog: + + * gcc.dg/guality/loop-1.c: Add additional option + -mno-unroll-only-small-loops. + * gcc.target/i386/pr86270.c: Add -mno-unroll-only-small-loops. + * gcc.target/i386/pr93002.c: Likewise. +--- + gcc/common/config/i386/i386-common.cc | 1 + + gcc/config/i386/i386.cc | 18 ++++++++ + gcc/config/i386/i386.h | 5 +++ + gcc/config/i386/i386.opt | 4 ++ + gcc/config/i386/x86-tune-costs.h | 58 +++++++++++++++++++++++++ + gcc/doc/invoke.texi | 11 ++++- + gcc/loop-init.cc | 10 +++-- + gcc/testsuite/gcc.dg/guality/loop-1.c | 2 + + gcc/testsuite/gcc.target/i386/pr86270.c | 2 +- + gcc/testsuite/gcc.target/i386/pr93002.c | 2 +- + 10 files changed, 107 insertions(+), 6 deletions(-) + +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index e2594cae4..cdd5caa55 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1687,6 +1687,7 @@ static const struct default_options ix86_option_optimization_table = + /* The STC algorithm produces the smallest code at -Os, for x86. */ + { OPT_LEVELS_2_PLUS, OPT_freorder_blocks_algorithm_, NULL, + REORDER_BLOCKS_ALGORITHM_STC }, ++ { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_only_small_loops, NULL, 1 }, + /* Turn off -fschedule-insns by default. It tends to make the + problem with not enough registers even worse. */ + { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 }, +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index 9a9ff3b34..e56004300 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -23570,6 +23570,24 @@ ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop) + unsigned i; + unsigned mem_count = 0; + ++ /* Unroll small size loop when unroll factor is not explicitly ++ specified. */ ++ if (!(flag_unroll_loops ++ || flag_unroll_all_loops ++ || loop->unroll)) ++ { ++ nunroll = 1; ++ ++ /* Any explicit -f{no-}unroll-{all-}loops turns off ++ -munroll-only-small-loops. */ ++ if (ix86_unroll_only_small_loops ++ && !OPTION_SET_P (flag_unroll_loops) ++ && loop->ninsns <= ix86_cost->small_unroll_ninsns) ++ nunroll = ix86_cost->small_unroll_factor; ++ ++ return nunroll; ++ } ++ + if (!TARGET_ADJUST_UNROLL) + return nunroll; + +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index fce0b3564..688aaabd3 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -219,6 +219,11 @@ struct processor_costs { + const char *const align_jump; /* Jump alignment. */ + const char *const align_label; /* Label alignment. */ + const char *const align_func; /* Function alignment. */ ++ ++ const unsigned small_unroll_ninsns; /* Insn count limit for small loop ++ to be unrolled. */ ++ const unsigned small_unroll_factor; /* Unroll factor for small loop to ++ be unrolled. */ + }; + + extern const struct processor_costs *ix86_cost; +diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt +index a3675e515..fc1b944ac 100644 +--- a/gcc/config/i386/i386.opt ++++ b/gcc/config/i386/i386.opt +@@ -1214,3 +1214,7 @@ Do not use GOT to access external symbols. + -param=x86-stlf-window-ninsns= + Target Joined UInteger Var(x86_stlf_window_ninsns) Init(64) Param + Instructions number above which STFL stall penalty can be compensated. ++ ++munroll-only-small-loops ++Target Var(ix86_unroll_only_small_loops) Init(0) Save ++Enable conservative small loop unrolling. +diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h +index f105d57ca..db4c2da34 100644 +--- a/gcc/config/i386/x86-tune-costs.h ++++ b/gcc/config/i386/x86-tune-costs.h +@@ -135,6 +135,8 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ + NULL, /* Jump alignment. */ + NULL, /* Label alignment. */ + NULL, /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* Processor costs (relative to an add) */ +@@ -244,6 +246,8 @@ struct processor_costs i386_cost = { /* 386 specific costs */ + "4", /* Jump alignment. */ + NULL, /* Label alignment. */ + "4", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs i486_memcpy2 = { +@@ -354,6 +358,8 @@ struct processor_costs i486_cost = { /* 486 specific costs */ + "16", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs pentium_memcpy2 = { +@@ -462,6 +468,8 @@ struct processor_costs pentium_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static const +@@ -563,6 +571,8 @@ struct processor_costs lakemont_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes +@@ -679,6 +689,8 @@ struct processor_costs pentiumpro_cost = { + "16:11:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs geode_memcpy2 = { +@@ -786,6 +798,8 @@ struct processor_costs geode_cost = { + NULL, /* Jump alignment. */ + NULL, /* Label alignment. */ + NULL, /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs k6_memcpy2 = { +@@ -896,6 +910,8 @@ struct processor_costs k6_cost = { + "32:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "32", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* For some reason, Athlon deals better with REP prefix (relative to loops) +@@ -1007,6 +1023,8 @@ struct processor_costs athlon_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* K8 has optimized REP instruction for medium sized blocks, but for very +@@ -1127,6 +1145,8 @@ struct processor_costs k8_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for +@@ -1255,6 +1275,8 @@ struct processor_costs amdfam10_cost = { + "32:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "32", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* BDVER has optimized REP instruction for medium sized blocks, but for +@@ -1376,6 +1398,8 @@ const struct processor_costs bdver_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "11", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + +@@ -1529,6 +1553,8 @@ struct processor_costs znver1_cost = { + "16", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* ZNVER2 has optimized REP instruction for medium sized blocks, but for +@@ -1686,6 +1712,8 @@ struct processor_costs znver2_cost = { + "16", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + struct processor_costs znver3_cost = { +@@ -1818,6 +1846,8 @@ struct processor_costs znver3_cost = { + "16", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* This table currently replicates znver3_cost table. */ +@@ -1952,6 +1982,8 @@ struct processor_costs znver4_cost = { + "16", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* skylake_cost should produce code tuned for Skylake familly of CPUs. */ +@@ -2076,6 +2108,8 @@ struct processor_costs skylake_cost = { + "16:11:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* icelake_cost should produce code tuned for Icelake family of CPUs. +@@ -2202,6 +2236,8 @@ struct processor_costs icelake_cost = { + "16:11:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* alderlake_cost should produce code tuned for alderlake family of CPUs. */ +@@ -2322,6 +2358,8 @@ struct processor_costs alderlake_cost = { + "16:11:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* BTVER1 has optimized REP instruction for medium sized blocks, but for +@@ -2435,6 +2473,8 @@ const struct processor_costs btver1_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "11", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs btver2_memcpy2 = { +@@ -2545,6 +2585,8 @@ const struct processor_costs btver2_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "11", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs pentium4_memcpy2 = { +@@ -2654,6 +2696,8 @@ struct processor_costs pentium4_cost = { + NULL, /* Jump alignment. */ + NULL, /* Label alignment. */ + NULL, /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs nocona_memcpy2 = { +@@ -2766,6 +2810,8 @@ struct processor_costs nocona_cost = { + NULL, /* Jump alignment. */ + NULL, /* Label alignment. */ + NULL, /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs atom_memcpy2 = { +@@ -2876,6 +2922,8 @@ struct processor_costs atom_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs slm_memcpy2 = { +@@ -2986,6 +3034,8 @@ struct processor_costs slm_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs tremont_memcpy2 = { +@@ -3110,6 +3160,8 @@ struct processor_costs tremont_cost = { + "16:11:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs intel_memcpy2 = { +@@ -3220,6 +3272,8 @@ struct processor_costs intel_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* Generic should produce code tuned for Core-i7 (and newer chips) +@@ -3339,6 +3393,8 @@ struct processor_costs generic_cost = { + "16:11:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* core_cost should produce code tuned for Core familly of CPUs. */ +@@ -3465,5 +3521,7 @@ struct processor_costs core_cost = { + "16:11:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index ff8cd032f..16f4b367e 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1449,7 +1449,8 @@ See RS/6000 and PowerPC Options. + -mgeneral-regs-only -mcall-ms2sysv-xlogues -mrelax-cmpxchg-loop @gol + -mindirect-branch=@var{choice} -mfunction-return=@var{choice} @gol + -mindirect-branch-register -mharden-sls=@var{choice} @gol +--mindirect-branch-cs-prefix -mneeded -mno-direct-extern-access} ++-mindirect-branch-cs-prefix -mneeded -mno-direct-extern-access @gol ++-munroll-only-small-loops} + + @emph{x86 Windows Options} + @gccoptlist{-mconsole -mcygwin -mno-cygwin -mdll @gol +@@ -33183,6 +33184,14 @@ treat access to protected symbols as local symbols. The default is + @option{-mno-direct-extern-access} and executable compiled with + @option{-mdirect-extern-access} may not be binary compatible if + protected symbols are used in shared libraries and executable. ++ ++@item -munroll-only-small-loops ++@opindex munroll-only-small-loops ++@opindex mno-unroll-only-small-loops ++Controls conservative small loop unrolling. It is default enabled by ++O2, and unrolls loop with less than 4 insns by 1 time. Explicit ++-fno-unroll-all-loops would disable this flag to avoid any ++unintended unrolling behavior that user does not want. + @end table + + @node x86 Windows Options +diff --git a/gcc/loop-init.cc b/gcc/loop-init.cc +index 1e4f6cfd7..f1c717041 100644 +--- a/gcc/loop-init.cc ++++ b/gcc/loop-init.cc +@@ -565,9 +565,12 @@ public: + {} + + /* opt_pass methods: */ +- virtual bool gate (function *) ++ virtual bool gate (function *fun) + { +- return (flag_unroll_loops || flag_unroll_all_loops || cfun->has_unroll); ++ return (flag_unroll_loops || flag_unroll_all_loops || cfun->has_unroll ++ || (targetm.loop_unroll_adjust ++ && optimize >= 2 ++ && optimize_function_for_speed_p (fun))); + } + + virtual unsigned int execute (function *); +@@ -583,7 +586,8 @@ pass_rtl_unroll_loops::execute (function *fun) + if (dump_file) + df_dump (dump_file); + +- if (flag_unroll_loops) ++ if (flag_unroll_loops ++ || targetm.loop_unroll_adjust) + flags |= UAP_UNROLL; + if (flag_unroll_all_loops) + flags |= UAP_UNROLL_ALL; +diff --git a/gcc/testsuite/gcc.dg/guality/loop-1.c b/gcc/testsuite/gcc.dg/guality/loop-1.c +index 1b1f6d322..a32ea445a 100644 +--- a/gcc/testsuite/gcc.dg/guality/loop-1.c ++++ b/gcc/testsuite/gcc.dg/guality/loop-1.c +@@ -1,5 +1,7 @@ + /* { dg-do run } */ + /* { dg-options "-fno-tree-scev-cprop -fno-tree-vectorize -g" } */ ++/* { dg-additional-options "-mno-unroll-only-small-loops" { target ia32 } } */ ++ + + #include "../nop.h" + +diff --git a/gcc/testsuite/gcc.target/i386/pr86270.c b/gcc/testsuite/gcc.target/i386/pr86270.c +index 81841ef5b..cbc9fbb04 100644 +--- a/gcc/testsuite/gcc.target/i386/pr86270.c ++++ b/gcc/testsuite/gcc.target/i386/pr86270.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2" } */ ++/* { dg-options "-O2 -mno-unroll-only-small-loops" } */ + + int *a; + long len; +diff --git a/gcc/testsuite/gcc.target/i386/pr93002.c b/gcc/testsuite/gcc.target/i386/pr93002.c +index 0248fcc00..f75a847f7 100644 +--- a/gcc/testsuite/gcc.target/i386/pr93002.c ++++ b/gcc/testsuite/gcc.target/i386/pr93002.c +@@ -1,6 +1,6 @@ + /* PR target/93002 */ + /* { dg-do compile } */ +-/* { dg-options "-O2" } */ ++/* { dg-options "-O2 -mno-unroll-only-small-loops" } */ + /* { dg-final { scan-assembler-not "cmp\^\n\r*-1" } } */ + + volatile int sink; +-- +2.33.0 +
View file
_service:tar_scm:0013-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch
Added
@@ -0,0 +1,230 @@ +From 96898a9cd8c159625848247bd2f3a09e5c12fcfa Mon Sep 17 00:00:00 2001 +From: Hongyu Wang <hongyu.wang@intel.com> +Date: Sat, 19 Nov 2022 09:38:00 +0800 +Subject: PATCH 13/22 i386: Only enable small loop unrolling in backend PR + 107692 + +Followed by the discussion in pr107692, -munroll-only-small-loops +Does not turns on/off -funroll-loops, and current check in +pass_rtl_unroll_loops::gate would cause -fno-unroll-loops do not take +effect. Revert the change about targetm.loop_unroll_adjust and apply +the backend option change to strictly follow the rule that +-funroll-loops takes full control of loop unrolling, and +munroll-only-small-loops just change its behavior to unroll small size +loops. + +gcc/ChangeLog: + + PR target/107692 + * common/config/i386/i386-common.cc (ix86_optimization_table): + Enable loop unroll O2, disable -fweb and -frename-registers + by default. + * config/i386/i386-options.cc + (ix86_override_options_after_change): + Disable small loop unroll when funroll-loops enabled, reset + cunroll_grow_size when it is not explicitly enabled. + (ix86_option_override_internal): Call + ix86_override_options_after_change instead of calling + ix86_recompute_optlev_based_flags and ix86_default_align + separately. + * config/i386/i386.cc (ix86_loop_unroll_adjust): Adjust unroll + factor if -munroll-only-small-loops enabled. + * loop-init.cc (pass_rtl_unroll_loops::gate): Do not enable + loop unrolling for -O2-speed. + (pass_rtl_unroll_loops::execute): Rmove + targetm.loop_unroll_adjust check. + +gcc/testsuite/ChangeLog: + + PR target/107692 + * gcc.dg/guality/loop-1.c: Remove additional option for ia32. + * gcc.target/i386/pr86270.c: Add -fno-unroll-loops. + * gcc.target/i386/pr93002.c: Likewise. +--- + gcc/common/config/i386/i386-common.cc | 8 ++++++ + gcc/config/i386/i386-options.cc | 34 ++++++++++++++++++++++--- + gcc/config/i386/i386.cc | 18 ++++--------- + gcc/loop-init.cc | 10 +++----- + gcc/testsuite/gcc.dg/guality/loop-1.c | 2 -- + gcc/testsuite/gcc.target/i386/pr86270.c | 2 +- + gcc/testsuite/gcc.target/i386/pr93002.c | 2 +- + 7 files changed, 48 insertions(+), 28 deletions(-) + +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index cdd5caa55..f650e255f 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1687,7 +1687,15 @@ static const struct default_options ix86_option_optimization_table = + /* The STC algorithm produces the smallest code at -Os, for x86. */ + { OPT_LEVELS_2_PLUS, OPT_freorder_blocks_algorithm_, NULL, + REORDER_BLOCKS_ALGORITHM_STC }, ++ ++ /* Turn on -funroll-loops with -munroll-only-small-loops to enable small ++ loop unrolling at -O2. */ ++ { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 }, + { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_only_small_loops, NULL, 1 }, ++ /* Turns off -frename-registers and -fweb which are enabled by ++ funroll-loops. */ ++ { OPT_LEVELS_ALL, OPT_frename_registers, NULL, 0 }, ++ { OPT_LEVELS_ALL, OPT_fweb, NULL, 0 }, + /* Turn off -fschedule-insns by default. It tends to make the + problem with not enough registers even worse. */ + { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 }, +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 099cec4b6..ff44ad4e0 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -1816,8 +1816,37 @@ ix86_recompute_optlev_based_flags (struct gcc_options *opts, + void + ix86_override_options_after_change (void) + { ++ /* Default align_* from the processor table. */ + ix86_default_align (&global_options); ++ + ix86_recompute_optlev_based_flags (&global_options, &global_options_set); ++ ++ /* Disable unrolling small loops when there's explicit ++ -f{,no}unroll-loop. */ ++ if ((OPTION_SET_P (flag_unroll_loops)) ++ || (OPTION_SET_P (flag_unroll_all_loops) ++ && flag_unroll_all_loops)) ++ { ++ if (!OPTION_SET_P (ix86_unroll_only_small_loops)) ++ ix86_unroll_only_small_loops = 0; ++ /* Re-enable -frename-registers and -fweb if funroll-loops ++ enabled. */ ++ if (!OPTION_SET_P (flag_web)) ++ flag_web = flag_unroll_loops; ++ if (!OPTION_SET_P (flag_rename_registers)) ++ flag_rename_registers = flag_unroll_loops; ++ /* -fcunroll-grow-size default follws -fno-unroll-loops. */ ++ if (!OPTION_SET_P (flag_cunroll_grow_size)) ++ flag_cunroll_grow_size = flag_unroll_loops ++ || flag_peel_loops ++ || optimize >= 3; ++ } ++ else ++ { ++ if (!OPTION_SET_P (flag_cunroll_grow_size)) ++ flag_cunroll_grow_size = flag_peel_loops || optimize >= 3; ++ } ++ + } + + /* Clear stack slot assignments remembered from previous functions. +@@ -2329,7 +2358,7 @@ ix86_option_override_internal (bool main_args_p, + + set_ix86_tune_features (opts, ix86_tune, opts->x_ix86_dump_tunes); + +- ix86_recompute_optlev_based_flags (opts, opts_set); ++ ix86_override_options_after_change (); + + ix86_tune_cost = processor_cost_tableix86_tune; + /* TODO: ix86_cost should be chosen at instruction or function granuality +@@ -2360,9 +2389,6 @@ ix86_option_override_internal (bool main_args_p, + || TARGET_64BIT_P (opts->x_ix86_isa_flags)) + opts->x_ix86_regparm = REGPARM_MAX; + +- /* Default align_* from the processor table. */ +- ix86_default_align (opts); +- + /* Provide default for -mbranch-cost= value. */ + SET_OPTION_IF_UNSET (opts, opts_set, ix86_branch_cost, + ix86_tune_cost->branch_cost); +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index e56004300..462dce10e 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -23572,20 +23572,12 @@ ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop) + + /* Unroll small size loop when unroll factor is not explicitly + specified. */ +- if (!(flag_unroll_loops +- || flag_unroll_all_loops +- || loop->unroll)) ++ if (ix86_unroll_only_small_loops && !loop->unroll) + { +- nunroll = 1; +- +- /* Any explicit -f{no-}unroll-{all-}loops turns off +- -munroll-only-small-loops. */ +- if (ix86_unroll_only_small_loops +- && !OPTION_SET_P (flag_unroll_loops) +- && loop->ninsns <= ix86_cost->small_unroll_ninsns) +- nunroll = ix86_cost->small_unroll_factor; +- +- return nunroll; ++ if (loop->ninsns <= ix86_cost->small_unroll_ninsns) ++ return MIN (nunroll, ix86_cost->small_unroll_factor); ++ else ++ return 1; + } + + if (!TARGET_ADJUST_UNROLL) +diff --git a/gcc/loop-init.cc b/gcc/loop-init.cc +index f1c717041..1e4f6cfd7 100644 +--- a/gcc/loop-init.cc ++++ b/gcc/loop-init.cc +@@ -565,12 +565,9 @@ public: + {} + + /* opt_pass methods: */ +- virtual bool gate (function *fun) ++ virtual bool gate (function *) + { +- return (flag_unroll_loops || flag_unroll_all_loops || cfun->has_unroll +- || (targetm.loop_unroll_adjust +- && optimize >= 2 +- && optimize_function_for_speed_p (fun))); ++ return (flag_unroll_loops || flag_unroll_all_loops || cfun->has_unroll); + } + + virtual unsigned int execute (function *); +@@ -586,8 +583,7 @@ pass_rtl_unroll_loops::execute (function *fun) + if (dump_file) + df_dump (dump_file); + +- if (flag_unroll_loops +- || targetm.loop_unroll_adjust) ++ if (flag_unroll_loops) + flags |= UAP_UNROLL; + if (flag_unroll_all_loops) + flags |= UAP_UNROLL_ALL; +diff --git a/gcc/testsuite/gcc.dg/guality/loop-1.c b/gcc/testsuite/gcc.dg/guality/loop-1.c +index a32ea445a..1b1f6d322 100644 +--- a/gcc/testsuite/gcc.dg/guality/loop-1.c ++++ b/gcc/testsuite/gcc.dg/guality/loop-1.c +@@ -1,7 +1,5 @@ + /* { dg-do run } */ + /* { dg-options "-fno-tree-scev-cprop -fno-tree-vectorize -g" } */ +-/* { dg-additional-options "-mno-unroll-only-small-loops" { target ia32 } } */ +- + + #include "../nop.h" + +diff --git a/gcc/testsuite/gcc.target/i386/pr86270.c b/gcc/testsuite/gcc.target/i386/pr86270.c +index cbc9fbb04..98b012caf 100644 +--- a/gcc/testsuite/gcc.target/i386/pr86270.c ++++ b/gcc/testsuite/gcc.target/i386/pr86270.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mno-unroll-only-small-loops" } */ ++/* { dg-options "-O2 -fno-unroll-loops" } */ + + int *a; + long len; +diff --git a/gcc/testsuite/gcc.target/i386/pr93002.c b/gcc/testsuite/gcc.target/i386/pr93002.c +index f75a847f7..7e2d869e1 100644 +--- a/gcc/testsuite/gcc.target/i386/pr93002.c ++++ b/gcc/testsuite/gcc.target/i386/pr93002.c +@@ -1,6 +1,6 @@ + /* PR target/93002 */ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mno-unroll-only-small-loops" } */ ++/* { dg-options "-O2 -fno-unroll-loops" } */ + /* { dg-final { scan-assembler-not "cmp\^\n\r*-1" } } */ + + volatile int sink; +-- +2.33.0 +
View file
_service:tar_scm:0014-Array-widen-compare-Add-a-new-optimization-for-array.patch
Added
@@ -0,0 +1,1981 @@ +From 5ef5f6c4ae806f56ff81450c759f36d59b5b23db Mon Sep 17 00:00:00 2001 +From: dingguangya <dingguangya1@huawei.com> +Date: Sat, 29 Jul 2023 17:45:01 +0800 +Subject: PATCH 14/22 Array-widen-compare Add a new optimization for array + comparison scenarios + +Add option farray-widen-compare. +For an array pointer whose element is a single-byte type, +by changing the pointer type to a long-byte type, the elements +can be combined and compared after loading. +--- + gcc/Makefile.in | 1 + + gcc/common.opt | 5 + + gcc/doc/invoke.texi | 13 +- + gcc/passes.def | 1 + + .../gcc.dg/tree-ssa/awiden-compare-1.c | 19 + + .../gcc.dg/tree-ssa/awiden-compare-2.c | 90 + + .../gcc.dg/tree-ssa/awiden-compare-3.c | 22 + + .../gcc.dg/tree-ssa/awiden-compare-4.c | 22 + + .../gcc.dg/tree-ssa/awiden-compare-5.c | 19 + + .../gcc.dg/tree-ssa/awiden-compare-6.c | 19 + + .../gcc.dg/tree-ssa/awiden-compare-7.c | 22 + + .../gcc.dg/tree-ssa/awiden-compare-8.c | 24 + + gcc/timevar.def | 1 + + gcc/tree-pass.h | 1 + + gcc/tree-ssa-loop-array-widen-compare.cc | 1555 +++++++++++++++++ + 15 files changed, 1813 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c + create mode 100644 gcc/tree-ssa-loop-array-widen-compare.cc + +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index 31ff95500..0aabc6ea3 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -1653,6 +1653,7 @@ OBJS = \ + tree-ssa-loop-ivopts.o \ + tree-ssa-loop-manip.o \ + tree-ssa-loop-niter.o \ ++ tree-ssa-loop-array-widen-compare.o \ + tree-ssa-loop-prefetch.o \ + tree-ssa-loop-split.o \ + tree-ssa-loop-unswitch.o \ +diff --git a/gcc/common.opt b/gcc/common.opt +index e365a48bc..4d91ce8cf 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1116,6 +1116,11 @@ fasynchronous-unwind-tables + Common Var(flag_asynchronous_unwind_tables) Optimization + Generate unwind tables that are exact at each instruction boundary. + ++farray-widen-compare ++Common Var(flag_array_widen_compare) Optimization ++Extends types for pointers to arrays to improve array comparsion performance. ++In some extreme situations this may result in unsafe behavior. ++ + fauto-inc-dec + Common Var(flag_auto_inc_dec) Init(1) Optimization + Generate auto-inc/dec instructions. +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index ff8cd032f..a11e2c24b 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -507,7 +507,7 @@ Objective-C and Objective-C++ Dialects}. + -falign-loops=@var{n}:@var{m}:@var{n2}:@var{m2} @gol + -fno-allocation-dce -fallow-store-data-races @gol + -fassociative-math -fauto-profile -fauto-profile=@var{path} @gol +--fauto-inc-dec -fbranch-probabilities @gol ++-farray-widen-compare -fauto-inc-dec -fbranch-probabilities @gol + -fcaller-saves @gol + -fcombine-stack-adjustments -fconserve-stack @gol + -fcompare-elim -fcprop-registers -fcrossjumping @gol +@@ -11387,6 +11387,17 @@ This pass is always skipped on architectures that do not have + instructions to support this. Enabled by default at @option{-O1} and + higher on architectures that support this. + ++@item -farray-widen-compare ++@opindex farray-widen-compare ++In the narrow-byte array comparison scenario, the types of pointers ++pointing to array are extended so that elements of multiple bytes can ++be loaded at a time when a wide type is used to dereference an array, ++thereby improving the performance of this comparison scenario. In some ++extreme situations this may result in unsafe behavior. ++ ++This option may generate better or worse code; results are highly dependent ++on the structure of loops within the source code. ++ + @item -fdce + @opindex fdce + Perform dead code elimination (DCE) on RTL@. +diff --git a/gcc/passes.def b/gcc/passes.def +index 375d3d62d..8dbb7983e 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -94,6 +94,7 @@ along with GCC; see the file COPYING3. If not see + NEXT_PASS (pass_dse); + NEXT_PASS (pass_cd_dce, false /* update_address_taken_p */); + NEXT_PASS (pass_phiopt, true /* early_p */); ++ NEXT_PASS (pass_array_widen_compare); + NEXT_PASS (pass_tail_recursion); + NEXT_PASS (pass_if_to_switch); + NEXT_PASS (pass_convert_switch); +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c +new file mode 100644 +index 000000000..e18ef5ec1 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include <stdint.h> ++#include <stdio.h> ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (++len != len_limit) ++ if (pblen != curlen) ++ break; ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c +new file mode 100644 +index 000000000..f4b20b43c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c +@@ -0,0 +1,90 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include <stdint.h> ++#include <stdio.h> ++ ++#define EMPTY_HASH_VALUE 0 ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++#define true 1 ++ ++typedef struct { ++ uint32_t len; ++ uint32_t dist; ++} lzma_match; ++ ++ ++lzma_match * ++func ( ++ const uint32_t len_limit, ++ const uint32_t pos, ++ const uint8_t *const cur, ++ uint32_t cur_match, ++ uint32_t depth, ++ uint32_t *const son, ++ const uint32_t cyclic_pos, ++ const uint32_t cyclic_size, ++ lzma_match *matches, ++ uint32_t len_best) ++{ ++ uint32_t *ptr0 = son + (cyclic_pos << 1) + 1; ++ uint32_t *ptr1 = son + (cyclic_pos << 1); ++ ++ uint32_t len0 = 0; ++ uint32_t len1 = 0; ++ ++ while (true) ++ { ++ const uint32_t delta = pos - cur_match; ++ if (depth-- == 0 || delta >= cyclic_size) ++ { ++ *ptr0 = EMPTY_HASH_VALUE; ++ *ptr1 = EMPTY_HASH_VALUE; ++ return matches; ++ } ++ ++ uint32_t *const pair = son + ((cyclic_pos - delta + (delta > cyclic_pos ? cyclic_size : 0)) << 1); ++ ++ const uint8_t *const pb = cur -delta; ++ uint32_t len = my_min(len0, len1); ++ ++ if (pblen == curlen) ++ { ++ while (++len != len_limit) ++ if (pblen != curlen) ++ break; ++ ++ if (len_best < len) ++ { ++ len_best = len; ++ matches->len = len; ++ matches->dist = delta - 1; ++ ++matches; ++ ++ if (len == len_limit) ++ { ++ *ptr1 = pair0; ++ *ptr0 = pair1; ++ return matches; ++ } ++ } ++ } ++ ++ if (pblen < curlen) ++ { ++ *ptr1 = cur_match; ++ ptr1 = pair + 1; ++ cur_match = *ptr1; ++ len1 = len; ++ } ++ else ++ { ++ *ptr0 = cur_match; ++ ptr0 = pair; ++ cur_match = *ptr0; ++ len0 = len; ++ } ++ } ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c +new file mode 100644 +index 000000000..86f5e7a1e +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include <stdint.h> ++#include <stdio.h> ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (len != len_limit) ++ { ++ if (pblen != curlen) ++ break; ++ len = len + 1; ++ } ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c +new file mode 100644 +index 000000000..d66558699 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include <stdint.h> ++#include <stdio.h> ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (len != len_limit) ++ { ++ if (pblen != curlen) ++ break; ++ len = len + 2; ++ } ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c +new file mode 100644 +index 000000000..e3e12bca4 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include <stdint.h> ++#include <stdio.h> ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (++len != len_limit) ++ if (pblen != curlen-1) ++ break; ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c +new file mode 100644 +index 000000000..b8500735e +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include <stdint.h> ++#include <stdio.h> ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (len++ != len_limit) ++ if (pblen != curlen) ++ break; ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c +new file mode 100644 +index 000000000..977bf5685 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include <stdint.h> ++#include <stdio.h> ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (len != len_limit) ++ { ++ len = len + 1; ++ if (pblen != curlen) ++ break; ++ } ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c +new file mode 100644 +index 000000000..386784c92 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include <stdint.h> ++#include <stdio.h> ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (++len != len_limit) ++ { ++ if (pblen != curlen) ++ { ++ len = len - 1; ++ break; ++ } ++ } ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */ +diff --git a/gcc/timevar.def b/gcc/timevar.def +index 2dae5e1c7..794b8017d 100644 +--- a/gcc/timevar.def ++++ b/gcc/timevar.def +@@ -216,6 +216,7 @@ DEFTIMEVAR (TV_TREE_NRV , "tree NRV optimization") + DEFTIMEVAR (TV_TREE_COPY_RENAME , "tree rename SSA copies") + DEFTIMEVAR (TV_TREE_SSA_VERIFY , "tree SSA verifier") + DEFTIMEVAR (TV_TREE_STMT_VERIFY , "tree STMT verifier") ++DEFTIMEVAR (TV_TREE_ARRAY_WIDEN_COMPARE, "tree array widen compare") + DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch conversion") + DEFTIMEVAR (TV_TREE_SWITCH_LOWERING, "tree switch lowering") + DEFTIMEVAR (TV_TREE_RECIP , "gimple CSE reciprocals") +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index 606d1d60b..55ee2fe7f 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -453,6 +453,7 @@ extern gimple_opt_pass *make_pass_cselim (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_phiopt (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_forwprop (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_phiprop (gcc::context *ctxt); ++extern gimple_opt_pass *make_pass_array_widen_compare (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_tree_ifcombine (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_dse (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_nrv (gcc::context *ctxt); +diff --git a/gcc/tree-ssa-loop-array-widen-compare.cc b/gcc/tree-ssa-loop-array-widen-compare.cc +new file mode 100644 +index 000000000..ba6170fa0 +--- /dev/null ++++ b/gcc/tree-ssa-loop-array-widen-compare.cc +@@ -0,0 +1,1555 @@ ++/* Array widen compare. ++ Copyright (C) 2022-2023 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it ++under the terms of the GNU General Public License as published by the ++Free Software Foundation; either version 3, or (at your option) any ++later version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "target.h" ++#include "tree.h" ++#include "gimple.h" ++#include "tree-pass.h" ++#include "gimple-ssa.h" ++#include "tree-pretty-print.h" ++#include "fold-const.h" ++#include "gimplify.h" ++#include "gimple-iterator.h" ++#include "tree-ssa-loop-manip.h" ++#include "tree-ssa-loop.h" ++#include "ssa.h" ++#include "tree-into-ssa.h" ++#include "cfganal.h" ++#include "cfgloop.h" ++#include "gimple-pretty-print.h" ++#include "tree-cfg.h" ++#include "cgraph.h" ++#include "print-tree.h" ++#include "cfghooks.h" ++#include "gimple-fold.h" ++ ++/* This pass handles scenarios similar to the following: ++ ++ uint32_t ++ func (uint32_t len0, uint32_t len1, const uint32_t len_limit, ++ const uint8_t *const pb, const uint8_t *const cur) ++ { ++ uint32_t len = my_min (len0, len1); ++ while (++len != len_limit) ++ if (pblen != curlen) ++ break; ++ return len; ++ } ++ ++ Features of this type of loop: ++ 1) the loop has two exits; ++ 2) One of the exits comes from the comparison result of the array; ++ ++ From the source code point of view, the pass completes the conversion of the ++ above scenario into: ++ ++ uint32_t ++ func (uint32_t len0, uint32_t len1, const uint32_t len_limit, ++ const uint8_t *const pb, const uint8_t *const cur) ++ { ++ uint32_t len = my_min (len0, len1); ++ // align_loop ++ for(++len; len + sizeof(uint64_t) <= len_limit; len += sizeof (uint64_t)) ++ { ++ uint64_t a = *((uint64_t*)(cur+len)); ++ uint64_t b = *((uint64_t*)(pb+len)); ++ if (a != b) ++ { ++ int lz = __builtin_ctzll (a ^ b); ++ len += lz / 8; ++ return len; ++ } ++ } ++ // epilogue_loop ++ for (;len != len_limit; ++len) ++ if (pblen != curlen) ++ break; ++ return len; ++ } ++ ++ This pass is to complete the conversion of such scenarios from the internal ++ perspective of the compiler: ++ 1) determine_loop_form: The function completes the screening of such ++ scenarios; ++ 2) convert_to_new_loop: The function completes the conversion of ++ origin_loop to new loops, and removes origin_loop; ++ 3) origin_loop_info: The structure is used to record important information ++ of origin_loop: such as loop exit, growth step size ++ of loop induction variable, initial value ++ of induction variable, etc; ++ 4) create_new_loops: The function is used as the key content of the pass ++ to complete the creation of new loops. */ ++ ++/* The useful information of origin loop. */ ++ ++struct origin_loop_info ++{ ++ tree base; /* The initial index of the array in the old loop. */ ++ tree limit; /* The limit index of the array in the old loop. */ ++ tree arr1; /* Array 1 in the old loop. */ ++ tree arr2; /* Array 2 in the old loop. */ ++ edge entry_edge; /* The edge into the old loop. */ ++ basic_block exit_bb1; ++ basic_block exit_bb2; ++ edge exit_e1; ++ edge exit_e2; ++ gimple *cond_stmt1; ++ gimple *cond_stmt2; ++ gimple *update_stmt; ++ bool exist_prolog_assgin; ++ /* Whether the marker has an initial value assigned ++ to the array index. */ ++ unsigned HOST_WIDE_INT step; ++ /* The growth step of the loop induction variable. */ ++}; ++ ++typedef struct origin_loop_info origin_loop_info; ++ ++static origin_loop_info origin_loop; ++hash_map <basic_block, tree> defs_map; ++ ++/* Dump the bb information in a loop. */ ++ ++static void ++dump_loop_bb (struct loop *loop) ++{ ++ basic_block *body = get_loop_body_in_dom_order (loop); ++ basic_block bb = NULL; ++ ++ for (unsigned i = 0; i < loop->num_nodes; i++) ++ { ++ bb = bodyi; ++ if (bb->loop_father != loop) ++ { ++ continue; ++ } ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "===== the %dth bb of loop ==========:\n", i); ++ gimple_dump_bb (dump_file, bb, 0, dump_flags); ++ fprintf (dump_file, "\n"); ++ } ++ } ++ free (body); ++} ++ ++/* Return true if the loop has precisely one backedge. */ ++ ++static bool ++loop_single_backedge_p (class loop *loop) ++{ ++ basic_block latch = loop->latch; ++ if (!single_succ_p (latch)) ++ return false; ++ ++ edge e = single_succ_edge (latch); ++ edge backedge = find_edge (latch, loop->header); ++ ++ if (e != backedge) ++ return false; ++ ++ return true; ++} ++ ++/* Return true if the loop has precisely one preheader BB. */ ++ ++static bool ++loop_single_preheader_bb (class loop *loop) ++{ ++ basic_block header = loop->header; ++ if (EDGE_COUNT (header->preds) != 2) ++ return false; ++ ++ edge e1 = EDGE_PRED (header, 0); ++ edge e2 = EDGE_PRED (header, 1); ++ ++ if ((e1->src == loop->latch && e2->src->loop_father != loop) ++ || (e2->src == loop->latch && e1->src->loop_father != loop)) ++ return true; ++ ++ return false; ++} ++ ++/* Initialize the origin_loop structure. */ ++static void ++init_origin_loop_structure () ++{ ++ origin_loop.base = NULL; ++ origin_loop.limit = NULL; ++ origin_loop.arr1 = NULL; ++ origin_loop.arr2 = NULL; ++ origin_loop.exit_e1 = NULL; ++ origin_loop.exit_e2 = NULL; ++ origin_loop.exit_bb1 = NULL; ++ origin_loop.exit_bb2 =NULL; ++ origin_loop.entry_edge = NULL; ++ origin_loop.cond_stmt1 = NULL; ++ origin_loop.cond_stmt2 = NULL; ++ origin_loop.update_stmt = NULL; ++ origin_loop.exist_prolog_assgin = false; ++ origin_loop.step = 0; ++} ++ ++/* Get the edge that first entered the loop. */ ++ ++static edge ++get_loop_preheader_edge (class loop *loop) ++{ ++ edge e; ++ edge_iterator ei; ++ ++ FOR_EACH_EDGE (e, ei, loop->header->preds) ++ if (e->src != loop->latch) ++ break; ++ ++ if (!e) ++ { ++ gcc_assert (!loop_outer (loop)); ++ return single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); ++ } ++ ++ return e; ++} ++ ++/* Make sure the exit condition stmt satisfies a specific form. */ ++ ++static bool ++check_cond_stmt (gimple *stmt) ++{ ++ if (!stmt) ++ return false; ++ if (gimple_code (stmt) != GIMPLE_COND) ++ return false; ++ ++ if (gimple_cond_code (stmt) != NE_EXPR && gimple_cond_code (stmt) != EQ_EXPR) ++ return false; ++ ++ tree lhs = gimple_cond_lhs (stmt); ++ tree rhs = gimple_cond_rhs (stmt); ++ ++ /* The parameter that does not support the cond statement is not SSA_NAME. ++ eg: if (len_1 != 100). */ ++ if (TREE_CODE (lhs) != SSA_NAME || TREE_CODE (rhs) != SSA_NAME) ++ return false; ++ ++ return true; ++} ++ ++/* Record the exit information in the original loop including exit edge, ++ exit bb block, exit condition stmt, ++ eg: exit_eX origin_exit_bbX cond_stmtX. */ ++ ++static bool ++record_origin_loop_exit_info (class loop *loop) ++{ ++ bool found = false; ++ edge e = NULL; ++ unsigned i = 0; ++ gimple *stmt; ++ ++ if (origin_loop.exit_e1 != NULL || origin_loop.exit_bb1 != NULL ++ || origin_loop.exit_e2 != NULL || origin_loop.exit_bb2 != NULL ++ || origin_loop.cond_stmt1 != NULL || origin_loop.cond_stmt2 != NULL) ++ return false; ++ ++ vec<edge> exit_edges = get_loop_exit_edges (loop); ++ if (exit_edges == vNULL) ++ return false; ++ ++ if (exit_edges.length () != 2) ++ goto fail; ++ ++ FOR_EACH_VEC_ELT (exit_edges, i, e) ++ { ++ if (e->src == loop->header) ++ { ++ origin_loop.exit_e1 = e; ++ origin_loop.exit_bb1 = e->dest; ++ stmt = gsi_stmt (gsi_last_bb (e->src)); ++ if (check_cond_stmt (stmt)) ++ origin_loop.cond_stmt1 = stmt; ++ } ++ else ++ { ++ origin_loop.exit_e2 = e; ++ origin_loop.exit_bb2 = e->dest; ++ stmt = gsi_stmt (gsi_last_bb (e->src)); ++ if (check_cond_stmt (stmt)) ++ origin_loop.cond_stmt2 = stmt; ++ } ++ } ++ ++ if (origin_loop.exit_e1 != NULL && origin_loop.exit_bb1 != NULL ++ && origin_loop.exit_e2 != NULL && origin_loop.exit_bb2 != NULL ++ && origin_loop.cond_stmt1 != NULL && origin_loop.cond_stmt2 != NULL) ++ found = true; ++ ++fail: ++ exit_edges.release (); ++ return found; ++} ++ ++/* Returns true if t is SSA_NAME and user variable exists. */ ++ ++static bool ++ssa_name_var_p (tree t) ++{ ++ if (!t || TREE_CODE (t) != SSA_NAME) ++ return false; ++ if (SSA_NAME_VAR (t)) ++ return true; ++ return false; ++} ++ ++/* Returns true if t1 and t2 are SSA_NAME and belong to the same variable. */ ++ ++static bool ++same_ssa_name_var_p (tree t1, tree t2) ++{ ++ if (!ssa_name_var_p (t1) || !ssa_name_var_p (t2)) ++ return false; ++ if (SSA_NAME_VAR (t1) == SSA_NAME_VAR (t2)) ++ return true; ++ return false; ++} ++ ++/* Get origin loop induction variable upper bound. */ ++ ++static bool ++get_iv_upper_bound (gimple *stmt) ++{ ++ if (origin_loop.limit != NULL) ++ return false; ++ ++ tree lhs = gimple_cond_lhs (stmt); ++ tree rhs = gimple_cond_rhs (stmt); ++ ++ if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE ++ || TREE_CODE (TREE_TYPE (rhs)) != INTEGER_TYPE) ++ return false; ++ ++ gimple *g = SSA_NAME_DEF_STMT (rhs); ++ ++ /* TODO: Currently, the input restrictions on lhs and rhs are implemented ++ through PARM_DECL. We may consider releasing the restrictions later, and ++ we need to consider the overall adaptation scenario and adding test ++ cases. */ ++ if (ssa_name_var_p (rhs) && TREE_CODE (SSA_NAME_VAR (rhs)) == PARM_DECL ++ && g && gimple_code (g) == GIMPLE_NOP ++ && (ssa_name_var_p (lhs) && TREE_CODE (SSA_NAME_VAR (lhs)) != PARM_DECL)) ++ { ++ origin_loop.limit = rhs; ++ } ++ else ++ return false; ++ ++ if (origin_loop.limit != NULL) ++ return true; ++ ++ return false; ++} ++ ++/* Returns true only when the expression on the rhs code of stmt is PLUS_EXPR, ++ rhs1 is SSA_NAME with the same var as origin_loop base, and rhs2 is ++ INTEGER_CST. */ ++ ++static bool ++check_update_stmt (gimple *stmt) ++{ ++ if (!stmt) ++ return false; ++ ++ if (gimple_assign_rhs_code (stmt) == PLUS_EXPR) ++ { ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ if (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == INTEGER_CST ++ && same_ssa_name_var_p (rhs1, origin_loop.base)) ++ { ++ origin_loop.step = tree_to_uhwi (rhs2); ++ if (origin_loop.step == 1) ++ return true; ++ } ++ } ++ return false; ++} ++ ++/* Get origin loop induction variable initial value. */ ++ ++static bool ++get_iv_base (gimple *stmt) ++{ ++ tree lhs = gimple_cond_lhs (stmt); ++ if (origin_loop.base != NULL || origin_loop.update_stmt != NULL) ++ return false; ++ ++ basic_block header = gimple_bb (stmt); ++ ++ gphi_iterator gsi; ++ edge e; ++ edge_iterator ei; ++ tree iv_after; ++ ++ for (gsi = gsi_start_phis (header); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ gphi *phi = gsi.phi (); ++ tree res = gimple_phi_result (phi); ++ if (!same_ssa_name_var_p (res, lhs)) ++ continue; ++ tree base = PHI_ARG_DEF_FROM_EDGE (phi, origin_loop.entry_edge); ++ if (!same_ssa_name_var_p (base, lhs)) ++ return false; ++ origin_loop.base = base; ++ FOR_EACH_EDGE (e, ei, header->preds) ++ { ++ if (e != origin_loop.entry_edge) ++ { ++ iv_after = PHI_ARG_DEF_FROM_EDGE (phi, e); ++ gimple *update = SSA_NAME_DEF_STMT (iv_after); ++ if (!check_update_stmt (update)) ++ return false; ++ origin_loop.update_stmt = update; ++ if (gimple_bb (update) == header && iv_after == lhs) ++ origin_loop.exist_prolog_assgin = true; ++ } ++ } ++ } ++ ++ if (origin_loop.base != NULL && origin_loop.update_stmt != NULL) ++ return true; ++ ++ return false; ++} ++ ++/* Record the upper bound and initial value of the induction variable in the ++ original loop; When prolog_assign is present, make sure loop header is in ++ simple form; And the interpretation of prolog_assign is as follows: ++ eg: while (++len != limit) ++ ...... ++ For such a loop, ++len will be processed before entering header_bb, and the ++ assign is regarded as the prolog_assign of the loop. */ ++ ++static bool ++record_origin_loop_header (class loop *loop) ++{ ++ basic_block header = loop->header; ++ ++ if (origin_loop.entry_edge != NULL || origin_loop.base != NULL ++ || origin_loop.update_stmt != NULL || origin_loop.limit != NULL) ++ return false; ++ origin_loop.entry_edge = get_loop_preheader_edge (loop); ++ ++ gimple_stmt_iterator gsi; ++ gimple *stmt; ++ ++ for (gsi = gsi_last_bb (header); !gsi_end_p (gsi); gsi_prev (&gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (stmt && is_gimple_debug (stmt)) ++ continue; ++ if (stmt && gimple_code (stmt) == GIMPLE_COND) ++ { ++ if (!get_iv_upper_bound (stmt)) ++ return false; ++ if (!get_iv_base (stmt)) ++ return false; ++ } ++ else if (stmt && gimple_code (stmt) == GIMPLE_ASSIGN) ++ { ++ if (stmt != origin_loop.update_stmt || !origin_loop.exist_prolog_assgin) ++ return false; ++ } ++ else ++ return false; ++ } ++ ++ if (origin_loop.entry_edge != NULL && origin_loop.base != NULL ++ && origin_loop.update_stmt != NULL && origin_loop.limit != NULL) ++ return true; ++ ++ return false; ++} ++ ++/* When prolog_assign does not exist, make sure that update_stmt exists in the ++ loop latch, and its form is a specific form, eg: ++ len_2 = len_1 + 1. */ ++ ++static bool ++record_origin_loop_latch (class loop *loop) ++{ ++ basic_block latch = loop->latch; ++ gimple_stmt_iterator gsi; ++ gimple *stmt; ++ ++ gsi = gsi_start_bb (latch); ++ ++ if (origin_loop.exist_prolog_assgin) ++ { ++ if (gsi_end_p (gsi)) ++ return true; ++ } ++ else ++ { ++ if (gsi_one_before_end_p (gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (stmt == origin_loop.update_stmt) ++ return true; ++ } ++ } ++ return false; ++} ++ ++/* Returns true when the DEF_STMT corresponding to arg0 of the mem_ref tree ++ satisfies the POINTER_PLUS_EXPR type. */ ++ ++static bool ++check_body_mem_ref (tree mem_ref) ++{ ++ tree arg0 = TREE_OPERAND (mem_ref , 0); ++ tree arg1 = TREE_OPERAND (mem_ref , 1); ++ ++ if (TREE_CODE (TREE_TYPE (arg0)) == POINTER_TYPE ++ && TREE_CODE (arg1) == INTEGER_CST ++ && tree_to_uhwi (arg1) == 0) ++ { ++ gimple *tmp_g = SSA_NAME_DEF_STMT (arg0); ++ if (tmp_g && gimple_assign_rhs_code (tmp_g) == POINTER_PLUS_EXPR) ++ return true; ++ } ++ return false; ++} ++ ++/* Returns true if the rh2 of the current stmt comes from the base in the ++ original loop. */ ++ ++static bool ++check_body_pointer_plus (gimple *stmt, tree &tmp_index) ++{ ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ if (TREE_CODE (TREE_TYPE (rhs1)) == POINTER_TYPE) ++ { ++ gimple *g = SSA_NAME_DEF_STMT (rhs2); ++ if (g && gimple_assign_rhs_code (g) == NOP_EXPR) ++ { ++ tree nop_rhs = gimple_assign_rhs1 (g); ++ if (same_ssa_name_var_p (nop_rhs, origin_loop.base)) ++ { ++ if (!origin_loop.arr1) ++ { ++ origin_loop.arr1 = rhs1; ++ tmp_index = rhs2; ++ } ++ else if (!origin_loop.arr2) ++ { ++ origin_loop.arr2 = rhs1; ++ if (tmp_index != rhs2) ++ return false; ++ } ++ else ++ return false; ++ return true; ++ } ++ } ++ } ++ return false; ++} ++ ++/* Record the array comparison information in the original loop, while ensuring ++ that there are only statements related to cont_stmt in the loop body. */ ++ ++static bool ++record_origin_loop_body (class loop *loop) ++{ ++ basic_block body = gimple_bb (origin_loop.cond_stmt2); ++ ++ if (origin_loop.arr1 != NULL || origin_loop.arr2 != NULL) ++ return false; ++ ++ gimple_stmt_iterator gsi; ++ for (gsi = gsi_start_bb (body); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ gimple_set_visited (gsi_stmt (gsi), false); ++ } ++ ++ tree cond_lhs = gimple_cond_lhs (origin_loop.cond_stmt2); ++ tree cond_rhs = gimple_cond_rhs (origin_loop.cond_stmt2); ++ if (TREE_CODE (TREE_TYPE (cond_lhs)) != INTEGER_TYPE ++ || TREE_CODE (TREE_TYPE (cond_rhs)) != INTEGER_TYPE) ++ return false; ++ ++ auto_vec<tree> stack; ++ tree tmp_index = NULL; ++ stack.safe_push (cond_lhs); ++ stack.safe_push (cond_rhs); ++ gimple_set_visited (origin_loop.cond_stmt2, true); ++ ++ while (!stack.is_empty ()) ++ { ++ tree op = stack.pop (); ++ gimple *g = SSA_NAME_DEF_STMT (op); ++ if (!g || gimple_bb (g) != body || !is_gimple_assign (g)) ++ continue; ++ gimple_set_visited (g, true); ++ if (gimple_assign_rhs_code (g) == MEM_REF) ++ { ++ tree mem_ref = gimple_assign_rhs1 (g); ++ if (!check_body_mem_ref (mem_ref)) ++ return false; ++ stack.safe_push (TREE_OPERAND (mem_ref , 0)); ++ } ++ else if (gimple_assign_rhs_code (g) == POINTER_PLUS_EXPR) ++ { ++ tree rhs2 = gimple_assign_rhs2 (g); ++ if (!check_body_pointer_plus (g, tmp_index)) ++ return false; ++ stack.safe_push (rhs2); ++ } ++ else if (gimple_assign_rhs_code (g) == NOP_EXPR) ++ { ++ tree rhs = gimple_assign_rhs1 (g); ++ if (!same_ssa_name_var_p (rhs, origin_loop.base)) ++ return false; ++ stack.safe_push (rhs); ++ } ++ else ++ return false; ++ } ++ bool allvisited = true; ++ for (gsi = gsi_start_bb (body); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ if (!gimple_visited_p (gsi_stmt (gsi)) ++ && !is_gimple_debug (gsi_stmt (gsi))) ++ allvisited = false; ++ } ++ if (allvisited) ++ { ++ if (origin_loop.arr1 != NULL && origin_loop.arr2 != NULL) ++ return true; ++ } ++ return false; ++} ++ ++/* Dump the original loop information to see if the origin loop ++ form matches. */ ++ ++static void ++dump_origin_loop_info () ++{ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nThe origin loop info:\n"); ++ fprintf (dump_file, "\n the origin_loop.limit is:\n"); ++ print_node (dump_file, "", origin_loop.limit, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.base is:\n"); ++ print_node (dump_file, "", origin_loop.base, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.arr1 is:\n"); ++ print_node (dump_file, "", origin_loop.arr1, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.arr2 is:\n"); ++ print_node (dump_file, "", origin_loop.arr2, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.cond_stmt1 is:\n"); ++ print_gimple_stmt (dump_file, origin_loop.cond_stmt1, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.cond_stmt2 is:\n"); ++ print_gimple_stmt (dump_file, origin_loop.cond_stmt2, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.update_stmt is:\n"); ++ print_gimple_stmt (dump_file, origin_loop.update_stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++} ++ ++/* Returns true only if the exit bb of the original loop is unique and its phi ++ node parameter comes from the same variable. */ ++ ++static bool ++check_exit_bb (class loop *loop) ++{ ++ if (origin_loop.exit_bb1 != origin_loop.exit_bb2 ++ || flow_bb_inside_loop_p (loop, origin_loop.exit_bb1)) ++ return false; ++ ++ gphi_iterator gsi; ++ for (gsi = gsi_start_phis (origin_loop.exit_bb1); !gsi_end_p (gsi); ++ gsi_next (&gsi)) ++ { ++ gphi *phi = gsi.phi (); ++ tree res = gimple_phi_result (phi); ++ if (!same_ssa_name_var_p (res, origin_loop.base)) ++ continue; ++ if (gimple_phi_num_args (phi) == 2) ++ { ++ tree arg0 = gimple_phi_arg_def (phi, 0); ++ tree arg1 = gimple_phi_arg_def (phi, 1); ++ if (arg0 == arg1) ++ return true; ++ } ++ } ++ return false; ++} ++ ++/* Make sure that the recorded origin_loop information meets the ++ relative requirements. */ ++ ++static bool ++check_origin_loop_info (class loop *loop) ++{ ++ dump_origin_loop_info (); ++ tree arr1_elem_size, arr2_elem_size; ++ ++ if (!check_exit_bb (loop)) ++ return false; ++ ++ if (TREE_CODE (origin_loop.base) != SSA_NAME) ++ return false; ++ ++ if (!TYPE_READONLY (TREE_TYPE (origin_loop.limit))) ++ return false; ++ ++ if (!TYPE_READONLY (TREE_TYPE (TREE_TYPE (origin_loop.arr1)))) ++ return false; ++ ++ if (!TYPE_READONLY (TREE_TYPE (TREE_TYPE (origin_loop.arr2)))) ++ return false; ++ ++ if (TREE_CODE (TREE_TYPE (origin_loop.arr1)) != POINTER_TYPE ++ || TREE_CODE (TREE_TYPE (origin_loop.arr2)) != POINTER_TYPE ++ || TREE_CODE (TREE_TYPE (TREE_TYPE (origin_loop.arr1))) != INTEGER_TYPE ++ || TREE_CODE (TREE_TYPE (TREE_TYPE (origin_loop.arr2))) != INTEGER_TYPE) ++ return false; ++ ++ arr1_elem_size = TYPE_SIZE (TREE_TYPE (TREE_TYPE (origin_loop.arr1))); ++ arr2_elem_size = TYPE_SIZE (TREE_TYPE (TREE_TYPE (origin_loop.arr2))); ++ ++ if (tree_to_uhwi (arr1_elem_size) != 8 || tree_to_uhwi (arr2_elem_size) != 8) ++ return false; ++ ++ return true; ++} ++ ++/* Record the useful information of the original loop and judge whether the ++ information meets the specified conditions. */ ++ ++static bool ++check_record_loop_form (class loop *loop) ++{ ++ if (!record_origin_loop_exit_info (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to record loop exit information.\n"); ++ } ++ return false; ++ } ++ ++ if (!record_origin_loop_header (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to record loop header information.\n"); ++ } ++ return false; ++ } ++ ++ if (!record_origin_loop_latch (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to record loop latch information.\n"); ++ } ++ return false; ++ } ++ ++ if (!record_origin_loop_body (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to record loop body information.\n"); ++ } ++ return false; ++ } ++ ++ if (!check_origin_loop_info (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to check origin loop information.\n"); ++ } ++ return false; ++ } ++ ++ return true; ++} ++ ++/* The main entry for judging whether the loop meets some conditions. */ ++ ++static bool ++determine_loop_form (class loop *loop) ++{ ++ /* Currently only standard loops are processed, that is, only loop_header, ++ loop_latch, loop_body 3 bb blocks are included. */ ++ if (loop->inner || loop->num_nodes != 3) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nWrong loop form, there is inner loop or" ++ "redundant bb.\n"); ++ } ++ return false; ++ } ++ ++ if (single_exit (loop) || !loop->latch) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nWrong loop form, only one exit or loop_latch" ++ "does not exist.\n"); ++ } ++ return false; ++ } ++ ++ /* Support loop with only one backedge. */ ++ if (!loop_single_backedge_p (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nWrong loop form, loop back edges are not" ++ "unique.\n"); ++ } ++ return false; ++ } ++ ++ /* Support loop with only one preheader BB. */ ++ if (!loop_single_preheader_bb (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nWrong loop form, loop preheader bb are not" ++ "unique.\n"); ++ } ++ return false; ++ } ++ ++ init_origin_loop_structure (); ++ if (!check_record_loop_form (loop)) ++ return false; ++ ++ return true; ++} ++ ++/* Create prolog bb for newly constructed loop; When prolog_assign exists in ++ the original loop, the corresponding assign needs to be added to prolog_bb; ++ eg: <bb 7> ++ len_16 = len_10 + 1 ++ Create simple copy statement when prolog_assign does not exist; ++ eg: <bb 7> ++ len_16 = len_10 ++ ++ The IR of bb is as above. */ ++ ++static void ++create_prolog_bb (basic_block &prolog_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer, edge entry_edge) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ tree lhs1; ++ ++ prolog_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (prolog_bb, outer); ++ redirect_edge_and_branch (entry_edge, prolog_bb); ++ set_immediate_dominator (CDI_DOMINATORS, prolog_bb, dominator_bb); ++ gsi = gsi_last_bb (prolog_bb); ++ lhs1 = copy_ssa_name (origin_loop.base); ++ ++ if (origin_loop.exist_prolog_assgin) ++ g = gimple_build_assign (lhs1, PLUS_EXPR, origin_loop.base, ++ build_int_cst (TREE_TYPE (origin_loop.base), origin_loop.step)); ++ else ++ g = gimple_build_assign (lhs1, NOP_EXPR, origin_loop.base); ++ gimple_seq_add_stmt (&stmts, g); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ set_current_def (origin_loop.base, lhs1); ++ defs_map.put (prolog_bb, lhs1); ++} ++ ++/* Create preheader bb for new loop; In order to ensure the standard form of ++ the loop, add a preheader_bb before loop_header. */ ++ ++static void ++create_loop_pred_bb (basic_block &loop_pred_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ loop_pred_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (loop_pred_bb, outer); ++ set_immediate_dominator (CDI_DOMINATORS, loop_pred_bb, dominator_bb); ++ defs_map.put (loop_pred_bb, get_current_def (origin_loop.base)); ++} ++ ++/* Add phi_arg for bb with phi node. */ ++ ++static void ++rewrite_add_phi_arg (basic_block bb) ++{ ++ edge e; ++ edge_iterator ei; ++ gphi *phi; ++ gphi_iterator gsi; ++ tree res; ++ location_t loc; ++ ++ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ phi = gsi.phi (); ++ res = gimple_phi_result (phi); ++ ++ FOR_EACH_EDGE (e, ei, bb->preds) ++ { ++ if (PHI_ARG_DEF_FROM_EDGE (phi, e)) ++ continue; ++ tree var = *(defs_map.get (e->src)); ++ if (!same_ssa_name_var_p (var, res)) ++ continue; ++ if (virtual_operand_p (var)) ++ loc = UNKNOWN_LOCATION; ++ else ++ loc = gimple_location (SSA_NAME_DEF_STMT (var)); ++ add_phi_arg (phi, var, e, loc); ++ } ++ } ++} ++ ++/* Create loop_header BB for align_loop. ++ eg: <bb 9> ++ _18 = (long unsigned int) len_17; ++ _19 = _18 + 8; ++ _20 = (long unsigned int) len_limit_12 (D); ++ if (_19 <= _20) ++ ++ The IR of bb is as above. */ ++ ++static void ++create_align_loop_header (basic_block &align_loop_header, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gcond *cond_stmt; ++ gphi *phi; ++ tree res; ++ ++ tree entry_node = get_current_def (origin_loop.base); ++ align_loop_header = create_empty_bb (after_bb); ++ add_bb_to_loop (align_loop_header, outer); ++ make_single_succ_edge (after_bb, align_loop_header, EDGE_FALLTHRU); ++ set_immediate_dominator (CDI_DOMINATORS, align_loop_header, dominator_bb); ++ gsi = gsi_last_bb (align_loop_header); ++ phi = create_phi_node (NULL_TREE, align_loop_header); ++ create_new_def_for (entry_node, phi, gimple_phi_result_ptr (phi)); ++ res = gimple_phi_result (phi); ++ ++ tree lhs1 = gimple_build (&stmts, NOP_EXPR, long_unsigned_type_node, res); ++ tree lhs2 = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (lhs1), lhs1, ++ build_int_cst (TREE_TYPE (lhs1), 8)); ++ tree lhs3 = gimple_build (&stmts, NOP_EXPR, long_unsigned_type_node, ++ origin_loop.limit); ++ cond_stmt = gimple_build_cond (LE_EXPR, lhs2, lhs3, NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ ++ set_current_def (origin_loop.base, res); ++ defs_map.put (align_loop_header, res); ++} ++ ++/* Create loop body BB for align_loop. ++ eg: <bb 10> ++ _21 = (sizetype) len_17; ++ _22 = cur_15 (D) + _21; ++ _23 = MEM(long unsigned int *)_22; ++ _24 = pb_13 (D) + _21; ++ _25 = MEM(long unsigned int *)_24; ++ if (_23 != _25) ++ ++ The IR of bb is as above. */ ++ ++static void ++create_align_loop_body_bb (basic_block &align_loop_body_bb, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ gcond *cond_stmt; ++ tree lhs1, lhs2; ++ ++ align_loop_body_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (align_loop_body_bb, outer); ++ make_edge (after_bb, align_loop_body_bb, EDGE_TRUE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, align_loop_body_bb, dominator_bb); ++ gsi = gsi_last_bb (align_loop_body_bb); ++ ++ tree var = gimple_build (&stmts, NOP_EXPR, sizetype, ++ get_current_def (origin_loop.base)); ++ lhs1 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr2), ++ origin_loop.arr2, var); ++ g = gimple_build_assign (make_ssa_name (long_unsigned_type_node), ++ fold_build2 (MEM_REF, long_unsigned_type_node, lhs1, ++ build_int_cst (build_pointer_type (long_unsigned_type_node), 0))); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs1 = gimple_assign_lhs (g); ++ lhs2 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr1), ++ origin_loop.arr1, var); ++ g = gimple_build_assign (make_ssa_name (long_unsigned_type_node), ++ fold_build2 (MEM_REF, long_unsigned_type_node, lhs2, ++ build_int_cst (build_pointer_type (long_unsigned_type_node), 0))); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs2 = gimple_assign_lhs (g); ++ cond_stmt = gimple_build_cond (gimple_cond_code (origin_loop.cond_stmt2), ++ lhs1, lhs2, NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++} ++ ++/* Create loop_latch BB for align_loop. ++ eg: <bb 11> ++ len_26 = len_17 + 8; ++ ++ The IR of bb is as above. */ ++ ++static void ++create_align_loop_latch (basic_block &align_loop_latch, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ tree res; ++ ++ tree entry_node = get_current_def (origin_loop.base); ++ align_loop_latch = create_empty_bb (after_bb); ++ add_bb_to_loop (align_loop_latch, outer); ++ make_edge (after_bb, align_loop_latch, EDGE_FALSE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, align_loop_latch, dominator_bb); ++ gsi = gsi_last_bb (align_loop_latch); ++ res = copy_ssa_name (entry_node); ++ g = gimple_build_assign (res, PLUS_EXPR, entry_node, ++ build_int_cst (TREE_TYPE (entry_node), 8)); ++ gimple_seq_add_stmt (&stmts, g); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ defs_map.put (align_loop_latch, res); ++} ++ ++/* Create a new loop and add it to outer_loop and return. */ ++ ++static class loop * ++init_new_loop (class loop *outer_loop, basic_block header, basic_block latch) ++{ ++ class loop *new_loop; ++ new_loop = alloc_loop (); ++ new_loop->header = header; ++ new_loop->latch = latch; ++ add_loop (new_loop, outer_loop); ++ ++ return new_loop; ++} ++ ++/* Create necessary exit BB for align_loop. ++ eg: <bb 12> ++ _27 = _23 ^ _25; ++ _28 = __builtin_ctzll (_27); ++ _29 = _28 >> 3; ++ len_30 = _29 + len_17; ++ ++ The IR of bb is as above. */ ++ ++static void ++create_align_loop_exit_bb (basic_block &align_loop_exit_bb, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ gimple *cond_stmt; ++ tree lhs1, lhs2; ++ tree cond_lhs, cond_rhs; ++ gcall *build_ctzll; ++ ++ tree entry_node = get_current_def (origin_loop.base); ++ align_loop_exit_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (align_loop_exit_bb, outer); ++ make_edge (after_bb, align_loop_exit_bb, EDGE_TRUE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, align_loop_exit_bb, dominator_bb); ++ gsi = gsi_last_bb (align_loop_exit_bb); ++ ++ cond_stmt = gsi_stmt (gsi_last_bb (after_bb)); ++ cond_lhs = gimple_cond_lhs (cond_stmt); ++ cond_rhs = gimple_cond_rhs (cond_stmt); ++ ++ lhs1 = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (cond_lhs), cond_lhs, ++ cond_rhs); ++ build_ctzll = gimple_build_call (builtin_decl_explicit (BUILT_IN_CTZLL), 1, ++ lhs1); ++ lhs1 = make_ssa_name (integer_type_node); ++ gimple_call_set_lhs (build_ctzll, lhs1); ++ gimple_seq_add_stmt (&stmts, build_ctzll); ++ lhs2 = copy_ssa_name (lhs1); ++ g = gimple_build_assign (lhs2, RSHIFT_EXPR, lhs1, ++ build_int_cst (TREE_TYPE (lhs1), 3)); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs1 = gimple_build (&stmts, NOP_EXPR, TREE_TYPE (entry_node), lhs2); ++ lhs2 = copy_ssa_name (entry_node); ++ g = gimple_build_assign (lhs2, PLUS_EXPR, lhs1, entry_node); ++ gimple_seq_add_stmt (&stmts, g); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ defs_map.put (align_loop_exit_bb, lhs2); ++} ++ ++/* Create loop_header BB for epilogue_loop. ++ eg: <bb 14> ++ # len_31 = PHI <len_17 (13), len_37 (16)> ++ if (len_31 != len_limit_12 (D)) ++ ++ The IR of bb is as above. */ ++ ++static void ++create_epilogue_loop_header (basic_block &epilogue_loop_header, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gcond *cond_stmt; ++ tree res; ++ gphi *phi; ++ ++ tree entry_node = get_current_def (origin_loop.base); ++ epilogue_loop_header = create_empty_bb (after_bb); ++ add_bb_to_loop (epilogue_loop_header, outer); ++ make_single_succ_edge (after_bb, epilogue_loop_header, EDGE_FALLTHRU); ++ set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_header, dominator_bb); ++ gsi = gsi_last_bb (epilogue_loop_header); ++ phi = create_phi_node (NULL_TREE, epilogue_loop_header); ++ create_new_def_for (entry_node, phi, gimple_phi_result_ptr (phi)); ++ res = gimple_phi_result (phi); ++ cond_stmt = gimple_build_cond (gimple_cond_code (origin_loop.cond_stmt1), res, ++ origin_loop.limit, NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ ++ set_current_def (origin_loop.base, res); ++ defs_map.put (epilogue_loop_header, res); ++} ++ ++/* Create loop body BB for epilogue_loop. ++ eg: <bb 15> ++ _32 = (sizetype) len_31; ++ _33 = pb_13 (D) + _32; ++ _34 = *_33; ++ _35 = cur_15 (D) + _32; ++ _36 = *_35; ++ if (_34 != _36) ++ ++ The IR of bb is as above. */ ++ ++static void ++create_epilogue_loop_body_bb (basic_block &epilogue_loop_body_bb, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ gcond *cond_stmt; ++ tree lhs1, lhs2, lhs3; ++ ++ tree entry_node = get_current_def (origin_loop.base); ++ epilogue_loop_body_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (epilogue_loop_body_bb, outer); ++ make_edge (after_bb, epilogue_loop_body_bb, EDGE_TRUE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_body_bb, dominator_bb); ++ gsi = gsi_last_bb (epilogue_loop_body_bb); ++ lhs1 = gimple_build (&stmts, NOP_EXPR, sizetype, entry_node); ++ lhs2 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr1), ++ origin_loop.arr1, lhs1); ++ g = gimple_build_assign (make_ssa_name (unsigned_char_type_node), ++ fold_build2 (MEM_REF, unsigned_char_type_node, lhs2, ++ build_int_cst (TREE_TYPE (lhs2), 0))); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs2 = gimple_assign_lhs (g); ++ lhs3 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr2), ++ origin_loop.arr2, lhs1); ++ g = gimple_build_assign (make_ssa_name (unsigned_char_type_node), ++ fold_build2 (MEM_REF, unsigned_char_type_node, lhs3, ++ build_int_cst (TREE_TYPE (lhs3), 0))); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs3 = gimple_assign_lhs (g); ++ cond_stmt = gimple_build_cond (gimple_cond_code (origin_loop.cond_stmt2), lhs2, ++ lhs3, NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ defs_map.put (epilogue_loop_body_bb, get_current_def (origin_loop.base)); ++} ++ ++/* Create loop_latch BB for epilogue_loop. ++ eg: <bb 16> ++ len_37 = len_31 + 1; ++ ++ The IR of bb is as above. */ ++ ++static void ++create_epilogue_loop_latch (basic_block &epilogue_loop_latch, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ tree res; ++ ++ tree entry_node = get_current_def (origin_loop.base); ++ epilogue_loop_latch = create_empty_bb (after_bb); ++ add_bb_to_loop (epilogue_loop_latch, outer); ++ make_edge (after_bb, epilogue_loop_latch, EDGE_FALSE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_latch, dominator_bb); ++ gsi = gsi_last_bb (epilogue_loop_latch); ++ res = copy_ssa_name (entry_node); ++ g = gimple_build_assign (res, PLUS_EXPR, entry_node, ++ build_int_cst (TREE_TYPE (entry_node), origin_loop.step)); ++ gimple_seq_add_stmt (&stmts, g); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ defs_map.put (epilogue_loop_latch, res); ++} ++ ++/* convert_to_new_loop ++ | | ++ | | ++ | | entry_edge ++ | ______ | ++ | / V V ++ | | -----origin_loop_header--- ++ | | | | ++ | | -------------------------\ ++ | | | \ ++ | | V \___ ___ ___ ___ ___ ___ ___ ++ | | -----origin_loop_body----- | ++ | | | | | ++ | | -------------------------\ | ++ | | | \___ ___ ___ ___ | ++ | | V V V ++ | | -----origin_loop_latch---- -----exit_bb------ ++ | | | | | | ++ | | /-------------------------- ------------------ ++ | \ __ / ++ | ++ | | ++ | ====> |entry_edge ++ | V ++ | -------prolog_bb----- ++ | | | ++ | --------------------- ++ | | ++ | V ++ | -----align_loop_header---- ++ | /-----------------> | | ++ |/ -------------------------- ++ || / \ ++ || V V ++ || ---align_loop_body--- ---epilogue_loop_header-- ++ || | | -------| |<---| ++ || --------------------\ / ------------------------- | ++ || | \____ | | | ++ || V | | V | ++ || ---align_loop_latch--- | | ---epilogue_loop_body---- | ++ || | | | | ----| | | ++ || ---------------------- | | / ------------------------- | ++ || / __________/ | | | | ++ || / | | | V | ++ | \ __________/ | | | ---epilogue_loop_latch--- | ++ | | | | | | | ++ | | | | ------------------------- / ++ | V | | | / ++ | -align_loop_exit_bb- | | \______________/ ++ | | | | | ++ | -------------------- | | ++ | | | | ++ | | V V ++ | | -----exit_bb------ ++ | |---->| | ++ | ------------------ ++ ++ The origin_loop conversion process starts from entry_edge and ends at ++ exit_bb; The execution logic of origin_loop is completely replaced by ++ align_loop + epilogue_loop: ++ 1) align_loop mainly implements the idea of using wide-type dereference ++ and comparison on array elements, so as to achieve the effect of ++ acceleration; For the corresponding source code understanding, please ++ refer to the description of the pass at the beginning; ++ 2) epilogue_loop processes the previous loop remaining array element ++ comparison. */ ++ ++static void ++create_new_loops (edge entry_edge) ++{ ++ basic_block prolog_bb; ++ basic_block align_loop_header, align_loop_latch, align_loop_body_bb; ++ basic_block align_pred_bb, align_loop_exit_bb; ++ basic_block epilogue_loop_header, epilogue_loop_latch, epilogue_loop_body_bb; ++ basic_block epilogue_loop_pred_bb; ++ class loop *align_loop; ++ class loop *epilogue_loop; ++ ++ class loop *outer = entry_edge->src->loop_father; ++ ++ create_prolog_bb (prolog_bb, entry_edge->src, entry_edge->src, outer, ++ entry_edge); ++ ++ create_loop_pred_bb (align_pred_bb, prolog_bb, prolog_bb, outer); ++ make_single_succ_edge (prolog_bb, align_pred_bb, EDGE_FALLTHRU); ++ ++ create_align_loop_header (align_loop_header, align_pred_bb, ++ align_pred_bb, outer); ++ ++ create_align_loop_body_bb (align_loop_body_bb, align_loop_header, ++ align_loop_header, outer); ++ ++ create_align_loop_latch (align_loop_latch, align_loop_body_bb, ++ align_loop_body_bb, outer); ++ make_edge (align_loop_latch, align_loop_header, EDGE_FALLTHRU); ++ rewrite_add_phi_arg (align_loop_header); ++ ++ align_loop = init_new_loop (outer, align_loop_header, align_loop_latch); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nPrint byte align loop %d:\n", align_loop->num); ++ flow_loop_dump (align_loop, dump_file, NULL, 1); ++ fprintf (dump_file, "\n\n"); ++ } ++ ++ create_align_loop_exit_bb (align_loop_exit_bb, align_loop_body_bb, ++ align_loop_body_bb, outer); ++ ++ create_loop_pred_bb (epilogue_loop_pred_bb, align_loop_header, ++ align_loop_header, outer); ++ make_edge (align_loop_header, epilogue_loop_pred_bb, EDGE_FALSE_VALUE); ++ ++ create_epilogue_loop_header (epilogue_loop_header, epilogue_loop_pred_bb, ++ epilogue_loop_pred_bb, outer); ++ ++ create_epilogue_loop_body_bb (epilogue_loop_body_bb, epilogue_loop_header, ++ epilogue_loop_header, outer); ++ ++ create_epilogue_loop_latch (epilogue_loop_latch, epilogue_loop_body_bb, ++ epilogue_loop_body_bb, outer); ++ make_single_succ_edge (epilogue_loop_latch, epilogue_loop_header, ++ EDGE_FALLTHRU); ++ rewrite_add_phi_arg (epilogue_loop_header); ++ ++ epilogue_loop = init_new_loop (outer, epilogue_loop_header, ++ epilogue_loop_latch); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nPrint epilogue loop %d:\n", epilogue_loop->num); ++ flow_loop_dump (epilogue_loop, dump_file, NULL, 1); ++ fprintf (dump_file, "\n\n"); ++ } ++ make_single_succ_edge (align_loop_exit_bb, origin_loop.exit_bb1, ++ EDGE_FALLTHRU); ++ set_immediate_dominator (CDI_DOMINATORS, origin_loop.exit_bb1, ++ entry_edge->src); ++ make_edge (epilogue_loop_body_bb, origin_loop.exit_bb1, EDGE_TRUE_VALUE); ++ ++ make_edge (epilogue_loop_header, origin_loop.exit_bb2, EDGE_FALSE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, origin_loop.exit_bb2, ++ entry_edge->src); ++ ++ rewrite_add_phi_arg (origin_loop.exit_bb1); ++ rewrite_add_phi_arg (origin_loop.exit_bb2); ++ ++ remove_edge (origin_loop.exit_e1); ++ remove_edge (origin_loop.exit_e2); ++} ++ ++/* Make sure that the dominance relationship of the newly inserted cfg ++ is not missing. */ ++ ++static void ++update_loop_dominator (cdi_direction dir) ++{ ++ gcc_assert (dom_info_available_p (dir)); ++ ++ basic_block bb; ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ basic_block imm_bb = get_immediate_dominator (dir, bb); ++ if (!imm_bb || bb == origin_loop.exit_bb1) ++ { ++ set_immediate_dominator (CDI_DOMINATORS, bb, ++ recompute_dominator (CDI_DOMINATORS, bb)); ++ continue; ++ } ++ } ++} ++ ++/* Clear information about the original loop. */ ++ ++static void ++remove_origin_loop (class loop *loop) ++{ ++ basic_block *body; ++ ++ body = get_loop_body_in_dom_order (loop); ++ unsigned n = loop->num_nodes; ++ for (unsigned i = 0; i < n; i++) ++ { ++ delete_basic_block (bodyi); ++ } ++ free (body); ++ delete_loop (loop); ++} ++ ++/* Perform the conversion of origin_loop to new_loop. */ ++ ++static void ++convert_to_new_loop (class loop *loop) ++{ ++ create_new_loops (origin_loop.entry_edge); ++ remove_origin_loop (loop); ++ update_loop_dominator (CDI_DOMINATORS); ++ update_ssa (TODO_update_ssa); ++} ++ ++/* The main entry of array-widen-compare optimizes. */ ++ ++static unsigned int ++tree_ssa_array_widen_compare () ++{ ++ unsigned int todo = 0; ++ class loop *loop; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ flow_loops_dump (dump_file, NULL, 1); ++ fprintf (dump_file, "\nConfirm which loop can be optimized using" ++ " array-widen-compare\n"); ++ } ++ ++ enum li_flags LI = LI_FROM_INNERMOST; ++ for (auto loop : loops_list (cfun, LI)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "======================================\n"); ++ fprintf (dump_file, "Processing loop %d:\n", loop->num); ++ fprintf (dump_file, "======================================\n"); ++ flow_loop_dump (loop, dump_file, NULL, 1); ++ fprintf (dump_file, "\n\n"); ++ } ++ ++ if (determine_loop_form (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "The %dth loop form is success matched," ++ "and the loop can be optimized.\n", ++ loop->num); ++ dump_loop_bb (loop); ++ } ++ ++ convert_to_new_loop (loop); ++ } ++ } ++ ++ todo |= (TODO_update_ssa); ++ return todo; ++} ++ ++/* Array widen compare. */ ++ ++namespace { ++ ++const pass_data pass_data_tree_array_widen_compare = ++{ ++ GIMPLE_PASS, ++ "awiden_compare", ++ OPTGROUP_LOOP, ++ TV_TREE_ARRAY_WIDEN_COMPARE, ++ (PROP_cfg | PROP_ssa), ++ 0, ++ 0, ++ 0, ++ (TODO_update_ssa | TODO_verify_all) ++}; ++ ++class pass_array_widen_compare : public gimple_opt_pass ++{ ++public: ++ pass_array_widen_compare (gcc::context *ctxt) ++ : gimple_opt_pass (pass_data_tree_array_widen_compare, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ virtual bool gate (function *); ++ virtual unsigned int execute (function *); ++ ++}; // class pass_array_widen_compare ++ ++bool ++pass_array_widen_compare::gate (function *) ++{ ++ return (flag_array_widen_compare > 0 && optimize >= 3); ++} ++ ++unsigned int ++pass_array_widen_compare::execute (function *fun) ++{ ++ if (number_of_loops (fun) <= 1) ++ return 0; ++ ++ /* Only supports LP64 data mode. */ ++ if (TYPE_PRECISION (long_integer_type_node) != 64 ++ || POINTER_SIZE != 64 || TYPE_PRECISION (integer_type_node) != 32) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "The current data mode is not supported," ++ "only the LP64 date mode is supported.\n"); ++ return 0; ++ } ++ ++ return tree_ssa_array_widen_compare (); ++} ++ ++} // anon namespace ++ ++gimple_opt_pass * ++make_pass_array_widen_compare (gcc::context *ctxt) ++{ ++ return new pass_array_widen_compare (ctxt); ++} +-- +2.33.0 +
View file
_service:tar_scm:0015-Backport-Structure-reorganization-optimization.patch
Added
@@ -0,0 +1,6170 @@ +From 8631d4a39453bb262675bea9abb5c1b7d52af624 Mon Sep 17 00:00:00 2001 +From: eastb233 <xiezhiheng@huawei.com> +Date: Wed, 19 Jul 2023 10:28:04 +0800 +Subject: PATCH 15/22 Backport Structure reorganization optimization + +Reference: https://gcc.gnu.org/git/?p=gcc-old.git;a=commit;h=6e1bd1c900533c627b5e4fbbecb41dcd7974b522 + +Introduce structure reorganization optimization, that change C-like +structures layout in order to better utilize spatial locality. This +transformation is affective for programs containing arrays of structures. +--- + gcc/Makefile.in | 1 + + gcc/common.opt | 4 +- + gcc/configure | 2 +- + gcc/configure.ac | 2 +- + gcc/doc/invoke.texi | 23 + + gcc/gimple-ssa-warn-access.cc | 8 + + gcc/ipa-param-manipulation.cc | 3 +- + gcc/ipa-param-manipulation.h | 3 +- + gcc/ipa-struct-reorg/escapes.def | 60 + + gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 4015 +++++++++++++++++ + gcc/ipa-struct-reorg/ipa-struct-reorg.h | 235 + + gcc/params.opt | 4 + + gcc/passes.def | 2 + + gcc/testsuite/gcc.dg/struct/struct-reorg.exp | 35 + + gcc/testsuite/gcc.dg/struct/struct_reorg-1.c | 24 + + gcc/testsuite/gcc.dg/struct/struct_reorg-2.c | 29 + + gcc/testsuite/gcc.dg/struct/struct_reorg-3.c | 23 + + gcc/testsuite/gcc.dg/struct/struct_reorg-4.c | 59 + + .../gcc.dg/struct/w_prof_global_array.c | 29 + + .../gcc.dg/struct/w_prof_global_var.c | 42 + + .../gcc.dg/struct/w_prof_local_array.c | 37 + + .../gcc.dg/struct/w_prof_local_var.c | 40 + + .../gcc.dg/struct/w_prof_single_str_global.c | 31 + + gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c | 64 + + .../gcc.dg/struct/w_ratio_cold_str.c | 43 + + .../gcc.dg/struct/wo_prof_array_field.c | 26 + + .../struct/wo_prof_array_through_pointer.c | 38 + + .../gcc.dg/struct/wo_prof_double_malloc.c | 29 + + .../gcc.dg/struct/wo_prof_empty_str.c | 44 + + .../struct/wo_prof_escape_arg_to_local.c | 44 + + .../gcc.dg/struct/wo_prof_escape_return-1.c | 33 + + .../gcc.dg/struct/wo_prof_escape_return.c | 32 + + .../gcc.dg/struct/wo_prof_escape_str_init.c | 31 + + .../struct/wo_prof_escape_substr_array.c | 33 + + .../struct/wo_prof_escape_substr_pointer.c | 48 + + .../struct/wo_prof_escape_substr_value.c | 45 + + .../gcc.dg/struct/wo_prof_global_array.c | 32 + + .../gcc.dg/struct/wo_prof_global_var.c | 45 + + .../gcc.dg/struct/wo_prof_local_array.c | 40 + + .../gcc.dg/struct/wo_prof_local_var.c | 43 + + .../gcc.dg/struct/wo_prof_malloc_size_var-1.c | 47 + + .../gcc.dg/struct/wo_prof_malloc_size_var.c | 47 + + .../struct/wo_prof_mult_field_peeling.c | 42 + + .../gcc.dg/struct/wo_prof_single_str_global.c | 34 + + .../gcc.dg/struct/wo_prof_single_str_local.c | 34 + + .../struct/wo_prof_single_str_pointer.c | 38 + + .../gcc.dg/struct/wo_prof_two_strs.c | 67 + + gcc/timevar.def | 1 + + gcc/tree-pass.h | 1 + + 49 files changed, 5686 insertions(+), 6 deletions(-) + create mode 100644 gcc/ipa-struct-reorg/escapes.def + create mode 100644 gcc/ipa-struct-reorg/ipa-struct-reorg.cc + create mode 100644 gcc/ipa-struct-reorg/ipa-struct-reorg.h + create mode 100644 gcc/testsuite/gcc.dg/struct/struct-reorg.exp + create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-1.c + create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-2.c + create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-3.c + create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-4.c + create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_global_array.c + create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_global_var.c + create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_local_array.c + create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_local_var.c + create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c + create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c + create mode 100644 gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_single_str_local.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_single_str_pointer.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c + +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index 31ff95500..c863ad992 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -1451,6 +1451,7 @@ OBJS = \ + incpath.o \ + init-regs.o \ + internal-fn.o \ ++ ipa-struct-reorg/ipa-struct-reorg.o \ + ipa-cp.o \ + ipa-sra.o \ + ipa-devirt.o \ +diff --git a/gcc/common.opt b/gcc/common.opt +index e365a48bc..b48fa3228 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1950,8 +1950,8 @@ Common Ignore + Does nothing. Preserved for backward compatibility. + + fipa-struct-reorg +-Common Ignore +-Does nothing. Preserved for backward compatibility. ++Common Var(flag_ipa_struct_reorg) Init(0) Optimization ++Perform structure layout optimizations. + + fipa-vrp + Common Var(flag_ipa_vrp) Optimization +diff --git a/gcc/configure b/gcc/configure +index c749ace01..98bbf0f85 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -34191,7 +34191,7 @@ $as_echo "$as_me: executing $ac_file commands" >&6;} + "depdir":C) $SHELL $ac_aux_dir/mkinstalldirs $DEPDIR ;; + "gccdepdir":C) + ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs build/$DEPDIR +- for lang in $subdirs c-family common analyzer rtl-ssa ++ for lang in $subdirs c-family common analyzer rtl-ssa ipa-struct-reorg + do + ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs $lang/$DEPDIR + done ;; +diff --git a/gcc/configure.ac b/gcc/configure.ac +index 992a50e7b..c74f4b555 100644 +--- a/gcc/configure.ac ++++ b/gcc/configure.ac +@@ -1340,7 +1340,7 @@ AC_CHECK_HEADERS(ext/hash_map) + ZW_CREATE_DEPDIR + AC_CONFIG_COMMANDS(gccdepdir, + ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs build/$DEPDIR +- for lang in $subdirs c-family common analyzer rtl-ssa ++ for lang in $subdirs c-family common analyzer rtl-ssa ipa-struct-reorg + do + ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs $lang/$DEPDIR + done, subdirs="$subdirs" ac_aux_dir=$ac_aux_dir DEPDIR=$DEPDIR) +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index ff8cd032f..e37bae5b1 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -526,6 +526,7 @@ Objective-C and Objective-C++ Dialects}. + -finline-functions -finline-functions-called-once -finline-limit=@var{n} @gol + -finline-small-functions -fipa-modref -fipa-cp -fipa-cp-clone @gol + -fipa-bit-cp -fipa-vrp -fipa-pta -fipa-profile -fipa-pure-const @gol ++-fipa-struct-reorg @gol + -fipa-reference -fipa-reference-addressable @gol + -fipa-stack-alignment -fipa-icf -fira-algorithm=@var{algorithm} @gol + -flive-patching=@var{level} @gol +@@ -11886,6 +11887,19 @@ higher. + Discover which functions are pure or constant. + Enabled by default at @option{-O1} and higher. + ++@item -fipa-struct-reorg ++@opindex fipa-struct-reorg ++Perform structure reorganization optimization, that change C-like structures ++layout in order to better utilize spatial locality. This transformation is ++affective for programs containing arrays of structures. Available in two ++compilation modes: profile-based (enabled with @option{-fprofile-generate}) ++or static (which uses built-in heuristics). It works only in whole program ++mode, so it requires @option{-fwhole-program} to be ++enabled. Structures considered @samp{cold} by this transformation are not ++affected (see @option{--param struct-reorg-cold-struct-ratio=@var{value}}). ++ ++With this flag, the program debug info reflects a new structure layout. ++ + @item -fipa-reference + @opindex fipa-reference + Discover which static variables do not escape the +@@ -13772,6 +13786,15 @@ In each case, the @var{value} is an integer. The following choices + of @var{name} are recognized for all targets: + + @table @gcctabopt ++@item struct-reorg-cold-struct-ratio ++The threshold ratio (as a percentage) between a structure frequency ++and the frequency of the hottest structure in the program. This parameter ++is used by struct-reorg optimization enabled by @option{-fipa-struct-reorg}. ++We say that if the ratio of a structure frequency, calculated by profiling, ++to the hottest structure frequency in the program is less than this ++parameter, then structure reorganization is not applied to this structure. ++The default is 10. ++ + @item predictable-branch-outcome + When branch is predicted to be taken with probability lower than this threshold + (in percent), then it is considered well predictable. +diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc +index 8d088ad33..a24645783 100644 +--- a/gcc/gimple-ssa-warn-access.cc ++++ b/gcc/gimple-ssa-warn-access.cc +@@ -2193,6 +2193,14 @@ pass_waccess::set_pass_param (unsigned int n, bool early) + bool + pass_waccess::gate (function *) + { ++ /* FIXME: In structure optimizations, some statements will be ++ rewritten and removed from the BB, leaving some unused SSA. ++ In pass waccess, it will traverse all SSA and cause ICE ++ when handling these unused SSA. So temporarily disable ++ pass waccess when enable structure optimizations. */ ++ if (flag_ipa_struct_reorg) ++ return false; ++ + return (warn_free_nonheap_object + || warn_mismatched_alloc + || warn_mismatched_new_delete); +diff --git a/gcc/ipa-param-manipulation.cc b/gcc/ipa-param-manipulation.cc +index 38328c3e8..f9e956008 100644 +--- a/gcc/ipa-param-manipulation.cc ++++ b/gcc/ipa-param-manipulation.cc +@@ -55,7 +55,8 @@ static const char *ipa_param_prefixesIPA_PARAM_PREFIX_COUNT + = {"SYNTH", + "ISRA", + "simd", +- "mask"}; ++ "mask", ++ "struct_reorg"}; + + /* Names of parameters for dumping. Keep in sync with enum ipa_parm_op. */ + +diff --git a/gcc/ipa-param-manipulation.h b/gcc/ipa-param-manipulation.h +index a9ad2b216..71f4a0a2f 100644 +--- a/gcc/ipa-param-manipulation.h ++++ b/gcc/ipa-param-manipulation.h +@@ -126,6 +126,7 @@ enum ipa_param_name_prefix_indices + IPA_PARAM_PREFIX_ISRA, + IPA_PARAM_PREFIX_SIMD, + IPA_PARAM_PREFIX_MASK, ++ IPA_PARAM_PREFIX_REORG, + IPA_PARAM_PREFIX_COUNT + }; + +@@ -189,7 +190,7 @@ struct GTY(()) ipa_adjusted_param + + /* Index into ipa_param_prefixes specifying a prefix to be used with + DECL_NAMEs of newly synthesized parameters. */ +- unsigned param_prefix_index : 2; ++ unsigned param_prefix_index : 3; + + /* Storage order of the original parameter (for the cases when the new + parameter is a component of an original one). */ +diff --git a/gcc/ipa-struct-reorg/escapes.def b/gcc/ipa-struct-reorg/escapes.def +new file mode 100644 +index 000000000..c4c8e0739 +--- /dev/null ++++ b/gcc/ipa-struct-reorg/escapes.def +@@ -0,0 +1,60 @@ ++/* Copyright (C) 2016-2023 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++/* Before including this file, you should define a macro: ++ DEF_ESCAPE (ENUM, TEXT) ++ ++ This macro will be called once for each escape reason. The ++ ENUM will be of type "escape_type". The TEXT is describing ++ the reason for the escape. ++*/ ++DEF_ESCAPE (escape_marked_as_used, "Type used in variable marked as used") ++DEF_ESCAPE (escape_via_global_var, "Type used via a external visible variable") ++DEF_ESCAPE (escape_via_global_init, "Type used via a global init of a variable") ++DEF_ESCAPE (escape_non_supported_allocator, "Type used by allocation which is not currently supported") ++DEF_ESCAPE (escape_dependent_type_escapes, "Type uses a type which escapes or is used by a type which escapes") ++DEF_ESCAPE (escape_var_arg_function, "Types escapes via a variable argument function") ++DEF_ESCAPE (escape_bitfields, "Types has bitfields") ++DEF_ESCAPE (escape_recusive_type, "Type has a recusive relationship") ++DEF_ESCAPE (escape_variable_sized_array, "Type has a variable sized type") ++DEF_ESCAPE (escape_external_function, "Type escapes via an external function call") ++DEF_ESCAPE (escape_visible_function, "Type escapes via expternally visible function call") ++DEF_ESCAPE (escape_pointer_function, "Type escapes via an function pointer call") ++DEF_ESCAPE (escape_unkown_field, "Type escapes via an unkown field accessed") ++DEF_ESCAPE (escape_union, "Type escapes via an union") ++DEF_ESCAPE (escape_inline_asm, "Type escapes via inline-asm") ++DEF_ESCAPE (escape_non_multiply_size, "Type escapes a pointer plus which is not a multiplicate of the size") ++DEF_ESCAPE (escape_cast_void, "Type escapes a cast to/from void*") ++DEF_ESCAPE (escape_cast_another_ptr, "Type escapes a cast to a different pointer") ++DEF_ESCAPE (escape_cast_int, "Type escapes a cast from/to intergral type") ++DEF_ESCAPE (escape_int_const, "Type escapes via integer constant") ++DEF_ESCAPE (escape_vce, "Type escapes via a VIEW_CONVERT_EXPR") ++DEF_ESCAPE (escape_array_access, "Type escapes via an array access") ++DEF_ESCAPE (escape_noclonable_function, "Type escapes via a non-clonable function") ++DEF_ESCAPE (escape_rescusive_type, "Recusive type") ++DEF_ESCAPE (escape_user_alignment, "Type has an user alignment set") ++DEF_ESCAPE (escape_volatile, "Type has an variable which is volatile") ++DEF_ESCAPE (escape_non_eq, "Type has a comparison other than equals or not equals") ++DEF_ESCAPE (escape_addr, "Type escapes via taking the address of field") ++DEF_ESCAPE (escape_cannot_change_signature, "Type used in a call that cannot change signature") ++DEF_ESCAPE (escape_non_optimize, "Type used by a function which turns off struct reorg") ++DEF_ESCAPE (escape_array, "Type is used in an array not handled yet") ++DEF_ESCAPE (escape_ptr_ptr, "Type is used in a pointer to a pointer not handled yet") ++DEF_ESCAPE (escape_return, "Type escapes via a return not handled yet") ++ ++#undef DEF_ESCAPE +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +new file mode 100644 +index 000000000..238530860 +--- /dev/null ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +@@ -0,0 +1,4015 @@ ++/* Struct-reorg optimizations. ++ Copyright (C) 2016-2023 Free Software Foundation, Inc. ++ Contributed by Andrew Pinski <apinski@cavium.com> ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++/* This pass implements the structure reorganization organization ++ (struct-reorg). ++ ++ Right now it handles just splitting off the hottest fields for a struct ++ of 2 fields: ++ struct s { ++ type1 field1; // Hot field ++ type2 field2; ++ }; ++ s *v; ++ into: ++ struct s_hot { ++ type1 field1; ++ }; ++ struct c_cold { ++ type2 field2; ++ }; ++ s_hot *v_hot; ++ s_cold *v_cold; ++ ++ TODO: This pass can be extended to more fields, and other alogrothims ++ like reordering. ++ ++ This pass operate in four stages: ++ 1. All of the field accesses, declarations (struct types and pointers ++ to that type) and struct types are scanned and recorded. This includes ++ global declarations. Also record all allocation and freeing sites; ++ this is needed for the rewriting phase. ++ ++ FIXME: If there is a top-level inline-asm, the pass immediately returns. ++ ++ 2. Prune out the types which are considered escaping. ++ Examples of types which are considered escaping: ++ a. A declaration has been marked as having the attribute used or ++ has user defined alignment (type too). ++ b. Accesses are via a BIT_FIELD_REF. ++ FIXME: Handle VECTOR_TYPE for this case. ++ c. The "allocation" site is not a known builtin function. ++ d. Casting to/from an integer. ++ ++ 3. Analyze the types for which optimization to do. ++ a. Split the fields into two different structs. ++ (FIXME: two field case handled only) ++ Look at all structs which contain two fields, if one of the fields ++ is hotter then split it and put it on the rewritting for accesses. ++ Allocations and freeing are marked to split into two functions; ++ all uses of that type will now be considered as two. ++ b. Reorder fields hottest to the coldest. TODO: Implement. ++ ++ 4. Rewrite each access and allocation and free whichis marked as ++ rewriting. ++ ++*/ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "tree.h" ++#include "tree-pass.h" ++#include "cgraph.h" ++#include "diagnostic-core.h" ++#include "function.h" ++#include "basic-block.h" ++#include "gimple.h" ++#include "vec.h" ++#include "tree-pretty-print.h" ++#include "gimple-pretty-print.h" ++#include "gimple-iterator.h" ++#include "cfg.h" ++#include "ssa.h" ++#include "tree-dfa.h" ++#include "fold-const.h" ++#include "tree-inline.h" ++#include "stor-layout.h" ++#include "tree-into-ssa.h" ++#include "tree-cfg.h" ++#include "alloc-pool.h" ++#include "symbol-summary.h" ++#include "ipa-prop.h" ++#include "ipa-struct-reorg.h" ++#include "tree-eh.h" ++#include "bitmap.h" ++#include "tree-ssa-live.h" /* For remove_unused_locals. */ ++#include "ipa-param-manipulation.h" ++#include "gimplify-me.h" ++ ++namespace { ++ ++using namespace struct_reorg; ++ ++#define VOID_POINTER_P(type) \ ++ (POINTER_TYPE_P (type) && VOID_TYPE_P (TREE_TYPE (type))) ++ ++/* Return true iff TYPE is stdarg va_list type. */ ++ ++static inline bool ++is_va_list_type (tree type) ++{ ++ return TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (va_list_type_node); ++} ++ ++static const char * ++get_type_name (tree type) ++{ ++ const char *tname = NULL; ++ ++ if (type == NULL) ++ return NULL; ++ ++ if (TYPE_NAME (type) != NULL) ++ { ++ if (TREE_CODE (TYPE_NAME (type)) == IDENTIFIER_NODE) ++ tname = IDENTIFIER_POINTER (TYPE_NAME (type)); ++ else if (DECL_NAME (TYPE_NAME (type)) != NULL) ++ tname = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))); ++ } ++ return tname; ++} ++ ++/* Return the inner most type for arrays and pointers of TYPE. */ ++ ++static tree ++inner_type (tree type) ++{ ++ while (POINTER_TYPE_P (type) ++ || TREE_CODE (type) == ARRAY_TYPE) ++ type = TREE_TYPE (type); ++ return type; ++} ++ ++/* Return true if TYPE is a type which struct reorg should handled. */ ++ ++static bool ++handled_type (tree type) ++{ ++ type = inner_type (type); ++ if (TREE_CODE (type) == RECORD_TYPE) ++ return !is_va_list_type (type); ++ return false; ++} ++ ++/* The gimplify_buildN API is moved to tree-vect-generic.c locally ++ at commit b972e036f40c12b106f9070c3e8adea0eb8a45fa. ++ ++ The gimplify_buildN API is copied from gcc 10 implementation. ++*/ ++ ++/* Build a binary operation and gimplify it. Emit code before GSI. ++ Return the gimple_val holding the result. */ ++ ++static tree ++gimplify_build2 (gimple_stmt_iterator *gsi, enum tree_code code, ++ tree type, tree a, tree b) ++{ ++ tree ret; ++ ++ ret = fold_build2_loc (gimple_location (gsi_stmt (*gsi)), code, type, a, b); ++ return force_gimple_operand_gsi (gsi, ret, true, NULL, true, ++ GSI_SAME_STMT); ++} ++ ++/* Build a unary operation and gimplify it. Emit code before GSI. ++ Return the gimple_val holding the result. */ ++ ++static tree ++gimplify_build1 (gimple_stmt_iterator *gsi, enum tree_code code, tree type, ++ tree a) ++{ ++ tree ret; ++ ++ ret = fold_build1_loc (gimple_location (gsi_stmt (*gsi)), code, type, a); ++ return force_gimple_operand_gsi (gsi, ret, true, NULL, true, ++ GSI_SAME_STMT); ++} ++ ++} // anon namespace ++ ++ ++namespace struct_reorg { ++ ++/* Constructor of srfunction. */ ++ ++srfunction::srfunction (cgraph_node *n) ++ : node (n), ++ old (NULL), ++ newnode (NULL), ++ newf (NULL) ++{} ++ ++/* Add an ARG to the list of arguments for the function. */ ++ ++void ++srfunction::add_arg (srdecl *arg) ++{ ++ args.safe_push (arg); ++} ++ ++/* Dump the SRFUNCTION to the file FILE. */ ++ ++void ++srfunction::dump (FILE *file) ++{ ++ if (node) ++ { ++ fprintf (file, "function : "); ++ print_generic_expr (file, node->decl); ++ fprintf (file, " with arguments: "); ++ for (unsigned i = 0; i < args.length (); i++) ++ { ++ if (i == 0) ++ fprintf (file, "\n "); ++ else ++ fprintf (file, "\n, "); ++ argsi->dump (file); ++ } ++ ++ fprintf (file, "\nuses globals: "); ++ for (unsigned i = 0; i < globals.length (); i++) ++ { ++ fprintf (file, "\n "); ++ globalsi->dump (file); ++ } ++ ++ fprintf (file, "\ndecls: "); ++ } ++ else ++ fprintf (file, "globals : "); ++ ++ for (unsigned i = 0; i < decls.length (); i++) ++ { ++ fprintf (file, "\n "); ++ declsi->dump (file); ++ } ++} ++ ++/* Simple dump the SRFUNCTION to the file FILE; ++ used so it is not recusive. */ ++ ++void ++srfunction::simple_dump (FILE *file) ++{ ++ print_generic_expr (file, node->decl); ++} ++ ++/* Constructor of FIELD. */ ++ ++srfield::srfield (tree field, srtype *base) ++ : offset (int_byte_position (field)), ++ fieldtype (TREE_TYPE (field)), ++ fielddecl (field), ++ base (base), ++ type (NULL), ++ clusternum (0) ++{ ++ for (int i = 0; i < max_split; i++) ++ newfieldi = NULL_TREE; ++} ++ ++/* Constructor of TYPE. */ ++ ++srtype::srtype (tree type) ++ : type (type), ++ chain_type (false), ++ escapes (does_not_escape), ++ visited (false) ++{ ++ for (int i = 0; i < max_split; i++) ++ newtypei = NULL_TREE; ++ ++ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL) ++ { ++ if (DECL_BIT_FIELD (field)) ++ { ++ escapes = escape_bitfields; ++ continue; ++ } ++ else if (!DECL_SIZE (field) ++ || TREE_CODE (DECL_SIZE (field)) != INTEGER_CST) ++ { ++ escapes = escape_variable_sized_array; ++ break; ++ } ++ srfield *t = new srfield (field, this); ++ fields.safe_push (t); ++ } ++ } ++} ++ ++/* Mark the type as escaping type E at statement STMT. */ ++ ++void ++srtype::mark_escape (escape_type e, gimple *stmt) ++{ ++ /* Once the type has escaped, it should never ++ change back to non escaping. */ ++ gcc_assert (e != does_not_escape); ++ if (has_escaped ()) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nO type: "); ++ simple_dump (dump_file); ++ fprintf (dump_file, " has already escaped."); ++ fprintf (dump_file, " old = \"%s\" ", ++ escape_type_stringescapes - 1); ++ fprintf (dump_file, " new = \"%s\"\n", escape_type_stringe - 1); ++ if (stmt) ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++ return; ++ } ++ escapes = e; ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nN type: "); ++ simple_dump (dump_file); ++ fprintf (dump_file, " new = \"%s\"\n", escape_reason ()); ++ if (stmt) ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++} ++ ++/* Add FIELD to the list of fields that use this type. */ ++ ++void ++srtype::add_field_site (srfield *field) ++{ ++ field_sites.safe_push (field); ++} ++ ++/* Constructor of DECL. */ ++ ++srdecl::srdecl (srtype *tp, tree decl, int argnum) ++ : type (tp), ++ decl (decl), ++ func (NULL_TREE), ++ argumentnum (argnum), ++ visited (false) ++{ ++ if (TREE_CODE (decl) == SSA_NAME) ++ func = current_function_decl; ++ else if (!is_global_var (decl)) ++ func = DECL_CONTEXT (decl); ++ for (int i = 0; i < max_split; i++) ++ newdecli = NULL_TREE; ++} ++ ++/* Find DECL in the function. */ ++ ++srdecl * ++srfunction::find_decl (tree decl) ++{ ++ for (unsigned i = 0; i < decls.length (); i++) ++ if (declsi->decl == decl) ++ return declsi; ++ return NULL; ++} ++ ++/* Record DECL of the TYPE with argument num ARG. */ ++ ++srdecl * ++srfunction::record_decl (srtype *type, tree decl, int arg) ++{ ++ // Search for the decl to see if it is already there. ++ srdecl *decl1 = find_decl (decl); ++ ++ if (decl1) ++ return decl1; ++ ++ gcc_assert (type); ++ ++ decl1 = new srdecl (type, decl, arg); ++ decls.safe_push (decl1); ++ return decl1; ++} ++ ++/* Find the field at OFF offset. */ ++ ++srfield * ++srtype::find_field (unsigned HOST_WIDE_INT off) ++{ ++ unsigned int i; ++ srfield *field; ++ ++ /* FIXME: handle array/struct field inside the current struct. */ ++ /* NOTE This does not need to be fixed to handle libquatumn. */ ++ FOR_EACH_VEC_ELT (fields, i, field) ++ { ++ if (off == field->offset) ++ return field; ++ } ++ return NULL; ++} ++ ++/* Add the function FN to the list of functions if it ++ is there not already. */ ++ ++void ++srtype::add_function (srfunction *fn) ++{ ++ unsigned decluid; ++ unsigned i; ++ decluid = DECL_UID (fn->node->decl); ++ ++ srfunction *fn1; ++ // Search for the decl to see if it is already there. ++ FOR_EACH_VEC_ELT (functions, i, fn1) ++ { ++ if (DECL_UID (fn1->node->decl) == decluid) ++ return; ++ } ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "Recording new function: %u.\n", decluid); ++ ++ functions.safe_push (fn); ++} ++ ++/* Dump out the type structure to FILE. */ ++ ++void ++srtype::dump (FILE *f) ++{ ++ unsigned int i; ++ srfield *field; ++ srfunction *fn; ++ sraccess *access; ++ ++ if (chain_type) ++ fprintf (f, "chain decl "); ++ ++ fprintf (f, "type : "); ++ print_generic_expr (f, type); ++ fprintf (f, "(%d) { ", TYPE_UID (type)); ++ if (escapes != does_not_escape) ++ fprintf (f, " escapes = \"%s\"\n", escape_reason ()); ++ fprintf (f, " fields = { "); ++ FOR_EACH_VEC_ELT (fields, i, field) ++ { ++ if (i == 0) ++ fprintf (f, "\n "); ++ else ++ fprintf (f, "\n, "); ++ field->dump (f); ++ } ++ fprintf (f, " }\n "); ++ fprintf (f, "\n accesses = {"); ++ FOR_EACH_VEC_ELT (accesses, i, access) ++ { ++ fprintf (f, "\n"); ++ access->dump (f); ++ } ++ fprintf (f, " }\n "); ++ fprintf (f, "\n functions = {"); ++ FOR_EACH_VEC_ELT (functions, i, fn) ++ { ++ fprintf (f, " \n"); ++ fn->simple_dump (f); ++ } ++ fprintf (f, "\n }\n"); ++ fprintf (f, "\n field_sites = {"); ++ FOR_EACH_VEC_ELT (field_sites, i, field) ++ { ++ fprintf (f, " \n"); ++ field->simple_dump (f); ++ } ++ fprintf (f, "\n }\n"); ++ fprintf (f, "}\n"); ++} ++ ++/* A simplified dump out the type structure to FILE. */ ++ ++void ++srtype::simple_dump (FILE *f) ++{ ++ print_generic_expr (f, type); ++} ++ ++/* Analyze the type and decide what to be done with it. */ ++ ++void ++srtype::analyze (void) ++{ ++ /* Chain decl types can't be split ++ so don't try. */ ++ if (chain_type) ++ return; ++ ++ /* If there is only one field then there is nothing ++ to be done. */ ++ if (fields.length () == 1) ++ return; ++ ++ /* For now we unconditionally split only structures with 2 fields ++ into 2 different structures. In future we intend to add profile ++ info and/or static heuristics to differentiate splitting process. */ ++ if (fields.length () == 2) ++ fields1->clusternum = 1; ++ ++ /* Otherwise we do nothing. */ ++ if (fields.length () >= 3) ++ return; ++} ++ ++/* Create the new fields for this field. */ ++ ++void ++srfield::create_new_fields (tree newtypemax_split, ++ tree newfieldsmax_split, ++ tree newlastmax_split) ++{ ++ tree ntmax_split; ++ ++ for (unsigned i = 0; i < max_split; i++) ++ nti = NULL; ++ ++ if (type == NULL) ++ nt0 = fieldtype; ++ else ++ memcpy (nt, type->newtype, sizeof (type->newtype)); ++ ++ for (unsigned i = 0; i < max_split && nti != NULL; i++) ++ { ++ tree field = make_node (FIELD_DECL); ++ if (nt1 != NULL && DECL_NAME (fielddecl)) ++ { ++ const char *tname = IDENTIFIER_POINTER (DECL_NAME (fielddecl)); ++ char id10; ++ char *name; ++ ++ sprintf (id, "%d", i); ++ name = concat (tname, ".reorg.", id, NULL); ++ DECL_NAME (field) = get_identifier (name); ++ free (name); ++ } ++ else ++ DECL_NAME (field) = DECL_NAME (fielddecl); ++ ++ TREE_TYPE (field) = reconstruct_complex_type ( ++ TREE_TYPE (fielddecl), nti); ++ DECL_SOURCE_LOCATION (field) = DECL_SOURCE_LOCATION (fielddecl); ++ SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl)); ++ DECL_USER_ALIGN (field) = DECL_USER_ALIGN (fielddecl); ++ TREE_ADDRESSABLE (field) = TREE_ADDRESSABLE (fielddecl); ++ DECL_NONADDRESSABLE_P (field) = !TREE_ADDRESSABLE (fielddecl); ++ TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (fielddecl); ++ DECL_CONTEXT (field) = newtypeclusternum; ++ ++ if (newfieldsclusternum == NULL) ++ newfieldsclusternum = newlastclusternum = field; ++ else ++ { ++ DECL_CHAIN (newlastclusternum) = field; ++ newlastclusternum = field; ++ } ++ newfieldi = field; ++ } ++} ++ ++/* Create the new TYPE corresponding to THIS type. */ ++ ++bool ++srtype::create_new_type (void) ++{ ++ /* If the type has been visited, ++ then return if a new type was ++ created or not. */ ++ if (visited) ++ return has_new_type (); ++ ++ visited = true; ++ ++ if (escapes != does_not_escape) ++ { ++ newtype0 = type; ++ return false; ++ } ++ ++ bool createnewtype = false; ++ unsigned maxclusters = 0; ++ ++ /* Create a new type for each field. */ ++ for (unsigned i = 0; i < fields.length (); i++) ++ { ++ srfield *field = fieldsi; ++ if (field->type) ++ createnewtype |= field->type->create_new_type (); ++ if (field->clusternum > maxclusters) ++ maxclusters = field->clusternum; ++ } ++ ++ /* If the fields' types did have a change or ++ we are not splitting the struct into two clusters, ++ then just return false and don't change the type. */ ++ if (!createnewtype && maxclusters == 0) ++ { ++ newtype0 = type; ++ return false; ++ } ++ ++ /* Should have at most max_split clusters. */ ++ gcc_assert (maxclusters < max_split); ++ ++ tree newfieldsmax_split; ++ tree newlastmax_split; ++ ++ maxclusters++; ++ ++ const char *tname = NULL; ++ ++ if (TYPE_NAME (type) != NULL) ++ { ++ if (TREE_CODE (TYPE_NAME (type)) == IDENTIFIER_NODE) ++ tname = IDENTIFIER_POINTER (TYPE_NAME (type)); ++ else if (DECL_NAME (TYPE_NAME (type)) != NULL) ++ tname = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))); ++ } ++ ++ for (unsigned i = 0; i < maxclusters; i++) ++ { ++ newfieldsi = NULL_TREE; ++ newlasti = NULL_TREE; ++ newtypei = make_node (RECORD_TYPE); ++ ++ char *name = NULL; ++ char id10; ++ sprintf (id, "%d", i); ++ if (tname) ++ { ++ name = concat (tname, ".reorg.", id, NULL); ++ TYPE_NAME (newtypei) = get_identifier (name); ++ free (name); ++ } ++ } ++ ++ for (unsigned i = 0; i < fields.length (); i++) ++ { ++ srfield *f = fieldsi; ++ f->create_new_fields (newtype, newfields, newlast); ++ } ++ ++ /* No reason to warn about these structs since the warning would ++ have happened already. */ ++ int save_warn_padded = warn_padded; ++ warn_padded = 0; ++ ++ for (unsigned i = 0; i < maxclusters; i++) ++ { ++ TYPE_FIELDS (newtypei) = newfieldsi; ++ layout_type (newtypei); ++ } ++ ++ warn_padded = save_warn_padded; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Created %d types:\n", maxclusters); ++ for (unsigned i = 0; i < maxclusters; i++) ++ { ++ print_generic_expr (dump_file, newtypei); ++ fprintf (dump_file, "\n"); ++ } ++ } ++ ++ return true; ++} ++ ++/* Helper function to copy some attributes from ORIG_DECL to the NEW_DECL. */ ++ ++static inline void ++copy_var_attributes (tree new_decl, tree orig_decl) ++{ ++ DECL_ARTIFICIAL (new_decl) = 1; ++ DECL_EXTERNAL (new_decl) = DECL_EXTERNAL (orig_decl); ++ TREE_STATIC (new_decl) = TREE_STATIC (orig_decl); ++ TREE_PUBLIC (new_decl) = TREE_PUBLIC (orig_decl); ++ TREE_USED (new_decl) = TREE_USED (orig_decl); ++ DECL_CONTEXT (new_decl) = DECL_CONTEXT (orig_decl); ++ TREE_THIS_VOLATILE (new_decl) = TREE_THIS_VOLATILE (orig_decl); ++ TREE_ADDRESSABLE (new_decl) = TREE_ADDRESSABLE (orig_decl); ++ TREE_READONLY (new_decl) = TREE_READONLY (orig_decl); ++ if (is_global_var (orig_decl)) ++ set_decl_tls_model (new_decl, DECL_TLS_MODEL (orig_decl)); ++} ++ ++/* Create all of the new decls (SSA_NAMES included) for THIS function. */ ++ ++void ++srfunction::create_new_decls (void) ++{ ++ /* If this function has been cloned, we don't need to ++ create the new decls. */ ++ if (newnode) ++ return; ++ ++ if (node) ++ set_cfun (DECL_STRUCT_FUNCTION (node->decl)); ++ ++ for (unsigned i = 0; i < decls.length (); i++) ++ { ++ srdecl *decl = declsi; ++ srtype *type = decl->type; ++ /* If the type of the decl does not change, ++ then don't create a new decl. */ ++ if (!type->has_new_type ()) ++ { ++ decl->newdecl0 = decl->decl; ++ continue; ++ } ++ ++ /* Handle SSA_NAMEs. */ ++ if (TREE_CODE (decl->decl) == SSA_NAME) ++ { ++ tree newtype1max_split; ++ tree inner = SSA_NAME_VAR (decl->decl); ++ tree newinnermax_split; ++ memset (newinner, 0, sizeof (newinner)); ++ for (unsigned j = 0; j < max_split && type->newtypej; j++) ++ newtype1j = reconstruct_complex_type (TREE_TYPE (declsi->decl), ++ type->newtypej); ++ if (inner) ++ { ++ srdecl *in = find_decl (inner); ++ gcc_assert (in); ++ memcpy (newinner, in->newdecl, sizeof (newinner)); ++ } ++ tree od = declsi->decl; ++ /* Create the new ssa names and copy some attributes ++ from the old one. */ ++ for (unsigned j = 0; j < max_split && type->newtypej; j++) ++ { ++ tree nd = make_ssa_name (newinnerj ? newinnerj ++ : newtype1j); ++ decl->newdeclj = nd; ++ /* If the old decl was a default definition, ++ handle it specially. */ ++ if (SSA_NAME_IS_DEFAULT_DEF (od)) ++ { ++ SSA_NAME_IS_DEFAULT_DEF (nd) = true; ++ SSA_NAME_DEF_STMT (nd) = gimple_build_nop (); ++ ++ /* Set the default definition for the ssaname if needed. */ ++ if (inner) ++ { ++ gcc_assert (newinnerj); ++ set_ssa_default_def (cfun, newinnerj, nd); ++ } ++ } ++ SSA_NAME_OCCURS_IN_ABNORMAL_PHI (nd) ++ = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (od); ++ statistics_counter_event (cfun, "Create new ssa_name", 1); ++ } ++ } ++ else if (TREE_CODE (declsi->decl) == VAR_DECL) ++ { ++ tree orig_var = decl->decl; ++ const char *tname = NULL; ++ if (DECL_NAME (orig_var)) ++ tname = IDENTIFIER_POINTER (DECL_NAME (orig_var)); ++ for (unsigned j = 0; j < max_split && type->newtypej; j++) ++ { ++ tree new_name = NULL; ++ char *name = NULL; ++ char id10; ++ sprintf (id, "%d", j); ++ if (tname) ++ { ++ name = concat (tname, ".reorg.", id, NULL); ++ new_name = get_identifier (name); ++ free (name); ++ } ++ tree newtype1 = reconstruct_complex_type (TREE_TYPE (orig_var), ++ type->newtypej); ++ decl->newdeclj = build_decl (DECL_SOURCE_LOCATION (orig_var), ++ VAR_DECL, new_name, newtype1); ++ copy_var_attributes (decl->newdeclj, orig_var); ++ if (!is_global_var (orig_var)) ++ add_local_decl (cfun, decl->newdeclj); ++ else ++ varpool_node::add (decl->newdeclj); ++ statistics_counter_event (cfun, "Create new var decl", 1); ++ } ++ } ++ /* Paramater decls are already handled in create_new_functions. */ ++ else if (TREE_CODE (declsi->decl) == PARM_DECL) ++ ; ++ else ++ internal_error ("Unhandled declaration type stored"); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Created New decls for decl:\n"); ++ fprintf (dump_file, "\n"); ++ declsi->dump (dump_file); ++ fprintf (dump_file, "\n"); ++ for (unsigned j = 0; j < max_split && declsi->newdeclj; j++) ++ { ++ print_generic_expr (dump_file, declsi->newdeclj); ++ fprintf (dump_file, "\n"); ++ } ++ fprintf (dump_file, "\n"); ++ } ++ } ++ ++ set_cfun (NULL); ++} ++ ++/* Dump out the field structure to FILE. */ ++ ++void ++srfield::dump (FILE *f) ++{ ++ fprintf (f, "field (%d) { ", DECL_UID (fielddecl)); ++ fprintf (f, "base = "); ++ base->simple_dump (f); ++ fprintf (f, ", offset = " HOST_WIDE_INT_PRINT_DEC, offset); ++ fprintf (f, ", type = "); ++ print_generic_expr (f, fieldtype); ++ if (type) ++ { ++ fprintf (f, "( srtype = "); ++ type->simple_dump (f); ++ fprintf (f, ")"); ++ } ++ fprintf (f, "\n}\n"); ++} ++ ++/* A simplified dump out the field structure to FILE. */ ++ ++void ++srfield::simple_dump (FILE *f) ++{ ++ fprintf (f, "field (%d)", DECL_UID (fielddecl)); ++} ++ ++/* Dump out the access structure to FILE. */ ++ ++void ++sraccess::dump (FILE *f) ++{ ++ fprintf (f, "access { "); ++ fprintf (f, "type = '("); ++ type->simple_dump (f); ++ fprintf (f, ")'"); ++ if (field) ++ { ++ fprintf (f, ", field = '("); ++ field->simple_dump (f); ++ fprintf (f, ")'"); ++ } ++ else ++ fprintf (f, ", whole type"); ++ fprintf (f, " in function: %s/%d", node->name (), node->order); ++ fprintf (f, ", stmt:\n"); ++ print_gimple_stmt (f, stmt, 0); ++ fprintf (f, "\n }\n"); ++} ++ ++/* Dump out the decl structure to FILE. */ ++ ++void ++srdecl::dump (FILE *file) ++{ ++ if (!func) ++ fprintf (file, "global "); ++ if (argumentnum != -1) ++ fprintf (file, "argument(%d) ", argumentnum); ++ fprintf (file, "decl: "); ++ print_generic_expr (file, decl); ++ fprintf (file, " type: "); ++ type->simple_dump (file); ++} ++ ++} // namespace struct_reorg ++ ++ ++namespace { ++ ++struct ipa_struct_reorg ++{ ++public: ++ // Constructors ++ ipa_struct_reorg (void) ++ : current_function (NULL), ++ done_recording (false) ++ {} ++ ++ // Public methods ++ unsigned execute (void); ++ void mark_type_as_escape (tree type, escape_type, gimple *stmt = NULL); ++private: ++ // Fields ++ auto_vec_del<srtype> types; ++ auto_vec_del<srfunction> functions; ++ srglobal globals; ++ srfunction *current_function; ++ ++ bool done_recording; ++ ++ // Private methods ++ void dump_types (FILE *f); ++ void dump_types_escaped (FILE *f); ++ void dump_functions (FILE *f); ++ void record_accesses (void); ++ void detect_cycles (void); ++ bool walk_field_for_cycles (srtype *); ++ void prune_escaped_types (void); ++ void propagate_escape (void); ++ void analyze_types (void); ++ void clear_visited (void); ++ bool create_new_types (void); ++ void restore_field_type (void); ++ void create_new_decls (void); ++ srdecl *find_decl (tree); ++ void create_new_functions (void); ++ void create_new_args (cgraph_node *new_node); ++ unsigned rewrite_functions (void); ++ srdecl *record_var (tree decl, ++ escape_type escapes = does_not_escape, ++ int arg = -1); ++ srfunction *record_function (cgraph_node *node); ++ srfunction *find_function (cgraph_node *node); ++ srtype *record_type (tree type); ++ void process_union (tree type); ++ srtype *find_type (tree type); ++ void maybe_record_stmt (cgraph_node *, gimple *); ++ void maybe_record_assign (cgraph_node *, gassign *); ++ void maybe_record_call (cgraph_node *, gcall *); ++ void maybe_record_allocation_site (cgraph_node *, gimple *); ++ void record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt); ++ void mark_expr_escape (tree, escape_type, gimple *stmt); ++ tree allocate_size (srtype *t, gimple *stmt); ++ ++ void mark_decls_in_as_not_needed (tree fn); ++ ++ bool rewrite_stmt (gimple *, gimple_stmt_iterator *); ++ bool rewrite_assign (gassign *, gimple_stmt_iterator *); ++ bool rewrite_call (gcall *, gimple_stmt_iterator *); ++ bool rewrite_cond (gcond *, gimple_stmt_iterator *); ++ bool rewrite_debug (gimple *, gimple_stmt_iterator *); ++ bool rewrite_phi (gphi *); ++ bool rewrite_expr (tree expr, ++ tree newexprmax_split, ++ bool ignore_missing_decl = false); ++ bool rewrite_lhs_rhs (tree lhs, tree rhs, tree newlhsmax_split, ++ tree newrhsmax_split); ++ bool get_type_field (tree expr, tree &base, bool &indirect, ++ srtype *&type, srfield *&field, ++ bool &realpart, bool &imagpart, ++ bool &address, bool should_create = false, ++ bool can_escape = false); ++ bool wholeaccess (tree expr, tree base, tree accesstype, srtype *t); ++ ++ void check_definition (srdecl *decl, vec<srdecl *> &); ++ void check_uses (srdecl *decl, vec<srdecl *> &); ++ void check_use (srdecl *decl, gimple *stmt, vec<srdecl *> &); ++ void check_type_and_push (tree newdecl, srtype *type, ++ vec<srdecl *> &worklist, gimple *stmt); ++ void check_other_side (srdecl *decl, tree other, gimple *stmt, ++ vec<srdecl *> &worklist); ++ ++ void find_vars (gimple *stmt); ++ void find_var (tree expr, gimple *stmt); ++ void mark_types_asm (gasm *astmt); ++ ++ bool has_rewritten_type (srfunction *); ++ void maybe_mark_or_record_other_side (tree side, tree other, gimple *stmt); ++}; ++ ++/* Dump all of the recorded types to file F. */ ++ ++void ++ipa_struct_reorg::dump_types (FILE *f) ++{ ++ unsigned i; ++ srtype *type; ++ FOR_EACH_VEC_ELT (types, i, type) ++ { ++ type->dump (f); ++ } ++ fprintf (f, "\n"); ++} ++ ++/* Dump all of the recorded types to file F. */ ++ ++void ++ipa_struct_reorg::dump_types_escaped (FILE *f) ++{ ++ unsigned i; ++ srtype *type; ++ FOR_EACH_VEC_ELT (types, i, type) ++ { ++ if (type->has_escaped ()) ++ { ++ type->simple_dump (f); ++ fprintf (f, " has escaped: \"%s\"\n", type->escape_reason ()); ++ } ++ } ++ fprintf (f, "\n"); ++} ++ ++/* Dump all of the record functions to file F. */ ++ ++void ++ipa_struct_reorg::dump_functions (FILE *f) ++{ ++ unsigned i; ++ srfunction *fn; ++ ++ fprintf (f, "\n\n"); ++ globals.dump (f); ++ fprintf (f, "\n\n"); ++ FOR_EACH_VEC_ELT (functions, i, fn) ++ { ++ fn->dump (f); ++ fprintf (f, "\n"); ++ } ++ fprintf (f, "\n\n"); ++} ++ ++/* Find the recorded srtype corresponding to TYPE. */ ++ ++srtype * ++ipa_struct_reorg::find_type (tree type) ++{ ++ unsigned i; ++ /* Get the main variant as we are going ++ to find that type only. */ ++ type = TYPE_MAIN_VARIANT (type); ++ ++ srtype *type1; ++ // Search for the type to see if it is already there. ++ FOR_EACH_VEC_ELT (types, i, type1) ++ { ++ if (types_compatible_p (type1->type, type)) ++ return type1; ++ } ++ return NULL; ++} ++ ++/* Is TYPE a volatile type or one which points ++ to a volatile type. */ ++ ++static bool ++isvolatile_type (tree type) ++{ ++ if (TYPE_VOLATILE (type)) ++ return true; ++ while (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE) ++ { ++ type = TREE_TYPE (type); ++ if (TYPE_VOLATILE (type)) ++ return true; ++ } ++ return false; ++} ++ ++/* Is TYPE an array type or points to an array type. */ ++ ++static bool ++isarraytype (tree type) ++{ ++ if (TREE_CODE (type) == ARRAY_TYPE) ++ return true; ++ while (POINTER_TYPE_P (type)) ++ { ++ type = TREE_TYPE (type); ++ if (TREE_CODE (type) == ARRAY_TYPE) ++ return true; ++ } ++ return false; ++} ++ ++/* Is TYPE a pointer to another pointer. */ ++ ++static bool ++isptrptr (tree type) ++{ ++ bool firstptr = false; ++ while (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE) ++ { ++ if (POINTER_TYPE_P (type)) ++ { ++ if (firstptr) ++ return true; ++ firstptr = true; ++ } ++ type = TREE_TYPE (type); ++ } ++ return false; ++} ++ ++/* Return the escape type which corresponds to if ++ this is an volatile type, an array type or a pointer ++ to a pointer type. */ ++ ++static escape_type ++escape_type_volatile_array_or_ptrptr (tree type) ++{ ++ if (isvolatile_type (type)) ++ return escape_volatile; ++ if (isarraytype (type)) ++ return escape_array; ++ if (isptrptr (type)) ++ return escape_ptr_ptr; ++ return does_not_escape; ++} ++ ++/* Record TYPE if not already recorded. */ ++ ++srtype * ++ipa_struct_reorg::record_type (tree type) ++{ ++ unsigned typeuid; ++ ++ /* Get the main variant as we are going ++ to record that type only. */ ++ type = TYPE_MAIN_VARIANT (type); ++ typeuid = TYPE_UID (type); ++ ++ srtype *type1; ++ ++ type1 = find_type (type); ++ if (type1) ++ return type1; ++ ++ /* If already done recording just return NULL. */ ++ if (done_recording) ++ return NULL; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "Recording new type: %u.\n", typeuid); ++ ++ type1 = new srtype (type); ++ types.safe_push (type1); ++ ++ /* If the type has an user alignment set, ++ that means the user most likely already setup the type. */ ++ if (TYPE_USER_ALIGN (type)) ++ type1->mark_escape (escape_user_alignment, NULL); ++ ++ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL) ++ { ++ tree t = TREE_TYPE (field); ++ process_union (t); ++ if (TREE_CODE (inner_type (t)) == UNION_TYPE ++ || TREE_CODE (inner_type (t)) == QUAL_UNION_TYPE) ++ type1->mark_escape (escape_union, NULL); ++ if (isvolatile_type (t)) ++ type1->mark_escape (escape_volatile, NULL); ++ escape_type e = escape_type_volatile_array_or_ptrptr (t); ++ if (e != does_not_escape) ++ type1->mark_escape (e, NULL); ++ if (handled_type (t)) ++ { ++ srtype *t1 = record_type (inner_type (t)); ++ srfield *f = type1->find_field (int_byte_position (field)); ++ /* We might have an variable sized type which ++ we don't set the handle. */ ++ if (f) ++ { ++ f->type = t1; ++ t1->add_field_site (f); ++ } ++ if (t1 == type1) ++ type1->mark_escape (escape_rescusive_type, NULL); ++ } ++ } ++ } ++ ++ return type1; ++} ++ ++/* Mark TYPE as escaping with ESCAPES as the reason. */ ++ ++void ++ipa_struct_reorg::mark_type_as_escape (tree type, ++ escape_type escapes, ++ gimple *stmt) ++{ ++ if (handled_type (type)) ++ { ++ srtype *stype = record_type (inner_type (type)); ++ ++ if (!stype) ++ return; ++ ++ stype->mark_escape (escapes, stmt); ++ } ++} ++ ++/* Maybe process the union of type TYPE, such that marking all of the fields' ++ types as being escaping. */ ++ ++void ++ipa_struct_reorg::process_union (tree type) ++{ ++ static hash_set<tree> unions_recorded; ++ ++ type = inner_type (type); ++ if (TREE_CODE (type) != UNION_TYPE ++ && TREE_CODE (type) != QUAL_UNION_TYPE) ++ return; ++ ++ type = TYPE_MAIN_VARIANT (type); ++ ++ /* We already processed this type. */ ++ if (unions_recorded.add (type)) ++ return; ++ ++ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL) ++ { ++ mark_type_as_escape (TREE_TYPE (field), escape_union); ++ process_union (TREE_TYPE (field)); ++ } ++ } ++} ++ ++/* Used by record_var function as a callback to walk_tree. ++ Mark the type as escaping if it has expressions which ++ cannot be converted for global initializations. */ ++ ++static tree ++record_init_types (tree *tp, int *walk_subtrees, void *data) ++{ ++ ipa_struct_reorg *c = (ipa_struct_reorg *)data; ++ switch (TREE_CODE (*tp)) ++ { ++ CASE_CONVERT: ++ case COMPONENT_REF: ++ case VIEW_CONVERT_EXPR: ++ case ARRAY_REF: ++ { ++ tree typeouter = TREE_TYPE (*tp); ++ tree typeinner = TREE_TYPE (TREE_OPERAND (*tp, 0)); ++ c->mark_type_as_escape (typeouter, escape_via_global_init); ++ c->mark_type_as_escape (typeinner, escape_via_global_init); ++ break; ++ } ++ case INTEGER_CST: ++ if (!integer_zerop (*tp)) ++ c->mark_type_as_escape (TREE_TYPE (*tp), escape_via_global_init); ++ break; ++ case VAR_DECL: ++ case PARM_DECL: ++ case FIELD_DECL: ++ c->mark_type_as_escape (TREE_TYPE (*tp), escape_via_global_init); ++ *walk_subtrees = false; ++ break; ++ default: ++ *walk_subtrees = true; ++ break; ++ } ++ return NULL_TREE; ++} ++ ++/* Record var DECL; optionally specify the escape reason and the argument ++ number in a function. */ ++ ++srdecl * ++ipa_struct_reorg::record_var (tree decl, escape_type escapes, int arg) ++{ ++ srtype *type; ++ srdecl *sd = NULL; ++ ++ process_union (TREE_TYPE (decl)); ++ ++ if (handled_type (TREE_TYPE (decl))) ++ { ++ type = record_type (inner_type (TREE_TYPE (decl))); ++ escape_type e; ++ ++ if (done_recording && !type) ++ return NULL; ++ ++ gcc_assert (type); ++ if (TREE_CODE (decl) == VAR_DECL && is_global_var (decl)) ++ sd = globals.record_decl (type, decl, arg); ++ else ++ { ++ gcc_assert (current_function); ++ sd = current_function->record_decl (type, decl, arg); ++ } ++ ++ /* If the variable has the "used" attribute, ++ then treat the type as escaping. */ ++ if (escapes != does_not_escape) ++ e = escapes; ++ else if (TREE_CODE (decl) != SSA_NAME && DECL_PRESERVE_P (decl)) ++ e = escape_marked_as_used; ++ else if (TREE_THIS_VOLATILE (decl)) ++ e = escape_volatile; ++ else if (TREE_CODE (decl) != SSA_NAME && DECL_USER_ALIGN (decl)) ++ e = escape_user_alignment; ++ else if (TREE_CODE (decl) != SSA_NAME && TREE_STATIC (decl) ++ && TREE_PUBLIC (decl)) ++ e = escape_via_global_var; ++ /* We don't have an initlizer. */ ++ else if (TREE_CODE (decl) != SSA_NAME ++ && DECL_INITIAL (decl) == error_mark_node) ++ e = escape_via_global_var; ++ else ++ e = escape_type_volatile_array_or_ptrptr (TREE_TYPE (decl)); ++ ++ if (e != does_not_escape) ++ type->mark_escape (e, NULL); ++ } ++ ++ /* Record the initial usage of variables as types escapes. */ ++ if (TREE_CODE (decl) != SSA_NAME && TREE_STATIC (decl) ++ && DECL_INITIAL (decl)) ++ { ++ walk_tree_without_duplicates (&DECL_INITIAL (decl), ++ record_init_types, this); ++ if (!integer_zerop (DECL_INITIAL (decl)) ++ && DECL_INITIAL (decl) != error_mark_node) ++ mark_type_as_escape (TREE_TYPE (decl), escape_via_global_init); ++ } ++ return sd; ++} ++ ++/* Find void* ssa_names which are used inside MEM or if we have &a.c, ++ mark the type as escaping. */ ++ ++void ++ipa_struct_reorg::find_var (tree expr, gimple *stmt) ++{ ++ /* If we have VCE<a> mark the outer type as escaping and the inner one ++ Also mark the inner most operand. */ ++ if (TREE_CODE (expr) == VIEW_CONVERT_EXPR) ++ { ++ mark_type_as_escape (TREE_TYPE (expr), escape_vce, stmt); ++ mark_type_as_escape (TREE_TYPE (TREE_OPERAND (expr, 0)), ++ escape_vce, stmt); ++ } ++ ++ /* If we have &b.c then we need to mark the type of b ++ as escaping as tracking a will be hard. */ ++ if (TREE_CODE (expr) == ADDR_EXPR ++ || TREE_CODE (expr) == VIEW_CONVERT_EXPR) ++ { ++ tree r = TREE_OPERAND (expr, 0); ++ if (handled_component_p (r) ++ || TREE_CODE (r) == MEM_REF) ++ { ++ while (handled_component_p (r) ++ || TREE_CODE (r) == MEM_REF) ++ { ++ if (TREE_CODE (r) == VIEW_CONVERT_EXPR) ++ { ++ mark_type_as_escape (TREE_TYPE (r), escape_vce, stmt); ++ mark_type_as_escape (TREE_TYPE (TREE_OPERAND (r, 0)), ++ escape_vce, stmt); ++ } ++ if (TREE_CODE (r) == MEM_REF) ++ mark_type_as_escape (TREE_TYPE (TREE_OPERAND (r, 1)), ++ escape_addr, stmt); ++ r = TREE_OPERAND (r, 0); ++ } ++ mark_expr_escape (r, escape_addr, stmt); ++ } ++ } ++ ++ tree base; ++ bool indirect; ++ srtype *type; ++ srfield *field; ++ bool realpart, imagpart, address; ++ get_type_field (expr, base, indirect, type, field, ++ realpart, imagpart, address, true, true); ++} ++ ++void ++ipa_struct_reorg::find_vars (gimple *stmt) ++{ ++ gasm *astmt; ++ switch (gimple_code (stmt)) ++ { ++ case GIMPLE_ASSIGN: ++ if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS ++ || gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) ++ { ++ tree lhs = gimple_assign_lhs (stmt); ++ tree rhs = gimple_assign_rhs1 (stmt); ++ find_var (gimple_assign_lhs (stmt), stmt); ++ find_var (gimple_assign_rhs1 (stmt), stmt); ++ if (TREE_CODE (lhs) == SSA_NAME ++ && VOID_POINTER_P (TREE_TYPE (lhs)) ++ && handled_type (TREE_TYPE (rhs))) ++ { ++ srtype *t = find_type (inner_type (TREE_TYPE (rhs))); ++ srdecl *d = find_decl (lhs); ++ if (!d && t) ++ current_function->record_decl (t, lhs, -1); ++ } ++ if (TREE_CODE (rhs) == SSA_NAME ++ && VOID_POINTER_P (TREE_TYPE (rhs)) ++ && handled_type (TREE_TYPE (lhs))) ++ { ++ srtype *t = find_type (inner_type (TREE_TYPE (lhs))); ++ srdecl *d = find_decl (rhs); ++ if (!d && t) ++ current_function->record_decl (t, rhs, -1); ++ } ++ } ++ break; ++ ++ case GIMPLE_CALL: ++ if (gimple_call_lhs (stmt)) ++ find_var (gimple_call_lhs (stmt), stmt); ++ ++ if (gimple_call_chain (stmt)) ++ find_var (gimple_call_chain (stmt), stmt); ++ ++ for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) ++ find_var (gimple_call_arg (stmt, i), stmt); ++ break; ++ ++ case GIMPLE_ASM: ++ astmt = as_a <gasm *> (stmt); ++ for (unsigned i = 0; i < gimple_asm_ninputs (astmt); i++) ++ find_var (TREE_VALUE (gimple_asm_input_op (astmt, i)), stmt); ++ for (unsigned i = 0; i < gimple_asm_noutputs (astmt); i++) ++ find_var (TREE_VALUE (gimple_asm_output_op (astmt, i)), stmt); ++ mark_types_asm (astmt); ++ break; ++ ++ case GIMPLE_RETURN: ++ { ++ tree expr = gimple_return_retval (as_a <greturn *> (stmt)); ++ if (expr) ++ find_var (expr, stmt); ++ /* return &a; should mark the type of a as escaping ++ through a return. */ ++ if (expr && TREE_CODE (expr) == ADDR_EXPR) ++ { ++ expr = TREE_OPERAND (expr, 0); ++ srdecl *d = find_decl (expr); ++ if (d) ++ d->type->mark_escape (escape_return, stmt); ++ } ++ } ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++/* Maybe record access of statement for further analaysis. */ ++ ++void ++ipa_struct_reorg::maybe_record_stmt (cgraph_node *node, gimple *stmt) ++{ ++ switch (gimple_code (stmt)) ++ { ++ case GIMPLE_ASSIGN: ++ maybe_record_assign (node, as_a <gassign *> (stmt)); ++ break; ++ case GIMPLE_CALL: ++ maybe_record_call (node, as_a <gcall *> (stmt)); ++ break; ++ case GIMPLE_DEBUG: ++ break; ++ case GIMPLE_GOTO: ++ case GIMPLE_SWITCH: ++ break; ++ default: ++ break; ++ } ++} ++ ++/* This function checks whether ARG is a result of multiplication ++ of some number by STRUCT_SIZE. If yes, the function returns true ++ and this number is filled into NUM. */ ++ ++static bool ++is_result_of_mult (tree arg, tree *num, tree struct_size) ++{ ++ if (!struct_size ++ || TREE_CODE (struct_size) != INTEGER_CST ++ || integer_zerop (struct_size)) ++ return false; ++ ++ /* If we have a integer, just check if it is a multiply of STRUCT_SIZE. */ ++ if (TREE_CODE (arg) == INTEGER_CST) ++ { ++ if (integer_zerop (size_binop (FLOOR_MOD_EXPR, arg, struct_size))) ++ { ++ *num = size_binop (FLOOR_DIV_EXPR, arg, struct_size); ++ return true; ++ } ++ return false; ++ } ++ gimple *size_def_stmt = SSA_NAME_DEF_STMT (arg); ++ ++ /* If the allocation statement was of the form ++ D.2229_10 = <alloc_func> (D.2228_9); ++ then size_def_stmt can be D.2228_9 = num.3_8 * 8; */ ++ ++ while (size_def_stmt && is_gimple_assign (size_def_stmt)) ++ { ++ tree lhs = gimple_assign_lhs (size_def_stmt); ++ ++ /* We expect temporary here. */ ++ if (!is_gimple_reg (lhs)) ++ return false; ++ ++ // FIXME: this should handle SHIFT also. ++ if (gimple_assign_rhs_code (size_def_stmt) == PLUS_EXPR) ++ { ++ tree num1, num2; ++ tree arg0 = gimple_assign_rhs1 (size_def_stmt); ++ tree arg1 = gimple_assign_rhs2 (size_def_stmt); ++ if (!is_result_of_mult (arg0, &num1, struct_size)) ++ return false; ++ if (!is_result_of_mult (arg1, &num2, struct_size)) ++ return false; ++ *num = size_binop (PLUS_EXPR, num1, num2); ++ return true; ++ } ++ else if (gimple_assign_rhs_code (size_def_stmt) == MULT_EXPR) ++ { ++ tree arg0 = gimple_assign_rhs1 (size_def_stmt); ++ tree arg1 = gimple_assign_rhs2 (size_def_stmt); ++ tree num1; ++ ++ if (is_result_of_mult (arg0, &num1, struct_size)) ++ { ++ *num = size_binop (MULT_EXPR, arg1, num1); ++ return true; ++ } ++ if (is_result_of_mult (arg1, &num1, struct_size)) ++ { ++ *num = size_binop (MULT_EXPR, arg0, num1); ++ return true; ++ } ++ ++ *num = NULL_TREE; ++ return false; ++ } ++ else if (gimple_assign_rhs_code (size_def_stmt) == SSA_NAME) ++ { ++ arg = gimple_assign_rhs1 (size_def_stmt); ++ size_def_stmt = SSA_NAME_DEF_STMT (arg); ++ } ++ else ++ { ++ *num = NULL_TREE; ++ return false; ++ } ++ } ++ ++ *num = NULL_TREE; ++ return false; ++} ++ ++/* Return TRUE if STMT is an allocation statement that is handled. */ ++ ++static bool ++handled_allocation_stmt (gimple *stmt) ++{ ++ if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA_WITH_ALIGN)) ++ return true; ++ return false; ++} ++ ++/* Returns the allocated size / T size for STMT. That is the number of ++ elements in the array allocated. */ ++ ++tree ++ipa_struct_reorg::allocate_size (srtype *type, gimple *stmt) ++{ ++ if (!stmt ++ || gimple_code (stmt) != GIMPLE_CALL ++ || !handled_allocation_stmt (stmt)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nNot a allocate statment:\n"); ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++ return NULL; ++ } ++ ++ if (type->has_escaped ()) ++ return NULL; ++ ++ tree struct_size = TYPE_SIZE_UNIT (type->type); ++ ++ tree size = gimple_call_arg (stmt, 0); ++ ++ if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC)) ++ size = gimple_call_arg (stmt, 1); ++ else if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) ++ { ++ tree arg1; ++ arg1 = gimple_call_arg (stmt, 1); ++ /* Check that second argument is a constant equal to ++ the size of structure. */ ++ if (operand_equal_p (arg1, struct_size, 0)) ++ return size; ++ /* Check that first argument is a constant equal to ++ the size of structure. */ ++ if (operand_equal_p (size, struct_size, 0)) ++ return arg1; ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\ncalloc the correct size:\n"); ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++ return NULL; ++ } ++ ++ tree num; ++ if (!is_result_of_mult (size, &num, struct_size)) ++ return NULL; ++ ++ return num; ++} ++ ++void ++ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, ++ gimple *stmt) ++{ ++ gcc_assert (TREE_CODE (side) == SSA_NAME || TREE_CODE (side) == ADDR_EXPR); ++ srtype *type = NULL; ++ if (handled_type (TREE_TYPE (other))) ++ type = record_type (inner_type (TREE_TYPE (other))); ++ if (TREE_CODE (side) == ADDR_EXPR) ++ side = TREE_OPERAND (side, 0); ++ srdecl *d = find_decl (side); ++ if (!type) ++ { ++ if (!d) ++ return; ++ if (TREE_CODE (side) == SSA_NAME ++ && VOID_POINTER_P (TREE_TYPE (side))) ++ return; ++ d->type->mark_escape (escape_cast_another_ptr, stmt); ++ return; ++ } ++ ++ if (!d) ++ { ++ if (VOID_POINTER_P (TREE_TYPE (side)) ++ && TREE_CODE (side) == SSA_NAME) ++ current_function->record_decl (type, side, -1); ++ else ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ } ++ else if (type != d->type) ++ { ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ d->type->mark_escape (escape_cast_another_ptr, stmt); ++ } ++} ++ ++/* Record accesses in an assignment statement STMT. */ ++ ++void ++ipa_struct_reorg::maybe_record_assign (cgraph_node *node, gassign *stmt) ++{ ++ if (gimple_clobber_p (stmt)) ++ { ++ record_stmt_expr (gimple_assign_lhs (stmt), node, stmt); ++ return; ++ } ++ ++ if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) ++ { ++ tree lhs = gimple_assign_lhs (stmt); ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ tree num; ++ if (!handled_type (TREE_TYPE (lhs))) ++ return; ++ /* Check if rhs2 is a multiplication of the size of the type. */ ++ if (is_result_of_mult (rhs2, &num, ++ TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs))))) ++ { ++ record_stmt_expr (lhs, node, stmt); ++ record_stmt_expr (rhs1, node, stmt); ++ } ++ else ++ { ++ mark_expr_escape (lhs, escape_non_multiply_size, stmt); ++ mark_expr_escape (rhs1, escape_non_multiply_size, stmt); ++ } ++ return; ++ } ++ /* Copies, References, Taking addresses. */ ++ if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS) ++ { ++ tree lhs = gimple_assign_lhs (stmt); ++ tree rhs = gimple_assign_rhs1 (stmt); ++ /* If we have a = &b.c then we need to mark the type of b ++ as escaping as tracking a will be hard. */ ++ if (TREE_CODE (rhs) == ADDR_EXPR) ++ { ++ tree r = TREE_OPERAND (rhs, 0); ++ if (handled_component_p (r)) ++ { ++ while (handled_component_p (r)) ++ r = TREE_OPERAND (r, 0); ++ mark_expr_escape (r, escape_addr, stmt); ++ return; ++ } ++ } ++ if ((TREE_CODE (rhs) == SSA_NAME || TREE_CODE (rhs) == ADDR_EXPR)) ++ maybe_mark_or_record_other_side (rhs, lhs, stmt); ++ if (TREE_CODE (lhs) == SSA_NAME) ++ maybe_mark_or_record_other_side (lhs, rhs, stmt); ++ } ++} ++ ++static tree ++get_ref_base_and_offset (tree &e, HOST_WIDE_INT &offset, ++ bool &realpart, bool &imagpart, ++ tree &accesstype) ++{ ++ offset = 0; ++ realpart = false; ++ imagpart = false; ++ accesstype = NULL_TREE; ++ if (TREE_CODE (e) == REALPART_EXPR) ++ { ++ e = TREE_OPERAND (e, 0); ++ realpart = true; ++ } ++ if (TREE_CODE (e) == IMAGPART_EXPR) ++ { ++ e = TREE_OPERAND (e, 0); ++ imagpart = true; ++ } ++ tree expr = e; ++ while (true) ++ { ++ switch (TREE_CODE (expr)) ++ { ++ case COMPONENT_REF: ++ { ++ tree field = TREE_OPERAND (expr, 1); ++ tree field_off = byte_position (field); ++ if (TREE_CODE (field_off) != INTEGER_CST) ++ return NULL; ++ offset += tree_to_shwi (field_off); ++ expr = TREE_OPERAND (expr, 0); ++ accesstype = NULL; ++ break; ++ } ++ case MEM_REF: ++ { ++ tree field_off = TREE_OPERAND (expr, 1); ++ gcc_assert (TREE_CODE (field_off) == INTEGER_CST); ++ /* So we can mark the types as escaping if different. */ ++ accesstype = TREE_TYPE (field_off); ++ offset += tree_to_uhwi (field_off); ++ return TREE_OPERAND (expr, 0); ++ } ++ default: ++ return expr; ++ } ++ } ++} ++ ++/* Return true if EXPR was accessing the whole type T. */ ++ ++bool ++ipa_struct_reorg::wholeaccess (tree expr, tree base, ++ tree accesstype, srtype *t) ++{ ++ if (expr == base) ++ return true; ++ ++ if (TREE_CODE (expr) == ADDR_EXPR && TREE_OPERAND (expr, 0) == base) ++ return true; ++ ++ if (!accesstype) ++ return false; ++ ++ if (!types_compatible_p (TREE_TYPE (expr), TREE_TYPE (accesstype))) ++ return false; ++ ++ if (!handled_type (TREE_TYPE (expr))) ++ return false; ++ ++ srtype *other_type = find_type (inner_type (TREE_TYPE (expr))); ++ ++ if (t == other_type) ++ return true; ++ ++ return false; ++} ++ ++bool ++ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, ++ srtype *&type, srfield *&field, ++ bool &realpart, bool &imagpart, ++ bool &address, bool should_create, ++ bool can_escape) ++{ ++ HOST_WIDE_INT offset; ++ tree accesstype; ++ address = false; ++ bool mark_as_bit_field = false; ++ ++ if (TREE_CODE (expr) == BIT_FIELD_REF) ++ { ++ expr = TREE_OPERAND (expr, 0); ++ mark_as_bit_field = true; ++ } ++ ++ base = get_ref_base_and_offset (expr, offset, realpart, imagpart, ++ accesstype); ++ ++ /* Variable access, unkown type. */ ++ if (base == NULL) ++ return false; ++ ++ if (TREE_CODE (base) == ADDR_EXPR) ++ { ++ address = true; ++ base = TREE_OPERAND (base, 0); ++ } ++ ++ if (offset != 0 && accesstype) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Non zero offset (%d) with MEM.\n", (int)offset); ++ print_generic_expr (dump_file, expr); ++ fprintf (dump_file, "\n"); ++ print_generic_expr (dump_file, base); ++ fprintf (dump_file, "\n"); ++ } ++ } ++ ++ srdecl *d = find_decl (base); ++ srtype *t; ++ ++ if (integer_zerop (base)) ++ { ++ gcc_assert (!d); ++ if (!accesstype) ++ return false; ++ t = find_type (inner_type (inner_type (accesstype))); ++ if (!t && should_create && handled_type (accesstype)) ++ t = record_type (inner_type (accesstype)); ++ if (!t) ++ return false; ++ } ++ else if (!d && accesstype) ++ { ++ if (!should_create) ++ return false; ++ if (!handled_type (accesstype)) ++ return false; ++ t = find_type (inner_type (inner_type (accesstype))); ++ if (!t) ++ t = record_type (inner_type (accesstype)); ++ if (!t || t->has_escaped ()) ++ return false; ++ /* If base is not void* mark the type as escaping. */ ++ if (!VOID_POINTER_P (TREE_TYPE (base))) ++ { ++ gcc_assert (can_escape); ++ t->mark_escape (escape_cast_another_ptr, NULL); ++ return false; ++ } ++ if (TREE_CODE (base) == SSA_NAME) ++ current_function->record_decl (t, base, -1); ++ } ++ else if (!d) ++ return false; ++ else ++ t = d->type; ++ ++ if (t->has_escaped ()) ++ return false; ++ ++ if (mark_as_bit_field) ++ { ++ gcc_assert (can_escape); ++ t->mark_escape (escape_bitfields, NULL); ++ return false; ++ } ++ ++ if (wholeaccess (expr, base, accesstype, t)) ++ { ++ field = NULL; ++ type = t; ++ indirect = accesstype != NULL; ++ return true; ++ } ++ ++ srfield *f = t->find_field (offset); ++ if (!f) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nunkown field\n"); ++ print_generic_expr (dump_file, expr); ++ fprintf (dump_file, "\n"); ++ print_generic_expr (dump_file, base); ++ fprintf (dump_file, "\n"); ++ } ++ gcc_assert (can_escape); ++ t->mark_escape (escape_unkown_field, NULL); ++ return false; ++ } ++ if (!types_compatible_p (f->fieldtype, TREE_TYPE (expr))) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nfieldtype = "); ++ print_generic_expr (dump_file, f->fieldtype); ++ fprintf (dump_file, "\naccess type = "); ++ print_generic_expr (dump_file, TREE_TYPE (expr)); ++ fprintf (dump_file, "original expr = "); ++ print_generic_expr (dump_file, expr); ++ fprintf (dump_file, "\n"); ++ } ++ gcc_assert (can_escape); ++ t->mark_escape (escape_unkown_field, NULL); ++ return false; ++ } ++ field = f; ++ type = t; ++ indirect = accesstype != NULL; ++ return true; ++} ++ ++/* Mark the type used in EXPR as escaping. */ ++ ++void ++ipa_struct_reorg::mark_expr_escape (tree expr, escape_type escapes, ++ gimple *stmt) ++{ ++ tree base; ++ bool indirect; ++ srtype *type; ++ srfield *field; ++ bool realpart, imagpart, address; ++ if (!get_type_field (expr, base, indirect, type, field, ++ realpart, imagpart, address)) ++ return; ++ ++ type->mark_escape (escapes, stmt); ++} ++ ++/* Record accesses in a call statement STMT. */ ++ ++void ++ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt) ++{ ++ tree argtype; ++ tree fndecl; ++ escape_type escapes = does_not_escape; ++ bool free_or_realloc = gimple_call_builtin_p (stmt, BUILT_IN_FREE) ++ || gimple_call_builtin_p (stmt, BUILT_IN_REALLOC); ++ ++ /* We check allocation sites in a different location. */ ++ if (handled_allocation_stmt (stmt)) ++ return; ++ ++ /* A few cases here: ++ 1) assigned from the lhs ++ 2) Used in argument ++ If a function being called is global (or indirect) ++ then we reject the types as being escaping. */ ++ ++ if (tree chain = gimple_call_chain (stmt)) ++ record_stmt_expr (chain, node, stmt); ++ ++ /* Assigned from LHS. */ ++ if (tree lhs = gimple_call_lhs (stmt)) ++ { ++ /* FIXME: handle return types. */ ++ mark_type_as_escape (TREE_TYPE (lhs), escape_return); ++ } ++ ++ /* If we have an internal call, just record the stmt. */ ++ if (gimple_call_internal_p (stmt)) ++ { ++ for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) ++ record_stmt_expr (gimple_call_arg (stmt, i), node, stmt); ++ return; ++ } ++ ++ fndecl = gimple_call_fndecl (stmt); ++ ++ /* If we have an indrect call, just mark the types as escape. */ ++ if (!fndecl) ++ escapes = escape_pointer_function; ++ /* Non local functions cause escape except for calls to free ++ and realloc. ++ FIXME: should support function annotations too. */ ++ else if (!free_or_realloc ++ && !cgraph_node::local_info_node (fndecl)->local) ++ escapes = escape_external_function; ++ else if (!free_or_realloc ++ && !cgraph_node::local_info_node (fndecl)->can_change_signature) ++ escapes = escape_cannot_change_signature; ++ /* FIXME: we should be able to handle functions in other partitions. */ ++ else if (symtab_node::get (fndecl)->in_other_partition) ++ escapes = escape_external_function; ++ ++ if (escapes != does_not_escape) ++ { ++ for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) ++ mark_type_as_escape (TREE_TYPE (gimple_call_arg (stmt, i)), ++ escapes); ++ return; ++ } ++ ++ argtype = TYPE_ARG_TYPES (gimple_call_fntype (stmt)); ++ for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) ++ { ++ tree arg = gimple_call_arg (stmt, i); ++ if (argtype) ++ { ++ tree argtypet = TREE_VALUE (argtype); ++ if (!free_or_realloc ++ && VOID_POINTER_P (argtypet)) ++ mark_type_as_escape (TREE_TYPE (arg), escape_cast_void); ++ else ++ record_stmt_expr (arg, node, stmt); ++ } ++ else ++ mark_type_as_escape (TREE_TYPE (arg), escape_var_arg_function); ++ ++ argtype = argtype ? TREE_CHAIN (argtype) : NULL_TREE; ++ } ++} ++ ++void ++ipa_struct_reorg::record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt) ++{ ++ tree base; ++ bool indirect; ++ srtype *type; ++ srfield *field; ++ bool realpart, imagpart, address; ++ if (!get_type_field (expr, base, indirect, type, field, ++ realpart, imagpart, address)) ++ return; ++ ++ if (!opt_for_fn (current_function_decl, flag_ipa_struct_reorg)) ++ type->mark_escape (escape_non_optimize, stmt); ++ ++ /* Record it. */ ++ type->add_access (new sraccess (stmt, node, type, field)); ++} ++ ++/* Find function corresponding to NODE. */ ++ ++srfunction * ++ipa_struct_reorg::find_function (cgraph_node *node) ++{ ++ for (unsigned i = 0; i < functions.length (); i++) ++ if (functionsi->node == node) ++ return functionsi; ++ return NULL; ++} ++ ++void ++ipa_struct_reorg::check_type_and_push (tree newdecl, srtype *type, ++ vec<srdecl *> &worklist, ++ gimple *stmt) ++{ ++ if (integer_zerop (newdecl)) ++ return; ++ ++ if (TREE_CODE (newdecl) == ADDR_EXPR) ++ { ++ srdecl *d = find_decl (TREE_OPERAND (newdecl, 0)); ++ if (!d) ++ { ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ return; ++ } ++ if (d->type == type) ++ return; ++ ++ srtype *type1 = d->type; ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ type1->mark_escape (escape_cast_another_ptr, stmt); ++ return; ++ } ++ ++ srdecl *d = find_decl (newdecl); ++ if (!d) ++ { ++ if (TREE_CODE (newdecl) == INTEGER_CST) ++ { ++ type->mark_escape (escape_int_const, stmt); ++ return; ++ } ++ /* If we have a non void* or a decl (which is hard to track), ++ then mark the type as escaping. */ ++ if (!VOID_POINTER_P (TREE_TYPE (newdecl)) ++ || DECL_P (newdecl)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nunkown decl: "); ++ print_generic_expr (dump_file, newdecl); ++ fprintf (dump_file, " in type:\n"); ++ print_generic_expr (dump_file, TREE_TYPE (newdecl)); ++ fprintf (dump_file, "\n"); ++ } ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ return; ++ } ++ /* At this point there should only be unkown void* ssa names. */ ++ gcc_assert (TREE_CODE (newdecl) == SSA_NAME); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nrecording unkown decl: "); ++ print_generic_expr (dump_file, newdecl); ++ fprintf (dump_file, " as type:\n"); ++ type->simple_dump (dump_file); ++ fprintf (dump_file, "\n"); ++ } ++ d = current_function->record_decl (type, newdecl, -1); ++ worklist.safe_push (d); ++ return; ++ } ++ ++ /* Only add to the worklist if the decl is a SSA_NAME. */ ++ if (TREE_CODE (newdecl) == SSA_NAME) ++ worklist.safe_push (d); ++ if (d->type == type) ++ return; ++ ++ srtype *type1 = d->type; ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ type1->mark_escape (escape_cast_another_ptr, stmt); ++} ++ ++/* ++ 2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl) ++ a) if the SSA_NAME is sourced from a pointer plus, record the pointer and ++ check to make sure the addition was a multiple of the size. ++ check the pointer type too. ++ b) If the name is sourced from an allocation check the allocation ++ i) Add SSA_NAME (void*) to the worklist if allocated from realloc ++ c) if the name is from a param, make sure the param type was of the ++ original type ++ d) if the name is from a cast/assignment, make sure it is used as that ++ type or void* ++ i) If void* then push the ssa_name into worklist ++*/ ++void ++ipa_struct_reorg::check_definition (srdecl *decl, vec<srdecl *> &worklist) ++{ ++ tree ssa_name = decl->decl; ++ srtype *type = decl->type; ++ ++ /* ++ c) if the name is from a param, make sure the param type was of the ++ original type. ++ */ ++ if (SSA_NAME_IS_DEFAULT_DEF (ssa_name)) ++ { ++ tree var = SSA_NAME_VAR (ssa_name); ++ if (var ++ && TREE_CODE (var) == PARM_DECL ++ && VOID_POINTER_P (TREE_TYPE (ssa_name))) ++ type->mark_escape (escape_cast_void, NULL); ++ return; ++ } ++ gimple *stmt = SSA_NAME_DEF_STMT (ssa_name); ++ ++ /* ++ b) If the name is sourced from an allocation check the allocation ++ i) Add SSA_NAME (void*) to the worklist if allocated from realloc ++ */ ++ if (gimple_code (stmt) == GIMPLE_CALL) ++ { ++ /* For realloc, check the type of the argument. */ ++ if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) ++ check_type_and_push (gimple_call_arg (stmt, 0), type, worklist, stmt); ++ ++ if (!handled_allocation_stmt (stmt) ++ || !allocate_size (type, stmt)) ++ type->mark_escape (escape_return, stmt); ++ return; ++ } ++ /* If the SSA_NAME is sourced from an inline-asm, ++ just mark the type as escaping. */ ++ if (gimple_code (stmt) == GIMPLE_ASM) ++ { ++ type->mark_escape (escape_inline_asm, stmt); ++ return; ++ } ++ ++ /* If the SSA_NAME is sourced from a PHI check add ++ each name to the worklist and check to make sure ++ they are used correctly. */ ++ if (gimple_code (stmt) == GIMPLE_PHI) ++ { ++ for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++) ++ check_type_and_push (gimple_phi_arg_def (stmt, i), ++ type, worklist, stmt); ++ return; ++ } ++ ++ gcc_assert (gimple_code (stmt) == GIMPLE_ASSIGN); ++ /* ++ a) if the SSA_NAME is sourced from a pointer plus, record the pointer and ++ check to make sure the addition was a multiple of the size. ++ check the pointer type too. ++ */ ++ ++ tree rhs = gimple_assign_rhs1 (stmt); ++ if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) ++ { ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ tree num; ++ if (!is_result_of_mult (rhs2, &num, TYPE_SIZE_UNIT (type->type))) ++ type->mark_escape (escape_non_multiply_size, stmt); ++ ++ if (TREE_CODE (rhs) == SSA_NAME) ++ check_type_and_push (rhs, type, worklist, stmt); ++ return; ++ } ++ ++ /* Casts between pointers and integer are escaping. */ ++ if (gimple_assign_cast_p (stmt)) ++ { ++ type->mark_escape (escape_cast_int, stmt); ++ return; ++ } ++ ++ /* ++ d) if the name is from a cast/assignment, make sure it is used as that ++ type or void* ++ i) If void* then push the ssa_name into worklist ++ */ ++ gcc_assert (gimple_assign_single_p (stmt)); ++ check_other_side (decl, rhs, stmt, worklist); ++} ++ ++/* Mark the types used by the inline-asm as escaping. ++ It is unkown what happens inside an inline-asm. */ ++ ++void ++ipa_struct_reorg::mark_types_asm (gasm *astmt) ++{ ++ for (unsigned i = 0; i < gimple_asm_ninputs (astmt); i++) ++ { ++ tree v = TREE_VALUE (gimple_asm_input_op (astmt, i)); ++ /* If we have &b, just strip the & here. */ ++ if (TREE_CODE (v) == ADDR_EXPR) ++ v = TREE_OPERAND (v, 0); ++ mark_expr_escape (v, escape_inline_asm, astmt); ++ } ++ for (unsigned i = 0; i < gimple_asm_noutputs (astmt); i++) ++ { ++ tree v = TREE_VALUE (gimple_asm_output_op (astmt, i)); ++ /* If we have &b, just strip the & here. */ ++ if (TREE_CODE (v) == ADDR_EXPR) ++ v = TREE_OPERAND (v, 0); ++ mark_expr_escape (v, escape_inline_asm, astmt); ++ } ++} ++ ++void ++ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, ++ vec<srdecl *> &worklist) ++{ ++ srtype *type = decl->type; ++ ++ if (TREE_CODE (other) == SSA_NAME ++ || DECL_P (other) ++ || TREE_CODE (other) == INTEGER_CST) ++ { ++ check_type_and_push (other, type, worklist, stmt); ++ return; ++ } ++ ++ tree t = TREE_TYPE (other); ++ if (!handled_type (t)) ++ { ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ return; ++ } ++ ++ srtype *t1 = find_type (inner_type (t)); ++ if (t1 == type) ++ { ++ tree base; ++ bool indirect; ++ srtype *type1; ++ srfield *field; ++ bool realpart, imagpart, address; ++ if (!get_type_field (other, base, indirect, type1, field, ++ realpart, imagpart, address)) ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ ++ return; ++ } ++ ++ if (t1) ++ t1->mark_escape (escape_cast_another_ptr, stmt); ++ ++ type->mark_escape (escape_cast_another_ptr, stmt); ++} ++ ++void ++ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, ++ vec<srdecl *> &worklist) ++{ ++ srtype *type = decl->type; ++ ++ if (gimple_code (stmt) == GIMPLE_RETURN) ++ { ++ type->mark_escape (escape_return, stmt); ++ return; ++ } ++ /* If the SSA_NAME PHI check and add the src to the worklist and ++ check to make sure they are used correctly. */ ++ if (gimple_code (stmt) == GIMPLE_PHI) ++ { ++ check_type_and_push (gimple_phi_result (stmt), type, worklist, stmt); ++ return; ++ } ++ ++ if (gimple_code (stmt) == GIMPLE_ASM) ++ { ++ mark_types_asm (as_a <gasm *> (stmt)); ++ return; ++ } ++ ++ if (gimple_code (stmt) == GIMPLE_COND) ++ { ++ tree rhs1 = gimple_cond_lhs (stmt); ++ tree rhs2 = gimple_cond_rhs (stmt); ++ tree orhs = rhs1; ++ if (gimple_cond_code (stmt) != EQ_EXPR ++ && gimple_cond_code (stmt) != NE_EXPR) ++ { ++ mark_expr_escape (rhs1, escape_non_eq, stmt); ++ mark_expr_escape (rhs2, escape_non_eq, stmt); ++ } ++ if (rhs1 == decl->decl) ++ orhs = rhs2; ++ if (integer_zerop (orhs)) ++ return; ++ if (TREE_CODE (orhs) != SSA_NAME) ++ mark_expr_escape (rhs1, escape_non_eq, stmt); ++ check_type_and_push (orhs, type, worklist, stmt); ++ return; ++ } ++ ++ /* Casts between pointers and integer are escaping. */ ++ if (gimple_assign_cast_p (stmt)) ++ { ++ type->mark_escape (escape_cast_int, stmt); ++ return; ++ } ++ ++ /* We might have a_1 = ptr_2 == ptr_3; */ ++ if (is_gimple_assign (stmt) ++ && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison) ++ { ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ tree orhs = rhs1; ++ if (gimple_assign_rhs_code (stmt) != EQ_EXPR ++ && gimple_assign_rhs_code (stmt) != NE_EXPR) ++ { ++ mark_expr_escape (rhs1, escape_non_eq, stmt); ++ mark_expr_escape (rhs2, escape_non_eq, stmt); ++ } ++ if (rhs1 == decl->decl) ++ orhs = rhs2; ++ if (integer_zerop (orhs)) ++ return; ++ if (TREE_CODE (orhs) != SSA_NAME) ++ mark_expr_escape (rhs1, escape_non_eq, stmt); ++ check_type_and_push (orhs, type, worklist, stmt); ++ return; ++ } ++ ++ if (gimple_assign_single_p (stmt)) ++ { ++ tree lhs = gimple_assign_lhs (stmt); ++ tree rhs = gimple_assign_rhs1 (stmt); ++ /* Check if we have a_1 = b_2; that a_1 is in the correct type. */ ++ if (decl->decl == rhs) ++ { ++ check_other_side (decl, lhs, stmt, worklist); ++ return; ++ } ++ } ++ ++ if (is_gimple_assign (stmt) ++ && gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) ++ { ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ tree lhs = gimple_assign_lhs (stmt); ++ tree num; ++ check_other_side (decl, lhs, stmt, worklist); ++ if (!is_result_of_mult (rhs2, &num, TYPE_SIZE_UNIT (type->type))) ++ type->mark_escape (escape_non_multiply_size, stmt); ++ } ++} ++ ++/* ++ 2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl) ++ d) if the name is from a cast/assignment, make sure it is used as that ++ type or void* ++ i) If void* then push the ssa_name into worklist ++ e) if used in conditional check the other side ++ i) If the conditional is non NE/EQ then mark the type as non rejecting ++ f) Check if the use in a Pointer PLUS EXPR Is used by mulitplication ++ of its size ++ */ ++void ++ipa_struct_reorg::check_uses (srdecl *decl, vec<srdecl *> &worklist) ++{ ++ tree ssa_name = decl->decl; ++ imm_use_iterator imm_iter; ++ use_operand_p use_p; ++ ++ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_name) ++ { ++ gimple *stmt = USE_STMT (use_p); ++ ++ if (is_gimple_debug (stmt)) ++ continue; ++ ++ check_use (decl, stmt, worklist); ++ } ++} ++ ++/* Record function corresponding to NODE. */ ++ ++srfunction * ++ipa_struct_reorg::record_function (cgraph_node *node) ++{ ++ function *fn; ++ tree parm, var; ++ unsigned int i; ++ srfunction *sfn; ++ escape_type escapes = does_not_escape; ++ ++ sfn = new srfunction (node); ++ functions.safe_push (sfn); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, ++ "\nRecording accesses and types from function: %s/%u\n", ++ node->name (), node->order); ++ ++ /* Nodes without a body are not interesting. Especially do not ++ visit clones at this point for now - we get duplicate decls ++ there for inline clones at least. */ ++ if (!node->has_gimple_body_p () || node->inlined_to) ++ return sfn; ++ ++ node->get_body (); ++ fn = DECL_STRUCT_FUNCTION (node->decl); ++ ++ if (!fn) ++ return sfn; ++ ++ current_function = sfn; ++ ++ if (DECL_PRESERVE_P (node->decl)) ++ escapes = escape_marked_as_used; ++ else if (!node->local) ++ escapes = escape_visible_function; ++ else if (!node->can_change_signature) ++ escapes = escape_cannot_change_signature; ++ else if (!tree_versionable_function_p (node->decl)) ++ escapes = escape_noclonable_function; ++ else if (!opt_for_fn (node->decl, flag_ipa_struct_reorg)) ++ escapes = escape_non_optimize; ++ ++ basic_block bb; ++ gimple_stmt_iterator si; ++ ++ /* Record the static chain decl. */ ++ if (fn->static_chain_decl) ++ { ++ srdecl *sd = record_var (fn->static_chain_decl, ++ escapes, -2); ++ if (sd) ++ { ++ /* Specify that this type is used by the static ++ chain so it cannot be split. */ ++ sd->type->chain_type = true; ++ sfn->add_arg (sd); ++ sd->type->add_function (sfn); ++ } ++ } ++ ++ /* Record the arguments. */ ++ for (parm = DECL_ARGUMENTS (node->decl), i = 0; ++ parm; ++ parm = DECL_CHAIN (parm), i++) ++ { ++ srdecl *sd = record_var (parm, escapes, i); ++ if (sd) ++ { ++ sfn->add_arg (sd); ++ sd->type->add_function (sfn); ++ } ++ } ++ ++ /* Mark the return type as escaping. */ ++ { ++ tree return_type = TREE_TYPE (TREE_TYPE (node->decl)); ++ mark_type_as_escape (return_type, escape_return, NULL); ++ } ++ ++ /* If the cfg does not exist for the function, ++ don't process the function. */ ++ if (!fn->cfg) ++ { ++ current_function = NULL; ++ return sfn; ++ } ++ ++ /* The following order is done for recording stage: ++ 0) Record all variables/SSA_NAMES that are of struct type ++ 1) Record MEM_REF/COMPONENT_REFs ++ a) Record SSA_NAMEs (void*) and record that as the accessed type. ++ */ ++ ++ push_cfun (fn); ++ ++ FOR_EACH_LOCAL_DECL (cfun, i, var) ++ { ++ if (TREE_CODE (var) != VAR_DECL) ++ continue; ++ ++ record_var (var); ++ } ++ ++ for (i = 1; i < num_ssa_names; ++i) ++ { ++ tree name = ssa_name (i); ++ if (!name ++ || has_zero_uses (name) ++ || virtual_operand_p (name)) ++ continue; ++ ++ record_var (name); ++ } ++ ++ /* Find the variables which are used via MEM_REF and are void* types. */ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) ++ { ++ gimple *stmt = gsi_stmt (si); ++ find_vars (stmt); ++ } ++ } ++ ++ auto_vec<srdecl *> worklist; ++ for (unsigned i = 0; i < current_function->decls.length (); i++) ++ { ++ srdecl *decl = current_function->declsi; ++ if (TREE_CODE (decl->decl) == SSA_NAME) ++ { ++ decl->visited = false; ++ worklist.safe_push (decl); ++ } ++ } ++ ++/* ++ 2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl) ++ a) if the SSA_NAME is sourced from a pointer plus, record the pointer and ++ check to make sure the addition was a multiple of the size. ++ check the pointer type too. ++ b) If the name is sourced from an allocation check the allocation ++ i) Add SSA_NAME (void*) to the worklist if allocated from realloc ++ c) if the name is from a param, make sure the param type was of the ++ original type ++ d) if the name is used in a cast/assignment, make sure it is used as that ++ type or void* ++ i) If void* then push the ssa_name into worklist ++ e) if used in conditional check the other side ++ i) If the conditional is non NE/EQ then mark the type as non rejecting ++ f) Check if the use in a POinter PLUS EXPR Is used by mulitplication ++ of its size ++*/ ++ ++ while (!worklist.is_empty ()) ++ { ++ srdecl *decl = worklist.pop (); ++ if (decl->visited) ++ continue; ++ decl->visited = true; ++ check_definition (decl, worklist); ++ check_uses (decl, worklist); ++ } ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) ++ { ++ gimple *stmt = gsi_stmt (si); ++ maybe_record_stmt (node, stmt); ++ } ++ } ++ ++ pop_cfun (); ++ current_function = NULL; ++ return sfn; ++} ++ ++/* Record all accesses for all types including global variables. */ ++ ++void ++ipa_struct_reorg::record_accesses (void) ++{ ++ varpool_node *var; ++ cgraph_node *cnode; ++ ++ /* Record global (non-auto) variables first. */ ++ FOR_EACH_VARIABLE (var) ++ { ++ if (!var->real_symbol_p ()) ++ continue; ++ ++ /* Record all variables including the accesses inside a variable. */ ++ escape_type escapes = does_not_escape; ++ if (var->externally_visible || !var->definition) ++ escapes = escape_via_global_var; ++ if (var->in_other_partition) ++ escapes = escape_via_global_var; ++ if (!var->externally_visible && var->definition) ++ var->get_constructor (); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Recording global variable: "); ++ print_generic_expr (dump_file, var->decl); ++ fprintf (dump_file, "\n"); ++ } ++ record_var (var->decl, escapes); ++ } ++ ++ FOR_EACH_FUNCTION (cnode) ++ { ++ if (!cnode->real_symbol_p ()) ++ continue; ++ ++ /* Record accesses inside a function. */ ++ if (cnode->definition) ++ record_function (cnode); ++ } ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "all types (before pruning):\n"); ++ dump_types (dump_file); ++ fprintf (dump_file, "all functions (before pruning):\n"); ++ dump_functions (dump_file); ++ } ++ done_recording = true; ++} ++ ++/* A helper function to detect cycles (recusive) types. ++ Return TRUE if TYPE was a rescusive type. */ ++ ++bool ++ipa_struct_reorg::walk_field_for_cycles (srtype *type) ++{ ++ unsigned i; ++ srfield *field; ++ ++ type->visited = true; ++ if (type->escaped_rescusive ()) ++ return true; ++ ++ if (type->has_escaped ()) ++ return false; ++ ++ FOR_EACH_VEC_ELT (type->fields, i, field) ++ { ++ if (!field->type) ++ ; ++ else if (field->type->visited ++ || walk_field_for_cycles (field->type)) ++ { ++ type->mark_escape (escape_rescusive_type, NULL); ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++/* Clear visited on all types. */ ++ ++void ++ipa_struct_reorg::clear_visited (void) ++{ ++ for (unsigned i = 0; i < types.length (); i++) ++ typesi->visited = false; ++} ++ ++/* Detect recusive types and mark them as escaping. */ ++ ++void ++ipa_struct_reorg::detect_cycles (void) ++{ ++ for (unsigned i = 0; i < types.length (); i++) ++ { ++ if (typesi->has_escaped ()) ++ continue; ++ ++ clear_visited (); ++ walk_field_for_cycles (typesi); ++ } ++} ++ ++/* Propagate escaping to depdenent types. */ ++ ++void ++ipa_struct_reorg::propagate_escape (void) ++{ ++ unsigned i; ++ srtype *type; ++ bool changed = false; ++ ++ do ++ { ++ changed = false; ++ FOR_EACH_VEC_ELT (types, i, type) ++ { ++ for (tree field = TYPE_FIELDS (type->type); ++ field; ++ field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL ++ && handled_type (TREE_TYPE (field))) ++ { ++ tree t = inner_type (TREE_TYPE (field)); ++ srtype *type1 = find_type (t); ++ if (!type1) ++ continue; ++ if (type1->has_escaped () ++ && !type->has_escaped ()) ++ { ++ type->mark_escape (escape_dependent_type_escapes, NULL); ++ changed = true; ++ } ++ if (type->has_escaped () ++ && !type1->has_escaped ()) ++ { ++ type1->mark_escape (escape_dependent_type_escapes, NULL); ++ changed = true; ++ } ++ } ++ } ++ } ++ } while (changed); ++} ++ ++/* Prune the escaped types and their decls from what was recorded. */ ++ ++void ++ipa_struct_reorg::prune_escaped_types (void) ++{ ++ detect_cycles (); ++ propagate_escape (); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "all types (after prop but before pruning):\n"); ++ dump_types (dump_file); ++ fprintf (dump_file, "all functions (after prop but before pruning):\n"); ++ dump_functions (dump_file); ++ } ++ ++ if (dump_file) ++ dump_types_escaped (dump_file); ++ ++ /* Prune the function arguments which escape ++ and functions which have no types as arguments. */ ++ for (unsigned i = 0; i < functions.length ();) ++ { ++ srfunction *function = functionsi; ++ ++ /* Prune function arguments of types that escape. */ ++ for (unsigned j = 0; j < function->args.length ();) ++ { ++ if (function->argsj->type->has_escaped ()) ++ function->args.ordered_remove (j); ++ else ++ j++; ++ } ++ ++ /* Prune global variables that the function uses of types ++ that escape. */ ++ for (unsigned j = 0; j < function->globals.length ();) ++ { ++ if (function->globalsj->type->has_escaped ()) ++ function->globals.ordered_remove (j); ++ else ++ j++; ++ } ++ ++ /* Prune variables that the function uses of types that escape. */ ++ for (unsigned j = 0; j < function->decls.length ();) ++ { ++ srdecl *decl = function->declsj; ++ if (decl->type->has_escaped ()) ++ { ++ function->decls.ordered_remove (j); ++ delete decl; ++ } ++ else ++ j++; ++ } ++ ++ /* Prune functions which don't refer to any variables any more. */ ++ if (function->args.is_empty () ++ && function->decls.is_empty () ++ && function->globals.is_empty ()) ++ { ++ delete function; ++ functions.ordered_remove (i); ++ } ++ else ++ i++; ++ } ++ ++ /* Prune globals of types that escape, all references to those decls ++ will have been removed in the first loop. */ ++ for (unsigned j = 0; j < globals.decls.length ();) ++ { ++ srdecl *decl = globals.declsj; ++ if (decl->type->has_escaped ()) ++ { ++ globals.decls.ordered_remove (j); ++ delete decl; ++ } ++ else ++ j++; ++ } ++ ++ /* Prune types that escape, all references to those types ++ will have been removed in the above loops. */ ++ for (unsigned i = 0; i < types.length ();) ++ { ++ srtype *type = typesi; ++ if (type->has_escaped ()) ++ { ++ /* All references to this type should have been removed now. */ ++ delete type; ++ types.ordered_remove (i); ++ } ++ else ++ i++; ++ } ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "all types (after pruning):\n"); ++ dump_types (dump_file); ++ fprintf (dump_file, "all functions (after pruning):\n"); ++ dump_functions (dump_file); ++ } ++} ++ ++/* Analyze all of the types. */ ++ ++void ++ipa_struct_reorg::analyze_types (void) ++{ ++ for (unsigned i = 0; i < types.length (); i++) ++ { ++ if (!typesi->has_escaped ()) ++ typesi->analyze (); ++ } ++} ++ ++/* When struct A has a struct B member, B's type info ++ is not stored in ++ TYPE_FIELDS (TREE_TYPE (TYPE_FIELDS (typeA))) ++ Try to restore B's type information. */ ++ ++void ++ipa_struct_reorg::restore_field_type (void) ++{ ++ for (unsigned i = 0; i < types.length (); i++) ++ { ++ for (unsigned j = 0; j < typesi->fields.length (); j++) ++ { ++ srfield *field = typesi->fieldsj; ++ if (TREE_CODE (inner_type (field->fieldtype)) == RECORD_TYPE) ++ { ++ /* If field type has TYPE_FIELDS information, ++ we do not need to do this. */ ++ if (TYPE_FIELDS (field->type->type) != NULL) ++ continue; ++ for (unsigned k = 0; k < types.length (); k++) ++ { ++ if (i == k) ++ continue; ++ const char *type1 = get_type_name (field->type->type); ++ const char *type2 = get_type_name (typesk->type); ++ if (type1 == NULL || type2 == NULL) ++ continue; ++ if (type1 == type2 ++ && TYPE_FIELDS (typesk->type)) ++ field->type = typesk; ++ } ++ } ++ } ++ } ++} ++ ++/* Create all new types we want to create. */ ++ ++bool ++ipa_struct_reorg::create_new_types (void) ++{ ++ int newtypes = 0; ++ clear_visited (); ++ for (unsigned i = 0; i < types.length (); i++) ++ newtypes += typesi->create_new_type (); ++ ++ if (dump_file) ++ { ++ if (newtypes) ++ fprintf (dump_file, "\nNumber of structures to transform is %d\n", ++ newtypes); ++ else ++ fprintf (dump_file, "\nNo structures to transform.\n"); ++ } ++ ++ return newtypes != 0; ++} ++ ++/* Create all the new decls except for the new arguments ++ which create_new_functions would have created. */ ++ ++void ++ipa_struct_reorg::create_new_decls (void) ++{ ++ globals.create_new_decls (); ++ for (unsigned i = 0; i < functions.length (); i++) ++ functionsi->create_new_decls (); ++} ++ ++/* Create the new arguments for the function corresponding to NODE. */ ++ ++void ++ipa_struct_reorg::create_new_args (cgraph_node *new_node) ++{ ++ tree decl = new_node->decl; ++ auto_vec<tree> params; ++ push_function_arg_decls (¶ms, decl); ++ vec<ipa_adjusted_param, va_gc> *adjs = NULL; ++ vec_safe_reserve (adjs, params.length ()); ++ for (unsigned i = 0; i < params.length (); i++) ++ { ++ struct ipa_adjusted_param adj; ++ tree parm = paramsi; ++ memset (&adj, 0, sizeof (adj)); ++ adj.base_index = i; ++ adj.prev_clone_index = i; ++ srtype *t = find_type (inner_type (TREE_TYPE (parm))); ++ if (!t ++ || t->has_escaped () ++ || !t->has_new_type ()) ++ { ++ adj.op = IPA_PARAM_OP_COPY; ++ vec_safe_push (adjs, adj); ++ continue; ++ } ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Creating a new argument for: "); ++ print_generic_expr (dump_file, paramsi); ++ fprintf (dump_file, " in function: "); ++ print_generic_expr (dump_file, decl); ++ fprintf (dump_file, "\n"); ++ } ++ adj.op = IPA_PARAM_OP_NEW; ++ adj.param_prefix_index = IPA_PARAM_PREFIX_REORG; ++ for (unsigned j = 0; j < max_split && t->newtypej; j++) ++ { ++ adj.type = reconstruct_complex_type (TREE_TYPE (parm), ++ t->newtypej); ++ vec_safe_push (adjs, adj); ++ } ++ } ++ ipa_param_body_adjustments *adjustments ++ = new ipa_param_body_adjustments (adjs, decl); ++ adjustments->modify_formal_parameters (); ++ auto_vec<tree> new_params; ++ push_function_arg_decls (&new_params, decl); ++ unsigned veclen = vec_safe_length (adjs); ++ for (unsigned i = 0; i < veclen; i++) ++ { ++ if ((*adjs)i.op != IPA_PARAM_OP_NEW) ++ continue; ++ tree decl = params(*adjs)i.base_index; ++ srdecl *d = find_decl (decl); ++ if (!d) ++ continue; ++ unsigned j = 0; ++ while (j < max_split && d->newdeclj) ++ j++; ++ d->newdeclj = new_paramsi; ++ } ++ ++ function *fn = DECL_STRUCT_FUNCTION (decl); ++ ++ if (!fn->static_chain_decl) ++ return; ++ srdecl *chain = find_decl (fn->static_chain_decl); ++ if (!chain) ++ return; ++ ++ srtype *type = chain->type; ++ tree orig_var = chain->decl; ++ const char *tname = NULL; ++ if (DECL_NAME (orig_var)) ++ tname = IDENTIFIER_POINTER (DECL_NAME (orig_var)); ++ gcc_assert (!type->newtype1); ++ tree new_name = NULL; ++ char *name = NULL; ++ if (tname) ++ { ++ name = concat (tname, ".reorg.0", NULL); ++ new_name = get_identifier (name); ++ free (name); ++ } ++ tree newtype1 = reconstruct_complex_type (TREE_TYPE (orig_var), ++ type->newtype0); ++ chain->newdecl0 = build_decl (DECL_SOURCE_LOCATION (orig_var), ++ PARM_DECL, new_name, newtype1); ++ copy_var_attributes (chain->newdecl0, orig_var); ++ fn->static_chain_decl = chain->newdecl0; ++} ++ ++/* Find the refered DECL in the current function or globals. ++ If this is a global decl, record that as being used ++ in the current function. */ ++ ++srdecl * ++ipa_struct_reorg::find_decl (tree decl) ++{ ++ srdecl *d; ++ d = globals.find_decl (decl); ++ if (d) ++ { ++ /* Record the global usage in the current function. */ ++ if (!done_recording && current_function) ++ { ++ bool add = true; ++ /* No reason to add it to the current function if it is ++ already recorded as such. */ ++ for (unsigned i = 0; i < current_function->globals.length (); i++) ++ { ++ if (current_function->globalsi == d) ++ { ++ add = false; ++ break; ++ } ++ } ++ if (add) ++ current_function->globals.safe_push (d); ++ } ++ return d; ++ } ++ if (current_function) ++ return current_function->find_decl (decl); ++ return NULL; ++} ++ ++/* Create new function clones for the cases where the arguments ++ need to be changed. */ ++ ++void ++ipa_struct_reorg::create_new_functions (void) ++{ ++ for (unsigned i = 0; i < functions.length (); i++) ++ { ++ srfunction *f = functionsi; ++ bool anyargchanges = false; ++ cgraph_node *new_node; ++ cgraph_node *node = f->node; ++ int newargs = 0; ++ if (f->old) ++ continue; ++ ++ if (f->args.length () == 0) ++ continue; ++ ++ for (unsigned j = 0; j < f->args.length (); j++) ++ { ++ srdecl *d = f->argsj; ++ srtype *t = d->type; ++ if (t->has_new_type ()) ++ { ++ newargs += t->newtype1 != NULL; ++ anyargchanges = true; ++ } ++ } ++ if (!anyargchanges) ++ continue; ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "Creating a clone of function: "); ++ f->simple_dump (dump_file); ++ fprintf (dump_file, "\n"); ++ } ++ statistics_counter_event (NULL, "Create new function", 1); ++ new_node = node->create_version_clone_with_body (vNULL, NULL, ++ NULL, NULL, NULL, ++ "struct_reorg"); ++ new_node->can_change_signature = node->can_change_signature; ++ new_node->make_local (); ++ f->newnode = new_node; ++ srfunction *n = record_function (new_node); ++ current_function = n; ++ n->old = f; ++ f->newf = n; ++ /* Create New arguments. */ ++ create_new_args (new_node); ++ current_function = NULL; ++ } ++} ++ ++bool ++ipa_struct_reorg::rewrite_lhs_rhs (tree lhs, tree rhs, ++ tree newlhsmax_split, ++ tree newrhsmax_split) ++{ ++ bool l = rewrite_expr (lhs, newlhs); ++ bool r = rewrite_expr (rhs, newrhs); ++ ++ /* Handle NULL pointer specially. */ ++ if (l && !r && integer_zerop (rhs)) ++ { ++ r = true; ++ for (unsigned i = 0; i < max_split && newlhsi; i++) ++ newrhsi = fold_convert (TREE_TYPE (newlhsi), rhs); ++ } ++ ++ return l || r; ++} ++ ++bool ++ipa_struct_reorg::rewrite_expr (tree expr, ++ tree newexprmax_split, ++ bool ignore_missing_decl) ++{ ++ tree base; ++ bool indirect; ++ srtype *t; ++ srfield *f; ++ bool realpart, imagpart; ++ bool address; ++ ++ tree newbasemax_split; ++ memset (newexpr, 0, sizeof (treemax_split)); ++ ++ if (TREE_CODE (expr) == CONSTRUCTOR) ++ { ++ srtype *t = find_type (TREE_TYPE (expr)); ++ if (!t) ++ return false; ++ gcc_assert (CONSTRUCTOR_NELTS (expr) == 0); ++ if (!t->has_new_type ()) ++ return false; ++ for (unsigned i = 0; i < max_split && t->newtypei; i++) ++ newexpri = build_constructor (t->newtypei, NULL); ++ return true; ++ } ++ ++ if (!get_type_field (expr, base, indirect, t, f, ++ realpart, imagpart, address)) ++ return false; ++ ++ /* If the type is not changed, then just return false. */ ++ if (!t->has_new_type ()) ++ return false; ++ ++ /* NULL pointer handling is "special". */ ++ if (integer_zerop (base)) ++ { ++ gcc_assert (indirect && !address); ++ for (unsigned i = 0; i < max_split && t->newtypei; i++) ++ { ++ tree newtype1 = reconstruct_complex_type (TREE_TYPE (base), ++ t->newtypei); ++ newbasei = fold_convert (newtype1, base); ++ } ++ } ++ else ++ { ++ srdecl *d = find_decl (base); ++ ++ if (!d && dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Can't find decl:\n"); ++ print_generic_expr (dump_file, base); ++ fprintf (dump_file, "\ntype:\n"); ++ t->dump (dump_file); ++ } ++ if (!d && ignore_missing_decl) ++ return true; ++ gcc_assert (d); ++ memcpy (newbase, d->newdecl, sizeof (d->newdecl)); ++ } ++ ++ if (f == NULL) ++ { ++ memcpy (newexpr, newbase, sizeof (newbase)); ++ for (unsigned i = 0; i < max_split && newexpri; i++) ++ { ++ if (address) ++ newexpri = build_fold_addr_expr (newexpri); ++ if (indirect) ++ newexpri = build_simple_mem_ref (newexpri); ++ if (imagpart) ++ newexpri = build1 (IMAGPART_EXPR, ++ TREE_TYPE (TREE_TYPE (newexpri)), ++ newexpri); ++ if (realpart) ++ newexpri = build1 (REALPART_EXPR, ++ TREE_TYPE (TREE_TYPE (newexpri)), ++ newexpri); ++ } ++ return true; ++ } ++ ++ tree newdecl = newbasef->clusternum; ++ for (unsigned i = 0; i < max_split && f->newfieldi; i++) ++ { ++ tree newbase1 = newdecl; ++ if (address) ++ newbase1 = build_fold_addr_expr (newbase1); ++ if (indirect) ++ newbase1 = build_simple_mem_ref (newbase1); ++ newexpri = build3 (COMPONENT_REF, TREE_TYPE (f->newfieldi), ++ newbase1, f->newfieldi, NULL_TREE); ++ if (imagpart) ++ newexpri = build1 (IMAGPART_EXPR, ++ TREE_TYPE (TREE_TYPE (newexpri)), ++ newexpri); ++ if (realpart) ++ newexpri = build1 (REALPART_EXPR, ++ TREE_TYPE (TREE_TYPE (newexpri)), ++ newexpri); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "cluster: %d. decl = ", (int)f->clusternum); ++ print_generic_expr (dump_file, newbase1); ++ fprintf (dump_file, "\nnewexpr = "); ++ print_generic_expr (dump_file, newexpri); ++ fprintf (dump_file, "\n"); ++ } ++ } ++ return true; ++} ++ ++bool ++ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) ++{ ++ bool remove = false; ++ if (gimple_clobber_p (stmt)) ++ { ++ tree lhs = gimple_assign_lhs (stmt); ++ tree newlhsmax_split; ++ if (!rewrite_expr (lhs, newlhs)) ++ return false; ++ for (unsigned i = 0; i < max_split && newlhsi; i++) ++ { ++ tree clobber = build_constructor (TREE_TYPE (newlhsi), NULL); ++ TREE_THIS_VOLATILE (clobber) = true; ++ gimple *newstmt = gimple_build_assign (newlhsi, clobber); ++ gsi_insert_before (gsi, newstmt, GSI_SAME_STMT); ++ remove = true; ++ } ++ return remove; ++ } ++ ++ if (gimple_assign_rhs_code (stmt) == EQ_EXPR ++ || gimple_assign_rhs_code (stmt) == NE_EXPR) ++ { ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ tree newrhs1max_split; ++ tree newrhs2max_split; ++ tree_code rhs_code = gimple_assign_rhs_code (stmt); ++ tree_code code = rhs_code == EQ_EXPR ? BIT_AND_EXPR : BIT_IOR_EXPR; ++ if (!rewrite_lhs_rhs (rhs1, rhs2, newrhs1, newrhs2)) ++ return false; ++ tree newexpr = NULL_TREE; ++ for (unsigned i = 0; i < max_split && newrhs1i; i++) ++ { ++ tree expr = gimplify_build2 (gsi, rhs_code, boolean_type_node, ++ newrhs1i, newrhs2i); ++ if (!newexpr) ++ newexpr = expr; ++ else ++ newexpr = gimplify_build2 (gsi, code, boolean_type_node, ++ newexpr, expr); ++ } ++ ++ if (newexpr) ++ { ++ newexpr = fold_convert (TREE_TYPE (gimple_assign_lhs (stmt)), ++ newexpr); ++ gimple_assign_set_rhs_from_tree (gsi, newexpr); ++ update_stmt (stmt); ++ } ++ return false; ++ } ++ ++ if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) ++ { ++ tree lhs = gimple_assign_lhs (stmt); ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ tree newlhsmax_split; ++ tree newrhsmax_split; ++ ++ if (!rewrite_lhs_rhs (lhs, rhs1, newlhs, newrhs)) ++ return false; ++ tree size = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs))); ++ tree num; ++ /* Check if rhs2 is a multiplication of the size of the type. */ ++ if (!is_result_of_mult (rhs2, &num, size)) ++ internal_error ( ++ "The rhs of pointer is not a multiplicate and it slips through"); ++ ++ num = gimplify_build1 (gsi, NOP_EXPR, sizetype, num); ++ for (unsigned i = 0; i < max_split && newlhsi; i++) ++ { ++ gimple *new_stmt; ++ ++ tree newsize = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (newlhsi))); ++ newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, num, newsize); ++ new_stmt = gimple_build_assign (newlhsi, POINTER_PLUS_EXPR, ++ newrhsi, newsize); ++ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); ++ remove = true; ++ } ++ return remove; ++ } ++ if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS) ++ { ++ tree lhs = gimple_assign_lhs (stmt); ++ tree rhs = gimple_assign_rhs1 (stmt); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "rewriting statement:\n"); ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++ tree newlhsmax_split; ++ tree newrhsmax_split; ++ if (!rewrite_lhs_rhs (lhs, rhs, newlhs, newrhs)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nDid nothing to statement.\n"); ++ return false; ++ } ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nreplaced with:\n"); ++ for (unsigned i = 0; i < max_split && (newlhsi || newrhsi); i++) ++ { ++ gimple *newstmt = gimple_build_assign (newlhsi ? newlhsi : lhs, ++ newrhsi ? newrhsi : rhs); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ print_gimple_stmt (dump_file, newstmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++ gsi_insert_before (gsi, newstmt, GSI_SAME_STMT); ++ remove = true; ++ } ++ return remove; ++ } ++ ++ return remove; ++} ++ ++/* Rewrite function call statement STMT. Return TRUE if the statement ++ is to be removed. */ ++ ++bool ++ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) ++{ ++ /* Handled allocation calls are handled seperately from normal ++ function calls. */ ++ if (handled_allocation_stmt (stmt)) ++ { ++ tree lhs = gimple_call_lhs (stmt); ++ tree newrhs1max_split; ++ srdecl *decl = find_decl (lhs); ++ if (!decl || !decl->type) ++ return false; ++ srtype *type = decl->type; ++ tree num = allocate_size (type, stmt); ++ gcc_assert (num); ++ memset (newrhs1, 0, sizeof (newrhs1)); ++ ++ /* The realloc call needs to have its first argument rewritten. */ ++ if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) ++ { ++ tree rhs1 = gimple_call_arg (stmt, 0); ++ if (integer_zerop (rhs1)) ++ { ++ for (unsigned i = 0; i < max_split; i++) ++ newrhs1i = rhs1; ++ } ++ else if (!rewrite_expr (rhs1, newrhs1)) ++ internal_error ("Rewrite failed for realloc"); ++ } ++ ++ /* Go through each new lhs. */ ++ for (unsigned i = 0; i < max_split && decl->newdecli; i++) ++ { ++ tree newsize = TYPE_SIZE_UNIT (type->type); ++ gimple *g; ++ /* Every allocation except for calloc needs ++ the size multiplied out. */ ++ if (!gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) ++ newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, num, newsize); ++ ++ if (gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA)) ++ g = gimple_build_call (gimple_call_fndecl (stmt), ++ 1, newsize); ++ else if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) ++ g = gimple_build_call (gimple_call_fndecl (stmt), ++ 2, num, newsize); ++ else if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) ++ g = gimple_build_call (gimple_call_fndecl (stmt), ++ 2, newrhs1i, newsize); ++ else ++ gcc_assert (false); ++ gimple_call_set_lhs (g, decl->newdecli); ++ gsi_insert_before (gsi, g, GSI_SAME_STMT); ++ } ++ return true; ++ } ++ ++ /* The function call free needs to be handled special. */ ++ if (gimple_call_builtin_p (stmt, BUILT_IN_FREE)) ++ { ++ tree expr = gimple_call_arg (stmt, 0); ++ tree newexprmax_split; ++ if (!rewrite_expr (expr, newexpr)) ++ return false; ++ ++ if (newexpr1 == NULL) ++ { ++ gimple_call_set_arg (stmt, 0, newexpr0); ++ update_stmt (stmt); ++ return false; ++ } ++ ++ for (unsigned i = 0; i < max_split && newexpri; i++) ++ { ++ gimple *g = gimple_build_call (gimple_call_fndecl (stmt), ++ 1, newexpri); ++ gsi_insert_before (gsi, g, GSI_SAME_STMT); ++ } ++ return true; ++ } ++ ++ /* Otherwise, look up the function to see if we have cloned it ++ and rewrite the arguments. */ ++ tree fndecl = gimple_call_fndecl (stmt); ++ ++ /* Indirect calls are already marked as escaping so ignore. */ ++ if (!fndecl) ++ return false; ++ ++ cgraph_node *node = cgraph_node::get (fndecl); ++ gcc_assert (node); ++ srfunction *f = find_function (node); ++ ++ /* Did not find the function or had not cloned it return saying don't ++ change the function call. */ ++ if (!f || !f->newf) ++ return false; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Changing arguments for function call :\n"); ++ print_gimple_expr (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++ ++ /* Move over to the new function. */ ++ f = f->newf; ++ ++ tree chain = gimple_call_chain (stmt); ++ unsigned nargs = gimple_call_num_args (stmt); ++ auto_vec<tree> vargs (nargs); ++ ++ if (chain) ++ { ++ tree newchainsmax_split; ++ if (rewrite_expr (chain, newchains)) ++ { ++ /* Chain decl's type cannot be split and but it can change. */ ++ gcc_assert (newchains1 == NULL); ++ chain = newchains0; ++ } ++ } ++ ++ for (unsigned i = 0; i < nargs; i++) ++ vargs.quick_push (gimple_call_arg (stmt, i)); ++ ++ int extraargs = 0; ++ ++ for (unsigned i = 0; i < f->args.length (); i++) ++ { ++ srdecl *d = f->argsi; ++ if (d->argumentnum == -2) ++ continue; ++ gcc_assert (d->argumentnum != -1); ++ tree arg = vargsd->argumentnum + extraargs; ++ tree newargsmax_split; ++ if (!rewrite_expr (arg, newargs)) ++ continue; ++ ++ /* If this ARG has a replacement handle the replacement. */ ++ for (unsigned j = 0; j < max_split && d->newdeclj; j++) ++ { ++ gcc_assert (newargsj); ++ /* If this is the first replacement of the arugment, ++ then just replace it. */ ++ if (j == 0) ++ vargsd->argumentnum + extraargs = newargsj; ++ else ++ { ++ /* More than one replacement, ++ we need to insert into the array. */ ++ extraargs++; ++ vargs.safe_insert (d->argumentnum + extraargs, newargsj); ++ } ++ } ++ } ++ ++ gcall *new_stmt; ++ ++ new_stmt = gimple_build_call_vec (f->node->decl, vargs); ++ ++ if (gimple_call_lhs (stmt)) ++ gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); ++ ++ gimple_set_vuse (new_stmt, gimple_vuse (stmt)); ++ gimple_set_vdef (new_stmt, gimple_vdef (stmt)); ++ ++ if (gimple_has_location (stmt)) ++ gimple_set_location (new_stmt, gimple_location (stmt)); ++ gimple_call_copy_flags (new_stmt, stmt); ++ gimple_call_set_chain (new_stmt, chain); ++ ++ gimple_set_modified (new_stmt, true); ++ ++ if (gimple_vdef (new_stmt) ++ && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME) ++ SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt; ++ ++ gsi_replace (gsi, new_stmt, false); ++ ++ /* We need to defer cleaning EH info on the new statement to ++ fixup-cfg. We may not have dominator information at this point ++ and thus would end up with unreachable blocks and have no way ++ to communicate that we need to run CFG cleanup then. */ ++ int lp_nr = lookup_stmt_eh_lp (stmt); ++ if (lp_nr != 0) ++ { ++ remove_stmt_from_eh_lp (stmt); ++ add_stmt_to_eh_lp (new_stmt, lp_nr); ++ } ++ ++ return false; ++} ++ ++/* Rewrite the conditional statement STMT. Return TRUE if the ++ old statement is to be removed. */ ++ ++bool ++ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi) ++{ ++ tree_code rhs_code = gimple_cond_code (stmt); ++ ++ /* Handle only equals or not equals conditionals. */ ++ if (rhs_code != EQ_EXPR ++ && rhs_code != NE_EXPR) ++ return false; ++ tree rhs1 = gimple_cond_lhs (stmt); ++ tree rhs2 = gimple_cond_rhs (stmt); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "COND: Rewriting\n"); ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ print_generic_expr (dump_file, rhs1); ++ fprintf (dump_file, "\n"); ++ print_generic_expr (dump_file, rhs2); ++ fprintf (dump_file, "\n"); ++ } ++ ++ tree newrhs1max_split; ++ tree newrhs2max_split; ++ tree_code code = rhs_code == EQ_EXPR ? BIT_AND_EXPR : BIT_IOR_EXPR; ++ if (!rewrite_lhs_rhs (rhs1, rhs2, newrhs1, newrhs2)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nDid nothing to statement.\n"); ++ return false; ++ } ++ ++ tree newexpr = NULL_TREE; ++ for (unsigned i = 0; i < max_split && newrhs1i; i++) ++ { ++ tree expr = gimplify_build2 (gsi, rhs_code, boolean_type_node, ++ newrhs1i, newrhs2i); ++ if (!newexpr) ++ newexpr = expr; ++ else ++ newexpr = gimplify_build2 (gsi, code, boolean_type_node, ++ newexpr, expr); ++ } ++ ++ if (newexpr) ++ { ++ gimple_cond_set_lhs (stmt, newexpr); ++ gimple_cond_set_rhs (stmt, boolean_true_node); ++ update_stmt (stmt); ++ } ++ return false; ++} ++ ++/* Rewrite debug statments if possible. Return TRUE if the statement ++ should be removed. */ ++ ++bool ++ipa_struct_reorg::rewrite_debug (gimple *stmt, gimple_stmt_iterator *) ++{ ++ bool remove = false; ++ if (gimple_debug_bind_p (stmt)) ++ { ++ tree var = gimple_debug_bind_get_var (stmt); ++ tree newvarmax_split; ++ if (rewrite_expr (var, newvar, true)) ++ remove = true; ++ if (gimple_debug_bind_has_value_p (stmt)) ++ { ++ var = gimple_debug_bind_get_value (stmt); ++ if (TREE_CODE (var) == POINTER_PLUS_EXPR) ++ var = TREE_OPERAND (var, 0); ++ if (rewrite_expr (var, newvar, true)) ++ remove = true; ++ } ++ } ++ else if (gimple_debug_source_bind_p (stmt)) ++ { ++ tree var = gimple_debug_source_bind_get_var (stmt); ++ tree newvarmax_split; ++ if (rewrite_expr (var, newvar, true)) ++ remove = true; ++ var = gimple_debug_source_bind_get_value (stmt); ++ if (TREE_CODE (var) == POINTER_PLUS_EXPR) ++ var = TREE_OPERAND (var, 0); ++ if (rewrite_expr (var, newvar, true)) ++ remove = true; ++ } ++ ++ return remove; ++} ++ ++/* Rewrite PHI nodes, return true if the PHI was replaced. */ ++ ++bool ++ipa_struct_reorg::rewrite_phi (gphi *phi) ++{ ++ tree newlhsmax_split; ++ gphi *newphimax_split; ++ tree result = gimple_phi_result (phi); ++ gphi_iterator gsi; ++ ++ memset (newphi, 0, sizeof (newphi)); ++ ++ if (!rewrite_expr (result, newlhs)) ++ return false; ++ ++ if (newlhs0 == NULL) ++ return false; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nrewriting PHI:"); ++ print_gimple_stmt (dump_file, phi, 0); ++ } ++ ++ for (unsigned i = 0; i < max_split && newlhsi; i++) ++ newphii = create_phi_node (newlhsi, gimple_bb (phi)); ++ ++ for (unsigned i = 0; i < gimple_phi_num_args (phi); i++) ++ { ++ tree newrhsmax_split; ++ phi_arg_d rhs = *gimple_phi_arg (phi, i); ++ rewrite_expr (rhs.def, newrhs); ++ for (unsigned j = 0; j < max_split && newlhsj; j++) ++ { ++ SET_PHI_ARG_DEF (newphij, i, newrhsj); ++ gimple_phi_arg_set_location (newphij, i, rhs.locus); ++ update_stmt (newphij); ++ } ++ } ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\ninto\n:"); ++ for (unsigned i = 0; i < max_split && newlhsi; i++) ++ { ++ print_gimple_stmt (dump_file, newphii, 0); ++ fprintf (dump_file, "\n"); ++ } ++ } ++ ++ gsi = gsi_for_phi (phi); ++ remove_phi_node (&gsi, false); ++ ++ return true; ++} ++ ++/* Rewrite gimple statement STMT, return true if the STATEMENT ++ is to be removed. */ ++ ++bool ++ipa_struct_reorg::rewrite_stmt (gimple *stmt, gimple_stmt_iterator *gsi) ++{ ++ switch (gimple_code (stmt)) ++ { ++ case GIMPLE_ASSIGN: ++ return rewrite_assign (as_a <gassign *> (stmt), gsi); ++ case GIMPLE_CALL: ++ return rewrite_call (as_a <gcall *> (stmt), gsi); ++ case GIMPLE_COND: ++ return rewrite_cond (as_a <gcond *> (stmt), gsi); ++ break; ++ case GIMPLE_GOTO: ++ case GIMPLE_SWITCH: ++ break; ++ case GIMPLE_DEBUG: ++ case GIMPLE_ASM: ++ break; ++ default: ++ break; ++ } ++ return false; ++} ++ ++/* Does the function F uses any decl which has changed. */ ++ ++bool ++ipa_struct_reorg::has_rewritten_type (srfunction *f) ++{ ++ for (unsigned i = 0; i < f->decls.length (); i++) ++ { ++ srdecl *d = f->declsi; ++ if (d->newdecl0 != d->decl) ++ return true; ++ } ++ ++ for (unsigned i = 0; i < f->globals.length (); i++) ++ { ++ srdecl *d = f->globalsi; ++ if (d->newdecl0 != d->decl) ++ return true; ++ } ++ return false; ++} ++ ++/* Rewrite the functions if needed, return ++ the TODOs requested. */ ++ ++unsigned ++ipa_struct_reorg::rewrite_functions (void) ++{ ++ unsigned retval = 0; ++ ++ restore_field_type (); ++ /* Create new types, if we did not create any new types, ++ then don't rewrite any accesses. */ ++ if (!create_new_types ()) ++ return 0; ++ ++ if (functions.length ()) ++ { ++ retval = TODO_remove_functions; ++ create_new_functions (); ++ } ++ ++ create_new_decls (); ++ ++ for (unsigned i = 0; i < functions.length (); i++) ++ { ++ srfunction *f = functionsi; ++ if (f->newnode) ++ continue; ++ ++ /* Function uses no rewriten types so don't cause a rewrite. */ ++ if (!has_rewritten_type (f)) ++ continue; ++ ++ cgraph_node *node = f->node; ++ basic_block bb; ++ ++ push_cfun (DECL_STRUCT_FUNCTION (node->decl)); ++ current_function = f; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nBefore rewrite:\n"); ++ dump_function_to_file (current_function_decl, dump_file, ++ dump_flags | TDF_VOPS); ++ } ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);) ++ { ++ if (rewrite_phi (si.phi ())) ++ si = gsi_start_phis (bb); ++ else ++ gsi_next (&si); ++ } ++ ++ for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);) ++ { ++ gimple *stmt = gsi_stmt (si); ++ if (rewrite_stmt (stmt, &si)) ++ gsi_remove (&si, true); ++ else ++ gsi_next (&si); ++ } ++ } ++ ++ /* Debug statements need to happen after all other statements ++ have changed. */ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);) ++ { ++ gimple *stmt = gsi_stmt (si); ++ if (gimple_code (stmt) == GIMPLE_DEBUG ++ && rewrite_debug (stmt, &si)) ++ gsi_remove (&si, true); ++ else ++ gsi_next (&si); ++ } ++ } ++ ++ /* Release the old SSA_NAMES for old arguments. */ ++ if (f->old) ++ { ++ for (unsigned i = 0; i < f->args.length (); i++) ++ { ++ srdecl *d = f->argsi; ++ if (d->newdecl0 != d->decl) ++ { ++ tree ssa_name = ssa_default_def (cfun, d->decl); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Found "); ++ print_generic_expr (dump_file, ssa_name); ++ fprintf (dump_file, " to be released.\n"); ++ } ++ release_ssa_name (ssa_name); ++ } ++ } ++ } ++ ++ update_ssa (TODO_update_ssa_only_virtuals); ++ ++ if (flag_tree_pta) ++ compute_may_aliases (); ++ ++ remove_unused_locals (); ++ ++ cgraph_edge::rebuild_edges (); ++ ++ free_dominance_info (CDI_DOMINATORS); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nAfter rewrite:\n"); ++ dump_function_to_file (current_function_decl, dump_file, ++ dump_flags | TDF_VOPS); ++ } ++ ++ pop_cfun (); ++ current_function = NULL; ++ } ++ ++ return retval | TODO_verify_all; ++} ++ ++unsigned int ++ipa_struct_reorg::execute (void) ++{ ++ /* FIXME: If there is a top-level inline-asm, ++ the pass immediately returns. */ ++ if (symtab->first_asm_symbol ()) ++ return 0; ++ record_accesses (); ++ prune_escaped_types (); ++ analyze_types (); ++ ++ return rewrite_functions (); ++} ++ ++const pass_data pass_data_ipa_struct_reorg = ++{ ++ SIMPLE_IPA_PASS, // type ++ "struct_reorg", // name ++ OPTGROUP_NONE, // optinfo_flags ++ TV_IPA_STRUCT_REORG, // tv_id ++ 0, // properties_required ++ 0, // properties_provided ++ 0, // properties_destroyed ++ 0, // todo_flags_start ++ 0, // todo_flags_finish ++}; ++ ++class pass_ipa_struct_reorg : public simple_ipa_opt_pass ++{ ++public: ++ pass_ipa_struct_reorg (gcc::context *ctxt) ++ : simple_ipa_opt_pass (pass_data_ipa_struct_reorg, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ virtual bool gate (function *); ++ virtual unsigned int execute (function *) ++ { ++ return ipa_struct_reorg ().execute (); ++ } ++ ++}; // class pass_ipa_struct_reorg ++ ++bool ++pass_ipa_struct_reorg::gate (function *) ++{ ++ return (optimize ++ && flag_ipa_struct_reorg ++ /* Don't bother doing anything if the program has errors. */ ++ && !seen_error ()); ++} ++ ++} // anon namespace ++ ++ ++simple_ipa_opt_pass * ++make_pass_ipa_struct_reorg (gcc::context *ctxt) ++{ ++ return new pass_ipa_struct_reorg (ctxt); ++} +\ No newline at end of file +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +new file mode 100644 +index 000000000..a58794070 +--- /dev/null ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +@@ -0,0 +1,235 @@ ++/* Struct-reorg optimizations. ++ Copyright (C) 2016-2023 Free Software Foundation, Inc. ++ Contributed by Andrew Pinski <apinski@cavium.com> ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++#ifndef IPA_STRUCT_REORG_H ++#define IPA_STRUCT_REORG_H ++ ++namespace struct_reorg { ++ ++const int max_split = 2; ++ ++template <typename type> ++struct auto_vec_del : auto_vec<type *> ++{ ++ ~auto_vec_del (); ++}; ++ ++template <typename T> ++auto_vec_del<T>::~auto_vec_del (void) ++{ ++ unsigned i; ++ T *t; ++ FOR_EACH_VEC_ELT (*this, i, t) ++ { ++ delete t; ++ } ++} ++ ++enum escape_type ++{ ++ does_not_escape, ++#define DEF_ESCAPE(ENUM, TEXT) ENUM, ++#include "escapes.def" ++ escape_max_escape ++}; ++ ++const char *escape_type_stringescape_max_escape - 1 = ++{ ++#define DEF_ESCAPE(ENUM, TEXT) TEXT, ++#include "escapes.def" ++}; ++ ++struct srfield; ++struct srtype; ++struct sraccess; ++struct srdecl; ++struct srfunction; ++ ++struct srfunction ++{ ++ cgraph_node *node; ++ auto_vec<srdecl *> args; ++ auto_vec<srdecl *> globals; ++ auto_vec_del<srdecl> decls; ++ srdecl *record_decl (srtype *, tree, int arg); ++ ++ srfunction *old; ++ cgraph_node *newnode; ++ srfunction *newf; ++ ++ // Constructors ++ srfunction (cgraph_node *n); ++ ++ // Methods ++ void add_arg (srdecl *arg); ++ void dump (FILE *file); ++ void simple_dump (FILE *file); ++ ++ bool check_args (void); ++ void create_new_decls (void); ++ srdecl *find_decl (tree); ++}; ++ ++struct srglobal : private srfunction ++{ ++ srglobal () ++ : srfunction (NULL) ++ {} ++ ++ using srfunction::dump; ++ using srfunction::create_new_decls; ++ using srfunction::find_decl; ++ using srfunction::record_decl; ++ using srfunction::decls; ++}; ++ ++struct srtype ++{ ++ tree type; ++ auto_vec_del<srfield> fields; ++ ++ // array of fields that use this type. ++ auto_vec<srfield *> field_sites; ++ ++ // array of functions which use directly the type ++ auto_vec<srfunction *> functions; ++ ++ auto_vec_del<sraccess> accesses; ++ bool chain_type; ++ ++private: ++ escape_type escapes; ++ ++public: ++ tree newtypemax_split; ++ bool visited; ++ ++ // Constructors ++ srtype (tree type); ++ ++ // Methods ++ void dump (FILE *file); ++ void simple_dump (FILE *file); ++ void add_function (srfunction *); ++ void add_access (sraccess *a) ++ { ++ accesses.safe_push (a); ++ } ++ void add_field_site (srfield *); ++ ++ srfield *find_field (unsigned HOST_WIDE_INT offset); ++ ++ bool create_new_type (void); ++ void analyze (void); ++ void mark_escape (escape_type, gimple *stmt); ++ bool has_escaped (void) ++ { ++ return escapes != does_not_escape; ++ } ++ const char *escape_reason (void) ++ { ++ if (!has_escaped ()) ++ return NULL; ++ return escape_type_stringescapes - 1; ++ } ++ bool escaped_rescusive (void) ++ { ++ return escapes == escape_rescusive_type; ++ } ++ bool has_new_type (void) ++ { ++ return newtype0 && newtype0 != type; ++ } ++}; ++ ++struct srfield ++{ ++ unsigned HOST_WIDE_INT offset; ++ tree fieldtype; ++ tree fielddecl; ++ srtype *base; ++ srtype *type; ++ ++ unsigned clusternum; ++ ++ tree newfieldmax_split; ++ ++ // Constructors ++ srfield (tree field, srtype *base); ++ ++ // Methods ++ void dump (FILE *file); ++ void simple_dump (FILE *file); ++ ++ void create_new_fields (tree newtypemax_split, ++ tree newfieldsmax_split, ++ tree newlastmax_split); ++}; ++ ++struct sraccess ++{ ++ gimple *stmt; ++ cgraph_node *node; ++ ++ srtype *type; ++ // NULL field means the whole type is accessed ++ srfield *field; ++ ++ // Constructors ++ sraccess (gimple *s, cgraph_node *n, srtype *t, srfield *f = NULL) ++ : stmt (s), ++ node (n), ++ type (t), ++ field (f) ++ {} ++ ++ // Methods ++ void dump (FILE *file); ++}; ++ ++struct srdecl ++{ ++ srtype *type; ++ tree decl; ++ tree func; ++ /* -1 : not an argument ++ -2 : static chain ++ */ ++ int argumentnum; ++ ++ bool visited; ++ ++ tree newdeclmax_split; ++ ++ // Constructors ++ srdecl (srtype *type, tree decl, int argumentnum = -1); ++ ++ // Methods ++ void dump (FILE *file); ++ bool has_new_decl (void) ++ { ++ return newdecl0 && newdecl0 != decl; ++ } ++}; ++ ++ ++} // namespace struct_reorg ++ ++#endif +diff --git a/gcc/params.opt b/gcc/params.opt +index e0ff9e210..1ddf1343f 100644 +--- a/gcc/params.opt ++++ b/gcc/params.opt +@@ -865,6 +865,10 @@ Enum(parloops_schedule_type) String(runtime) Value(PARLOOPS_SCHEDULE_RUNTIME) + Common Joined UInteger Var(param_partial_inlining_entry_probability) Init(70) Optimization IntegerRange(0, 100) Param + Maximum probability of the entry BB of split region (in percent relative to entry BB of the function) to make partial inlining happen. + ++-param=struct-reorg-cold-struct-ratio= ++Common Joined UInteger Var(param_struct_reorg_cold_struct_ratio) Init(10) IntegerRange(0, 100) Param Optimization ++The threshold ratio between current and hottest structure counts. ++ + -param=predictable-branch-outcome= + Common Joined UInteger Var(param_predictable_branch_outcome) Init(2) IntegerRange(0, 50) Param Optimization + Maximal estimated outcome of branch considered predictable. +diff --git a/gcc/passes.def b/gcc/passes.def +index 375d3d62d..1c1658c4a 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -177,6 +177,8 @@ along with GCC; see the file COPYING3. If not see + compiled unit. */ + INSERT_PASSES_AFTER (all_late_ipa_passes) + NEXT_PASS (pass_ipa_pta); ++ /* FIXME: this should be a normal IP pass. */ ++ NEXT_PASS (pass_ipa_struct_reorg); + NEXT_PASS (pass_omp_simd_clone); + TERMINATE_PASS_LIST (all_late_ipa_passes) + +diff --git a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +new file mode 100644 +index 000000000..43913104e +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +@@ -0,0 +1,35 @@ ++# Copyright (C) 1997-2023 Free Software Foundation, Inc. ++ ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# <http://www.gnu.org/licenses/>. ++ ++load_lib gcc-dg.exp ++load_lib torture-options.exp ++ ++# Initialize `dg'. ++dg-init ++torture-init ++ ++set STRUCT_REORG_TORTURE_OPTIONS list \ ++ { -O3 } \ ++ { -Ofast } ++ ++set-torture-options $STRUCT_REORG_TORTURE_OPTIONS {{}} ++ ++gcc-dg-runtest lsort glob -nocomplain $srcdir/$subdir/*.c \ ++ "" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program" ++ ++# All done. ++torture-finish ++dg-finish +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c +new file mode 100644 +index 000000000..6565fe8dd +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c +@@ -0,0 +1,24 @@ ++// { dg-do compile } ++// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all" } ++ ++struct a ++{ ++ int t, t1; ++}; ++ ++static struct a *b; ++ ++void *xmalloc(int); ++ ++ ++void f(void) ++{ ++ b = xmalloc (sizeof(*b)); ++} ++ ++int g(void) ++{ ++ return b->t; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c +new file mode 100644 +index 000000000..44babd35b +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c +@@ -0,0 +1,29 @@ ++// { dg-do run } ++ ++#include <assert.h> ++ ++struct a ++{ ++ int t; ++ int t1; ++}; ++ ++__attribute__((noinline)) int f(int i, int j) ++{ ++ struct a *t; ++ struct a t1 = {i, j}; ++ t = &t1; ++ auto int g(void) __attribute__((noinline)); ++ int g(void) ++ { ++ return t->t + t->t1; ++ } ++ return g(); ++} ++ ++int main() ++{ ++ assert (f(1, 2) == 3); ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c +new file mode 100644 +index 000000000..5864ad46f +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c +@@ -0,0 +1,23 @@ ++// { dg-do compile } ++// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all" } ++ ++#include <stdlib.h> ++typedef struct { ++ long laststart_offset; ++ unsigned regnum; ++} compile_stack_elt_t; ++typedef struct { ++ compile_stack_elt_t *stack; ++ unsigned size; ++} compile_stack_type; ++void f (const char *p, const char *pend, int c) ++{ ++ compile_stack_type compile_stack; ++ while (p != pend) ++ if (c) ++ compile_stack.stack = realloc (compile_stack.stack, ++ (compile_stack.size << 1) ++ * sizeof (compile_stack_elt_t)); ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c +new file mode 100644 +index 000000000..e5a8a6c84 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c +@@ -0,0 +1,59 @@ ++/* { dg-do run } */ ++ ++extern void abort (void); ++ ++struct S ++{ ++ int b; ++ int *c; ++}; ++static int d, e; ++ ++static struct S s; ++ ++static int * ++__attribute__((noinline, const)) ++foo (void) ++{ ++ return &s.b; ++} ++ ++int * ++__attribute__((noinline)) ++bar (int **f) ++{ ++ s.c = &d; ++ *f = &e; ++ /* As nothing ever takes the address of any int * field in struct S, ++ the write to *f can't alias with the s.c field. */ ++ return s.c; ++} ++ ++int ++__attribute__((noinline)) ++baz (int *x) ++{ ++ s.b = 1; ++ *x = 4; ++ /* Function foo takes address of an int field in struct S, ++ so *x can alias with the s.b field (and it does in this testcase). */ ++ return s.b; ++} ++ ++int ++__attribute__((noinline)) ++t (void) ++{ ++ int *f = (int *) 0; ++ return 10 * (bar (&f) != &d) + baz (foo ()); ++} ++ ++int ++main (void) ++{ ++ if (t () != 4) ++ abort (); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c b/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c +new file mode 100644 +index 000000000..733413a94 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c +@@ -0,0 +1,29 @@ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#define N 1000 ++str_t AN; ++ ++int ++main () ++{ ++ int i; ++ ++ for (i = 0; i < N; i++) ++ { ++ Ai.a = 0; ++ } ++ ++ for (i = 0; i < N; i++) ++ if (Ai.a != 0) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c b/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c +new file mode 100644 +index 000000000..0ef686e74 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c +@@ -0,0 +1,42 @@ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++str_t *p; ++ ++int ++main () ++{ ++ int i, sum; ++ ++ p = malloc (N * sizeof (str_t)); ++ if (p == NULL) ++ return 0; ++ for (i = 0; i < N; i++) ++ pi.b = i; ++ ++ for (i = 0; i < N; i++) ++ pi.a = pi.b + 1; ++ ++ for (i = 0; i < N; i++) ++ if (pi.a != pi.b + 1) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c b/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c +new file mode 100644 +index 000000000..23a53be53 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c +@@ -0,0 +1,37 @@ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++int ++main () ++{ ++ int i; ++ str_t AN; ++ ++ for (i = 0; i < N; i++) ++ { ++ Ai.a = 0; ++ } ++ ++ for (i = 0; i < N; i++) ++ if (Ai.a != 0) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c b/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c +new file mode 100644 +index 000000000..0cbb172f2 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c +@@ -0,0 +1,40 @@ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++int ++main () ++{ ++ int i, sum; ++ ++ str_t * p = malloc (N * sizeof (str_t)); ++ if (p == NULL) ++ return 0; ++ for (i = 0; i < N; i++) ++ pi.b = i; ++ ++ for (i = 0; i < N; i++) ++ pi.a = pi.b + 1; ++ ++ for (i = 0; i < N; i++) ++ if (pi.a != pi.b + 1) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c b/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c +new file mode 100644 +index 000000000..f900b1349 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c +@@ -0,0 +1,31 @@ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ int b; ++}str_t; ++ ++#define N 3 ++ ++str_t str; ++ ++int ++main () ++{ ++ int i; ++ int res = 1<<(1<<N); ++ str.a = 2; ++ ++ for (i = 0; i < N; i++) ++ str.a = str.a * str.a; ++ ++ if (str.a != res) ++ abort (); ++ ++ /* POSIX ignores all but the 8 low-order bits, but other ++ environments may not. */ ++ return (str.a & 255); ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c b/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c +new file mode 100644 +index 000000000..13b4cdc70 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c +@@ -0,0 +1,64 @@ ++#include <stdlib.h> ++ ++typedef struct ++{ ++ int a; ++ float b; ++}str_t1; ++ ++typedef struct ++{ ++ int c; ++ float d; ++}str_t2; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 16000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/16) ++#endif ++#else ++#define N 1000 ++#endif ++ ++str_t1 *p1; ++str_t2 *p2; ++int num; ++ ++void ++foo (void) ++{ ++ int i; ++ ++ for (i=0; i < num; i++) ++ p2i.c = 2; ++} ++ ++int ++main () ++{ ++ int i, r; ++ ++ r = rand (); ++ num = r > N ? N : r; ++ p1 = malloc (num * sizeof (str_t1)); ++ p2 = malloc (num * sizeof (str_t2)); ++ ++ if (p1 == NULL || p2 == NULL) ++ return 0; ++ ++ for (i = 0; i < num; i++) ++ p1i.a = 1; ++ ++ foo (); ++ ++ for (i = 0; i < num; i++) ++ if (p1i.a != 1 || p2i.c != 2) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c b/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c +new file mode 100644 +index 000000000..dcc545964 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c +@@ -0,0 +1,43 @@ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ int b; ++}str_t1; ++ ++typedef struct ++{ ++ float a; ++ float b; ++}str_t2; ++ ++#define N1 1000 ++#define N2 100 ++str_t1 A1N1; ++str_t2 A2N2; ++ ++int ++main () ++{ ++ int i; ++ ++ for (i = 0; i < N1; i++) ++ A1i.a = 0; ++ ++ for (i = 0; i < N2; i++) ++ A2i.a = 0; ++ ++ for (i = 0; i < N1; i++) ++ if (A1i.a != 0) ++ abort (); ++ ++ for (i = 0; i < N2; i++) ++ if (A2i.a != 0) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* Arrays are not handled. */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c b/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c +new file mode 100644 +index 000000000..6d6375fc1 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++typedef struct basic ++{ ++ int a; ++ int b10; ++} type_struct; ++ ++type_struct *str1; ++ ++int main() ++{ ++ int i; ++ ++ str1 = malloc (10 * sizeof (type_struct)); ++ ++ for (i=0; i<=9; i++) ++ str1i.a = str1i.b0; ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c b/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c +new file mode 100644 +index 000000000..9d3213408 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c +@@ -0,0 +1,38 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++int ++main () ++{ ++ int i; ++ str_t AN; ++ str_t *p = A; ++ ++ for (i = 0; i < N; i++) ++ pi.a = 0; ++ ++ for (i = 0; i < N; i++) ++ if (pi.a != 0) ++ abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c b/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c +new file mode 100644 +index 000000000..d79992a53 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c +@@ -0,0 +1,29 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++ ++typedef struct test_struct ++{ ++ int a; ++ int b; ++} type_struct; ++ ++typedef type_struct **struct_pointer2; ++ ++struct_pointer2 str1; ++ ++int main() ++{ ++ int i, j; ++ ++ str1 = malloc (2 * sizeof (type_struct *)); ++ ++ for (i = 0; i <= 1; i++) ++ str1i = malloc (2 * sizeof (type_struct)); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c b/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c +new file mode 100644 +index 000000000..ee9b0d765 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c +@@ -0,0 +1,44 @@ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++ ++struct S { int a; struct V *b; }; ++typedef struct { int c; } T; ++typedef struct { int d; int e; } U; ++ ++void * ++fn (void *x) ++{ ++ return x; ++} ++ ++int ++foo (struct S *s) ++{ ++ T x; ++ ++ T y = *(T *)fn (&x); ++ return y.c; ++} ++ ++int ++bar (struct S *s) ++{ ++ U x; ++ ++ U y = *(U *)fn (&x); ++ return y.d + s->a; ++} ++ ++int ++main () ++{ ++ struct S s; ++ ++ foo(&s) + bar (&s); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "No structures to transform" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c +new file mode 100644 +index 000000000..9ebb2b4cc +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c +@@ -0,0 +1,44 @@ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++struct str ++{ ++ int a; ++ float b; ++}; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++int ++foo (struct str * p_str) ++{ ++ static int sum = 0; ++ ++ sum = sum + p_str->a; ++ return sum; ++} ++ ++int ++main () ++{ ++ int i, sum; ++ struct str * p = malloc (N * sizeof (struct str)); ++ if (p == NULL) ++ return 0; ++ for (i = 0; i < N; i++) ++ sum = foo (p+i); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c +new file mode 100644 +index 000000000..d0dce8b53 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c +@@ -0,0 +1,33 @@ ++/* { dg-do run } */ ++/* { dg-additional-options "-fno-ipa-sra" } */ ++ ++#include <stdlib.h> ++ ++struct A { ++ int d; ++ int d1; ++}; ++ ++struct A a; ++ ++struct A *foo () __attribute__((noinline)); ++struct A *foo () ++{ ++ a.d = 5; ++ return &a; ++} ++ ++int ++main () ++{ ++ a.d = 0; ++ foo (); ++ ++ if (a.d != 5) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "has escaped. .Type escapes via a return" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c +new file mode 100644 +index 000000000..71167182d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c +@@ -0,0 +1,32 @@ ++/* { dg-do run } */ ++/* { dg-additional-options "-fno-ipa-sra" } */ ++ ++#include <stdlib.h> ++ ++struct A { ++ int d; ++}; ++ ++struct A a; ++ ++struct A foo () __attribute__((noinline)); ++struct A foo () ++{ ++ a.d = 5; ++ return a; ++} ++ ++int ++main () ++{ ++ a.d = 0; ++ foo (); ++ ++ if (a.d != 5) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "has escaped: \"Type escapes via a return" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c +new file mode 100644 +index 000000000..74fa11f39 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ int b; ++}str_t; ++ ++#define N 2 ++ ++str_t A2 = {{1,1},{2,2}}; ++ ++int ++main () ++{ ++ int i; ++ ++ for (i = 0; i < N; i++) ++ Ai.b = Ai.a; ++ ++ for (i = 0; i < N; i++) ++ if (Ai.b != Ai.a) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c +new file mode 100644 +index 000000000..60d2466e1 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#define N 1000 ++ ++typedef struct ++{ ++ str_t AN; ++ int c; ++}str_with_substr_t; ++ ++str_with_substr_t a; ++ ++int ++main () ++{ ++ int i; ++ ++ for (i = 0; i < N; i++) ++ a.Ai.b = 0; ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c +new file mode 100644 +index 000000000..baf617816 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c +@@ -0,0 +1,48 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 16000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/16) ++#endif ++#else ++#define N 1000 ++#endif ++ ++typedef struct ++{ ++ str_t * sub_str; ++ int c; ++}str_with_substr_t; ++ ++int foo; ++ ++int ++main (void) ++{ ++ int i; ++ str_with_substr_t AN; ++ str_t aN; ++ ++ for (i=0; i < N; i++) ++ Ai.sub_str = &(ai); ++ ++ for (i=0; i < N; i++) ++ Ai.sub_str->a = 5; ++ ++ foo = A56.sub_str->a; ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "has escaped...Type is used in an array" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c +new file mode 100644 +index 000000000..33fce3b23 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c +@@ -0,0 +1,45 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++ ++typedef struct ++{ ++ str_t sub_str; ++ int c; ++}str_with_substr_t; ++ ++int ++main () ++{ ++ int i; ++ str_with_substr_t AN; ++ ++ for (i = 0; i < N; i++) ++ Ai.sub_str.a = 5; ++ ++ for (i = 0; i < N; i++) ++ if (Ai.sub_str.a != 5) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "has escaped...Type is used in an array" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c +new file mode 100644 +index 000000000..1c5a3aa15 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c +@@ -0,0 +1,32 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#define N 1000 ++str_t AN; ++ ++int ++main () ++{ ++ int i; ++ ++ for (i = 0; i < N; i++) ++ { ++ Ai.a = 0; ++ } ++ ++ for (i = 0; i < N; i++) ++ if (Ai.a != 0) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c +new file mode 100644 +index 000000000..a0d1467fe +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c +@@ -0,0 +1,45 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++str_t *p; ++ ++int ++main () ++{ ++ int i, sum; ++ ++ p = malloc (N * sizeof (str_t)); ++ if (p == NULL) ++ return 0; ++ for (i = 0; i < N; i++) ++ pi.b = i; ++ ++ for (i = 0; i < N; i++) ++ pi.b = pi.a + 1; ++ ++ for (i = 0; i < N; i++) ++ if (pi.b != pi.a + 1) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c +new file mode 100644 +index 000000000..6c24e1c8b +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c +@@ -0,0 +1,40 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++int ++main () ++{ ++ int i; ++ str_t AN; ++ ++ for (i = 0; i < N; i++) ++ { ++ Ai.a = 0; ++ } ++ ++ for (i = 0; i < N; i++) ++ if (Ai.a != 0) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c +new file mode 100644 +index 000000000..8f2f8143f +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c +@@ -0,0 +1,43 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++int ++main () ++{ ++ int i, sum; ++ ++ str_t * p = malloc (N * sizeof (str_t)); ++ if (p == NULL) ++ return 0; ++ for (i = 0; i < N; i++) ++ pi.b = i; ++ ++ for (i = 0; i < N; i++) ++ pi.b = pi.a + 1; ++ ++ for (i = 0; i < N; i++) ++ if (pi.b != pi.a + 1) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c +new file mode 100644 +index 000000000..98bf01a6d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c +@@ -0,0 +1,47 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++int ++main () ++{ ++ long i, num; ++ ++ num = rand(); ++ num = num > N ? N : num; ++ str_t * p = malloc (num * sizeof (str_t)); ++ ++ if (p == 0) ++ return 0; ++ ++ for (i = 1; i <= num; i++) ++ pi-1.b = i; ++ ++ for (i = 1; i <= num; i++) ++ pi-1.a = pi-1.b + 1; ++ ++ for (i = 0; i < num; i++) ++ if (pi.a != pi.b + 1) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c +new file mode 100644 +index 000000000..66b0f967c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c +@@ -0,0 +1,47 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++int ++main () ++{ ++ int i, num; ++ ++ num = rand(); ++ num = num > N ? N : num; ++ str_t * p = malloc (num * sizeof (str_t)); ++ ++ if (p == 0) ++ return 0; ++ ++ for (i = 0; i < num; i++) ++ pi.b = i; ++ ++ for (i = 0; i < num; i++) ++ pi.a = pi.b + 1; ++ ++ for (i = 0; i < num; i++) ++ if (pi.a != pi.b + 1) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c b/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c +new file mode 100644 +index 000000000..d28bcfb02 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c +@@ -0,0 +1,42 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ float b; ++ int c; ++ float d; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 1600 ++#define N 100 ++#else ++#define N (STACK_SIZE/16) ++#endif ++#else ++#define N 100 ++#endif ++ ++int ++main () ++{ ++ int i; ++ str_t *p = malloc (N * sizeof (str_t)); ++ if (p == NULL) ++ return 0; ++ for (i = 0; i < N; i++) ++ pi.a = 5; ++ ++ for (i = 0; i < N; i++) ++ if (pi.a != 5) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* Two more fields structure is not splitted. */ ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c +new file mode 100644 +index 000000000..37a6a43a8 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c +@@ -0,0 +1,34 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ int b; ++}str_t; ++ ++#define N 3 ++ ++str_t str; ++ ++int ++main () ++{ ++ int i; ++ int res = 1<<(1<<N); ++ str.a = 2; ++ ++ for (i = 0; i < N; i++) ++ str.a = str.a * str.a; ++ ++ if (str.a != res) ++ abort (); ++ ++ /* POSIX ignores all but the 8 low-order bits, but other ++ environments may not. */ ++ return (str.a & 255); ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_local.c b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_local.c +new file mode 100644 +index 000000000..ca9a8efcf +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_local.c +@@ -0,0 +1,34 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ int b; ++}str_t; ++ ++#define N 3 ++ ++int ++main () ++{ ++ int i; ++ int res = 1<<(1<<N); ++ str_t str; ++ ++ str.a = 2; ++ ++ for (i = 0; i < N; i++) ++ str.a = str.a * str.a; ++ ++ if (str.a != res) ++ abort (); ++ ++ /* POSIX ignores all but the 8 low-order bits, but other ++ environments may not. */ ++ return (str.a & 255); ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "No structures to transform" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_pointer.c b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_pointer.c +new file mode 100644 +index 000000000..baa95bddf +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_pointer.c +@@ -0,0 +1,38 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++typedef struct ++{ ++ int a; ++ int *b; ++}str_t; ++ ++#define N 3 ++ ++str_t *p; ++ ++int ++main () ++{ ++ str_t str; ++ int i; ++ int res = 1 << (1 << N); ++ p = &str; ++ str.a = 2; ++ ++ p->b = &(p->a); ++ ++ for (i=0; i < N; i++) ++ p->a = *(p->b)*(*(p->b)); ++ ++ if (p->a != res) ++ abort (); ++ ++ /* POSIX ignores all but the 8 low-order bits, but other ++ environments may not. */ ++ return (p->a & 255); ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "has escaped...Type escapes a cast to a different" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c b/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c +new file mode 100644 +index 000000000..cba92e995 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c +@@ -0,0 +1,67 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include <stdlib.h> ++ ++typedef struct ++{ ++ int a; ++ float b; ++}str_t1; ++ ++typedef struct ++{ ++ int c; ++ float d; ++}str_t2; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 16000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/16) ++#endif ++#else ++#define N 1000 ++#endif ++ ++str_t1 *p1; ++str_t2 *p2; ++int num; ++ ++void ++foo (void) ++{ ++ int i; ++ ++ for (i=0; i < num; i++) ++ p2i.c = 2; ++} ++ ++int ++main () ++{ ++ int i, r; ++ ++ r = rand (); ++ num = r > N ? N : r; ++ p1 = malloc (num * sizeof (str_t1)); ++ p2 = malloc (num * sizeof (str_t2)); ++ ++ if (p1 == NULL || p2 == NULL) ++ return 0; ++ ++ for (i = 0; i < num; i++) ++ p1i.a = 1; ++ ++ foo (); ++ ++ for (i = 0; i < num; i++) ++ if (p1i.a != 1 || p2i.c != 2) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +diff --git a/gcc/timevar.def b/gcc/timevar.def +index 2dae5e1c7..366118126 100644 +--- a/gcc/timevar.def ++++ b/gcc/timevar.def +@@ -80,6 +80,7 @@ DEFTIMEVAR (TV_IPA_CONSTANT_PROP , "ipa cp") + DEFTIMEVAR (TV_IPA_INLINING , "ipa inlining heuristics") + DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting") + DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats") ++DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization") + DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations") + DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS , "lto stream decompression") + DEFTIMEVAR (TV_IPA_LTO_COMPRESS , "lto stream compression") +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index 606d1d60b..ec7be874c 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -526,6 +526,7 @@ extern ipa_opt_pass_d *make_pass_ipa_devirt (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_odr (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt); ++extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_target_clone (gcc::context *ctxt); +-- +2.33.0 +
View file
_service:tar_scm:0016-CompleteStructRelayout-Complete-Structure-Relayout.patch
Added
@@ -0,0 +1,2056 @@ +From 699caeaa2d89966e4af1d36bc96b53eb4dac0a09 Mon Sep 17 00:00:00 2001 +From: eastb233 <xiezhiheng@huawei.com> +Date: Fri, 25 Aug 2023 09:59:39 +0800 +Subject: PATCH 16/22 CompleteStructRelayout Complete Structure Relayout + +Introduce complete structure reorganization based on original +structure reorganization optimization, which change array of +structure to structure of array in order to better utilize +spatial locality. +--- + gcc/ipa-struct-reorg/escapes.def | 2 + + gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 994 ++++++++++++++++-- + gcc/ipa-struct-reorg/ipa-struct-reorg.h | 33 + + .../g++.dg/struct/no-body-function.cpp | 18 + + .../g++.dg/struct/struct-reorg-1.cpp | 13 + + .../g++.dg/struct/struct-reorg-2.cpp | 17 + + .../g++.dg/struct/struct-reorg-3.cpp | 24 + + gcc/testsuite/g++.dg/struct/struct-reorg.exp | 26 + + gcc/testsuite/gcc.dg/struct/csr_1.c | 60 ++ + .../gcc.dg/struct/csr_allocation-1.c | 46 + + .../gcc.dg/struct/csr_allocation-2.c | 59 ++ + .../gcc.dg/struct/csr_allocation-3.c | 77 ++ + gcc/testsuite/gcc.dg/struct/csr_cast_int.c | 52 + + .../gcc.dg/struct/csr_separate_instance.c | 48 + + .../gcc.dg/struct/sr_address_of_field.c | 37 + + gcc/testsuite/gcc.dg/struct/sr_convert_mem.c | 23 + + gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c | 25 + + gcc/testsuite/gcc.dg/struct/sr_pointer_and.c | 17 + + .../gcc.dg/struct/sr_pointer_minus.c | 33 + + 19 files changed, 1539 insertions(+), 65 deletions(-) + create mode 100644 gcc/testsuite/g++.dg/struct/no-body-function.cpp + create mode 100644 gcc/testsuite/g++.dg/struct/struct-reorg-1.cpp + create mode 100644 gcc/testsuite/g++.dg/struct/struct-reorg-2.cpp + create mode 100644 gcc/testsuite/g++.dg/struct/struct-reorg-3.cpp + create mode 100644 gcc/testsuite/g++.dg/struct/struct-reorg.exp + create mode 100644 gcc/testsuite/gcc.dg/struct/csr_1.c + create mode 100644 gcc/testsuite/gcc.dg/struct/csr_allocation-1.c + create mode 100644 gcc/testsuite/gcc.dg/struct/csr_allocation-2.c + create mode 100644 gcc/testsuite/gcc.dg/struct/csr_allocation-3.c + create mode 100644 gcc/testsuite/gcc.dg/struct/csr_cast_int.c + create mode 100644 gcc/testsuite/gcc.dg/struct/csr_separate_instance.c + create mode 100644 gcc/testsuite/gcc.dg/struct/sr_address_of_field.c + create mode 100644 gcc/testsuite/gcc.dg/struct/sr_convert_mem.c + create mode 100644 gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c + create mode 100644 gcc/testsuite/gcc.dg/struct/sr_pointer_and.c + create mode 100644 gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c + +diff --git a/gcc/ipa-struct-reorg/escapes.def b/gcc/ipa-struct-reorg/escapes.def +index c4c8e0739..d825eb3e6 100644 +--- a/gcc/ipa-struct-reorg/escapes.def ++++ b/gcc/ipa-struct-reorg/escapes.def +@@ -56,5 +56,7 @@ DEF_ESCAPE (escape_non_optimize, "Type used by a function which turns off struct + DEF_ESCAPE (escape_array, "Type is used in an array not handled yet") + DEF_ESCAPE (escape_ptr_ptr, "Type is used in a pointer to a pointer not handled yet") + DEF_ESCAPE (escape_return, "Type escapes via a return not handled yet") ++DEF_ESCAPE (escape_separate_instance, "Type escapes via a separate instance") ++DEF_ESCAPE (escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt") + + #undef DEF_ESCAPE +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +index 238530860..c8b975a92 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +@@ -104,10 +104,12 @@ along with GCC; see the file COPYING3. If not see + #include "tree-ssa-live.h" /* For remove_unused_locals. */ + #include "ipa-param-manipulation.h" + #include "gimplify-me.h" ++#include "cfgloop.h" + + namespace { + + using namespace struct_reorg; ++using namespace struct_relayout; + + #define VOID_POINTER_P(type) \ + (POINTER_TYPE_P (type) && VOID_TYPE_P (TREE_TYPE (type))) +@@ -194,6 +196,14 @@ gimplify_build1 (gimple_stmt_iterator *gsi, enum tree_code code, tree type, + GSI_SAME_STMT); + } + ++enum srmode ++{ ++ NORMAL = 0, ++ COMPLETE_STRUCT_RELAYOUT ++}; ++ ++static bool is_result_of_mult (tree, tree *, tree); ++ + } // anon namespace + + +@@ -283,7 +293,8 @@ srtype::srtype (tree type) + : type (type), + chain_type (false), + escapes (does_not_escape), +- visited (false) ++ visited (false), ++ has_alloc_array (0) + { + for (int i = 0; i < max_split; i++) + newtypei = NULL_TREE; +@@ -483,13 +494,6 @@ srtype::dump (FILE *f) + fn->simple_dump (f); + } + fprintf (f, "\n }\n"); +- fprintf (f, "\n field_sites = {"); +- FOR_EACH_VEC_ELT (field_sites, i, field) +- { +- fprintf (f, " \n"); +- field->simple_dump (f); +- } +- fprintf (f, "\n }\n"); + fprintf (f, "}\n"); + } + +@@ -631,15 +635,7 @@ srtype::create_new_type (void) + + maxclusters++; + +- const char *tname = NULL; +- +- if (TYPE_NAME (type) != NULL) +- { +- if (TREE_CODE (TYPE_NAME (type)) == IDENTIFIER_NODE) +- tname = IDENTIFIER_POINTER (TYPE_NAME (type)); +- else if (DECL_NAME (TYPE_NAME (type)) != NULL) +- tname = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))); +- } ++ const char *tname = get_type_name (type); + + for (unsigned i = 0; i < maxclusters; i++) + { +@@ -653,7 +649,10 @@ srtype::create_new_type (void) + if (tname) + { + name = concat (tname, ".reorg.", id, NULL); +- TYPE_NAME (newtypei) = get_identifier (name); ++ TYPE_NAME (newtypei) = build_decl (UNKNOWN_LOCATION, ++ TYPE_DECL, ++ get_identifier (name), ++ newtypei); + free (name); + } + } +@@ -673,6 +672,8 @@ srtype::create_new_type (void) + { + TYPE_FIELDS (newtypei) = newfieldsi; + layout_type (newtypei); ++ if (TYPE_NAME (newtypei) != NULL) ++ layout_decl (TYPE_NAME (newtypei), 0); + } + + warn_padded = save_warn_padded; +@@ -841,12 +842,6 @@ srfield::dump (FILE *f) + fprintf (f, ", offset = " HOST_WIDE_INT_PRINT_DEC, offset); + fprintf (f, ", type = "); + print_generic_expr (f, fieldtype); +- if (type) +- { +- fprintf (f, "( srtype = "); +- type->simple_dump (f); +- fprintf (f, ")"); +- } + fprintf (f, "\n}\n"); + } + +@@ -855,7 +850,8 @@ srfield::dump (FILE *f) + void + srfield::simple_dump (FILE *f) + { +- fprintf (f, "field (%d)", DECL_UID (fielddecl)); ++ if (fielddecl) ++ fprintf (f, "field (%d)", DECL_UID (fielddecl)); + } + + /* Dump out the access structure to FILE. */ +@@ -899,6 +895,92 @@ srdecl::dump (FILE *file) + } // namespace struct_reorg + + ++namespace struct_relayout { ++ ++/* Complete Structure Relayout Optimization. ++ It reorganizes all structure members, and puts same member together. ++ struct s { ++ long a; ++ int b; ++ struct s *c; ++ }; ++ Array looks like ++ abcabcabcabc... ++ will be transformed to ++ aaaa...bbbb...cccc... ++*/ ++ ++#define GPTR_SIZE(i) \ ++ TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (gptri))) ++ ++unsigned transformed = 0; ++ ++unsigned ++csrtype::calculate_field_num (tree field_offset) ++{ ++ if (field_offset == NULL) ++ return 0; ++ ++ HOST_WIDE_INT off = int_byte_position (field_offset); ++ unsigned i = 1; ++ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ { ++ if (off == int_byte_position (field)) ++ return i; ++ i++; ++ } ++ return 0; ++} ++ ++void ++csrtype::init_type_info (void) ++{ ++ if (!type) ++ return; ++ new_size = old_size = tree_to_uhwi (TYPE_SIZE_UNIT (type)); ++ ++ /* Close enough to pad to improve performance. ++ 33~63 should pad to 64 but 33~48 (first half) are too far away, and ++ 65~127 should pad to 128 but 65~96 (first half) are too far away. */ ++ if (old_size > 48 && old_size < 64) ++ new_size = 64; ++ if (old_size > 96 && old_size < 128) ++ new_size = 128; ++ ++ /* For performance reasons, only allow structure size ++ that is a power of 2 and not too big. */ ++ if (new_size != 1 && new_size != 2 ++ && new_size != 4 && new_size != 8 ++ && new_size != 16 && new_size != 32 ++ && new_size != 64 && new_size != 128) ++ { ++ new_size = 0; ++ field_count = 0; ++ return; ++ } ++ ++ unsigned i = 0; ++ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ if (TREE_CODE (field) == FIELD_DECL) ++ i++; ++ field_count = i; ++ ++ struct_size = build_int_cstu (TREE_TYPE (TYPE_SIZE_UNIT (type)), ++ new_size); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Type: "); ++ print_generic_expr (dump_file, type); ++ fprintf (dump_file, " has %d members.\n", field_count); ++ fprintf (dump_file, "Modify struct size from %ld to %ld.\n", ++ old_size, new_size); ++ } ++} ++ ++} // namespace struct_relayout ++ ++ + namespace { + + struct ipa_struct_reorg +@@ -907,13 +989,10 @@ public: + // Constructors + ipa_struct_reorg (void) + : current_function (NULL), +- done_recording (false) ++ done_recording (false), ++ current_mode (NORMAL) + {} + +- // Public methods +- unsigned execute (void); +- void mark_type_as_escape (tree type, escape_type, gimple *stmt = NULL); +-private: + // Fields + auto_vec_del<srtype> types; + auto_vec_del<srfunction> functions; +@@ -921,8 +1000,13 @@ private: + srfunction *current_function; + + bool done_recording; ++ srmode current_mode; ++ ++ // Methods ++ unsigned execute (enum srmode mode); ++ void mark_type_as_escape (tree type, escape_type escapes, ++ gimple *stmt = NULL); + +- // Private methods + void dump_types (FILE *f); + void dump_types_escaped (FILE *f); + void dump_functions (FILE *f); +@@ -954,6 +1038,7 @@ private: + void maybe_record_allocation_site (cgraph_node *, gimple *); + void record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt); + void mark_expr_escape (tree, escape_type, gimple *stmt); ++ bool handled_allocation_stmt (gimple *stmt); + tree allocate_size (srtype *t, gimple *stmt); + + void mark_decls_in_as_not_needed (tree fn); +@@ -976,6 +1061,7 @@ private: + bool can_escape = false); + bool wholeaccess (tree expr, tree base, tree accesstype, srtype *t); + ++ void check_alloc_num (gimple *stmt, srtype *type); + void check_definition (srdecl *decl, vec<srdecl *> &); + void check_uses (srdecl *decl, vec<srdecl *> &); + void check_use (srdecl *decl, gimple *stmt, vec<srdecl *> &); +@@ -990,8 +1076,591 @@ private: + + bool has_rewritten_type (srfunction *); + void maybe_mark_or_record_other_side (tree side, tree other, gimple *stmt); ++ ++ unsigned execute_struct_relayout (void); + }; + ++struct ipa_struct_relayout ++{ ++public: ++ // Fields ++ tree gptrmax_relayout_split + 1; ++ csrtype ctype; ++ ipa_struct_reorg *sr; ++ cgraph_node *current_node; ++ ++ // Constructors ++ ipa_struct_relayout (tree type, ipa_struct_reorg *sr_) ++ { ++ ctype.type = type; ++ sr = sr_; ++ current_node = NULL; ++ for (int i = 0; i < max_relayout_split + 1; i++) ++ gptri = NULL; ++ } ++ ++ // Methods ++ tree create_new_vars (tree type, const char *name); ++ void create_global_ptrs (void); ++ unsigned int rewrite (void); ++ void rewrite_stmt_in_function (void); ++ bool rewrite_debug (gimple *stmt, gimple_stmt_iterator *gsi); ++ bool rewrite_stmt (gimple *stmt, gimple_stmt_iterator *gsi); ++ bool handled_allocation_stmt (gcall *stmt); ++ void init_global_ptrs (gcall *stmt, gimple_stmt_iterator *gsi); ++ bool check_call_uses (gcall *stmt); ++ bool rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi); ++ tree create_ssa (tree node, gimple_stmt_iterator *gsi); ++ bool is_candidate (tree xhs); ++ tree rewrite_address (tree xhs, gimple_stmt_iterator *gsi); ++ tree rewrite_offset (tree offset, HOST_WIDE_INT num); ++ bool rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi); ++ bool maybe_rewrite_cst (tree cst, gimple_stmt_iterator *gsi, ++ HOST_WIDE_INT ×); ++ unsigned int execute (void); ++}; ++ ++} // anon namespace ++ ++namespace { ++ ++/* Methods for ipa_struct_relayout. */ ++ ++static void ++set_var_attributes (tree var) ++{ ++ if (!var) ++ return; ++ gcc_assert (TREE_CODE (var) == VAR_DECL); ++ ++ DECL_ARTIFICIAL (var) = 1; ++ DECL_EXTERNAL (var) = 0; ++ TREE_STATIC (var) = 1; ++ TREE_PUBLIC (var) = 0; ++ TREE_USED (var) = 1; ++ DECL_CONTEXT (var) = NULL; ++ TREE_THIS_VOLATILE (var) = 0; ++ TREE_ADDRESSABLE (var) = 0; ++ TREE_READONLY (var) = 0; ++ if (is_global_var (var)) ++ set_decl_tls_model (var, TLS_MODEL_NONE); ++} ++ ++tree ++ipa_struct_relayout::create_new_vars (tree type, const char *name) ++{ ++ gcc_assert (type); ++ tree new_type = build_pointer_type (type); ++ ++ tree new_name = NULL; ++ if (name) ++ new_name = get_identifier (name); ++ ++ tree new_var = build_decl (UNKNOWN_LOCATION, VAR_DECL, new_name, new_type); ++ ++ /* Set new_var's attributes. */ ++ set_var_attributes (new_var); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Created new var: "); ++ print_generic_expr (dump_file, new_var); ++ fprintf (dump_file, "\n"); ++ } ++ return new_var; ++} ++ ++void ++ipa_struct_relayout::create_global_ptrs (void) ++{ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "Create global gptrs: {\n"); ++ ++ char *gptr0_name = NULL; ++ const char *type_name = get_type_name (ctype.type); ++ ++ if (type_name) ++ gptr0_name = concat (type_name, "_gptr0", NULL); ++ tree var_gptr0 = create_new_vars (ctype.type, gptr0_name); ++ gptr0 = var_gptr0; ++ varpool_node::add (var_gptr0); ++ ++ unsigned i = 1; ++ for (tree field = TYPE_FIELDS (ctype.type); field; ++ field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL) ++ { ++ tree type = TREE_TYPE (field); ++ ++ char *name = NULL; ++ char id10 = {0}; ++ sprintf (id, "%d", i); ++ const char *decl_name = IDENTIFIER_POINTER (DECL_NAME (field)); ++ ++ if (type_name && decl_name) ++ name = concat (type_name, "_", decl_name, "_gptr", id, NULL); ++ tree var = create_new_vars (type, name); ++ ++ gptri = var; ++ varpool_node::add (var); ++ i++; ++ } ++ } ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nTotally create %d gptrs. }\n\n", i); ++ gcc_assert (ctype.field_count == i - 1); ++} ++ ++void ++ipa_struct_relayout::rewrite_stmt_in_function (void) ++{ ++ gcc_assert (cfun); ++ ++ basic_block bb = NULL; ++ gimple_stmt_iterator si; ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ for (si = gsi_start_bb (bb); !gsi_end_p (si);) ++ { ++ gimple *stmt = gsi_stmt (si); ++ if (rewrite_stmt (stmt, &si)) ++ gsi_remove (&si, true); ++ else ++ gsi_next (&si); ++ } ++ } ++ ++ /* Debug statements need to happen after all other statements ++ have changed. */ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ for (si = gsi_start_bb (bb); !gsi_end_p (si);) ++ { ++ gimple *stmt = gsi_stmt (si); ++ if (gimple_code (stmt) == GIMPLE_DEBUG ++ && rewrite_debug (stmt, &si)) ++ gsi_remove (&si, true); ++ else ++ gsi_next (&si); ++ } ++ } ++} ++ ++unsigned int ++ipa_struct_relayout::rewrite (void) ++{ ++ cgraph_node *cnode = NULL; ++ function *fn = NULL; ++ FOR_EACH_FUNCTION (cnode) ++ { ++ if (!cnode->real_symbol_p () || !cnode->has_gimple_body_p ()) ++ continue; ++ if (cnode->definition) ++ { ++ fn = DECL_STRUCT_FUNCTION (cnode->decl); ++ if (fn == NULL) ++ continue; ++ ++ current_node = cnode; ++ push_cfun (fn); ++ ++ rewrite_stmt_in_function (); ++ ++ update_ssa (TODO_update_ssa_only_virtuals); ++ ++ if (flag_tree_pta) ++ compute_may_aliases (); ++ ++ remove_unused_locals (); ++ ++ cgraph_edge::rebuild_edges (); ++ ++ free_dominance_info (CDI_DOMINATORS); ++ ++ pop_cfun (); ++ current_node = NULL; ++ } ++ } ++ return TODO_verify_all; ++} ++ ++bool ++ipa_struct_relayout::rewrite_debug (gimple *stmt ATTRIBUTE_UNUSED, ++ gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED) ++{ ++ /* Delete debug gimple now. */ ++ return true; ++} ++ ++bool ++ipa_struct_relayout::rewrite_stmt (gimple *stmt, gimple_stmt_iterator *gsi) ++{ ++ switch (gimple_code (stmt)) ++ { ++ case GIMPLE_ASSIGN: ++ return rewrite_assign (as_a <gassign *> (stmt), gsi); ++ case GIMPLE_CALL: ++ return rewrite_call (as_a <gcall *> (stmt), gsi); ++ default: ++ break; ++ } ++ return false; ++} ++ ++bool ++ipa_struct_relayout::handled_allocation_stmt (gcall *stmt) ++{ ++ if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) ++ return true; ++ return false; ++} ++ ++void ++ipa_struct_relayout::init_global_ptrs (gcall *stmt, gimple_stmt_iterator *gsi) ++{ ++ gcc_assert (handled_allocation_stmt (stmt)); ++ ++ tree lhs = gimple_call_lhs (stmt); ++ ++ /* Case that gimple is at the end of bb. */ ++ if (gsi_one_before_end_p (*gsi)) ++ { ++ gassign *gptr0 = gimple_build_assign (gptr0, lhs); ++ gsi_insert_after (gsi, gptr0, GSI_SAME_STMT); ++ } ++ gsi_next (gsi); ++ ++ /* Emit gimple gptr0 = _X and gptr1 = _X. */ ++ gassign *gptr0 = gimple_build_assign (gptr0, lhs); ++ gsi_insert_before (gsi, gptr0, GSI_SAME_STMT); ++ gassign *gptr1 = gimple_build_assign (gptr1, lhs); ++ gsi_insert_before (gsi, gptr1, GSI_SAME_STMT); ++ ++ /* Emit gimple gptr_i = gptr_i-1 + _Ygap. */ ++ for (unsigned i = 2; i <= ctype.field_count; i++) ++ { ++ gimple *new_stmt = NULL; ++ tree gptr_i_prev_ssa = create_ssa (gptri-1, gsi); ++ tree gptr_i_ssa = make_ssa_name (TREE_TYPE (gptri-1)); ++ ++ /* Emit gimple _Ygap = N * sizeof (member). */ ++ tree member_gap = gimplify_build2 (gsi, MULT_EXPR, ++ long_unsigned_type_node, ++ gimple_call_arg (stmt, 0), ++ GPTR_SIZE (i-1)); ++ ++ new_stmt = gimple_build_assign (gptr_i_ssa, POINTER_PLUS_EXPR, ++ gptr_i_prev_ssa, member_gap); ++ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); ++ ++ gassign *gptr_i = gimple_build_assign (gptri, gptr_i_ssa); ++ gsi_insert_before (gsi, gptr_i, GSI_SAME_STMT); ++ } ++ gsi_prev (gsi); ++} ++ ++bool ++ipa_struct_relayout::check_call_uses (gcall *stmt) ++{ ++ gcc_assert (current_node); ++ srfunction *fn = sr->find_function (current_node); ++ tree lhs = gimple_call_lhs (stmt); ++ ++ if (fn == NULL) ++ return false; ++ ++ srdecl *d = fn->find_decl (lhs); ++ if (d == NULL) ++ return false; ++ if (types_compatible_p (d->type->type, ctype.type)) ++ return true; ++ ++ return false; ++} ++ ++bool ++ipa_struct_relayout::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) ++{ ++ if (handled_allocation_stmt (stmt)) ++ { ++ /* Rewrite stmt _X = calloc (N, sizeof (struct)). */ ++ tree size = gimple_call_arg (stmt, 1); ++ if (TREE_CODE (size) != INTEGER_CST) ++ return false; ++ if (tree_to_uhwi (size) != ctype.old_size) ++ return false; ++ if (!check_call_uses (stmt)) ++ return false; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Rewrite allocation call:\n"); ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "to\n"); ++ } ++ ++ /* Modify sizeof (struct). */ ++ gimple_call_set_arg (stmt, 1, ctype.struct_size); ++ update_stmt (stmt); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++ ++ init_global_ptrs (stmt, gsi); ++ } ++ return false; ++} ++ ++tree ++ipa_struct_relayout::create_ssa (tree node, gimple_stmt_iterator *gsi) ++{ ++ gcc_assert (TREE_CODE (node) == VAR_DECL); ++ tree node_ssa = make_ssa_name (TREE_TYPE (node)); ++ gassign *stmt = gimple_build_assign (node_ssa, node); ++ gsi_insert_before (gsi, stmt, GSI_SAME_STMT); ++ return node_ssa; ++} ++ ++bool ++ipa_struct_relayout::is_candidate (tree xhs) ++{ ++ if (TREE_CODE (xhs) != COMPONENT_REF) ++ return false; ++ tree mem = TREE_OPERAND (xhs, 0); ++ if (TREE_CODE (mem) == MEM_REF) ++ { ++ tree type = TREE_TYPE (mem); ++ if (types_compatible_p (type, ctype.type)) ++ return true; ++ } ++ return false; ++} ++ ++tree ++ipa_struct_relayout::rewrite_address (tree xhs, gimple_stmt_iterator *gsi) ++{ ++ tree mem_ref = TREE_OPERAND (xhs, 0); ++ tree pointer = TREE_OPERAND (mem_ref, 0); ++ tree pointer_offset = TREE_OPERAND (mem_ref, 1); ++ tree field = TREE_OPERAND (xhs, 1); ++ ++ tree pointer_ssa = fold_convert (long_unsigned_type_node, pointer); ++ tree gptr0_ssa = fold_convert (long_unsigned_type_node, gptr0); ++ ++ /* Emit gimple _X1 = ptr - gptr0. */ ++ tree step1 = gimplify_build2 (gsi, MINUS_EXPR, long_unsigned_type_node, ++ pointer_ssa, gptr0_ssa); ++ ++ /* Emit gimple _X2 = _X1 / sizeof (struct). */ ++ tree step2 = gimplify_build2 (gsi, TRUNC_DIV_EXPR, long_unsigned_type_node, ++ step1, ctype.struct_size); ++ ++ unsigned field_num = ctype.calculate_field_num (field); ++ gcc_assert (field_num > 0 && field_num <= ctype.field_count); ++ ++ /* Emit gimple _X3 = _X2 * sizeof (member). */ ++ tree step3 = gimplify_build2 (gsi, MULT_EXPR, long_unsigned_type_node, ++ step2, GPTR_SIZE (field_num)); ++ ++ /* Emit gimple _X4 = gptrI. */ ++ tree gptr_field_ssa = create_ssa (gptrfield_num, gsi); ++ tree new_address = make_ssa_name (TREE_TYPE (gptrfield_num)); ++ gassign *new_stmt = gimple_build_assign (new_address, POINTER_PLUS_EXPR, ++ gptr_field_ssa, step3); ++ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); ++ ++ /* MEM_REF with nonzero offset like ++ MEMptr + sizeof (struct) = 0B ++ should be transformed to ++ MEMgptr + sizeof (member) = 0B ++ */ ++ HOST_WIDE_INT size ++ = tree_to_shwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_address)))); ++ tree new_size = rewrite_offset (pointer_offset, size); ++ if (new_size) ++ TREE_OPERAND (mem_ref, 1) = new_size; ++ ++ /* Update mem_ref pointer. */ ++ TREE_OPERAND (mem_ref, 0) = new_address; ++ ++ /* Update mem_ref TREE_TYPE. */ ++ TREE_TYPE (mem_ref) = TREE_TYPE (TREE_TYPE (new_address)); ++ ++ return mem_ref; ++} ++ ++tree ++ipa_struct_relayout::rewrite_offset (tree offset, HOST_WIDE_INT num) ++{ ++ if (TREE_CODE (offset) == INTEGER_CST) ++ { ++ bool sign = false; ++ HOST_WIDE_INT off = TREE_INT_CST_LOW (offset); ++ if (off == 0) ++ return NULL; ++ if (off < 0) ++ { ++ off = -off; ++ sign = true; ++ } ++ if (off % ctype.old_size == 0) ++ { ++ HOST_WIDE_INT times = off / ctype.old_size; ++ times = sign ? -times : times; ++ return build_int_cst (TREE_TYPE (offset), num * times); ++ } ++ } ++ return NULL; ++} ++ ++#define REWRITE_ASSIGN_TREE_IN_STMT(node) \ ++do \ ++{ \ ++ tree node = gimple_assign_##node (stmt); \ ++ if (node && is_candidate (node)) \ ++ { \ ++ tree mem_ref = rewrite_address (node, gsi); \ ++ gimple_assign_set_##node (stmt, mem_ref); \ ++ update_stmt (stmt); \ ++ } \ ++} while (0) ++ ++/* COMPONENT_REF = exp => MEM_REF = exp ++ / \ / \ ++ MEM_REF field gptr offset ++ / \ ++ pointer offset ++*/ ++bool ++ipa_struct_relayout::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) ++{ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Maybe rewrite assign:\n"); ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "to\n"); ++ } ++ ++ switch (gimple_num_ops (stmt)) ++ { ++ case 4: REWRITE_ASSIGN_TREE_IN_STMT (rhs3); // FALLTHRU ++ case 3: ++ { ++ REWRITE_ASSIGN_TREE_IN_STMT (rhs2); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ if (rhs2 && TREE_CODE (rhs2) == INTEGER_CST) ++ { ++ /* Handle pointer++ and pointer-- or ++ factor is euqal to struct size. */ ++ HOST_WIDE_INT times = 1; ++ if (maybe_rewrite_cst (rhs2, gsi, times)) ++ { ++ tree tmp = build_int_cst ( ++ TREE_TYPE (TYPE_SIZE_UNIT (ctype.type)), ++ ctype.new_size * times); ++ gimple_assign_set_rhs2 (stmt, tmp); ++ update_stmt (stmt); ++ } ++ } ++ } // FALLTHRU ++ case 2: REWRITE_ASSIGN_TREE_IN_STMT (rhs1); // FALLTHRU ++ case 1: REWRITE_ASSIGN_TREE_IN_STMT (lhs); // FALLTHRU ++ case 0: break; ++ default: gcc_unreachable (); ++ } ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++ return false; ++} ++ ++bool ++ipa_struct_relayout::maybe_rewrite_cst (tree cst, gimple_stmt_iterator *gsi, ++ HOST_WIDE_INT ×) ++{ ++ bool ret = false; ++ gcc_assert (TREE_CODE (cst) == INTEGER_CST); ++ ++ gimple *stmt = gsi_stmt (*gsi); ++ if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) ++ { ++ tree lhs = gimple_assign_lhs (stmt); ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ if (types_compatible_p (inner_type (TREE_TYPE (rhs1)), ctype.type) ++ || types_compatible_p (inner_type (TREE_TYPE (lhs)), ctype.type)) ++ { ++ tree num = NULL; ++ if (is_result_of_mult (cst, &num, TYPE_SIZE_UNIT (ctype.type))) ++ { ++ times = TREE_INT_CST_LOW (num); ++ return true; ++ } ++ } ++ } ++ ++ if (gimple_assign_rhs_code (stmt) == MULT_EXPR) ++ { ++ if (gsi_one_before_end_p (*gsi)) ++ return false; ++ gsi_next (gsi); ++ gimple *stmt2 = gsi_stmt (*gsi); ++ ++ if (gimple_code (stmt2) == GIMPLE_ASSIGN ++ && gimple_assign_rhs_code (stmt2) == POINTER_PLUS_EXPR) ++ { ++ tree lhs = gimple_assign_lhs (stmt2); ++ tree rhs1 = gimple_assign_rhs1 (stmt2); ++ if (types_compatible_p (inner_type (TREE_TYPE (rhs1)), ctype.type) ++ || types_compatible_p (inner_type (TREE_TYPE (lhs)), ctype.type)) ++ { ++ tree num = NULL; ++ if (is_result_of_mult (cst, &num, TYPE_SIZE_UNIT (ctype.type))) ++ { ++ times = TREE_INT_CST_LOW (num); ++ ret = true; ++ } ++ } ++ } ++ gsi_prev (gsi); ++ return ret; ++ } ++ return false; ++} ++ ++unsigned int ++ipa_struct_relayout::execute (void) ++{ ++ ctype.init_type_info (); ++ if (ctype.field_count < min_relayout_split ++ || ctype.field_count > max_relayout_split) ++ return 0; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Complete Struct Relayout Type: "); ++ print_generic_expr (dump_file, ctype.type); ++ fprintf (dump_file, "\n"); ++ } ++ transformed++; ++ ++ create_global_ptrs (); ++ return rewrite (); ++} ++ ++} // anon namespace ++ ++ ++namespace { ++ ++/* Methods for ipa_struct_reorg. */ ++ + /* Dump all of the recorded types to file F. */ + + void +@@ -1189,7 +1858,7 @@ ipa_struct_reorg::record_type (tree type) + f->type = t1; + t1->add_field_site (f); + } +- if (t1 == type1) ++ if (t1 == type1 && current_mode != COMPLETE_STRUCT_RELAYOUT) + type1->mark_escape (escape_rescusive_type, NULL); + } + } +@@ -1331,6 +2000,12 @@ ipa_struct_reorg::record_var (tree decl, escape_type escapes, int arg) + else + e = escape_type_volatile_array_or_ptrptr (TREE_TYPE (decl)); + ++ /* Separate instance is hard to trace in complete struct ++ relayout optimization. */ ++ if (current_mode == COMPLETE_STRUCT_RELAYOUT ++ && TREE_CODE (TREE_TYPE (decl)) == RECORD_TYPE) ++ e = escape_separate_instance; ++ + if (e != does_not_escape) + type->mark_escape (e, NULL); + } +@@ -1369,6 +2044,7 @@ ipa_struct_reorg::find_var (tree expr, gimple *stmt) + || TREE_CODE (expr) == VIEW_CONVERT_EXPR) + { + tree r = TREE_OPERAND (expr, 0); ++ tree orig_type = TREE_TYPE (expr); + if (handled_component_p (r) + || TREE_CODE (r) == MEM_REF) + { +@@ -1382,8 +2058,18 @@ ipa_struct_reorg::find_var (tree expr, gimple *stmt) + escape_vce, stmt); + } + if (TREE_CODE (r) == MEM_REF) +- mark_type_as_escape (TREE_TYPE (TREE_OPERAND (r, 1)), +- escape_addr, stmt); ++ { ++ mark_type_as_escape (TREE_TYPE (TREE_OPERAND (r, 1)), ++ escape_addr, stmt); ++ tree inner_type = TREE_TYPE (TREE_OPERAND (r, 0)); ++ if (orig_type != inner_type) ++ { ++ mark_type_as_escape (orig_type, ++ escape_cast_another_ptr, stmt); ++ mark_type_as_escape (inner_type, ++ escape_cast_another_ptr, stmt); ++ } ++ } + r = TREE_OPERAND (r, 0); + } + mark_expr_escape (r, escape_addr, stmt); +@@ -1407,7 +2093,8 @@ ipa_struct_reorg::find_vars (gimple *stmt) + { + case GIMPLE_ASSIGN: + if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS +- || gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) ++ || gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR ++ || gimple_assign_rhs_code (stmt) == NOP_EXPR) + { + tree lhs = gimple_assign_lhs (stmt); + tree rhs = gimple_assign_rhs1 (stmt); +@@ -1432,6 +2119,32 @@ ipa_struct_reorg::find_vars (gimple *stmt) + current_function->record_decl (t, rhs, -1); + } + } ++ else ++ { ++ /* Because we won't handle these stmts in rewrite phase, ++ just mark these types as escaped. */ ++ switch (gimple_num_ops (stmt)) ++ { ++ case 4: mark_type_as_escape ( ++ TREE_TYPE (gimple_assign_rhs3 (stmt)), ++ escape_unhandled_rewrite, stmt); ++ // FALLTHRU ++ case 3: mark_type_as_escape ( ++ TREE_TYPE (gimple_assign_rhs2 (stmt)), ++ escape_unhandled_rewrite, stmt); ++ // FALLTHRU ++ case 2: mark_type_as_escape ( ++ TREE_TYPE (gimple_assign_rhs1 (stmt)), ++ escape_unhandled_rewrite, stmt); ++ // FALLTHRU ++ case 1: mark_type_as_escape ( ++ TREE_TYPE (gimple_assign_lhs (stmt)), ++ escape_unhandled_rewrite, stmt); ++ // FALLTHRU ++ case 0: break; ++ default: gcc_unreachable (); ++ } ++ } + break; + + case GIMPLE_CALL: +@@ -1514,9 +2227,21 @@ is_result_of_mult (tree arg, tree *num, tree struct_size) + /* If we have a integer, just check if it is a multiply of STRUCT_SIZE. */ + if (TREE_CODE (arg) == INTEGER_CST) + { +- if (integer_zerop (size_binop (FLOOR_MOD_EXPR, arg, struct_size))) ++ bool sign = false; ++ HOST_WIDE_INT size = TREE_INT_CST_LOW (arg); ++ if (size < 0) ++ { ++ size = -size; ++ sign = true; ++ } ++ tree arg2 = build_int_cst (TREE_TYPE (arg), size); ++ if (integer_zerop (size_binop (FLOOR_MOD_EXPR, arg2, struct_size))) + { +- *num = size_binop (FLOOR_DIV_EXPR, arg, struct_size); ++ tree number = size_binop (FLOOR_DIV_EXPR, arg2, struct_size); ++ if (sign) ++ number = build_int_cst (TREE_TYPE (number), ++ -tree_to_shwi (number)); ++ *num = number; + return true; + } + return false; +@@ -1586,16 +2311,21 @@ is_result_of_mult (tree arg, tree *num, tree struct_size) + + /* Return TRUE if STMT is an allocation statement that is handled. */ + +-static bool +-handled_allocation_stmt (gimple *stmt) ++bool ++ipa_struct_reorg::handled_allocation_stmt (gimple *stmt) + { +- if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) +- || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) +- || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC) +- || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC) +- || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA) +- || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA_WITH_ALIGN)) ++ if (current_mode == COMPLETE_STRUCT_RELAYOUT ++ && gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) + return true; ++ ++ if (current_mode != COMPLETE_STRUCT_RELAYOUT) ++ if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA_WITH_ALIGN)) ++ return true; + return false; + } + +@@ -1636,7 +2366,7 @@ ipa_struct_reorg::allocate_size (srtype *type, gimple *stmt) + the size of structure. */ + if (operand_equal_p (arg1, struct_size, 0)) + return size; +- /* Check that first argument is a constant equal to ++ /* ??? Check that first argument is a constant equal to + the size of structure. */ + if (operand_equal_p (size, struct_size, 0)) + return arg1; +@@ -1751,6 +2481,25 @@ ipa_struct_reorg::maybe_record_assign (cgraph_node *node, gassign *stmt) + } + } + ++static bool ++check_mem_ref_offset (tree expr) ++{ ++ tree num = NULL; ++ bool ret = false; ++ ++ if (TREE_CODE (expr) != MEM_REF) ++ return false; ++ ++ /* Try to find the structure size. */ ++ tree field_off = TREE_OPERAND (expr, 1); ++ tree tmp = TREE_OPERAND (expr, 0); ++ if (TREE_CODE (tmp) == ADDR_EXPR) ++ tmp = TREE_OPERAND (tmp, 0); ++ tree size = TYPE_SIZE_UNIT (inner_type (TREE_TYPE (tmp))); ++ ret = is_result_of_mult (field_off, &num, size); ++ return ret; ++} ++ + static tree + get_ref_base_and_offset (tree &e, HOST_WIDE_INT &offset, + bool &realpart, bool &imagpart, +@@ -1792,7 +2541,8 @@ get_ref_base_and_offset (tree &e, HOST_WIDE_INT &offset, + gcc_assert (TREE_CODE (field_off) == INTEGER_CST); + /* So we can mark the types as escaping if different. */ + accesstype = TREE_TYPE (field_off); +- offset += tree_to_uhwi (field_off); ++ if (!check_mem_ref_offset (expr)) ++ offset += tree_to_uhwi (field_off); + return TREE_OPERAND (expr, 0); + } + default: +@@ -2176,6 +2926,31 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srtype *type, + type1->mark_escape (escape_cast_another_ptr, stmt); + } + ++void ++ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type) ++{ ++ if (current_mode == COMPLETE_STRUCT_RELAYOUT ++ && handled_allocation_stmt (stmt)) ++ { ++ tree arg0 = gimple_call_arg (stmt, 0); ++ basic_block bb = gimple_bb (stmt); ++ cgraph_node *node = current_function->node; ++ if (integer_onep (arg0)) ++ /* Actually NOT an array, but may ruin other array. */ ++ type->has_alloc_array = -1; ++ else if (bb->loop_father != NULL ++ && loop_outer (bb->loop_father) != NULL) ++ /* The allocation is in a loop. */ ++ type->has_alloc_array = -2; ++ else if (node->callers != NULL) ++ type->has_alloc_array = -3; ++ else ++ type->has_alloc_array = type->has_alloc_array < 0 ++ ? type->has_alloc_array ++ : type->has_alloc_array + 1; ++ } ++} ++ + /* + 2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl) + a) if the SSA_NAME is sourced from a pointer plus, record the pointer and +@@ -2223,6 +2998,7 @@ ipa_struct_reorg::check_definition (srdecl *decl, vec<srdecl *> &worklist) + if (!handled_allocation_stmt (stmt) + || !allocate_size (type, stmt)) + type->mark_escape (escape_return, stmt); ++ check_alloc_num (stmt, type); + return; + } + /* If the SSA_NAME is sourced from an inline-asm, +@@ -2264,6 +3040,20 @@ ipa_struct_reorg::check_definition (srdecl *decl, vec<srdecl *> &worklist) + return; + } + ++ if (gimple_assign_rhs_code (stmt) == MAX_EXPR ++ || gimple_assign_rhs_code (stmt) == MIN_EXPR ++ || gimple_assign_rhs_code (stmt) == BIT_IOR_EXPR ++ || gimple_assign_rhs_code (stmt) == BIT_XOR_EXPR ++ || gimple_assign_rhs_code (stmt) == BIT_AND_EXPR) ++ { ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ if (TREE_CODE (rhs) == SSA_NAME) ++ check_type_and_push (rhs, type, worklist, stmt); ++ if (TREE_CODE (rhs2) == SSA_NAME) ++ check_type_and_push (rhs2, type, worklist, stmt); ++ return; ++ } ++ + /* Casts between pointers and integer are escaping. */ + if (gimple_assign_cast_p (stmt)) + { +@@ -2328,6 +3118,11 @@ ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, + srtype *t1 = find_type (inner_type (t)); + if (t1 == type) + { ++ /* In Complete Struct Relayout, if lhs type is the same ++ as rhs type, we could return without any harm. */ ++ if (current_mode == COMPLETE_STRUCT_RELAYOUT) ++ return; ++ + tree base; + bool indirect; + srtype *type1; +@@ -2376,8 +3171,11 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + tree rhs1 = gimple_cond_lhs (stmt); + tree rhs2 = gimple_cond_rhs (stmt); + tree orhs = rhs1; +- if (gimple_cond_code (stmt) != EQ_EXPR +- && gimple_cond_code (stmt) != NE_EXPR) ++ enum tree_code code = gimple_cond_code (stmt); ++ if (code != EQ_EXPR && code != NE_EXPR ++ && (current_mode != COMPLETE_STRUCT_RELAYOUT ++ || (code != LT_EXPR && code != LE_EXPR ++ && code != GT_EXPR && code != GE_EXPR))) + { + mark_expr_escape (rhs1, escape_non_eq, stmt); + mark_expr_escape (rhs2, escape_non_eq, stmt); +@@ -2406,8 +3204,11 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + tree rhs1 = gimple_assign_rhs1 (stmt); + tree rhs2 = gimple_assign_rhs2 (stmt); + tree orhs = rhs1; +- if (gimple_assign_rhs_code (stmt) != EQ_EXPR +- && gimple_assign_rhs_code (stmt) != NE_EXPR) ++ enum tree_code code = gimple_assign_rhs_code (stmt); ++ if (code != EQ_EXPR && code != NE_EXPR ++ && (current_mode != COMPLETE_STRUCT_RELAYOUT ++ || (code != LT_EXPR && code != LE_EXPR ++ && code != GT_EXPR && code != GE_EXPR))) + { + mark_expr_escape (rhs1, escape_non_eq, stmt); + mark_expr_escape (rhs2, escape_non_eq, stmt); +@@ -2692,6 +3493,12 @@ ipa_struct_reorg::record_accesses (void) + /* Record accesses inside a function. */ + if (cnode->definition) + record_function (cnode); ++ else ++ { ++ tree return_type = TREE_TYPE (TREE_TYPE (cnode->decl)); ++ mark_type_as_escape (return_type, escape_return, NULL); ++ } ++ + } + + if (dump_file && (dump_flags & TDF_DETAILS)) +@@ -2807,8 +3614,11 @@ ipa_struct_reorg::propagate_escape (void) + void + ipa_struct_reorg::prune_escaped_types (void) + { +- detect_cycles (); +- propagate_escape (); ++ if (current_mode != COMPLETE_STRUCT_RELAYOUT) ++ { ++ detect_cycles (); ++ propagate_escape (); ++ } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +@@ -3954,17 +4764,66 @@ ipa_struct_reorg::rewrite_functions (void) + } + + unsigned int +-ipa_struct_reorg::execute (void) ++ipa_struct_reorg::execute_struct_relayout (void) + { +- /* FIXME: If there is a top-level inline-asm, +- the pass immediately returns. */ +- if (symtab->first_asm_symbol ()) +- return 0; +- record_accesses (); +- prune_escaped_types (); +- analyze_types (); ++ unsigned retval = 0; ++ for (unsigned i = 0; i < types.length (); i++) ++ { ++ tree type = typesi->type; ++ if (TYPE_FIELDS (type) == NULL) ++ continue; ++ if (typesi->has_alloc_array != 1) ++ continue; ++ if (typesi->chain_type) ++ continue; ++ retval |= ipa_struct_relayout (type, this).execute (); ++ } ++ ++ if (dump_file) ++ { ++ if (transformed) ++ fprintf (dump_file, "\nNumber of structures to transform in " ++ "Complete Structure Relayout is %d\n", transformed); ++ else ++ fprintf (dump_file, "\nNo structures to transform in " ++ "Complete Structure Relayout.\n"); ++ } ++ ++ return retval; ++} ++ ++unsigned int ++ipa_struct_reorg::execute (enum srmode mode) ++{ ++ unsigned int ret = 0; ++ ++ if (mode == NORMAL) ++ { ++ current_mode = NORMAL; ++ /* FIXME: If there is a top-level inline-asm, ++ the pass immediately returns. */ ++ if (symtab->first_asm_symbol ()) ++ return 0; ++ record_accesses (); ++ prune_escaped_types (); ++ analyze_types (); ++ ++ ret = rewrite_functions (); ++ } ++ else if (mode == COMPLETE_STRUCT_RELAYOUT) ++ { ++ if (dump_file) ++ fprintf (dump_file, "\n\nTry Complete Struct Relayout:\n"); ++ current_mode = COMPLETE_STRUCT_RELAYOUT; ++ if (symtab->first_asm_symbol ()) ++ return 0; ++ record_accesses (); ++ prune_escaped_types (); ++ ++ ret = execute_struct_relayout (); ++ } + +- return rewrite_functions (); ++ return ret; + } + + const pass_data pass_data_ipa_struct_reorg = +@@ -3991,7 +4850,11 @@ public: + virtual bool gate (function *); + virtual unsigned int execute (function *) + { +- return ipa_struct_reorg ().execute (); ++ unsigned int ret = 0; ++ ret = ipa_struct_reorg ().execute (NORMAL); ++ if (!ret) ++ ret = ipa_struct_reorg ().execute (COMPLETE_STRUCT_RELAYOUT); ++ return ret; + } + + }; // class pass_ipa_struct_reorg +@@ -3999,10 +4862,11 @@ public: + bool + pass_ipa_struct_reorg::gate (function *) + { +- return (optimize ++ return (optimize >= 3 + && flag_ipa_struct_reorg + /* Don't bother doing anything if the program has errors. */ +- && !seen_error ()); ++ && !seen_error () ++ && flag_lto_partition == LTO_PARTITION_ONE); + } + + } // anon namespace +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +index a58794070..ef7f4c780 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.h ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +@@ -120,6 +120,9 @@ private: + public: + tree newtypemax_split; + bool visited; ++ /* Negative number means it has illegal allocated arrays ++ that we do not optimize. */ ++ int has_alloc_array; + + // Constructors + srtype (tree type); +@@ -232,4 +235,34 @@ struct srdecl + + } // namespace struct_reorg + ++ ++namespace struct_relayout { ++ ++const int min_relayout_split = 8; ++const int max_relayout_split = 16; ++ ++struct csrtype ++{ ++ tree type; ++ unsigned HOST_WIDE_INT old_size; ++ unsigned HOST_WIDE_INT new_size; ++ unsigned field_count; ++ tree struct_size; ++ ++ // Constructors ++ csrtype () ++ : type (NULL), ++ old_size (0), ++ new_size (0), ++ field_count (0), ++ struct_size (NULL) ++ {} ++ ++ // Methods ++ unsigned calculate_field_num (tree field_offset); ++ void init_type_info (void); ++}; ++ ++} // namespace struct_relayout ++ + #endif +diff --git a/gcc/testsuite/g++.dg/struct/no-body-function.cpp b/gcc/testsuite/g++.dg/struct/no-body-function.cpp +new file mode 100644 +index 000000000..4e56e73fc +--- /dev/null ++++ b/gcc/testsuite/g++.dg/struct/no-body-function.cpp +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=gnu++17 -Wno-builtin-declaration-mismatch -O3 -fwhole-program -flto-partition=one -fipa-struct-reorg -S" } */ ++ ++struct S { ++ int x; ++ double y; ++}; ++S f(); ++ ++const auto x0, y0 = f(); ++const auto x1, y1 = f(); ++ ++static union { ++int a; ++double b; ++}; ++ ++const auto x2, y2 = f(); +diff --git a/gcc/testsuite/g++.dg/struct/struct-reorg-1.cpp b/gcc/testsuite/g++.dg/struct/struct-reorg-1.cpp +new file mode 100644 +index 000000000..6ab71abe1 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/struct/struct-reorg-1.cpp +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -fwhole-program -flto-partition=one -fipa-struct-reorg -fdump-ipa-struct_reorg-details -S" } */ ++ ++struct Foo { int foo; int a; }; ++Foo& ignoreSetMutex = *(new Foo); ++ ++struct Goo { int goo; int a; }; ++ ++int main () ++{ ++ Goo* a; ++ return a->goo = 90; ++} +diff --git a/gcc/testsuite/g++.dg/struct/struct-reorg-2.cpp b/gcc/testsuite/g++.dg/struct/struct-reorg-2.cpp +new file mode 100644 +index 000000000..72b7db8a9 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/struct/struct-reorg-2.cpp +@@ -0,0 +1,17 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -fwhole-program -flto-partition=one -fipa-struct-reorg -fdump-ipa-struct_reorg-details" } */ ++ ++#include <stdlib.h> ++ ++struct testg { ++ int b; ++ float c; ++}; ++ ++testg *testgvar; ++int main () ++{ ++ testgvar = (testg*) calloc(10, sizeof(testg)); ++ int b = testgvar->b; ++ return b; ++} +diff --git a/gcc/testsuite/g++.dg/struct/struct-reorg-3.cpp b/gcc/testsuite/g++.dg/struct/struct-reorg-3.cpp +new file mode 100644 +index 000000000..771164a96 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/struct/struct-reorg-3.cpp +@@ -0,0 +1,24 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -fwhole-program -flto-partition=one -fipa-struct-reorg -fdump-ipa-struct_reorg-details" } */ ++ ++#include <stdlib.h> ++ ++struct testg { ++ int b; ++ float c; ++ double d; ++ double e; ++ double f; ++ double h; ++ double i; ++ double j; ++ int k; ++}; ++ ++testg *testgvar; ++int main () ++{ ++ testgvar = (testg*) calloc(10, sizeof(testg)); ++ int b = testgvar->b; ++ return b; ++} +diff --git a/gcc/testsuite/g++.dg/struct/struct-reorg.exp b/gcc/testsuite/g++.dg/struct/struct-reorg.exp +new file mode 100644 +index 000000000..e3ffe1388 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/struct/struct-reorg.exp +@@ -0,0 +1,26 @@ ++# Copyright (C) 2021-2023 Free Software Foundation, Inc. ++ ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# <http://www.gnu.org/licenses/>. ++ ++load_lib g++-dg.exp ++ ++# Initialize `dg'. ++dg-init ++ ++g++-dg-runtest lsort glob -nocomplain $srcdir/$subdir/*.cpp \ ++ "" "" ++ ++# All done. ++dg-finish +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/csr_1.c b/gcc/testsuite/gcc.dg/struct/csr_1.c +new file mode 100644 +index 000000000..811030bf1 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/csr_1.c +@@ -0,0 +1,60 @@ ++// { dg-do run } ++ ++#include <stdlib.h> ++#include <stdio.h> ++ ++typedef struct node node_t; ++typedef struct node* node_p; ++ ++struct node { ++ unsigned long a; ++ unsigned long b; ++ node_p c; ++ node_p d; ++ long e; ++ long f; ++ long g; ++ long h; ++ long i; ++ long j; ++ long k; ++ long l; ++ int m; ++ int n; ++}; ++ ++const int MAX = 10000; ++node_p n; ++ ++int ++main () ++{ ++ n = (node_p) calloc (MAX, sizeof (node_t)); ++ ++ for (int i = 0; i < MAX; i++) ++ { ++ ni.a = 100; ++ } ++ for (int i = 0; i < MAX; i++) ++ { ++ if (ni.a != 100) ++ { ++ abort (); ++ } ++ } ++ ++ for (int i = 0; i < MAX; i++) ++ { ++ ni.l = ni.a; ++ } ++ for (int i = 0; i < MAX; i++) ++ { ++ if (ni.l != 100) ++ { ++ abort (); ++ } ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in Complete Structure Relayout is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/csr_allocation-1.c b/gcc/testsuite/gcc.dg/struct/csr_allocation-1.c +new file mode 100644 +index 000000000..63bb695ae +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/csr_allocation-1.c +@@ -0,0 +1,46 @@ ++#include <stdlib.h> ++#include <stdio.h> ++ ++typedef struct node node_t; ++typedef struct node* node_p; ++ ++struct node { ++ unsigned long a; ++ unsigned long b; ++ node_p c; ++ node_p d; ++ long e; ++ long f; ++ long g; ++ long h; ++ long i; ++ long j; ++ long k; ++ long l; ++ int m; ++ int n; ++}; ++ ++const int MAX = 1; ++node_p n; ++ ++int ++main () ++{ ++ n = (node_p) calloc (MAX, sizeof (node_t)); ++ ++ for (int i = 0; i < MAX; i++) ++ { ++ ni.a = 100; ++ } ++ for (int i = 0; i < MAX; i++) ++ { ++ if (ni.a != 100) ++ { ++ abort (); ++ } ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform in Complete Structure Relayout." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/csr_allocation-2.c b/gcc/testsuite/gcc.dg/struct/csr_allocation-2.c +new file mode 100644 +index 000000000..0f75d5d12 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/csr_allocation-2.c +@@ -0,0 +1,59 @@ ++#include <stdlib.h> ++#include <stdio.h> ++ ++typedef struct node node_t; ++typedef struct node* node_p; ++ ++struct node { ++ unsigned long a; ++ unsigned long b; ++ node_p c; ++ node_p d; ++ long e; ++ long f; ++ long g; ++ long h; ++ long i; ++ long j; ++ long k; ++ long l; ++ int m; ++ int n; ++}; ++ ++const int MAX = 10; ++node_p n; ++node_p m; ++ ++int main() ++{ ++ int i; ++ for (i = 0; i < MAX / 5; i++) ++ { ++ n = (node_p) calloc(MAX, sizeof(node_t)); ++ if (i == 0) ++ { ++ m = n; ++ } ++ } ++ ++ for (int i = 0; i < MAX; i++) ++ { ++ ni.a = 100; ++ } ++ for (int i = 0; i < MAX; i++) ++ { ++ mi.a = 50; ++ } ++ ++ for (int i = 0; i < MAX; i++) ++ { ++ if (ni.a != 100) ++ { ++ abort (); ++ } ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform in Complete Structure Relayout." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/csr_allocation-3.c b/gcc/testsuite/gcc.dg/struct/csr_allocation-3.c +new file mode 100644 +index 000000000..3dcb674c6 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/csr_allocation-3.c +@@ -0,0 +1,77 @@ ++#include <stdlib.h> ++#include <stdio.h> ++ ++typedef struct node node_t; ++typedef struct node* node_p; ++ ++struct node { ++ unsigned long a; ++ unsigned long b; ++ node_p c; ++ node_p d; ++ long e; ++ long f; ++ long g; ++ long h; ++ long i; ++ long j; ++ long k; ++ long l; ++ int m; ++ int n; ++}; ++ ++const int MAX = 10; ++node_p n; ++node_p m; ++ ++void test (int, int) __attribute__((noinline)); ++ ++void ++test (int num, int flag) ++{ ++ if (num <= 0) ++ { ++ return; ++ } ++ n = (node_p) calloc (num, sizeof (node_t)); ++ if (flag) ++ { ++ m = n; ++ } ++ return; ++} ++ ++int ++main () ++{ ++ test (MAX, 1); ++ test (MAX, 0); ++ ++ for (int i = 0; i < MAX; i++) ++ { ++ ni.a = 100; ++ } ++ for (int i = 0; i < MAX; i++) ++ { ++ mi.a = 50; ++ } ++ ++ for (int i = 0; i < MAX; i++) ++ { ++ if (ni.a != 100) ++ { ++ abort (); ++ } ++ } ++ for (int i = 0; i < MAX; i++) ++ { ++ if (mi.a != 50) ++ { ++ abort (); ++ } ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform in Complete Structure Relayout." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/csr_cast_int.c b/gcc/testsuite/gcc.dg/struct/csr_cast_int.c +new file mode 100644 +index 000000000..6907158c9 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/csr_cast_int.c +@@ -0,0 +1,52 @@ ++// { dg-do run } ++ ++#include <stdlib.h> ++#include <stdio.h> ++ ++typedef struct node node_t; ++typedef struct node* node_p; ++ ++struct node { ++ unsigned long a; ++ unsigned long b; ++ node_p c; ++ node_p d; ++ long e; ++ long f; ++ long g; ++ long h; ++ long i; ++ long j; ++ long k; ++ long l; ++ int m; ++ int n; ++}; ++ ++const int MAX = 100; ++node_p n; ++unsigned long y; ++ ++int ++main () ++{ ++ n = (node_p) calloc (MAX, sizeof (node_t)); ++ ++ for (int i = 0; i < MAX; i++) ++ { ++ ni.b = 50; ++ } ++ ++ node_p x = &n5; ++ y = (unsigned long) x; ++ y += 8; ++ ++ if (*((unsigned long*) y) != 50) ++ { ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "struct node has escaped: \"Type escapes a cast from/to intergral type\"" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/csr_separate_instance.c b/gcc/testsuite/gcc.dg/struct/csr_separate_instance.c +new file mode 100644 +index 000000000..9e5e05838 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/csr_separate_instance.c +@@ -0,0 +1,48 @@ ++#include <stdlib.h> ++#include <stdio.h> ++ ++typedef struct node node_t; ++typedef struct node* node_p; ++ ++struct node { ++ unsigned long a; ++ unsigned long b; ++ node_p c; ++ node_p d; ++ long e; ++ long f; ++ long g; ++ long h; ++ long i; ++ long j; ++ long k; ++ long l; ++ int m; ++ int n; ++}; ++ ++const int MAX = 10000; ++node_p n; ++node_t t; ++ ++int ++main () ++{ ++ n = (node_p) calloc (MAX, sizeof (node_t)); ++ t.a = 100; ++ ++ for (int i = 0; i < MAX; i++) ++ { ++ ni.a = t.a; ++ } ++ for (int i = 0; i < MAX; i++) ++ { ++ if (ni.a != 100) ++ { ++ abort (); ++ } ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "struct node has escaped: \"Type escapes via a separate instance\"" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/sr_address_of_field.c b/gcc/testsuite/gcc.dg/struct/sr_address_of_field.c +new file mode 100644 +index 000000000..9d58edab8 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/sr_address_of_field.c +@@ -0,0 +1,37 @@ ++/* { dg-do run } */ ++ ++static struct S { ++ int *p1; ++ int *p2; ++} s; ++ ++typedef __UINTPTR_TYPE__ uintptr_t; ++ ++int ++foo () ++{ ++ int i = 1; ++ int j = 2; ++ struct S s; ++ int **p; ++ s.p1 = &i; ++ s.p2 = &j; ++ p = &s.p1; ++ uintptr_t pi = (uintptr_t) p; ++ pi = pi + sizeof (int *); ++ p = (int **)pi; ++ **p = 3; ++ return j; ++} ++ ++int ++main () ++{ ++ if (foo () != 3) ++ { ++ __builtin_abort (); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "struct S has escaped: \"Type escapes via taking the address of field\"" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/sr_convert_mem.c b/gcc/testsuite/gcc.dg/struct/sr_convert_mem.c +new file mode 100644 +index 000000000..a99ee0de4 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/sr_convert_mem.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++ ++struct T1 { ++ long var1; ++ int var2; ++}; ++ ++struct T2 { ++ long var1; ++ int var2; ++}; ++ ++void test (void*); ++ ++__attribute__((used)) void ++foo (struct T2 *t2) ++{ ++ struct T1* t1 = (void *)(&t21); ++ void* data = (void *)(&t11); ++ ++ test(data); ++ return; ++} +diff --git a/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c b/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c +new file mode 100644 +index 000000000..fb135ef0b +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c +@@ -0,0 +1,25 @@ ++// { dg-do compile } ++ ++#include <stdlib.h> ++ ++struct S { ++ unsigned long a; ++ unsigned long b; ++}; ++ ++struct S* s; ++struct S* t = (struct S*) 1000; ++ ++int ++main () ++{ ++ s = (struct S*) calloc (1000, sizeof (struct S)); ++ s = s > t ? s : t; ++ if (s == 0) ++ { ++ abort (); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/sr_pointer_and.c b/gcc/testsuite/gcc.dg/struct/sr_pointer_and.c +new file mode 100644 +index 000000000..9a4b10d9a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/sr_pointer_and.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++ ++struct test {long val; struct test* next; }; ++ ++unsigned long P_DATA; ++ ++void func (struct test*); ++ ++__attribute__((used)) static void ++foo (struct test* pt) ++{ ++ struct test t; ++ ++ t.next = (void *)((unsigned long)pt->next & P_DATA); ++ func(&t); ++ return; ++} +diff --git a/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c b/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c +new file mode 100644 +index 000000000..9a82da0d6 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c +@@ -0,0 +1,33 @@ ++// { dg-do compile } ++ ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node* node_p; ++ ++struct node { ++ unsigned long a; ++ unsigned long b; ++}; ++ ++int max; ++int x; ++ ++node_p n; ++node_p z; ++ ++int ++main () ++{ ++ n = (node_p) calloc (max, sizeof (node_t)); ++ ++ node_p xp = &nx; ++ ++ if (xp - z == 10) ++ { ++ abort (); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "struct node has escaped: \"Type escapes via a unhandled rewrite stmt\"" "struct_reorg" } } */ +-- +2.33.0 +
View file
_service:tar_scm:0017-StructReorg-Some-bugfix-for-structure-reorganization.patch
Added
@@ -0,0 +1,489 @@ +From 2b4db34d3b21ff8597373e9e67858b3b60cc7dae Mon Sep 17 00:00:00 2001 +From: eastb233 <xiezhiheng@huawei.com> +Date: Fri, 21 Jul 2023 11:20:51 +0800 +Subject: PATCH 17/22 StructReorg Some bugfix for structure reorganization + +Some bugfix for structure reorganization, +1. disable type simplify in LTO within optimizations +2. only enable optimizations in C language +3. use new to initialize allocated memory in symbol-summary.h +4. cover escape scenarios not considered +--- + gcc/ipa-free-lang-data.cc | 11 ++ + gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 101 +++++++++++-------- + gcc/symbol-summary.h | 13 ++- + gcc/testsuite/gcc.dg/struct/struct_reorg-5.c | 31 ++++++ + gcc/testsuite/gcc.dg/struct/struct_reorg-6.c | 54 ++++++++++ + gcc/testsuite/gcc.dg/struct/struct_reorg-7.c | 38 +++++++ + gcc/testsuite/gcc.dg/struct/struct_reorg-8.c | 25 +++++ + gcc/testsuite/gcc.dg/struct/struct_reorg-9.c | 54 ++++++++++ + 8 files changed, 283 insertions(+), 44 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-5.c + create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-6.c + create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-7.c + create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-8.c + create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-9.c + +diff --git a/gcc/ipa-free-lang-data.cc b/gcc/ipa-free-lang-data.cc +index a74215685..5450be9fe 100644 +--- a/gcc/ipa-free-lang-data.cc ++++ b/gcc/ipa-free-lang-data.cc +@@ -102,6 +102,12 @@ fld_worklist_push (tree t, class free_lang_data_d *fld) + static tree + fld_simplified_type_name (tree type) + { ++ /* Simplify type will cause that struct A and struct A within ++ struct B are different type pointers, so skip it in structure ++ optimizations. */ ++ if (flag_ipa_struct_reorg) ++ return TYPE_NAME (type); ++ + if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL) + return TYPE_NAME (type); + /* Drop TYPE_DECLs in TYPE_NAME in favor of the identifier in the +@@ -340,6 +346,11 @@ fld_simplified_type (tree t, class free_lang_data_d *fld) + { + if (!t) + return t; ++ /* Simplify type will cause that struct A and struct A within ++ struct B are different type pointers, so skip it in structure ++ optimizations. */ ++ if (flag_ipa_struct_reorg) ++ return t; + if (POINTER_TYPE_P (t)) + return fld_incomplete_type_of (t, fld); + /* FIXME: This triggers verification error, see PR88140. */ +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +index c8b975a92..9f790b28b 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +@@ -105,6 +105,7 @@ along with GCC; see the file COPYING3. If not see + #include "ipa-param-manipulation.h" + #include "gimplify-me.h" + #include "cfgloop.h" ++#include "langhooks.h" + + namespace { + +@@ -196,6 +197,39 @@ gimplify_build1 (gimple_stmt_iterator *gsi, enum tree_code code, tree type, + GSI_SAME_STMT); + } + ++/* Check whether in C language or LTO with only C language. */ ++ ++static bool ++lang_c_p (void) ++{ ++ const char *language_string = lang_hooks.name; ++ ++ if (!language_string) ++ return false; ++ ++ if (strcmp (language_string, "GNU GIMPLE") == 0) ++ { ++ unsigned i = 0; ++ tree t = NULL; ++ const char *unit_string = NULL; ++ ++ FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t) ++ { ++ unit_string = TRANSLATION_UNIT_LANGUAGE (t); ++ if (!unit_string ++ || (strncmp (unit_string, "GNU C", 5) != 0) ++ || (!ISDIGIT (unit_string5))) ++ return false; ++ } ++ return true; ++ } ++ else if (strncmp (language_string, "GNU C", 5) == 0 ++ && ISDIGIT (language_string5)) ++ return true; ++ ++ return false; ++} ++ + enum srmode + { + NORMAL = 0, +@@ -1018,7 +1052,6 @@ public: + void analyze_types (void); + void clear_visited (void); + bool create_new_types (void); +- void restore_field_type (void); + void create_new_decls (void); + srdecl *find_decl (tree); + void create_new_functions (void); +@@ -2107,7 +2140,12 @@ ipa_struct_reorg::find_vars (gimple *stmt) + srtype *t = find_type (inner_type (TREE_TYPE (rhs))); + srdecl *d = find_decl (lhs); + if (!d && t) +- current_function->record_decl (t, lhs, -1); ++ { ++ current_function->record_decl (t, lhs, -1); ++ tree var = SSA_NAME_VAR (lhs); ++ if (var && VOID_POINTER_P (TREE_TYPE (var))) ++ current_function->record_decl (t, var, -1); ++ } + } + if (TREE_CODE (rhs) == SSA_NAME + && VOID_POINTER_P (TREE_TYPE (rhs)) +@@ -2116,7 +2154,12 @@ ipa_struct_reorg::find_vars (gimple *stmt) + srtype *t = find_type (inner_type (TREE_TYPE (lhs))); + srdecl *d = find_decl (rhs); + if (!d && t) +- current_function->record_decl (t, rhs, -1); ++ { ++ current_function->record_decl (t, rhs, -1); ++ tree var = SSA_NAME_VAR (rhs); ++ if (var && VOID_POINTER_P (TREE_TYPE (var))) ++ current_function->record_decl (t, var, -1); ++ } + } + } + else +@@ -2796,8 +2839,14 @@ ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt) + if (escapes != does_not_escape) + { + for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) +- mark_type_as_escape (TREE_TYPE (gimple_call_arg (stmt, i)), +- escapes); ++ { ++ mark_type_as_escape (TREE_TYPE (gimple_call_arg (stmt, i)), ++ escapes); ++ srdecl *d = current_function->find_decl ( ++ gimple_call_arg (stmt, i)); ++ if (d) ++ d->type->mark_escape (escapes, stmt); ++ } + return; + } + +@@ -3731,42 +3780,6 @@ ipa_struct_reorg::analyze_types (void) + } + } + +-/* When struct A has a struct B member, B's type info +- is not stored in +- TYPE_FIELDS (TREE_TYPE (TYPE_FIELDS (typeA))) +- Try to restore B's type information. */ +- +-void +-ipa_struct_reorg::restore_field_type (void) +-{ +- for (unsigned i = 0; i < types.length (); i++) +- { +- for (unsigned j = 0; j < typesi->fields.length (); j++) +- { +- srfield *field = typesi->fieldsj; +- if (TREE_CODE (inner_type (field->fieldtype)) == RECORD_TYPE) +- { +- /* If field type has TYPE_FIELDS information, +- we do not need to do this. */ +- if (TYPE_FIELDS (field->type->type) != NULL) +- continue; +- for (unsigned k = 0; k < types.length (); k++) +- { +- if (i == k) +- continue; +- const char *type1 = get_type_name (field->type->type); +- const char *type2 = get_type_name (typesk->type); +- if (type1 == NULL || type2 == NULL) +- continue; +- if (type1 == type2 +- && TYPE_FIELDS (typesk->type)) +- field->type = typesk; +- } +- } +- } +- } +-} +- + /* Create all new types we want to create. */ + + bool +@@ -4647,7 +4660,6 @@ ipa_struct_reorg::rewrite_functions (void) + { + unsigned retval = 0; + +- restore_field_type (); + /* Create new types, if we did not create any new types, + then don't rewrite any accesses. */ + if (!create_new_types ()) +@@ -4866,7 +4878,10 @@ pass_ipa_struct_reorg::gate (function *) + && flag_ipa_struct_reorg + /* Don't bother doing anything if the program has errors. */ + && !seen_error () +- && flag_lto_partition == LTO_PARTITION_ONE); ++ && flag_lto_partition == LTO_PARTITION_ONE ++ /* Only enable struct optimizations in C since other ++ languages' grammar forbid. */ ++ && lang_c_p ()); + } + + } // anon namespace +diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h +index c54d3084c..3fe64047c 100644 +--- a/gcc/symbol-summary.h ++++ b/gcc/symbol-summary.h +@@ -103,6 +103,12 @@ protected: + /* Allocates new data that are stored within map. */ + T* allocate_new () + { ++ /* In structure optimizatons, we call new to ensure that ++ the allocated memory is initialized to 0. */ ++ if (flag_ipa_struct_reorg) ++ return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T () ++ : new T (); ++ + /* Call gcc_internal_because we do not want to call finalizer for + a type T. We call dtor explicitly. */ + return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T () +@@ -115,7 +121,12 @@ protected: + if (is_ggc ()) + ggc_delete (item); + else +- m_allocator.remove (item); ++ { ++ if (flag_ipa_struct_reorg) ++ delete item; ++ else ++ m_allocator.remove (item); ++ } + } + + /* Unregister all call-graph hooks. */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-5.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-5.c +new file mode 100644 +index 000000000..273baa9a3 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-5.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-flto -fno-use-linker-plugin" } */ ++ ++struct D ++{ ++ int n; ++ int c 8; ++}; ++ ++struct A ++{ ++ int i; ++ char *p; ++}; ++ ++struct B ++{ ++ struct A *a; ++ struct D *d; ++}; ++ ++int dtInsert1 (struct B *b) ++{ ++ struct A a = { 0, 0 }; ++ struct D *d; ++ b->a = &a; ++ d = b->d; ++ &d->c d->n; ++ return 0; ++} ++ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-6.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-6.c +new file mode 100644 +index 000000000..455f9b501 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-6.c +@@ -0,0 +1,54 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-flto -fno-use-linker-plugin" } */ ++ ++typedef struct basic_block_def *basic_block; ++typedef struct gimple_seq_node_d *gimple_seq_node; ++typedef struct gimple_seq_d *gimple_seq; ++typedef struct ++{ ++ gimple_seq_node ptr; ++ gimple_seq seq; ++ basic_block bb; ++} gimple_stmt_iterator; ++typedef void *gimple; ++extern void exit(int); ++struct gimple_seq_node_d ++{ ++ gimple stmt; ++ struct gimple_seq_node_d *next; ++}; ++struct gimple_seq_d ++{ ++}; ++static __inline__ gimple_stmt_iterator ++gsi_start (gimple_seq seq) ++{ ++ gimple_stmt_iterator i; ++ i.seq = seq; ++ return i; ++} ++static __inline__ unsigned char ++gsi_end_p (gimple_stmt_iterator i) ++{ ++ return i.ptr == ((void *)0); ++} ++static __inline__ void ++gsi_next (gimple_stmt_iterator *i) ++{ ++ i->ptr = i->ptr->next; ++} ++static __inline__ gimple ++gsi_stmt (gimple_stmt_iterator i) ++{ ++ return i.ptr->stmt; ++} ++void ++c_warn_unused_result (gimple_seq seq) ++{ ++ gimple_stmt_iterator i; ++ for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i)) ++ { ++ gimple g = gsi_stmt (i); ++ if (!g) exit(0); ++ } ++} +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-7.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-7.c +new file mode 100644 +index 000000000..afc0bd86c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-7.c +@@ -0,0 +1,38 @@ ++/* { dg-do run } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++struct gki_elem { ++ char *key; ++ int idx; ++}; ++ ++typedef struct { ++ struct gki_elem *table; ++ ++ int primelevel; ++ int nhash; ++ int nkeys; ++} GKI; ++ ++void * ++sre_malloc(size_t size) ++{ ++ void *ptr = malloc (size); ++ return ptr; ++} ++ ++__attribute__((noinline)) int ++GKIStoreKey(GKI *hash) ++{ ++ hash->table = sre_malloc(sizeof(struct gki_elem)); ++} ++ ++int ++main () ++{ ++ GKI *hash = malloc (sizeof(GKI)); ++ GKIStoreKey(hash); ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-8.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-8.c +new file mode 100644 +index 000000000..9bcfaf368 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-8.c +@@ -0,0 +1,25 @@ ++/* { dg-do run } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++#include <string.h> ++ ++typedef struct { ++ unsigned char blue; ++ unsigned char green; ++} Pixel; ++ ++typedef struct { ++ unsigned short colormaplength; ++ Pixel *colormapdata; ++} TargaImage; ++ ++TargaImage *img; ++ ++int main() { ++ img = (TargaImage *) malloc( sizeof(TargaImage) ); ++ if (img->colormaplength > 0) { ++ img->colormapdata = (Pixel *) malloc(sizeof(Pixel) * img->colormaplength); ++ memset(img->colormapdata, 0, (sizeof(Pixel) * img->colormaplength) ); ++ } ++} +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-9.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-9.c +new file mode 100644 +index 000000000..052f4e3bd +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-9.c +@@ -0,0 +1,54 @@ ++/* { dg-do run } */ ++ ++extern void abort(void); ++ ++struct packed_ushort { ++ unsigned short ucs; ++} __attribute__((packed)); ++ ++struct source { ++ int pos, length; ++}; ++ ++static int flag; ++ ++static void __attribute__((noinline)) fetch(struct source *p) ++{ ++ p->length = 128; ++} ++ ++static struct packed_ushort __attribute__((noinline)) next(struct source *p) ++{ ++ struct packed_ushort rv; ++ ++ if (p->pos >= p->length) { ++ if (flag) { ++ flag = 0; ++ fetch(p); ++ return next(p); ++ } ++ flag = 1; ++ rv.ucs = 0xffff; ++ return rv; ++ } ++ rv.ucs = 0; ++ return rv; ++} ++ ++int main(void) ++{ ++ struct source s; ++ int i; ++ ++ s.pos = 0; ++ s.length = 0; ++ flag = 0; ++ ++ for (i = 0; i < 16; i++) { ++ struct packed_ushort rv = next(&s); ++ if ((i == 0 && rv.ucs != 0xffff) ++ || (i > 0 && rv.ucs != 0)) ++ abort(); ++ } ++ return 0; ++} +-- +2.33.0 +
View file
_service:tar_scm:0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch
Added
@@ -0,0 +1,342 @@ +From 19ded9dad06b22b9b7aa9e3902e3e7a38a2256ab Mon Sep 17 00:00:00 2001 +From: dingguangya <dingguangya1@huawei.com> +Date: Sat, 29 Jul 2023 18:27:10 +0800 +Subject: PATCH 18/22 ccmp Add another optimization opportunity for ccmp + instruction + +Add flag -fccmp2. +Enables the use of the ccmp instruction by creating a new conflict +relationship for instances where temporary expressions replacement +cannot be effectively created. +--- + gcc/ccmp.cc | 33 ++++ + gcc/ccmp.h | 1 + + gcc/common.opt | 4 + + gcc/testsuite/gcc.target/aarch64/ccmp_3.c | 15 ++ + gcc/tree-ssa-coalesce.cc | 197 ++++++++++++++++++++++ + 5 files changed, 250 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c + +diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc +index 3db0a264e..e34f3bcc6 100644 +--- a/gcc/ccmp.cc ++++ b/gcc/ccmp.cc +@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see + #include "cfgexpand.h" + #include "ccmp.h" + #include "predict.h" ++#include "gimple-iterator.h" + + /* Check whether T is a simple boolean variable or a SSA name + set by a comparison operator in the same basic block. */ +@@ -129,6 +130,38 @@ ccmp_candidate_p (gimple *g) + return false; + } + ++/* Check whether bb is a potential conditional compare candidate. */ ++bool ++check_ccmp_candidate (basic_block bb) ++{ ++ gimple_stmt_iterator gsi; ++ gimple *bb_last_stmt, *stmt; ++ tree op0, op1; ++ ++ gsi = gsi_last_bb (bb); ++ bb_last_stmt = gsi_stmt (gsi); ++ ++ if (bb_last_stmt && gimple_code (bb_last_stmt) == GIMPLE_COND) ++ { ++ op0 = gimple_cond_lhs (bb_last_stmt); ++ op1 = gimple_cond_rhs (bb_last_stmt); ++ ++ if (TREE_CODE (op0) == SSA_NAME ++ && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE ++ && TREE_CODE (op1) == INTEGER_CST ++ && ((gimple_cond_code (bb_last_stmt) == NE_EXPR) ++ || (gimple_cond_code (bb_last_stmt) == EQ_EXPR))) ++ { ++ stmt = SSA_NAME_DEF_STMT (op0); ++ if (stmt && gimple_code (stmt) == GIMPLE_ASSIGN) ++ { ++ return ccmp_candidate_p (stmt); ++ } ++ } ++ } ++ return false; ++} ++ + /* Extract the comparison we want to do from the tree. */ + void + get_compare_parts (tree t, int *up, rtx_code *rcode, +diff --git a/gcc/ccmp.h b/gcc/ccmp.h +index 1799d5fed..efe3a1c14 100644 +--- a/gcc/ccmp.h ++++ b/gcc/ccmp.h +@@ -21,5 +21,6 @@ along with GCC; see the file COPYING3. If not see + #define GCC_CCMP_H + + extern rtx expand_ccmp_expr (gimple *, machine_mode); ++extern bool check_ccmp_candidate (basic_block bb); + + #endif /* GCC_CCMP_H */ +diff --git a/gcc/common.opt b/gcc/common.opt +index 4d91ce8cf..0aa516719 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -2017,6 +2017,10 @@ fira-verbose= + Common RejectNegative Joined UInteger Var(flag_ira_verbose) Init(5) + -fira-verbose=<number> Control IRA's level of diagnostic messages. + ++fccmp2 ++Common Var(flag_ccmp2) Init(0) Optimization ++Optimize potential ccmp instruction in complex scenarios. ++ + fivopts + Common Var(flag_ivopts) Init(1) Optimization + Optimize induction variables on trees. +diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c +new file mode 100644 +index 000000000..b509ba810 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile { target { aarch64*-*-linux* } } } */ ++/* { dg-options "-O -fdump-rtl-expand-details -fccmp2" } */ ++ ++int func (int a, int b, int c) ++{ ++ while(1) ++ { ++ if(a-- == 0 || b >= c) ++ { ++ return 1; ++ } ++ } ++} ++ ++/* { dg-final { scan-assembler-times "\tccmp\t" 1} } */ +diff --git a/gcc/tree-ssa-coalesce.cc b/gcc/tree-ssa-coalesce.cc +index dccf41ab8..195e06428 100644 +--- a/gcc/tree-ssa-coalesce.cc ++++ b/gcc/tree-ssa-coalesce.cc +@@ -38,6 +38,9 @@ along with GCC; see the file COPYING3. If not see + #include "explow.h" + #include "tree-dfa.h" + #include "stor-layout.h" ++#include "ccmp.h" ++#include "target.h" ++#include "tree-outof-ssa.h" + + /* This set of routines implements a coalesce_list. This is an object which + is used to track pairs of ssa_names which are desirable to coalesce +@@ -854,6 +857,198 @@ live_track_clear_base_vars (live_track *ptr) + bitmap_clear (&ptr->live_base_var); + } + ++/* Return true if gimple is a copy assignment. */ ++ ++static inline bool ++gimple_is_assign_copy_p (gimple *gs) ++{ ++ return (is_gimple_assign (gs) && gimple_assign_copy_p (gs) ++ && TREE_CODE (gimple_assign_lhs (gs)) == SSA_NAME ++ && TREE_CODE (gimple_assign_rhs1 (gs)) == SSA_NAME); ++} ++ ++#define MAX_CCMP_CONFLICT_NUM 5 ++ ++/* Clear high-cost conflict graphs. */ ++ ++static void ++remove_high_cost_graph_for_ccmp (ssa_conflicts *conflict_graph) ++{ ++ unsigned x = 0; ++ int add_conflict_num = 0; ++ bitmap b; ++ FOR_EACH_VEC_ELT (conflict_graph->conflicts, x, b) ++ { ++ if (b) ++ { ++ add_conflict_num++; ++ } ++ } ++ if (add_conflict_num >= MAX_CCMP_CONFLICT_NUM) ++ { ++ conflict_graph->conflicts.release (); ++ } ++} ++ ++/* Adding a new conflict graph to the original graph. */ ++ ++static void ++process_add_graph (live_track *live, basic_block bb, ++ ssa_conflicts *conflict_graph) ++{ ++ tree use, def; ++ ssa_op_iter iter; ++ gimple *first_visit_stmt = NULL; ++ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); ++ gsi_next (&gsi)) ++ { ++ if (gimple_visited_p (gsi_stmt (gsi))) ++ { ++ first_visit_stmt = gsi_stmt (gsi); ++ break; ++ } ++ } ++ if (!first_visit_stmt) ++ return; ++ ++ for (gimple_stmt_iterator gsi = gsi_last_bb (bb); ++ gsi_stmt (gsi) != first_visit_stmt; gsi_prev (&gsi)) ++ { ++ gimple *stmt = gsi_stmt (gsi); ++ if (gimple_visited_p (gsi_stmt (gsi)) && is_gimple_debug (stmt)) ++ { ++ continue; ++ } ++ if (gimple_is_assign_copy_p (stmt)) ++ { ++ live_track_clear_var (live, gimple_assign_rhs1 (stmt)); ++ } ++ FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF) ++ { ++ live_track_process_def (live, def, conflict_graph); ++ } ++ FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) ++ { ++ live_track_process_use (live, use); ++ } ++ } ++} ++ ++/* Build a conflict graph based on ccmp candidate. */ ++ ++static void ++add_ccmp_conflict_graph (ssa_conflicts *conflict_graph, ++ tree_live_info_p liveinfo, var_map map, basic_block bb) ++{ ++ live_track *live; ++ tree use, def; ++ ssa_op_iter iter; ++ live = new_live_track (map); ++ live_track_init (live, live_on_exit (liveinfo, bb)); ++ ++ gimple *last_stmt = gsi_stmt (gsi_last_bb (bb)); ++ gcc_assert (gimple_cond_lhs (last_stmt)); ++ ++ auto_vec<tree> stack; ++ stack.safe_push (gimple_cond_lhs (last_stmt)); ++ while (!stack.is_empty ()) ++ { ++ tree op = stack.pop (); ++ gimple *op_stmt = SSA_NAME_DEF_STMT (op); ++ if (!op_stmt || gimple_bb (op_stmt) != bb ++ || !is_gimple_assign (op_stmt) ++ || !ssa_is_replaceable_p (op_stmt)) ++ { ++ continue; ++ } ++ if (gimple_is_assign_copy_p (op_stmt)) ++ { ++ live_track_clear_var (live, gimple_assign_rhs1 (op_stmt)); ++ } ++ gimple_set_visited (op_stmt, true); ++ FOR_EACH_SSA_TREE_OPERAND (def, op_stmt, iter, SSA_OP_DEF) ++ { ++ live_track_process_def (live, def, conflict_graph); ++ } ++ FOR_EACH_SSA_TREE_OPERAND (use, op_stmt, iter, SSA_OP_USE) ++ { ++ stack.safe_push (use); ++ live_track_process_use (live, use); ++ } ++ } ++ ++ process_add_graph (live, bb, conflict_graph); ++ delete_live_track (live); ++ remove_high_cost_graph_for_ccmp (conflict_graph); ++} ++ ++/* Determine whether the ccmp conflict graph can be added. ++ i.e, ++ ++ ;; basic block 3, loop depth 1 ++ ;; pred: 2 ++ ;; 3 ++ # ivtmp.5_10 = PHI <ivtmp.5_12 (2), ivtmp.5_11 (3)> ++ _7 = b_4 (D) >= c_5 (D); ++ _8 = ivtmp.5_10 == 0; ++ _9 = _7 | _8; ++ ivtmp.5_11 = ivtmp.5_10 - 1; ++ if (_9 != 0) ++ goto <bb 4>; 10.70% ++ else ++ goto <bb 3>; 89.30% ++ ++ In the above loop, the expression will be replaced: ++ ++ _7 replaced by b_4 (D) >= c_5 (D) ++ _8 replaced by ivtmp.5_10 == 0 ++ ++ If the current case want use the ccmp instruction, then ++ ++ _9 can replaced by _7 | _8 ++ ++ So this requires that ivtmp.5_11 and ivtmp.5_10 be divided into different ++ partitions. ++ ++ Now this function can achieve this ability. */ ++ ++static void ++determine_add_ccmp_conflict_graph (basic_block bb, tree_live_info_p liveinfo, ++ var_map map, ssa_conflicts *graph) ++{ ++ if (!flag_ccmp2 || !targetm.gen_ccmp_first || !check_ccmp_candidate (bb)) ++ return; ++ for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); ++ gsi_next (&bsi)) ++ { ++ gimple_set_visited (gsi_stmt (bsi), false); ++ } ++ ssa_conflicts *ccmp_conflict_graph; ++ ccmp_conflict_graph = ssa_conflicts_new (num_var_partitions (map)); ++ add_ccmp_conflict_graph (ccmp_conflict_graph, liveinfo, map, bb); ++ unsigned x; ++ bitmap b; ++ if (ccmp_conflict_graph) ++ { ++ FOR_EACH_VEC_ELT (ccmp_conflict_graph->conflicts, x, b) ++ { ++ if (!b) ++ continue; ++ unsigned y = bitmap_first_set_bit (b); ++ if (!graph->conflictsx || !bitmap_bit_p (graph->conflictsx, y)) ++ { ++ ssa_conflicts_add (graph, x, y); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "potential ccmp: add additional " ++ "conflict-ssa : bb%d %d:%d\n", ++ bb->index, x, y); ++ } ++ } ++ } ++ } ++ ssa_conflicts_delete (ccmp_conflict_graph); ++} + + /* Build a conflict graph based on LIVEINFO. Any partitions which are in the + partition view of the var_map liveinfo is based on get entries in the +@@ -938,6 +1133,8 @@ build_ssa_conflict_graph (tree_live_info_p liveinfo) + live_track_process_use (live, var); + } + ++ determine_add_ccmp_conflict_graph (bb, liveinfo, map, graph); ++ + /* If result of a PHI is unused, looping over the statements will not + record any conflicts since the def was never live. Since the PHI node + is going to be translated out of SSA form, it will insert a copy. +-- +2.33.0 +
View file
_service:tar_scm:0019-fp-model-Enable-fp-model-on-kunpeng.patch
Added
@@ -0,0 +1,405 @@ +From 8cdb316a3fe205a3089b9c17aec0442f4d5f75be Mon Sep 17 00:00:00 2001 +From: bule <bule1@huawei.com> +Date: Sun, 27 Aug 2023 16:49:04 +0800 +Subject: PATCH 19/22 fp-model Enable fp-model on kunpeng + +Enable fp-model options on kunpeng for precision control. +--- + gcc/common.opt | 26 +++++ + gcc/config/aarch64/aarch64-linux.h | 3 +- + gcc/flag-types.h | 9 ++ + gcc/fortran/options.cc | 8 ++ + gcc/opts-common.cc | 146 ++++++++++++++++++++++++++++- + gcc/opts.cc | 68 ++++++++++++++ + 6 files changed, 256 insertions(+), 4 deletions(-) + +diff --git a/gcc/common.opt b/gcc/common.opt +index 8a0dafc52..f5eef8a45 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1642,6 +1642,32 @@ ffp-int-builtin-inexact + Common Var(flag_fp_int_builtin_inexact) Init(1) Optimization + Allow built-in functions ceil, floor, round, trunc to raise \"inexact\" exceptions. + ++fftz ++Common Var(flag_ftz) Optimization ++Control fpcr register for flush to zero. ++ ++fp-model= ++Common Joined RejectNegative Enum(fp_model) Var(flag_fp_model) Init(FP_MODEL_NORMAL) Optimization ++-fp-model=normal|fast|precise|except|strict Perform floating-point precision control. ++ ++Enum ++Name(fp_model) Type(enum fp_model) UnknownError(unknown floating point precision model %qs) ++ ++EnumValue ++Enum(fp_model) String(normal) Value(FP_MODEL_NORMAL) ++ ++EnumValue ++Enum(fp_model) String(fast) Value(FP_MODEL_FAST) ++ ++EnumValue ++Enum(fp_model) String(precise) Value(FP_MODEL_PRECISE) ++ ++EnumValue ++Enum(fp_model) String(except) Value(FP_MODEL_EXCEPT) ++ ++EnumValue ++Enum(fp_model) String(strict) Value(FP_MODEL_STRICT) ++ + ; Nonzero means don't put addresses of constant functions in registers. + ; Used for compiling the Unix kernel, where strange substitutions are + ; done on the assembly output. +diff --git a/gcc/config/aarch64/aarch64-linux.h b/gcc/config/aarch64/aarch64-linux.h +index 5e4553d79..a5cba6391 100644 +--- a/gcc/config/aarch64/aarch64-linux.h ++++ b/gcc/config/aarch64/aarch64-linux.h +@@ -50,7 +50,8 @@ + #define LINK_SPEC LINUX_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC + + #define GNU_USER_TARGET_MATHFILE_SPEC \ +- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}" ++ "%{Ofast|ffast-math|funsafe-math-optimizations|fp-model=fast|fftz:\ ++ %{!fno-ftz:crtfastmath.o%s}}" + + #undef ENDFILE_SPEC + #define ENDFILE_SPEC \ +diff --git a/gcc/flag-types.h b/gcc/flag-types.h +index 2c8498169..64c64eb32 100644 +--- a/gcc/flag-types.h ++++ b/gcc/flag-types.h +@@ -260,6 +260,15 @@ enum fp_contract_mode { + FP_CONTRACT_FAST = 2 + }; + ++/* Floating-point precision mode. */ ++enum fp_model { ++ FP_MODEL_NORMAL = 0, ++ FP_MODEL_FAST = 1, ++ FP_MODEL_PRECISE = 2, ++ FP_MODEL_EXCEPT = 3, ++ FP_MODEL_STRICT = 4 ++}; ++ + /* Scalar storage order kind. */ + enum scalar_storage_order_kind { + SSO_NATIVE = 0, +diff --git a/gcc/fortran/options.cc b/gcc/fortran/options.cc +index d0fa634f1..3eb99a84a 100644 +--- a/gcc/fortran/options.cc ++++ b/gcc/fortran/options.cc +@@ -243,6 +243,7 @@ form_from_filename (const char *filename) + return f_form; + } + ++static void gfc_handle_fpe_option (const char *arg, bool trap); + + /* Finalize commandline options. */ + +@@ -286,6 +287,13 @@ gfc_post_options (const char **pfilename) + if (flag_protect_parens == -1) + flag_protect_parens = !optimize_fast; + ++ /* If fp-model=precise/strict, turn on all ffpe-trap and ffpe-summary. */ ++ if (flag_fp_model == FP_MODEL_EXCEPT || flag_fp_model == FP_MODEL_STRICT) ++ { ++ gfc_handle_fpe_option ("all", false); ++ gfc_handle_fpe_option ("invalid,zero,overflow,underflow", true); ++ } ++ + /* -Ofast sets implies -fstack-arrays unless an explicit size is set for + stack arrays. */ + if (flag_stack_arrays == -1 && flag_max_stack_var_size == -2) +diff --git a/gcc/opts-common.cc b/gcc/opts-common.cc +index 7c07d5046..489a6e02a 100644 +--- a/gcc/opts-common.cc ++++ b/gcc/opts-common.cc +@@ -28,7 +28,8 @@ along with GCC; see the file COPYING3. If not see + #include "spellcheck.h" + #include "opts-jobserver.h" + +-static void prune_options (struct cl_decoded_option **, unsigned int *); ++static void prune_options (struct cl_decoded_option **, unsigned int *, ++ unsigned int); + + /* An option that is undocumented, that takes a joined argument, and + that doesn't fit any of the classes of uses (language/common, +@@ -1091,7 +1092,7 @@ decode_cmdline_options_to_array (unsigned int argc, const char **argv, + + *decoded_options = opt_array; + *decoded_options_count = num_decoded_options; +- prune_options (decoded_options, decoded_options_count); ++ prune_options (decoded_options, decoded_options_count, lang_mask); + } + + /* Return true if NEXT_OPT_IDX cancels OPT_IDX. Return false if the +@@ -1112,11 +1113,109 @@ cancel_option (int opt_idx, int next_opt_idx, int orig_next_opt_idx) + return false; + } + ++/* Check whether opt_idx exists in decoded_options array between index ++ start and end. If found, return its index in decoded_options, ++ else return end. */ ++static unsigned int ++find_opt_idx (const struct cl_decoded_option *decoded_options, ++ unsigned int decoded_options_count, ++ unsigned int start, unsigned int end, unsigned int opt_idx) ++{ ++ gcc_assert (end <= decoded_options_count); ++ gcc_assert (opt_idx < cl_options_count); ++ unsigned int k; ++ for (k = start; k < end; k++) ++ { ++ if (decoded_optionsk.opt_index == opt_idx) ++ { ++ return k; ++ } ++ } ++ return k; ++} ++ ++/* remove the opt_index element from decoded_options array. */ ++static unsigned int ++remove_option (struct cl_decoded_option *decoded_options, ++ unsigned int decoded_options_count, ++ unsigned int opt_index) ++{ ++ gcc_assert (opt_index < decoded_options_count); ++ unsigned int i; ++ for (i = opt_index; i < decoded_options_count - 1; i++) ++ { ++ decoded_optionsi = decoded_optionsi + 1; ++ } ++ return decoded_options_count - 1; ++} ++ ++/* Handle the priority between fp-model, Ofast, and ++ ffast-math. */ ++static unsigned int ++handle_fp_model_driver (struct cl_decoded_option *decoded_options, ++ unsigned int decoded_options_count, ++ unsigned int fp_model_index, ++ unsigned int lang_mask) ++{ ++ struct cl_decoded_option fp_model_opt = decoded_optionsfp_model_index; ++ enum fp_model model = (enum fp_model) fp_model_opt.value; ++ if (model == FP_MODEL_PRECISE || model == FP_MODEL_STRICT) ++ { ++ /* If found Ofast, override Ofast with O3. */ ++ unsigned int Ofast_index; ++ Ofast_index = find_opt_idx (decoded_options, decoded_options_count, ++ 0, decoded_options_count, OPT_Ofast); ++ while (Ofast_index != decoded_options_count) ++ { ++ const char *tmp_argv = "-O3"; ++ decode_cmdline_option (&tmp_argv, lang_mask, ++ &decoded_optionsOfast_index); ++ warning (0, "%<-Ofast%> is degraded to %<-O3%> due to %qs", ++ fp_model_opt.orig_option_with_args_text); ++ Ofast_index = find_opt_idx (decoded_options, decoded_options_count, ++ 0, decoded_options_count, OPT_Ofast); ++ } ++ /* If found ffast-math before fp-model=precise/strict ++ it, cancel it. */ ++ unsigned int ffast_math_index; ++ ffast_math_index ++ = find_opt_idx (decoded_options, decoded_options_count, 0, ++ fp_model_index, OPT_ffast_math); ++ if (ffast_math_index != fp_model_index) ++ { ++ decoded_options_count ++ = remove_option (decoded_options, decoded_options_count, ++ ffast_math_index); ++ warning (0, "%<-ffast-math%> before %qs is canceled", ++ fp_model_opt.orig_option_with_args_text); ++ } ++ } ++ if (model == FP_MODEL_FAST) ++ { ++ /* If found -fno-fast-math after fp-model=fast, cancel this one. */ ++ unsigned int fno_fast_math_index; ++ fno_fast_math_index ++ = find_opt_idx (decoded_options, decoded_options_count, fp_model_index, ++ decoded_options_count, OPT_ffast_math); ++ if (fno_fast_math_index != decoded_options_count ++ && decoded_optionsfno_fast_math_index.value == 0) ++ { ++ decoded_options_count ++ = remove_option (decoded_options, decoded_options_count, ++ fp_model_index); ++ warning (0, ++ "%<-fp-model=fast%> before %<-fno-fast-math%> is canceled"); ++ } ++ } ++ return decoded_options_count; ++} ++ + /* Filter out options canceled by the ones after them. */ + + static void + prune_options (struct cl_decoded_option **decoded_options, +- unsigned int *decoded_options_count) ++ unsigned int *decoded_options_count, ++ unsigned int lang_mask) + { + unsigned int old_decoded_options_count = *decoded_options_count; + struct cl_decoded_option *old_decoded_options = *decoded_options; +@@ -1127,7 +1226,12 @@ prune_options (struct cl_decoded_option **decoded_options, + const struct cl_option *option; + unsigned int fdiagnostics_color_idx = 0; + ++ if (!diagnostic_ready_p ()) ++ diagnostic_initialize (global_dc, 0); ++ + /* Remove arguments which are negated by others after them. */ ++ ++ unsigned int fp_model_index = old_decoded_options_count; + new_decoded_options_count = 0; + for (i = 0; i < old_decoded_options_count; i++) + { +@@ -1151,6 +1255,34 @@ prune_options (struct cl_decoded_option **decoded_options, + fdiagnostics_color_idx = i; + continue; + ++ case OPT_fp_model_: ++ /* Only the last fp-model option will take effect. */ ++ unsigned int next_fp_model_idx; ++ next_fp_model_idx = find_opt_idx (old_decoded_options, ++ old_decoded_options_count, ++ i + 1, ++ old_decoded_options_count, ++ OPT_fp_model_); ++ if (next_fp_model_idx != old_decoded_options_count) ++ { ++ /* Found more than one fp-model, cancel this one. */ ++ if (old_decoded_optionsi.value ++ != old_decoded_optionsnext_fp_model_idx.value) ++ { ++ warning (0, "%qs is overrided by %qs", ++ old_decoded_optionsi. ++ orig_option_with_args_text, ++ old_decoded_optionsnext_fp_model_idx. ++ orig_option_with_args_text); ++ } ++ break; ++ } ++ else ++ { ++ /* Found the last fp-model option. */ ++ fp_model_index = new_decoded_options_count; ++ } ++ /* FALLTHRU. */ + default: + gcc_assert (opt_idx < cl_options_count); + option = &cl_optionsopt_idx; +@@ -1190,6 +1322,14 @@ keep: + break; + } + } ++ if (fp_model_index < new_decoded_options_count) ++ { ++ new_decoded_options_count ++ = handle_fp_model_driver (new_decoded_options, ++ new_decoded_options_count, ++ fp_model_index, ++ lang_mask); ++ } + + if (fdiagnostics_color_idx >= 1) + { +diff --git a/gcc/opts.cc b/gcc/opts.cc +index a97630d1c..b522ed7e2 100644 +--- a/gcc/opts.cc ++++ b/gcc/opts.cc +@@ -328,6 +328,7 @@ static void set_debug_level (uint32_t dinfo, int extended, + struct gcc_options *opts_set, + location_t loc); + static void set_fast_math_flags (struct gcc_options *opts, int set); ++static void set_fp_model_flags (struct gcc_options *opts, int set); + static void decode_d_option (const char *arg, struct gcc_options *opts, + location_t loc, diagnostic_context *dc); + static void set_unsafe_math_optimizations_flags (struct gcc_options *opts, +@@ -2857,6 +2858,10 @@ common_handle_option (struct gcc_options *opts, + set_fast_math_flags (opts, value); + break; + ++ case OPT_fp_model_: ++ set_fp_model_flags (opts, value); ++ break; ++ + case OPT_funsafe_math_optimizations: + set_unsafe_math_optimizations_flags (opts, value); + break; +@@ -3266,6 +3271,69 @@ set_fast_math_flags (struct gcc_options *opts, int set) + } + } + ++/* Handle fp-model options. */ ++static void ++set_fp_model_flags (struct gcc_options *opts, int set) ++{ ++ enum fp_model model = (enum fp_model) set; ++ switch (model) ++ { ++ case FP_MODEL_FAST: ++ /* Equivalent to open ffast-math. */ ++ set_fast_math_flags (opts, 1); ++ break; ++ ++ case FP_MODEL_PRECISE: ++ /* Equivalent to close ffast-math. */ ++ set_fast_math_flags (opts, 0); ++ /* Turn on -frounding-math -fsignaling-nans. */ ++ if (!opts->frontend_set_flag_signaling_nans) ++ opts->x_flag_signaling_nans = 1; ++ if (!opts->frontend_set_flag_rounding_math) ++ opts->x_flag_rounding_math = 1; ++ opts->x_flag_expensive_optimizations = 0; ++ opts->x_flag_code_hoisting = 0; ++ opts->x_flag_predictive_commoning = 0; ++ opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF; ++ break; ++ ++ case FP_MODEL_EXCEPT: ++ if (!opts->frontend_set_flag_signaling_nans) ++ opts->x_flag_signaling_nans = 1; ++ if (!opts->frontend_set_flag_errno_math) ++ opts->x_flag_errno_math = 1; ++ if (!opts->frontend_set_flag_trapping_math) ++ opts->x_flag_trapping_math = 1; ++ opts->x_flag_fp_int_builtin_inexact = 1; ++ /* Also turn on ffpe-trap in fortran. */ ++ break; ++ ++ case FP_MODEL_STRICT: ++ /* Turn on both precise and except. */ ++ if (!opts->frontend_set_flag_signaling_nans) ++ opts->x_flag_signaling_nans = 1; ++ if (!opts->frontend_set_flag_rounding_math) ++ opts->x_flag_rounding_math = 1; ++ opts->x_flag_expensive_optimizations = 0; ++ opts->x_flag_code_hoisting = 0; ++ opts->x_flag_predictive_commoning = 0; ++ if (!opts->frontend_set_flag_errno_math) ++ opts->x_flag_errno_math = 1; ++ if (!opts->frontend_set_flag_trapping_math) ++ opts->x_flag_trapping_math = 1; ++ opts->x_flag_fp_int_builtin_inexact = 1; ++ opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF; ++ break; ++ ++ case FP_MODEL_NORMAL: ++ /* Do nothing. */ ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ + /* When -funsafe-math-optimizations is set the following + flags are set as well. */ + static void +-- +2.33.0 +
View file
_service:tar_scm:0020-simdmath-Enable-simdmath-on-kunpeng.patch
Added
@@ -0,0 +1,317 @@ +From 49ad10199dbdda2c36850a2617f5c985977939c5 Mon Sep 17 00:00:00 2001 +From: bule <bule1@huawei.com> +Date: Sun, 27 Aug 2023 16:49:42 +0800 +Subject: PATCH 20/22 simdmath Enable simdmath on kunpeng + +This enable simd math function supported by libmathlib on fortran/c/c++. +Use -fsimdmath to turn on the generation of simdmath function. The +supported functions can be found in simdmath.h. Add more simd declaration +if you need more kinds of math functions. -msimdmath-64 is used to turn +on 64-bit simd math functions which is not supported by libmathlib. +Therefore, this option is default to off. +--- + gcc/c-family/c-opts.cc | 4 ++ + gcc/common.opt | 4 ++ + gcc/config/aarch64/aarch64.cc | 9 ++++- + gcc/config/aarch64/aarch64.opt | 6 +++ + gcc/fortran/scanner.cc | 3 ++ + gcc/opts.cc | 17 ++++++++ + .../gcc.target/aarch64/simd_pcs_attribute-3.c | 2 +- + libgomp/Makefile.am | 4 +- + libgomp/Makefile.in | 10 +++-- + libgomp/configure | 4 +- + libgomp/configure.ac | 2 +- + libgomp/simdmath.h.in | 40 +++++++++++++++++++ + libgomp/simdmath_f.h.in | 11 +++++ + 13 files changed, 106 insertions(+), 10 deletions(-) + create mode 100644 libgomp/simdmath.h.in + create mode 100644 libgomp/simdmath_f.h.in + +diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc +index a341a0617..5134f6128 100644 +--- a/gcc/c-family/c-opts.cc ++++ b/gcc/c-family/c-opts.cc +@@ -801,6 +801,10 @@ c_common_post_options (const char **pfilename) + if (cpp_opts->deps.style == DEPS_NONE) + check_deps_environment_vars (); + ++ if (flag_simdmath) ++ { ++ defer_opt (OPT_include, "simdmath.h"); ++ } + handle_deferred_opts (); + + sanitize_cpp_opts (); +diff --git a/gcc/common.opt b/gcc/common.opt +index f5eef8a45..e9d580957 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -2125,6 +2125,10 @@ fmath-errno + Common Var(flag_errno_math) Init(1) Optimization SetByCombined + Set errno after built-in math functions. + ++fsimdmath ++Common Var(flag_simdmath) Init(0) Optimization ++Enable auto-vectorize math functions for mathlib. This option will turn on -fno-math-errno and -fopenmp-simd. ++ + fmax-errors= + Common Joined RejectNegative UInteger Var(flag_max_errors) + -fmax-errors=<number> Maximum number of errors to report. +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 226dc9dff..a3da4ca30 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -26904,8 +26904,13 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, + elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type)); + if (known_eq (clonei->simdlen, 0U)) + { +- count = 2; +- vec_bits = (num == 0 ? 64 : 128); ++ /* Currently mathlib or sleef hasn't provide function for V2SF mode ++ simdclone of single precision functions. (e.g._ZCVnN2v_expf) ++ Therefore this mode is disabled by default to avoid link error. ++ Use -msimdmath-64 option to enable this mode. */ ++ count = flag_simdmath_64 ? 2 : 1; ++ vec_bits = ((num == 0 && flag_simdmath_64) ? 64 : 128); ++ + clonei->simdlen = exact_div (vec_bits, elt_bits); + } + else +diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt +index 92220b26e..a64b927e9 100644 +--- a/gcc/config/aarch64/aarch64.opt ++++ b/gcc/config/aarch64/aarch64.opt +@@ -190,6 +190,12 @@ precision of square root results to about 16 bits for + single precision and to 32 bits for double precision. + If enabled, it implies -mlow-precision-recip-sqrt. + ++msimdmath-64 ++Target Var(flag_simdmath_64) Optimization ++Allow compiler to generate V2SF 64 bits simdclone of math functions, ++which is not currently supported in mathlib or sleef. ++Therefore this option is disabled by default. ++ + mlow-precision-div + Target Var(flag_mlow_precision_div) Optimization + Enable the division approximation. Enabling this reduces +diff --git a/gcc/fortran/scanner.cc b/gcc/fortran/scanner.cc +index 2dff25147..63e262f51 100644 +--- a/gcc/fortran/scanner.cc ++++ b/gcc/fortran/scanner.cc +@@ -2769,6 +2769,9 @@ gfc_new_file (void) + if (flag_pre_include != NULL) + load_file (flag_pre_include, NULL, false); + ++ if (flag_simdmath) ++ load_file ("simdmath_f.h", NULL, false); ++ + if (gfc_cpp_enabled ()) + { + gfc_cpp_preprocess (gfc_source_file); +diff --git a/gcc/opts.cc b/gcc/opts.cc +index b522ed7e2..c3cc2c169 100644 +--- a/gcc/opts.cc ++++ b/gcc/opts.cc +@@ -322,6 +322,7 @@ static const char undocumented_msg = N_("This option lacks documentation."); + static const char use_diagnosed_msg = N_("Uses of this option are diagnosed."); + + typedef char *char_p; /* For DEF_VEC_P. */ ++static void set_simdmath_flags (struct gcc_options *opts, int set); + + static void set_debug_level (uint32_t dinfo, int extended, + const char *arg, struct gcc_options *opts, +@@ -2850,6 +2851,10 @@ common_handle_option (struct gcc_options *opts, + dc->min_margin_width = value; + break; + ++ case OPT_fsimdmath: ++ set_simdmath_flags (opts, value); ++ break; ++ + case OPT_fdump_: + /* Deferred. */ + break; +@@ -3227,6 +3232,18 @@ common_handle_option (struct gcc_options *opts, + return true; + } + ++/* The following routines are used to set -fno-math-errno and -fopenmp-simd ++ to enable vector mathlib. */ ++static void ++set_simdmath_flags (struct gcc_options *opts, int set) ++{ ++ if (set) ++ { ++ opts->x_flag_errno_math = 0; ++ opts->x_flag_openmp_simd = 1; ++ } ++} ++ + /* Used to set the level of strict aliasing warnings in OPTS, + when no level is specified (i.e., when -Wstrict-aliasing, and not + -Wstrict-aliasing=level was given). +diff --git a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c +index 95f6a6803..e0e0efa9d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c ++++ b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-Ofast" } */ ++/* { dg-options "-Ofast -msimdmath-64" } */ + + __attribute__ ((__simd__)) + __attribute__ ((__nothrow__ , __leaf__ , __const__)) +diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am +index f8b2a06d6..8dfa160d6 100644 +--- a/libgomp/Makefile.am ++++ b/libgomp/Makefile.am +@@ -75,10 +75,10 @@ libgomp_la_SOURCES += openacc.f90 + endif + + nodist_noinst_HEADERS = libgomp_f.h +-nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h ++nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h simdmath.h + if USE_FORTRAN + nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \ +- openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod ++ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod simdmath_f.h + endif + + LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS)) +diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in +index 6f0cb7161..90fc326f0 100644 +--- a/libgomp/Makefile.in ++++ b/libgomp/Makefile.in +@@ -147,7 +147,7 @@ am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ + configure.lineno config.status.lineno + mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs + CONFIG_HEADER = config.h +-CONFIG_CLEAN_FILES = omp.h omp_lib.h omp_lib.f90 libgomp_f.h \ ++CONFIG_CLEAN_FILES = omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h \ + libgomp.spec + CONFIG_CLEAN_VPATH_FILES = + am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +@@ -583,9 +583,9 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \ + @PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_LIBADD = libgomp.la $(PLUGIN_GCN_LIBS) + @PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_LIBTOOLFLAGS = --tag=disable-static + nodist_noinst_HEADERS = libgomp_f.h +-nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h ++nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h simdmath.h + @USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \ +-@USE_FORTRAN_TRUE@ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod ++@USE_FORTRAN_TRUE@ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod simdmath_f.h + + LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS)) + LINK = $(LIBTOOL) --tag CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ +@@ -676,6 +676,10 @@ omp.h: $(top_builddir)/config.status $(srcdir)/omp.h.in + cd $(top_builddir) && $(SHELL) ./config.status $@ + omp_lib.h: $(top_builddir)/config.status $(srcdir)/omp_lib.h.in + cd $(top_builddir) && $(SHELL) ./config.status $@ ++simdmath_f.h: $(top_builddir)/config.status $(srcdir)/simdmath_f.h.in ++ cd $(top_builddir) && $(SHELL) ./config.status $@ ++simdmath.h: $(top_builddir)/config.status $(srcdir)/simdmath.h.in ++ cd $(top_builddir) && $(SHELL) ./config.status $@ + omp_lib.f90: $(top_builddir)/config.status $(srcdir)/omp_lib.f90.in + cd $(top_builddir) && $(SHELL) ./config.status $@ + libgomp_f.h: $(top_builddir)/config.status $(srcdir)/libgomp_f.h.in +diff --git a/libgomp/configure b/libgomp/configure +index 85fdb4d3f..471c957b7 100755 +--- a/libgomp/configure ++++ b/libgomp/configure +@@ -17064,7 +17064,7 @@ fi + + + +-ac_config_files="$ac_config_files omp.h omp_lib.h omp_lib.f90 libgomp_f.h" ++ac_config_files="$ac_config_files omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h" + + ac_config_files="$ac_config_files Makefile testsuite/Makefile libgomp.spec" + +@@ -18215,6 +18215,8 @@ do + "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;; + "omp.h") CONFIG_FILES="$CONFIG_FILES omp.h" ;; + "omp_lib.h") CONFIG_FILES="$CONFIG_FILES omp_lib.h" ;; ++ "simdmath.h") CONFIG_FILES="$CONFIG_FILES simdmath.h" ;; ++ "simdmath_f.h") CONFIG_FILES="$CONFIG_FILES simdmath_f.h" ;; + "omp_lib.f90") CONFIG_FILES="$CONFIG_FILES omp_lib.f90" ;; + "libgomp_f.h") CONFIG_FILES="$CONFIG_FILES libgomp_f.h" ;; + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; +diff --git a/libgomp/configure.ac b/libgomp/configure.ac +index a9b1f3973..1f81a0d30 100644 +--- a/libgomp/configure.ac ++++ b/libgomp/configure.ac +@@ -472,7 +472,7 @@ CFLAGS="$save_CFLAGS" + # Determine what GCC version number to use in filesystem paths. + GCC_BASE_VER + +-AC_CONFIG_FILES(omp.h omp_lib.h omp_lib.f90 libgomp_f.h) ++AC_CONFIG_FILES(omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h) + AC_CONFIG_FILES(Makefile testsuite/Makefile libgomp.spec) + AC_CONFIG_FILES(testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in) + AC_CONFIG_FILES(testsuite/libgomp-site-extra.exp) +diff --git a/libgomp/simdmath.h.in b/libgomp/simdmath.h.in +new file mode 100644 +index 000000000..ab91a4ec3 +--- /dev/null ++++ b/libgomp/simdmath.h.in +@@ -0,0 +1,40 @@ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double cos (double x); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float cosf (float x); ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double sin (double x); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float sinf (float x); ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double exp (double x); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float expf (float x); ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double log (double x); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float logf (float x); ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double pow (double x, double y); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float powf (float x, float y); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float exp2f (float x); ++ ++#ifdef __cplusplus ++} // extern "C" ++#endif +diff --git a/libgomp/simdmath_f.h.in b/libgomp/simdmath_f.h.in +new file mode 100644 +index 000000000..550595015 +--- /dev/null ++++ b/libgomp/simdmath_f.h.in +@@ -0,0 +1,11 @@ ++!GCC$ builtin (cos) attributes simd (notinbranch) ++!GCC$ builtin (cosf) attributes simd (notinbranch) ++!GCC$ builtin (sin) attributes simd (notinbranch) ++!GCC$ builtin (sinf) attributes simd (notinbranch) ++!GCC$ builtin (exp) attributes simd (notinbranch) ++!GCC$ builtin (expf) attributes simd (notinbranch) ++!GCC$ builtin (exp2f) attributes simd (notinbranch) ++!GCC$ builtin (log) attributes simd (notinbranch) ++!GCC$ builtin (logf) attributes simd (notinbranch) ++!GCC$ builtin (pow) attributes simd (notinbranch) ++!GCC$ builtin (powf) attributes simd (notinbranch) +-- +2.33.0 +
View file
_service:tar_scm:0021-StructReorderFields-Structure-reorder-fields.patch
Added
@@ -0,0 +1,5739 @@ +From 6997c9ad8985f6f0bfc16cdb46e7386af299a226 Mon Sep 17 00:00:00 2001 +From: h00564365 <huangxiaoquan1@huawei.com> +Date: Mon, 31 Jul 2023 22:01:56 +0800 +Subject: PATCH 21/22 StructReorderFields Structure reorder fields + +Introduce structure fields reordering optimization, that change +fields ordering of C-like structures in order to better utilize spatial +locality. +--- + gcc/common.opt | 4 + + gcc/doc/invoke.texi | 1 + + gcc/gimple-ssa-warn-access.cc | 2 +- + gcc/ipa-free-lang-data.cc | 4 +- + gcc/ipa-struct-reorg/escapes.def | 3 + + gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 2545 +++++++++++++---- + gcc/ipa-struct-reorg/ipa-struct-reorg.h | 14 +- + gcc/passes.def | 1 + + gcc/symbol-summary.h | 4 +- + .../struct/rf_DTE_struct_instance_field.c | 75 + + gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c | 94 + + .../gcc.dg/struct/rf_check_ptr_layers_bug.c | 24 + + .../gcc.dg/struct/rf_create_fields_bug.c | 82 + + .../gcc.dg/struct/rf_create_new_func_bug.c | 56 + + .../gcc.dg/struct/rf_ele_minus_verify.c | 60 + + .../gcc.dg/struct/rf_escape_by_base.c | 83 + + .../gcc.dg/struct/rf_external_func_types.c | 69 + + gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c | 72 + + .../gcc.dg/struct/rf_mem_ref_offset.c | 58 + + .../struct/rf_mul_layer_ptr_record_bug.c | 30 + + .../gcc.dg/struct/rf_pass_conflict.c | 109 + + gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c | 87 + + gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c | 71 + + .../gcc.dg/struct/rf_ptr_negate_expr.c | 55 + + gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c | 34 + + gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c | 55 + + gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c | 58 + + .../gcc.dg/struct/rf_rescusive_type.c | 57 + + .../struct/rf_rewrite_assign_more_cmp.c | 65 + + .../gcc.dg/struct/rf_rewrite_cond_bug.c | 72 + + .../gcc.dg/struct/rf_rewrite_cond_more_cmp.c | 58 + + .../gcc.dg/struct/rf_rewrite_phi_bug.c | 81 + + gcc/testsuite/gcc.dg/struct/rf_shwi.c | 23 + + gcc/testsuite/gcc.dg/struct/rf_visible_func.c | 92 + + .../gcc.dg/struct/rf_void_ptr_param_func.c | 54 + + gcc/testsuite/gcc.dg/struct/struct-reorg.exp | 15 +- + gcc/testsuite/gcc.dg/struct/struct_reorg-1.c | 8 +- + gcc/testsuite/gcc.dg/struct/struct_reorg-3.c | 9 +- + gcc/timevar.def | 1 + + gcc/tree-pass.h | 1 + + 40 files changed, 3796 insertions(+), 490 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_external_func_types.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_shwi.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_visible_func.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index 0c7bd2f6c..98169de7c 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1954,6 +1954,10 @@ fipa-matrix-reorg + Common Ignore + Does nothing. Preserved for backward compatibility. + ++fipa-reorder-fields ++Common Var(flag_ipa_reorder_fields) Init(0) Optimization ++Perform structure fields reorder optimizations. ++ + fipa-struct-reorg + Common Var(flag_ipa_struct_reorg) Init(0) Optimization + Perform structure layout optimizations. +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 3485cc8af..2b376e0e9 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -526,6 +526,7 @@ Objective-C and Objective-C++ Dialects}. + -finline-functions -finline-functions-called-once -finline-limit=@var{n} @gol + -finline-small-functions -fipa-modref -fipa-cp -fipa-cp-clone @gol + -fipa-bit-cp -fipa-vrp -fipa-pta -fipa-profile -fipa-pure-const @gol ++-fipa-reorder-fields @gol + -fipa-struct-reorg @gol + -fipa-reference -fipa-reference-addressable @gol + -fipa-stack-alignment -fipa-icf -fira-algorithm=@var{algorithm} @gol +diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc +index a24645783..7f5c92c96 100644 +--- a/gcc/gimple-ssa-warn-access.cc ++++ b/gcc/gimple-ssa-warn-access.cc +@@ -2198,7 +2198,7 @@ pass_waccess::gate (function *) + In pass waccess, it will traverse all SSA and cause ICE + when handling these unused SSA. So temporarily disable + pass waccess when enable structure optimizations. */ +- if (flag_ipa_struct_reorg) ++ if (flag_ipa_struct_reorg || flag_ipa_reorder_fields) + return false; + + return (warn_free_nonheap_object +diff --git a/gcc/ipa-free-lang-data.cc b/gcc/ipa-free-lang-data.cc +index 5450be9fe..a88381ddb 100644 +--- a/gcc/ipa-free-lang-data.cc ++++ b/gcc/ipa-free-lang-data.cc +@@ -105,7 +105,7 @@ fld_simplified_type_name (tree type) + /* Simplify type will cause that struct A and struct A within + struct B are different type pointers, so skip it in structure + optimizations. */ +- if (flag_ipa_struct_reorg) ++ if (flag_ipa_struct_reorg || flag_ipa_reorder_fields) + return TYPE_NAME (type); + + if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL) +@@ -349,7 +349,7 @@ fld_simplified_type (tree t, class free_lang_data_d *fld) + /* Simplify type will cause that struct A and struct A within + struct B are different type pointers, so skip it in structure + optimizations. */ +- if (flag_ipa_struct_reorg) ++ if (flag_ipa_struct_reorg || flag_ipa_reorder_fields) + return t; + if (POINTER_TYPE_P (t)) + return fld_incomplete_type_of (t, fld); +diff --git a/gcc/ipa-struct-reorg/escapes.def b/gcc/ipa-struct-reorg/escapes.def +index d825eb3e6..996a09bac 100644 +--- a/gcc/ipa-struct-reorg/escapes.def ++++ b/gcc/ipa-struct-reorg/escapes.def +@@ -58,5 +58,8 @@ DEF_ESCAPE (escape_ptr_ptr, "Type is used in a pointer to a pointer not handled + DEF_ESCAPE (escape_return, "Type escapes via a return not handled yet") + DEF_ESCAPE (escape_separate_instance, "Type escapes via a separate instance") + DEF_ESCAPE (escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt") ++DEF_ESCAPE (escape_via_orig_escape, "Type escapes via a original escape type") ++DEF_ESCAPE (escape_instance_field, "Type escapes via a field of instance") ++DEF_ESCAPE (escape_via_empty_no_orig, "Type escapes via empty and no original") + + #undef DEF_ESCAPE +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +index 9f790b28b..3e5f9538b 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +@@ -207,50 +207,88 @@ lang_c_p (void) + if (!language_string) + return false; + +- if (strcmp (language_string, "GNU GIMPLE") == 0) ++ if (lang_GNU_C ()) ++ return true; ++ else if (strcmp (language_string, "GNU GIMPLE") == 0) // For LTO check + { + unsigned i = 0; +- tree t = NULL; +- const char *unit_string = NULL; ++ tree t = NULL_TREE; + + FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t) + { +- unit_string = TRANSLATION_UNIT_LANGUAGE (t); +- if (!unit_string +- || (strncmp (unit_string, "GNU C", 5) != 0) +- || (!ISDIGIT (unit_string5))) ++ language_string = TRANSLATION_UNIT_LANGUAGE (t); ++ if (language_string == NULL ++ || strncmp (language_string, "GNU C", 5) ++ || (language_string5 != '\0' ++ && !(ISDIGIT (language_string5)))) + return false; + } + return true; + } +- else if (strncmp (language_string, "GNU C", 5) == 0 +- && ISDIGIT (language_string5)) +- return true; +- + return false; + } + ++/* Get the number of pointer layers. */ ++ ++int ++get_ptr_layers (tree expr) ++{ ++ int layers = 0; ++ while (POINTER_TYPE_P (expr) || TREE_CODE (expr) == ARRAY_TYPE) ++ { ++ layers++; ++ expr = TREE_TYPE (expr); ++ } ++ return layers; ++} ++ ++/* Comparison pointer layers. */ ++ ++bool ++cmp_ptr_layers (tree a, tree b) ++{ ++ return get_ptr_layers (a) == get_ptr_layers (b); ++} ++ ++/* Return true if the ssa_name comes from the void* parameter. */ ++ ++bool ++is_from_void_ptr_parm (tree ssa_name) ++{ ++ gcc_assert (TREE_CODE (ssa_name) == SSA_NAME); ++ tree var = SSA_NAME_VAR (ssa_name); ++ return (var && TREE_CODE (var) == PARM_DECL ++ && VOID_POINTER_P (TREE_TYPE (ssa_name))); ++} ++ + enum srmode + { + NORMAL = 0, +- COMPLETE_STRUCT_RELAYOUT ++ COMPLETE_STRUCT_RELAYOUT, ++ STRUCT_REORDER_FIELDS + }; + +-static bool is_result_of_mult (tree, tree *, tree); ++static bool is_result_of_mult (tree arg, tree *num, tree struct_size); ++static bool isptrptr (tree type); + +-} // anon namespace ++srmode current_mode; + ++} // anon namespace + + namespace struct_reorg { + ++hash_map <tree, auto_vec <tree>> fields_to_finish; ++ + /* Constructor of srfunction. */ + + srfunction::srfunction (cgraph_node *n) + : node (n), + old (NULL), + newnode (NULL), +- newf (NULL) +-{} ++ newf (NULL), ++ is_safe_func (false) ++{ ++} + + /* Add an ARG to the list of arguments for the function. */ + +@@ -400,12 +438,13 @@ srtype::add_field_site (srfield *field) + + /* Constructor of DECL. */ + +-srdecl::srdecl (srtype *tp, tree decl, int argnum) ++srdecl::srdecl (srtype *tp, tree decl, int argnum, tree orig_type) + : type (tp), + decl (decl), + func (NULL_TREE), + argumentnum (argnum), +- visited (false) ++ visited (false), ++ orig_type (orig_type) + { + if (TREE_CODE (decl) == SSA_NAME) + func = current_function_decl; +@@ -429,17 +468,23 @@ srfunction::find_decl (tree decl) + /* Record DECL of the TYPE with argument num ARG. */ + + srdecl * +-srfunction::record_decl (srtype *type, tree decl, int arg) ++srfunction::record_decl (srtype *type, tree decl, int arg, tree orig_type) + { + // Search for the decl to see if it is already there. + srdecl *decl1 = find_decl (decl); + + if (decl1) +- return decl1; ++ { ++ /* Added the orig_type information. */ ++ if (!decl1->orig_type && orig_type && isptrptr (orig_type)) ++ decl1->orig_type = orig_type; ++ return decl1; ++ } + + gcc_assert (type); + +- decl1 = new srdecl (type, decl, arg); ++ orig_type = isptrptr (TREE_TYPE (decl)) ? TREE_TYPE (decl) : orig_type; ++ decl1 = new srdecl (type, decl, arg, isptrptr (orig_type) ? orig_type : NULL); + decls.safe_push (decl1); + return decl1; + } +@@ -503,31 +548,21 @@ srtype::dump (FILE *f) + print_generic_expr (f, type); + fprintf (f, "(%d) { ", TYPE_UID (type)); + if (escapes != does_not_escape) +- fprintf (f, " escapes = \"%s\"\n", escape_reason ()); +- fprintf (f, " fields = { "); ++ fprintf (f, "escapes = \"%s\"", escape_reason ()); ++ fprintf (f, "\nfields = {\n"); + FOR_EACH_VEC_ELT (fields, i, field) +- { +- if (i == 0) +- fprintf (f, "\n "); +- else +- fprintf (f, "\n, "); +- field->dump (f); +- } +- fprintf (f, " }\n "); +- fprintf (f, "\n accesses = {"); ++ field->dump (f); ++ fprintf (f, "}\n "); ++ ++ fprintf (f, "\naccesses = {\n"); + FOR_EACH_VEC_ELT (accesses, i, access) +- { +- fprintf (f, "\n"); +- access->dump (f); +- } +- fprintf (f, " }\n "); +- fprintf (f, "\n functions = {"); ++ access->dump (f); ++ fprintf (f, "}\n "); ++ ++ fprintf (f, "\nfunctions = {\n"); + FOR_EACH_VEC_ELT (functions, i, fn) +- { +- fprintf (f, " \n"); +- fn->simple_dump (f); +- } +- fprintf (f, "\n }\n"); ++ fn->simple_dump (f); ++ fprintf (f, "}\n"); + fprintf (f, "}\n"); + } + +@@ -537,6 +572,8 @@ void + srtype::simple_dump (FILE *f) + { + print_generic_expr (f, type); ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ fprintf (f, "(%d)", TYPE_UID (type)); + } + + /* Analyze the type and decide what to be done with it. */ +@@ -572,6 +609,12 @@ srfield::create_new_fields (tree newtypemax_split, + tree newfieldsmax_split, + tree newlastmax_split) + { ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ create_new_reorder_fields (newtype, newfields, newlast); ++ return; ++ } ++ + tree ntmax_split; + + for (unsigned i = 0; i < max_split; i++) +@@ -620,6 +663,104 @@ srfield::create_new_fields (tree newtypemax_split, + } + } + ++/* Reorder fields. */ ++ ++void ++srfield::reorder_fields (tree newfieldsmax_split, tree newlastmax_split, ++ tree &field) ++{ ++ /* Reorder fields in descending. ++ newfields: always stores the first member of the chain ++ and with the largest size. ++ field: indicates the node to be inserted. */ ++ if (newfieldsclusternum == NULL) ++ { ++ newfieldsclusternum = field; ++ newlastclusternum = field; ++ } ++ else ++ { ++ tree tmp = newfieldsclusternum; ++ if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (field))) ++ > tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tmp)))) ++ { ++ DECL_CHAIN (field) = tmp; ++ newfieldsclusternum = field; ++ } ++ else ++ { ++ while (DECL_CHAIN (tmp) ++ && (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (field))) ++ <= tree_to_uhwi ( ++ TYPE_SIZE (TREE_TYPE (DECL_CHAIN (tmp)))))) ++ tmp = DECL_CHAIN (tmp); ++ ++ /* Now tmp size > field size ++ insert field: tmp -> xx ==> tmp -> field -> xx. */ ++ DECL_CHAIN (field) = DECL_CHAIN (tmp); // field -> xx ++ DECL_CHAIN (tmp) = field; // tmp -> field ++ } ++ } ++} ++ ++/* Create the new reorder fields for this field. ++ newtypemax_split: srtype's member variable, ++ newfieldsmax_split: created by create_new_type func, ++ newlastmax_split: created by create_new_type func. */ ++ ++void ++srfield::create_new_reorder_fields (tree newtypemax_split, ++ tree newfieldsmax_split, ++ tree newlastmax_split) ++{ ++ /* newtype, corresponding to newtypemax_split in srtype. */ ++ tree nt = NULL_TREE; ++ if (type == NULL) ++ /* Common var. */ ++ nt = fieldtype; ++ else ++ { ++ /* RECORD_TYPE var. */ ++ if (type->has_escaped ()) ++ nt = type->type; ++ else ++ nt = type->newtype0; ++ } ++ tree field = make_node (FIELD_DECL); ++ ++ /* Used for recursive types. ++ fields_to_finish: hase_map in the format of "type: {fieldA, fieldB}", ++ key : indicates the original type, ++ vaule: filed that need to be updated to newtype. */ ++ if (nt == NULL) ++ { ++ nt = make_node (RECORD_TYPE); ++ auto_vec <tree> &fields ++ = fields_to_finish.get_or_insert (inner_type (type->type)); ++ fields.safe_push (field); ++ } ++ ++ DECL_NAME (field) = DECL_NAME (fielddecl); ++ if (type == NULL) ++ /* Common members do not need to reconstruct. ++ Otherwise, int* -> int** or void* -> void**. */ ++ TREE_TYPE (field) = nt; ++ else ++ TREE_TYPE (field) = reconstruct_complex_type (TREE_TYPE (fielddecl), nt); ++ DECL_SOURCE_LOCATION (field) = DECL_SOURCE_LOCATION (fielddecl); ++ SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl)); ++ DECL_USER_ALIGN (field) = DECL_USER_ALIGN (fielddecl); ++ TREE_ADDRESSABLE (field) = TREE_ADDRESSABLE (fielddecl); ++ DECL_NONADDRESSABLE_P (field) = !TREE_ADDRESSABLE (fielddecl); ++ TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (fielddecl); ++ DECL_CONTEXT (field) = newtypeclusternum; ++ ++ reorder_fields (newfields, newlast, field); ++ ++ /* srfield member variable, which stores the new field decl. */ ++ newfield0 = field; ++} ++ + /* Create the new TYPE corresponding to THIS type. */ + + bool +@@ -655,7 +796,8 @@ srtype::create_new_type (void) + /* If the fields' types did have a change or + we are not splitting the struct into two clusters, + then just return false and don't change the type. */ +- if (!createnewtype && maxclusters == 0) ++ if (!createnewtype && maxclusters == 0 ++ && current_mode != STRUCT_REORDER_FIELDS) + { + newtype0 = type; + return false; +@@ -664,6 +806,7 @@ srtype::create_new_type (void) + /* Should have at most max_split clusters. */ + gcc_assert (maxclusters < max_split); + ++ /* Record the first member of the field chain. */ + tree newfieldsmax_split; + tree newlastmax_split; + +@@ -682,7 +825,8 @@ srtype::create_new_type (void) + sprintf (id, "%d", i); + if (tname) + { +- name = concat (tname, ".reorg.", id, NULL); ++ name = concat (tname, current_mode == STRUCT_REORDER_FIELDS ++ ? ".reorder." : ".reorg.", id, NULL); + TYPE_NAME (newtypei) = build_decl (UNKNOWN_LOCATION, + TYPE_DECL, + get_identifier (name), +@@ -718,6 +862,7 @@ srtype::create_new_type (void) + for (unsigned i = 0; i < maxclusters; i++) + { + print_generic_expr (dump_file, newtypei); ++ fprintf (dump_file, "(%d)", TYPE_UID (newtypei)); + fprintf (dump_file, "\n"); + } + } +@@ -776,8 +921,12 @@ srfunction::create_new_decls (void) + tree newinnermax_split; + memset (newinner, 0, sizeof (newinner)); + for (unsigned j = 0; j < max_split && type->newtypej; j++) +- newtype1j = reconstruct_complex_type (TREE_TYPE (declsi->decl), +- type->newtypej); ++ { ++ newtype1j = reconstruct_complex_type ( ++ isptrptr (declsi->orig_type) ? declsi->orig_type ++ : TREE_TYPE (declsi->decl), ++ type->newtypej); ++ } + if (inner) + { + srdecl *in = find_decl (inner); +@@ -825,7 +974,8 @@ srfunction::create_new_decls (void) + sprintf (id, "%d", j); + if (tname) + { +- name = concat (tname, ".reorg.", id, NULL); ++ name = concat (tname, current_mode == STRUCT_REORDER_FIELDS ++ ? ".reorder." : ".reorg.", id, NULL); + new_name = get_identifier (name); + free (name); + } +@@ -850,7 +1000,6 @@ srfunction::create_new_decls (void) + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Created New decls for decl:\n"); +- fprintf (dump_file, "\n"); + declsi->dump (dump_file); + fprintf (dump_file, "\n"); + for (unsigned j = 0; j < max_split && declsi->newdeclj; j++) +@@ -876,7 +1025,7 @@ srfield::dump (FILE *f) + fprintf (f, ", offset = " HOST_WIDE_INT_PRINT_DEC, offset); + fprintf (f, ", type = "); + print_generic_expr (f, fieldtype); +- fprintf (f, "\n}\n"); ++ fprintf (f, "}\n"); + } + + /* A simplified dump out the field structure to FILE. */ +@@ -908,7 +1057,7 @@ sraccess::dump (FILE *f) + fprintf (f, " in function: %s/%d", node->name (), node->order); + fprintf (f, ", stmt:\n"); + print_gimple_stmt (f, stmt, 0); +- fprintf (f, "\n }\n"); ++ fprintf (f, "}\n"); + } + + /* Dump out the decl structure to FILE. */ +@@ -1023,8 +1172,7 @@ public: + // Constructors + ipa_struct_reorg (void) + : current_function (NULL), +- done_recording (false), +- current_mode (NORMAL) ++ done_recording (false) + {} + + // Fields +@@ -1032,9 +1180,10 @@ public: + auto_vec_del<srfunction> functions; + srglobal globals; + srfunction *current_function; ++ hash_set <cgraph_node *> safe_functions; ++ auto_vec<srtype *> ext_func_types; + + bool done_recording; +- srmode current_mode; + + // Methods + unsigned execute (enum srmode mode); +@@ -1042,6 +1191,7 @@ public: + gimple *stmt = NULL); + + void dump_types (FILE *f); ++ void dump_newtypes (FILE *f); + void dump_types_escaped (FILE *f); + void dump_functions (FILE *f); + void record_accesses (void); +@@ -1049,6 +1199,9 @@ public: + bool walk_field_for_cycles (srtype *); + void prune_escaped_types (void); + void propagate_escape (void); ++ void propagate_escape_via_original (void); ++ void propagate_escape_via_empty_with_no_original (void); ++ void propagate_escape_via_ext_func_types (void); + void analyze_types (void); + void clear_visited (void); + bool create_new_types (void); +@@ -1060,8 +1213,11 @@ public: + srdecl *record_var (tree decl, + escape_type escapes = does_not_escape, + int arg = -1); ++ void record_safe_func_with_void_ptr_parm (void); + srfunction *record_function (cgraph_node *node); + srfunction *find_function (cgraph_node *node); ++ void record_field_type (tree field, srtype *base_srtype); ++ void record_struct_field_types (tree base_type, srtype *base_srtype); + srtype *record_type (tree type); + void process_union (tree type); + srtype *find_type (tree type); +@@ -1072,7 +1228,7 @@ public: + void record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt); + void mark_expr_escape (tree, escape_type, gimple *stmt); + bool handled_allocation_stmt (gimple *stmt); +- tree allocate_size (srtype *t, gimple *stmt); ++ tree allocate_size (srtype *t, srdecl *decl, gimple *stmt); + + void mark_decls_in_as_not_needed (tree fn); + +@@ -1087,21 +1243,23 @@ public: + bool ignore_missing_decl = false); + bool rewrite_lhs_rhs (tree lhs, tree rhs, tree newlhsmax_split, + tree newrhsmax_split); +- bool get_type_field (tree expr, tree &base, bool &indirect, +- srtype *&type, srfield *&field, +- bool &realpart, bool &imagpart, +- bool &address, bool should_create = false, +- bool can_escape = false); ++ bool get_type_field (tree expr, tree &base, bool &indirect, srtype *&type, ++ srfield *&field, bool &realpart, bool &imagpart, ++ bool &address, bool &escape_from_base, ++ bool should_create = false, bool can_escape = false); + bool wholeaccess (tree expr, tree base, tree accesstype, srtype *t); + + void check_alloc_num (gimple *stmt, srtype *type); ++ void check_definition_assign (srdecl *decl, vec<srdecl *> &worklist); ++ void check_definition_call (srdecl *decl, vec<srdecl *> &worklist); + void check_definition (srdecl *decl, vec<srdecl *> &); + void check_uses (srdecl *decl, vec<srdecl *> &); + void check_use (srdecl *decl, gimple *stmt, vec<srdecl *> &); +- void check_type_and_push (tree newdecl, srtype *type, ++ void check_type_and_push (tree newdecl, srdecl *decl, + vec<srdecl *> &worklist, gimple *stmt); + void check_other_side (srdecl *decl, tree other, gimple *stmt, + vec<srdecl *> &worklist); ++ void check_ptr_layers (tree a_expr, tree b_expr, gimple *stmt); + + void find_vars (gimple *stmt); + void find_var (tree expr, gimple *stmt); +@@ -1703,9 +1861,42 @@ ipa_struct_reorg::dump_types (FILE *f) + srtype *type; + FOR_EACH_VEC_ELT (types, i, type) + { ++ fprintf (f, "======= the %dth type: ======\n", i); + type->dump (f); ++ fprintf (f, "\n"); ++ } ++} ++ ++/* Dump all of the created newtypes to file F. */ ++ ++void ++ipa_struct_reorg::dump_newtypes (FILE *f) ++{ ++ unsigned i = 0; ++ srtype *type = NULL; ++ FOR_EACH_VEC_ELT (types, i, type) ++ { ++ if (type->has_escaped ()) ++ continue; ++ fprintf (f, "======= the %dth newtype: ======\n", i); ++ fprintf (f, "type : "); ++ print_generic_expr (f, type->newtype0); ++ fprintf (f, "(%d) ", TYPE_UID (type->newtype0)); ++ fprintf (f, "{ "); ++ fprintf (f, "\nfields = {\n"); ++ ++ for (tree field = TYPE_FIELDS (TYPE_MAIN_VARIANT (type->newtype0)); ++ field; field = DECL_CHAIN (field)) ++ { ++ fprintf (f, "field (%d) ", DECL_UID (field)); ++ fprintf (f, "{"); ++ fprintf (f, "type = "); ++ print_generic_expr (f, TREE_TYPE (field)); ++ fprintf (f, "}\n"); ++ } ++ fprintf (f, "}\n "); ++ fprintf (f, "\n"); + } +- fprintf (f, "\n"); + } + + /* Dump all of the recorded types to file F. */ +@@ -1803,6 +1994,8 @@ isarraytype (tree type) + static bool + isptrptr (tree type) + { ++ if (type == NULL) ++ return false; + bool firstptr = false; + while (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE) + { +@@ -1817,154 +2010,740 @@ isptrptr (tree type) + return false; + } + +-/* Return the escape type which corresponds to if +- this is an volatile type, an array type or a pointer +- to a pointer type. */ ++/* Adding node to map and stack. */ + +-static escape_type +-escape_type_volatile_array_or_ptrptr (tree type) ++bool ++add_node (tree node, int layers, hash_map <tree, int> &map, ++ auto_vec <tree> &stack) + { +- if (isvolatile_type (type)) +- return escape_volatile; +- if (isarraytype (type)) +- return escape_array; +- if (isptrptr (type)) +- return escape_ptr_ptr; +- return does_not_escape; ++ if (TREE_CODE (node) != SSA_NAME) ++ return false; ++ if (map.get (node) == NULL) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, " "); ++ fprintf (dump_file, "add node: \t\t"); ++ print_generic_expr (dump_file, node); ++ fprintf (dump_file, ",\t\tptr layers: %d: \n", layers); ++ } ++ map.put (node, layers); ++ stack.safe_push (node); ++ } ++ else if (*map.get (node) != layers) ++ return false; ++ return true; + } + +-/* Record TYPE if not already recorded. */ ++/* Check the number of pointer layers of the gimple phi in definition. */ + +-srtype * +-ipa_struct_reorg::record_type (tree type) ++bool ++check_def_phi (tree def_node, hash_map <tree, int> &ptr_layers) + { +- unsigned typeuid; +- +- /* Get the main variant as we are going +- to record that type only. */ +- type = TYPE_MAIN_VARIANT (type); +- typeuid = TYPE_UID (type); ++ bool res = true; ++ gimple *def_stmt = SSA_NAME_DEF_STMT (def_node); ++ for (unsigned j = 0; j < gimple_phi_num_args (def_stmt); j++) ++ { ++ tree phi_node = gimple_phi_arg_def (def_stmt, j); ++ if (integer_zerop (phi_node)) ++ continue; ++ if (ptr_layers.get (phi_node) == NULL) ++ return false; ++ res &= *ptr_layers.get (def_node) == *ptr_layers.get (phi_node); ++ } ++ return res; ++} + +- srtype *type1; ++/* Check the number of pointer layers of the gimple assign in definition. */ + +- type1 = find_type (type); +- if (type1) +- return type1; ++bool ++check_def_assign (tree def_node, hash_map <tree, int> &ptr_layers) ++{ ++ bool res = true; ++ gimple *def_stmt = SSA_NAME_DEF_STMT (def_node); ++ gimple_rhs_class rhs_class = gimple_assign_rhs_class (def_stmt); ++ tree_code rhs_code = gimple_assign_rhs_code (def_stmt); ++ tree rhs1 = gimple_assign_rhs1 (def_stmt); ++ tree rhs1_base = TREE_CODE (rhs1) == MEM_REF ? TREE_OPERAND (rhs1, 0) : rhs1; ++ if (ptr_layers.get (rhs1_base) == NULL) ++ return false; ++ if (rhs_class == GIMPLE_SINGLE_RHS || rhs_class == GIMPLE_UNARY_RHS) ++ { ++ if (TREE_CODE (rhs1) == SSA_NAME) ++ res = *ptr_layers.get (def_node) == *ptr_layers.get (rhs1); ++ else if (TREE_CODE (rhs1) == MEM_REF) ++ res = *ptr_layers.get (def_node) ++ == *ptr_layers.get (TREE_OPERAND (rhs1, 0)); ++ else ++ { ++ return false; ++ } ++ } ++ else if (rhs_class == GIMPLE_BINARY_RHS) ++ { ++ if (rhs_code == POINTER_PLUS_EXPR) ++ res = *ptr_layers.get (def_node) == *ptr_layers.get (rhs1); ++ else if (rhs_code == BIT_AND_EXPR) ++ res = *ptr_layers.get (def_node) == *ptr_layers.get (rhs1); ++ else ++ return false; ++ } ++ else ++ return false; ++ return res; ++} + +- /* If already done recording just return NULL. */ +- if (done_recording) +- return NULL; ++/* Check node definition. */ + ++bool ++check_node_def (hash_map <tree, int> &ptr_layers) ++{ ++ bool res = true; + if (dump_file && (dump_flags & TDF_DETAILS)) +- fprintf (dump_file, "Recording new type: %u.\n", typeuid); +- +- type1 = new srtype (type); +- types.safe_push (type1); +- +- /* If the type has an user alignment set, +- that means the user most likely already setup the type. */ +- if (TYPE_USER_ALIGN (type)) +- type1->mark_escape (escape_user_alignment, NULL); +- +- for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ fprintf (dump_file, "\n======== check node definition ========\n"); ++ for (unsigned i = 1; i < num_ssa_names; ++i) + { +- if (TREE_CODE (field) == FIELD_DECL) ++ tree name = ssa_name (i); ++ if (name && ptr_layers.get (name) != NULL) + { +- tree t = TREE_TYPE (field); +- process_union (t); +- if (TREE_CODE (inner_type (t)) == UNION_TYPE +- || TREE_CODE (inner_type (t)) == QUAL_UNION_TYPE) +- type1->mark_escape (escape_union, NULL); +- if (isvolatile_type (t)) +- type1->mark_escape (escape_volatile, NULL); +- escape_type e = escape_type_volatile_array_or_ptrptr (t); +- if (e != does_not_escape) +- type1->mark_escape (e, NULL); +- if (handled_type (t)) +- { +- srtype *t1 = record_type (inner_type (t)); +- srfield *f = type1->find_field (int_byte_position (field)); +- /* We might have an variable sized type which +- we don't set the handle. */ +- if (f) +- { +- f->type = t1; +- t1->add_field_site (f); +- } +- if (t1 == type1 && current_mode != COMPLETE_STRUCT_RELAYOUT) +- type1->mark_escape (escape_rescusive_type, NULL); +- } ++ gimple *def_stmt = SSA_NAME_DEF_STMT (name); ++ if (dump_file && (dump_flags & TDF_DETAILS) ++ && gimple_code (def_stmt) != GIMPLE_DEBUG) ++ print_gimple_stmt (dump_file, def_stmt, 0); ++ ++ if (gimple_code (def_stmt) == GIMPLE_PHI) ++ res = check_def_phi (name, ptr_layers); ++ else if (gimple_code (def_stmt) == GIMPLE_ASSIGN) ++ res = check_def_assign (name, ptr_layers); ++ else if (gimple_code (def_stmt) == GIMPLE_NOP) ++ continue; ++ else ++ return false; + } + } ++ return res; ++} + +- return type1; ++/* Check pointer usage. */ ++ ++bool ++check_record_ptr_usage (gimple *use_stmt, tree ¤t_node, ++ hash_map <tree, int> &ptr_layers, ++ auto_vec <tree> &ssa_name_stack) ++{ ++ gimple_rhs_class rhs_class = gimple_assign_rhs_class (use_stmt); ++ tree rhs1 = gimple_assign_rhs1 (use_stmt); ++ tree lhs = gimple_assign_lhs (use_stmt); ++ if (rhs_class != GIMPLE_SINGLE_RHS ++ || (TREE_CODE (rhs1) != COMPONENT_REF && TREE_CODE (rhs1) != SSA_NAME) ++ || (TREE_CODE (lhs) != MEM_REF && TREE_CODE (lhs) != SSA_NAME)) ++ return false; ++ ++ bool res = true; ++ /* MEM(long int *)a_1 = _1; (record). ++ If lhs is ssa_name, lhs cannot be the current node. ++ _2 = _1->flow; (No record). */ ++ if (TREE_CODE (rhs1) == SSA_NAME) ++ { ++ tree tmp = (rhs1 != current_node) ? rhs1 : lhs; ++ if (TREE_CODE (tmp) == MEM_REF) ++ res = add_node (TREE_OPERAND (tmp, 0), ++ *ptr_layers.get (current_node) + 1, ++ ptr_layers, ssa_name_stack); ++ else ++ res = add_node (tmp, *ptr_layers.get (current_node), ++ ptr_layers, ssa_name_stack); ++ } ++ else if (TREE_CODE (lhs) == SSA_NAME && TREE_CODE (rhs1) == COMPONENT_REF) ++ res = !(POINTER_TYPE_P (TREE_TYPE (rhs1))); ++ else ++ res = false; ++ return res; + } + +-/* Mark TYPE as escaping with ESCAPES as the reason. */ ++/* Check and record a single node. */ + +-void +-ipa_struct_reorg::mark_type_as_escape (tree type, +- escape_type escapes, +- gimple *stmt) ++bool ++check_record_single_node (gimple *use_stmt, tree ¤t_node, ++ hash_map <tree, int> &ptr_layers, ++ auto_vec <tree> &ssa_name_stack) + { +- if (handled_type (type)) +- { +- srtype *stype = record_type (inner_type (type)); ++ gimple_rhs_class rhs_class = gimple_assign_rhs_class (use_stmt); ++ tree rhs1 = gimple_assign_rhs1 (use_stmt); ++ tree lhs = gimple_assign_lhs (use_stmt); ++ gcc_assert (rhs_class == GIMPLE_SINGLE_RHS || rhs_class == GIMPLE_UNARY_RHS); + +- if (!stype) +- return; ++ if ((TREE_CODE (rhs1) != SSA_NAME && TREE_CODE (rhs1) != MEM_REF) ++ || (TREE_CODE (lhs) != SSA_NAME && TREE_CODE (lhs) != MEM_REF)) ++ return false; + +- stype->mark_escape (escapes, stmt); ++ bool res = true; ++ if (TREE_CODE (lhs) == SSA_NAME && TREE_CODE (rhs1) == MEM_REF) ++ /* Add such as: _2 = MEM(struct arc_t * *)_1. */ ++ res = add_node (lhs, *ptr_layers.get (current_node) - 1, ++ ptr_layers, ssa_name_stack); ++ else if (TREE_CODE (lhs) == MEM_REF && TREE_CODE (rhs1) == SSA_NAME) ++ { ++ /* Add such as: MEM(long int *)a_1 = _1. */ ++ if (rhs1 == current_node) ++ res = add_node (TREE_OPERAND (lhs, 0), ++ *ptr_layers.get (current_node) + 1, ++ ptr_layers, ssa_name_stack); ++ else ++ res = add_node (rhs1, *ptr_layers.get (current_node) - 1, ++ ptr_layers, ssa_name_stack); + } ++ else if (TREE_CODE (lhs) == SSA_NAME && TREE_CODE (rhs1) == SSA_NAME) ++ res = add_node (lhs, *ptr_layers.get (current_node), ++ ptr_layers, ssa_name_stack); ++ else ++ res = false; ++ ++ return res; + } + +-/* Maybe process the union of type TYPE, such that marking all of the fields' +- types as being escaping. */ ++/* Check and record multiple nodes. */ + +-void +-ipa_struct_reorg::process_union (tree type) ++bool ++check_record_mult_node (gimple *use_stmt, tree ¤t_node, ++ hash_map <tree, int> &ptr_layers, ++ auto_vec <tree> &ssa_name_stack) + { +- static hash_set<tree> unions_recorded; ++ gimple_rhs_class rhs_class = gimple_assign_rhs_class (use_stmt); ++ tree_code rhs_code = gimple_assign_rhs_code (use_stmt); ++ tree rhs1 = gimple_assign_rhs1 (use_stmt); ++ tree lhs = gimple_assign_lhs (use_stmt); ++ tree rhs2 = gimple_assign_rhs2 (use_stmt); ++ gcc_assert (rhs_class == GIMPLE_BINARY_RHS); ++ ++ if ((rhs_code != POINTER_PLUS_EXPR && rhs_code != POINTER_DIFF_EXPR ++ && rhs_code != BIT_AND_EXPR) ++ || (TREE_CODE (lhs) != SSA_NAME && TREE_CODE (rhs1) != SSA_NAME)) ++ return false; + +- type = inner_type (type); +- if (TREE_CODE (type) != UNION_TYPE +- && TREE_CODE (type) != QUAL_UNION_TYPE) +- return; ++ bool res = true; ++ if (rhs_code == POINTER_PLUS_EXPR) ++ res = add_node (lhs == current_node ? rhs1 : lhs, ++ *ptr_layers.get (current_node), ++ ptr_layers, ssa_name_stack); ++ else if (rhs_code == POINTER_DIFF_EXPR) ++ res = add_node (rhs1 != current_node ? rhs1 : rhs2, ++ *ptr_layers.get (current_node), ++ ptr_layers, ssa_name_stack); ++ else if (rhs_code == BIT_AND_EXPR) ++ { ++ if (TREE_CODE (rhs2) != INTEGER_CST) ++ return false; ++ res = add_node (lhs == current_node ? rhs1 : lhs, ++ *ptr_layers.get (current_node), ++ ptr_layers, ssa_name_stack); ++ } ++ return res; ++} + +- type = TYPE_MAIN_VARIANT (type); ++/* Check whether gimple assign is correctly used and record node. */ + +- /* We already processed this type. */ +- if (unions_recorded.add (type)) +- return; ++bool ++check_record_assign (tree ¤t_node, gimple *use_stmt, ++ hash_map <tree, int> &ptr_layers, ++ auto_vec <tree> &ssa_name_stack) ++{ ++ gimple_rhs_class rhs_class = gimple_assign_rhs_class (use_stmt); ++ if (*ptr_layers.get (current_node) == 1) ++ return check_record_ptr_usage (use_stmt, current_node, ++ ptr_layers, ssa_name_stack); ++ else if (*ptr_layers.get (current_node) > 1) ++ { ++ if (rhs_class != GIMPLE_BINARY_RHS ++ && rhs_class != GIMPLE_UNARY_RHS ++ && rhs_class != GIMPLE_SINGLE_RHS) ++ return false; + +- for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ if (rhs_class == GIMPLE_SINGLE_RHS || rhs_class == GIMPLE_UNARY_RHS) ++ return check_record_single_node (use_stmt, current_node, ++ ptr_layers, ssa_name_stack); ++ else if (rhs_class == GIMPLE_BINARY_RHS) ++ return check_record_mult_node (use_stmt, current_node, ++ ptr_layers, ssa_name_stack); ++ } ++ else ++ return false; ++ ++ return true; ++} ++ ++/* Check whether gimple phi is correctly used and record node. */ ++ ++bool ++check_record_phi (tree ¤t_node, gimple *use_stmt, ++ hash_map <tree, int> &ptr_layers, ++ auto_vec <tree> &ssa_name_stack) ++{ ++ bool res = true; ++ res &= add_node (gimple_phi_result (use_stmt), *ptr_layers.get (current_node), ++ ptr_layers, ssa_name_stack); ++ ++ for (unsigned i = 0; i < gimple_phi_num_args (use_stmt); i++) + { +- if (TREE_CODE (field) == FIELD_DECL) +- { +- mark_type_as_escape (TREE_TYPE (field), escape_union); +- process_union (TREE_TYPE (field)); +- } ++ if (integer_zerop (gimple_phi_arg_def (use_stmt, i))) ++ continue; ++ res &= add_node (gimple_phi_arg_def (use_stmt, i), ++ *ptr_layers.get (current_node), ++ ptr_layers, ssa_name_stack); + } ++ return res; + } + +-/* Used by record_var function as a callback to walk_tree. +- Mark the type as escaping if it has expressions which +- cannot be converted for global initializations. */ ++/* Check the use of callee. */ + +-static tree +-record_init_types (tree *tp, int *walk_subtrees, void *data) ++bool ++check_callee (cgraph_node *node, gimple *stmt, ++ hash_map <tree, int> &ptr_layers, int input_layers) + { +- ipa_struct_reorg *c = (ipa_struct_reorg *)data; +- switch (TREE_CODE (*tp)) ++ /* caller main () ++ { spec_qsort.constprop (_649, _651); } ++ def spec_qsort.constprop (void * a, size_t n) ++ { spec_qsort.constprop (a_1, _139); } */ ++ /* In safe functions, only call itself is allowed. */ ++ if (node->get_edge (stmt)->callee != node) ++ return false; ++ tree input_node = gimple_call_arg (stmt, 0); ++ if (ptr_layers.get (input_node) == NULL ++ || *ptr_layers.get (input_node) != input_layers) ++ return false; ++ if (SSA_NAME_VAR (input_node) != DECL_ARGUMENTS (node->decl)) ++ return false; ++ ++ for (unsigned i = 1; i < gimple_call_num_args (stmt); i++) + { +- CASE_CONVERT: +- case COMPONENT_REF: +- case VIEW_CONVERT_EXPR: +- case ARRAY_REF: +- { +- tree typeouter = TREE_TYPE (*tp); +- tree typeinner = TREE_TYPE (TREE_OPERAND (*tp, 0)); +- c->mark_type_as_escape (typeouter, escape_via_global_init); ++ if (ptr_layers.get (gimple_call_arg (stmt, i)) != NULL) ++ return false; ++ } ++ return true; ++} ++ ++/* Check the usage of input nodes and related nodes. */ ++ ++bool ++check_node_use (cgraph_node *node, tree current_node, ++ hash_map <tree, int> &ptr_layers, ++ auto_vec <tree> &ssa_name_stack, ++ int input_layers) ++{ ++ imm_use_iterator imm_iter; ++ gimple *use_stmt = NULL; ++ bool res = true; ++ /* Use FOR_EACH_IMM_USE_STMT as an indirect edge ++ to search for possible related nodes and push to stack. */ ++ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, current_node) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS) ++ && gimple_code (use_stmt) != GIMPLE_DEBUG) ++ { ++ fprintf (dump_file, "%*s", 4, ""); ++ print_gimple_stmt (dump_file, use_stmt, 0); ++ } ++ /* For other types of gimple, do not record the node. */ ++ if (res) ++ { ++ if (gimple_code (use_stmt) == GIMPLE_PHI) ++ res = check_record_phi (current_node, use_stmt, ++ ptr_layers, ssa_name_stack); ++ else if (gimple_code (use_stmt) == GIMPLE_ASSIGN) ++ res = check_record_assign (current_node, use_stmt, ++ ptr_layers, ssa_name_stack); ++ else if (gimple_code (use_stmt) == GIMPLE_CALL) ++ res = check_callee (node, use_stmt, ptr_layers, input_layers); ++ else if (gimple_code (use_stmt) == GIMPLE_RETURN) ++ res = false; ++ } ++ } ++ return res; ++} ++ ++/* Trace the pointer layers of void node. */ ++ ++bool ++get_void_node_ptr_layers (tree input, int &input_layers) ++{ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "input type is void* node\n"); ++ imm_use_iterator imm_iter; ++ gimple *use_stmt = NULL; ++ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, input) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ print_gimple_stmt (dump_file, use_stmt, 0); ++ if (gimple_code (use_stmt) == GIMPLE_ASSIGN ++ && gimple_assign_rhs_class (use_stmt) == GIMPLE_SINGLE_RHS) ++ { ++ tree rhs1 = gimple_assign_rhs1 (use_stmt); ++ tree lhs = gimple_assign_lhs (use_stmt); ++ if (TREE_CODE (lhs) == SSA_NAME && handled_type (TREE_TYPE (lhs))) ++ { ++ if (TREE_CODE (rhs1) == MEM_REF) ++ { ++ input_layers = get_ptr_layers (TREE_TYPE (lhs)) + 1; ++ return true; ++ } ++ } ++ } ++ } ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "end trace pointer layers of void* node\n"); ++ return false; ++} ++ ++/* Preparing the First Node for DFS. */ ++ ++bool ++set_init_node (cgraph_node *node, cgraph_edge *caller, ++ hash_map <tree, int> &ptr_layers, ++ auto_vec <tree> &ssa_name_stack, int &input_layers) ++{ ++ /* Set input_layer ++ caller spec_qsort.constprop (_649, _651) ++ |-- Obtains the actual ptr layer ++ from the input node. */ ++ caller->caller->get_untransformed_body (); ++ if (caller->call_stmt == NULL ++ || gimple_call_num_args (caller->call_stmt) == 0) ++ return false; ++ tree input = gimple_call_arg (caller->call_stmt, 0); ++ if (!(POINTER_TYPE_P (TREE_TYPE (input)) ++ || TREE_CODE (TREE_TYPE (input)) == ARRAY_TYPE)) ++ return false; ++ if (handled_type (TREE_TYPE (input))) ++ input_layers = get_ptr_layers (TREE_TYPE (input)); ++ else ++ { ++ if (VOID_POINTER_P (TREE_TYPE (input))) ++ { ++ if (!get_void_node_ptr_layers (input, input_layers)) ++ return false; ++ } ++ } ++ ++ /* Set initial node ++ def spec_qsort.constprop (void * a, size_t n) ++ |-- Find the initial ssa_name ++ from the parameter node. */ ++ tree parm = DECL_ARGUMENTS (node->decl); ++ for (unsigned j = 1; j < num_ssa_names; ++j) ++ { ++ tree name = ssa_name (j); ++ if (!name || has_zero_uses (name) || virtual_operand_p (name)) ++ continue; ++ if (SSA_NAME_VAR (name) == parm ++ && gimple_code (SSA_NAME_DEF_STMT (name)) == GIMPLE_NOP) ++ { ++ if (!add_node (name, input_layers, ptr_layers, ssa_name_stack)) ++ return false; ++ } ++ } ++ return !ssa_name_stack.is_empty (); ++} ++ ++/* Check the usage of each call. */ ++ ++bool ++check_each_call (cgraph_node *node, cgraph_edge *caller) ++{ ++ hash_map <tree, int> ptr_layers; ++ auto_vec <tree> ssa_name_stack; ++ int input_layers = 0; ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "======== check each call : %s/%u ========\n", ++ node->name (), node->order); ++ if (!set_init_node (node, caller, ptr_layers, ssa_name_stack, input_layers)) ++ return false; ++ int i = 0; ++ while (!ssa_name_stack.is_empty ()) ++ { ++ tree current_node = ssa_name_stack.pop (); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\ncur node %d: \t", i++); ++ print_generic_expr (dump_file, current_node); ++ fprintf (dump_file, ",\t\tptr layers: %d: \n", ++ *ptr_layers.get (current_node)); ++ } ++ if (get_ptr_layers (TREE_TYPE (current_node)) ++ > *ptr_layers.get (current_node)) ++ return false; ++ if (!check_node_use (node, current_node, ptr_layers, ssa_name_stack, ++ input_layers)) ++ return false; ++ } ++ ++ if (!check_node_def (ptr_layers)) ++ return false; ++ return true; ++} ++ ++/* Filter out function: void func (void*, int n), ++ and the function has no static variable, no structure-related variable, ++ and no global variable is used. */ ++ ++bool ++filter_func (cgraph_node *node) ++{ ++ tree parm = DECL_ARGUMENTS (node->decl); ++ if (!(parm && VOID_POINTER_P (TREE_TYPE (parm)) ++ && VOID_TYPE_P (TREE_TYPE (TREE_TYPE (node->decl))))) ++ return false; ++ ++ for (parm = DECL_CHAIN (parm); parm; parm = DECL_CHAIN (parm)) ++ { ++ if (TREE_CODE (TREE_TYPE (parm)) != INTEGER_TYPE) ++ return false; ++ } ++ ++ if (DECL_STRUCT_FUNCTION (node->decl)->static_chain_decl) ++ return false; ++ ++ tree var = NULL_TREE; ++ unsigned int i = 0; ++ bool res = true; ++ FOR_EACH_LOCAL_DECL (cfun, i, var) ++ { ++ if (TREE_CODE (var) == VAR_DECL && handled_type (TREE_TYPE (var))) ++ res = false; ++ } ++ if (!res) ++ return false; ++ ++ for (unsigned j = 1; j < num_ssa_names; ++j) ++ { ++ tree name = ssa_name (j); ++ if (!name || has_zero_uses (name) || virtual_operand_p (name)) ++ continue; ++ tree var = SSA_NAME_VAR (name); ++ if (var && TREE_CODE (var) == VAR_DECL && is_global_var (var)) ++ return false; ++ } ++ return true; ++} ++ ++/* Check whether the function with the void* parameter and uses the input node ++ safely. ++ In these functions only component_ref can be used to dereference the last ++ layer of the input structure pointer. The hack operation pointer offset ++ after type cast cannot be used. ++*/ ++ ++bool ++is_safe_func_with_void_ptr_parm (cgraph_node *node) ++{ ++ if (!filter_func (node)) ++ return false; ++ ++ /* Distinguish Recursive Callers ++ normal_callers: main () ++ { spec_qsort.constprop (_649, _651); } ++ definition: spec_qsort.constprop (void * a, size_t n) ++ recursive_callers: { spec_qsort.constprop (a_1, _139); } */ ++ auto_vec<cgraph_edge *> callers = node->collect_callers (); ++ auto_vec<cgraph_edge *> normal_callers; ++ for (unsigned i = 0; i < callers.length (); i++) ++ { ++ if (callersi->caller != node) ++ normal_callers.safe_push (callersi); ++ } ++ if (normal_callers.length () == 0) ++ return false; ++ ++ for (unsigned i = 0; i < normal_callers.length (); i++) ++ { ++ if (!check_each_call (node, normal_callersi)) ++ return false; ++ } ++ return true; ++} ++ ++/* Return the escape type which corresponds to if ++ this is an volatile type, an array type or a pointer ++ to a pointer type. */ ++ ++static escape_type ++escape_type_volatile_array_or_ptrptr (tree type) ++{ ++ if (isvolatile_type (type)) ++ return escape_volatile; ++ if (isarraytype (type)) ++ return escape_array; ++ if (isptrptr (type) && (current_mode != STRUCT_REORDER_FIELDS)) ++ return escape_ptr_ptr; ++ return does_not_escape; ++} ++ ++/* Record field type. */ ++ ++void ++ipa_struct_reorg::record_field_type (tree field, srtype *base_srtype) ++{ ++ tree field_type = TREE_TYPE (field); ++ /* The uid of the type in the structure is different ++ from that outside the structure. */ ++ srtype *field_srtype = record_type (inner_type (field_type)); ++ srfield *field_srfield = base_srtype->find_field (int_byte_position (field)); ++ /* We might have an variable sized type which we don't set the handle. */ ++ if (field_srfield) ++ { ++ field_srfield->type = field_srtype; ++ field_srtype->add_field_site (field_srfield); ++ } ++ if (field_srtype == base_srtype && current_mode != COMPLETE_STRUCT_RELAYOUT ++ && current_mode != STRUCT_REORDER_FIELDS) ++ base_srtype->mark_escape (escape_rescusive_type, NULL); ++ /* Types of non-pointer field are difficult to track the correctness ++ of the rewrite when it used by the escaped type. */ ++ if (current_mode == STRUCT_REORDER_FIELDS ++ && TREE_CODE (field_type) == RECORD_TYPE) ++ field_srtype->mark_escape (escape_instance_field, NULL); ++} ++ ++/* Record structure all field types. */ ++ ++void ++ipa_struct_reorg::record_struct_field_types (tree base_type, ++ srtype *base_srtype) ++{ ++ for (tree field = TYPE_FIELDS (base_type); field; field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL) ++ { ++ tree field_type = TREE_TYPE (field); ++ process_union (field_type); ++ if (TREE_CODE (inner_type (field_type)) == UNION_TYPE ++ || TREE_CODE (inner_type (field_type)) == QUAL_UNION_TYPE) ++ base_srtype->mark_escape (escape_union, NULL); ++ if (isvolatile_type (field_type)) ++ base_srtype->mark_escape (escape_volatile, NULL); ++ escape_type e = escape_type_volatile_array_or_ptrptr (field_type); ++ if (e != does_not_escape) ++ base_srtype->mark_escape (e, NULL); ++ /* Types of non-pointer field are difficult to track the correctness ++ of the rewrite when it used by the escaped type. */ ++ if (current_mode == STRUCT_REORDER_FIELDS ++ && TREE_CODE (field_type) == RECORD_TYPE) ++ base_srtype->mark_escape (escape_instance_field, NULL); ++ if (handled_type (field_type)) ++ record_field_type (field, base_srtype); ++ } ++ } ++} ++ ++/* Record TYPE if not already recorded. */ ++ ++srtype * ++ipa_struct_reorg::record_type (tree type) ++{ ++ unsigned typeuid; ++ ++ /* Get the main variant as we are going ++ to record that type only. */ ++ type = TYPE_MAIN_VARIANT (type); ++ typeuid = TYPE_UID (type); ++ ++ srtype *type1; ++ ++ type1 = find_type (type); ++ if (type1) ++ return type1; ++ ++ /* If already done recording just return NULL. */ ++ if (done_recording) ++ return NULL; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Recording new type: %u.\n", typeuid); ++ const char *type_name = get_type_name (type); ++ if (type_name == NULL) ++ fprintf (dump_file, "Recording new type NULL name\n"); ++ else ++ fprintf (dump_file, "Recording new type name: %s.\n", type_name); ++ } ++ ++ type1 = new srtype (type); ++ types.safe_push (type1); ++ ++ /* If the type has an user alignment set, ++ that means the user most likely already setup the type. */ ++ if (TYPE_USER_ALIGN (type)) ++ type1->mark_escape (escape_user_alignment, NULL); ++ ++ record_struct_field_types (type, type1); ++ ++ return type1; ++} ++ ++/* Mark TYPE as escaping with ESCAPES as the reason. */ ++ ++void ++ipa_struct_reorg::mark_type_as_escape (tree type, ++ escape_type escapes, ++ gimple *stmt) ++{ ++ if (handled_type (type)) ++ { ++ srtype *stype = record_type (inner_type (type)); ++ ++ if (!stype) ++ return; ++ ++ stype->mark_escape (escapes, stmt); ++ } ++} ++ ++/* Maybe process the union of type TYPE, such that marking all of the fields' ++ types as being escaping. */ ++ ++void ++ipa_struct_reorg::process_union (tree type) ++{ ++ static hash_set<tree> unions_recorded; ++ ++ type = inner_type (type); ++ if (TREE_CODE (type) != UNION_TYPE ++ && TREE_CODE (type) != QUAL_UNION_TYPE) ++ return; ++ ++ type = TYPE_MAIN_VARIANT (type); ++ ++ /* We already processed this type. */ ++ if (unions_recorded.add (type)) ++ return; ++ ++ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL) ++ { ++ mark_type_as_escape (TREE_TYPE (field), escape_union); ++ process_union (TREE_TYPE (field)); ++ } ++ } ++} ++ ++/* Used by record_var function as a callback to walk_tree. ++ Mark the type as escaping if it has expressions which ++ cannot be converted for global initializations. */ ++ ++static tree ++record_init_types (tree *tp, int *walk_subtrees, void *data) ++{ ++ ipa_struct_reorg *c = (ipa_struct_reorg *)data; ++ switch (TREE_CODE (*tp)) ++ { ++ CASE_CONVERT: ++ case COMPONENT_REF: ++ case VIEW_CONVERT_EXPR: ++ case ARRAY_REF: ++ { ++ tree typeouter = TREE_TYPE (*tp); ++ tree typeinner = TREE_TYPE (TREE_OPERAND (*tp, 0)); ++ c->mark_type_as_escape (typeouter, escape_via_global_init); + c->mark_type_as_escape (typeinner, escape_via_global_init); + break; + } +@@ -1996,6 +2775,8 @@ ipa_struct_reorg::record_var (tree decl, escape_type escapes, int arg) + + process_union (TREE_TYPE (decl)); + ++ /* Only the structure type RECORD_TYPE is recorded. ++ Therefore, the void* type is filtered out. */ + if (handled_type (TREE_TYPE (decl))) + { + type = record_type (inner_type (TREE_TYPE (decl))); +@@ -2035,7 +2816,8 @@ ipa_struct_reorg::record_var (tree decl, escape_type escapes, int arg) + + /* Separate instance is hard to trace in complete struct + relayout optimization. */ +- if (current_mode == COMPLETE_STRUCT_RELAYOUT ++ if ((current_mode == COMPLETE_STRUCT_RELAYOUT ++ || current_mode == STRUCT_REORDER_FIELDS) + && TREE_CODE (TREE_TYPE (decl)) == RECORD_TYPE) + e = escape_separate_instance; + +@@ -2078,11 +2860,9 @@ ipa_struct_reorg::find_var (tree expr, gimple *stmt) + { + tree r = TREE_OPERAND (expr, 0); + tree orig_type = TREE_TYPE (expr); +- if (handled_component_p (r) +- || TREE_CODE (r) == MEM_REF) ++ if (handled_component_p (r) || TREE_CODE (r) == MEM_REF) + { +- while (handled_component_p (r) +- || TREE_CODE (r) == MEM_REF) ++ while (handled_component_p (r) || TREE_CODE (r) == MEM_REF) + { + if (TREE_CODE (r) == VIEW_CONVERT_EXPR) + { +@@ -2114,8 +2894,10 @@ ipa_struct_reorg::find_var (tree expr, gimple *stmt) + srtype *type; + srfield *field; + bool realpart, imagpart, address; ++ bool escape_from_base = false; ++ /* The should_create flag is true, the declaration can be recorded. */ + get_type_field (expr, base, indirect, type, field, +- realpart, imagpart, address, true, true); ++ realpart, imagpart, address, escape_from_base, true, true); + } + + void +@@ -2132,36 +2914,79 @@ ipa_struct_reorg::find_vars (gimple *stmt) + tree lhs = gimple_assign_lhs (stmt); + tree rhs = gimple_assign_rhs1 (stmt); + find_var (gimple_assign_lhs (stmt), stmt); ++ /* _2 = MEM(struct arc_t * *)_1; ++ records the right value _1 declaration. */ + find_var (gimple_assign_rhs1 (stmt), stmt); +- if (TREE_CODE (lhs) == SSA_NAME ++ ++ /* Add a safe func mechanism. */ ++ bool l_find = true; ++ bool r_find = true; ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ l_find = !(current_function->is_safe_func ++ && TREE_CODE (lhs) == SSA_NAME ++ && is_from_void_ptr_parm (lhs)); ++ r_find = !(current_function->is_safe_func ++ && TREE_CODE (rhs) == SSA_NAME ++ && is_from_void_ptr_parm (rhs)); ++ } ++ ++ if ((TREE_CODE (lhs) == SSA_NAME) + && VOID_POINTER_P (TREE_TYPE (lhs)) +- && handled_type (TREE_TYPE (rhs))) ++ && handled_type (TREE_TYPE (rhs)) && l_find) + { + srtype *t = find_type (inner_type (TREE_TYPE (rhs))); + srdecl *d = find_decl (lhs); + if (!d && t) + { +- current_function->record_decl (t, lhs, -1); ++ current_function->record_decl (t, lhs, -1, ++ isptrptr (TREE_TYPE (rhs)) ? TREE_TYPE (rhs) : NULL); + tree var = SSA_NAME_VAR (lhs); + if (var && VOID_POINTER_P (TREE_TYPE (var))) +- current_function->record_decl (t, var, -1); ++ current_function->record_decl (t, var, -1, ++ isptrptr (TREE_TYPE (rhs)) ? TREE_TYPE (rhs) : NULL); + } + } ++ /* Find void ssa_name such as: ++ void * _1; struct arc * _2; ++ _2 = _1 + _3; _1 = calloc (100, 40). */ + if (TREE_CODE (rhs) == SSA_NAME + && VOID_POINTER_P (TREE_TYPE (rhs)) +- && handled_type (TREE_TYPE (lhs))) ++ && handled_type (TREE_TYPE (lhs)) && r_find) + { + srtype *t = find_type (inner_type (TREE_TYPE (lhs))); + srdecl *d = find_decl (rhs); + if (!d && t) + { +- current_function->record_decl (t, rhs, -1); ++ current_function->record_decl (t, rhs, -1, ++ isptrptr (TREE_TYPE (lhs)) ? TREE_TYPE (lhs) : NULL); + tree var = SSA_NAME_VAR (rhs); + if (var && VOID_POINTER_P (TREE_TYPE (var))) +- current_function->record_decl (t, var, -1); ++ current_function->record_decl (t, var, -1, ++ isptrptr (TREE_TYPE (lhs)) ? TREE_TYPE (lhs) : NULL); + } + } + } ++ else if ((current_mode == STRUCT_REORDER_FIELDS) ++ && (gimple_assign_rhs_code (stmt) == LE_EXPR ++ || gimple_assign_rhs_code (stmt) == LT_EXPR ++ || gimple_assign_rhs_code (stmt) == GE_EXPR ++ || gimple_assign_rhs_code (stmt) == GT_EXPR)) ++ { ++ find_var (gimple_assign_lhs (stmt), stmt); ++ find_var (gimple_assign_rhs1 (stmt), stmt); ++ find_var (gimple_assign_rhs2 (stmt), stmt); ++ } ++ /* Find void ssa_name from stmt such as: _2 = _1 - old_arcs_1. */ ++ else if ((current_mode == STRUCT_REORDER_FIELDS) ++ && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR ++ && types_compatible_p ( ++ TYPE_MAIN_VARIANT (TREE_TYPE (gimple_assign_rhs1 (stmt))), ++ TYPE_MAIN_VARIANT (TREE_TYPE (gimple_assign_rhs2 (stmt))))) ++ { ++ find_var (gimple_assign_rhs1 (stmt), stmt); ++ find_var (gimple_assign_rhs2 (stmt), stmt); ++ } + else + { + /* Because we won't handle these stmts in rewrite phase, +@@ -2232,27 +3057,134 @@ ipa_struct_reorg::find_vars (gimple *stmt) + } + } + +-/* Maybe record access of statement for further analaysis. */ ++/* Maybe record access of statement for further analaysis. */ ++ ++void ++ipa_struct_reorg::maybe_record_stmt (cgraph_node *node, gimple *stmt) ++{ ++ switch (gimple_code (stmt)) ++ { ++ case GIMPLE_ASSIGN: ++ maybe_record_assign (node, as_a <gassign *> (stmt)); ++ break; ++ case GIMPLE_CALL: ++ maybe_record_call (node, as_a <gcall *> (stmt)); ++ break; ++ case GIMPLE_DEBUG: ++ break; ++ case GIMPLE_GOTO: ++ case GIMPLE_SWITCH: ++ break; ++ default: ++ break; ++ } ++} ++ ++/* Calculate the multiplier. */ ++ ++static bool ++calculate_mult_num (tree arg, tree *num, tree struct_size) ++{ ++ gcc_assert (TREE_CODE (arg) == INTEGER_CST); ++ bool sign = false; ++ HOST_WIDE_INT size = TREE_INT_CST_LOW (arg); ++ if (size < 0) ++ { ++ size = -size; ++ sign = true; ++ } ++ tree arg2 = build_int_cst (TREE_TYPE (arg), size); ++ if (integer_zerop (size_binop (FLOOR_MOD_EXPR, arg2, struct_size))) ++ { ++ tree number = size_binop (FLOOR_DIV_EXPR, arg2, struct_size); ++ if (sign) ++ number = build_int_cst (TREE_TYPE (number), -tree_to_shwi (number)); ++ *num = number; ++ return true; ++ } ++ return false; ++} ++ ++/* Trace and calculate the multiplier of PLUS_EXPR. */ ++ ++static bool ++trace_calculate_plus (gimple *size_def_stmt, tree *num, tree struct_size) ++{ ++ gcc_assert (gimple_assign_rhs_code (size_def_stmt) == PLUS_EXPR); ++ ++ tree num1 = NULL_TREE; ++ tree num2 = NULL_TREE; ++ tree arg0 = gimple_assign_rhs1 (size_def_stmt); ++ tree arg1 = gimple_assign_rhs2 (size_def_stmt); ++ if (!is_result_of_mult (arg0, &num1, struct_size) || num1 == NULL_TREE) ++ return false; ++ if (!is_result_of_mult (arg1, &num2, struct_size) || num2 == NULL_TREE) ++ return false; ++ *num = size_binop (PLUS_EXPR, num1, num2); ++ return true; ++} ++ ++/* Trace and calculate the multiplier of MULT_EXPR. */ ++ ++static bool ++trace_calculate_mult (gimple *size_def_stmt, tree *num, tree struct_size) ++{ ++ gcc_assert (gimple_assign_rhs_code (size_def_stmt) == MULT_EXPR); ++ ++ tree arg0 = gimple_assign_rhs1 (size_def_stmt); ++ tree arg1 = gimple_assign_rhs2 (size_def_stmt); ++ tree num1 = NULL_TREE; ++ ++ if (is_result_of_mult (arg0, &num1, struct_size) && num1 != NULL_TREE) ++ { ++ *num = size_binop (MULT_EXPR, arg1, num1); ++ return true; ++ } ++ if (is_result_of_mult (arg1, &num1, struct_size) && num1 != NULL_TREE) ++ { ++ *num = size_binop (MULT_EXPR, arg0, num1); ++ return true; ++ } ++ *num = NULL_TREE; ++ return false; ++} ++ ++/* Trace and calculate the multiplier of NEGATE_EXPR. */ ++ ++static bool ++trace_calculate_negate (gimple *size_def_stmt, tree *num, tree struct_size) ++{ ++ gcc_assert (gimple_assign_rhs_code (size_def_stmt) == NEGATE_EXPR); ++ ++ /* Support NEGATE_EXPR trace: _3 = -_2; _2 = _1 * 72. */ ++ tree num1 = NULL_TREE; ++ tree arg0 = gimple_assign_rhs1 (size_def_stmt); ++ if (!is_result_of_mult (arg0, &num1, struct_size) || num1 == NULL_TREE) ++ return false; ++ tree num0 = build_int_cst (TREE_TYPE (num1), -1); ++ *num = size_binop (MULT_EXPR, num0, num1); ++ return true; ++} ++ ++/* Trace and calculate the multiplier of POINTER_DIFF_EXPR. */ + +-void +-ipa_struct_reorg::maybe_record_stmt (cgraph_node *node, gimple *stmt) ++static bool ++trace_calculate_diff (gimple *size_def_stmt, tree *num) + { +- switch (gimple_code (stmt)) ++ gcc_assert (gimple_assign_rhs_code (size_def_stmt) == NOP_EXPR); ++ ++ /* Support POINTER_DIFF_EXPR trace: ++ _3 = (long unsigned int) _2; _2 = _1 - old_arcs_1. */ ++ tree arg = gimple_assign_rhs1 (size_def_stmt); ++ size_def_stmt = SSA_NAME_DEF_STMT (arg); ++ if (size_def_stmt && is_gimple_assign (size_def_stmt) ++ && gimple_assign_rhs_code (size_def_stmt) == POINTER_DIFF_EXPR) + { +- case GIMPLE_ASSIGN: +- maybe_record_assign (node, as_a <gassign *> (stmt)); +- break; +- case GIMPLE_CALL: +- maybe_record_call (node, as_a <gcall *> (stmt)); +- break; +- case GIMPLE_DEBUG: +- break; +- case GIMPLE_GOTO: +- case GIMPLE_SWITCH: +- break; +- default: +- break; ++ *num = NULL_TREE; ++ return true; + } ++ *num = NULL_TREE; ++ return false; + } + + /* This function checks whether ARG is a result of multiplication +@@ -2269,26 +3201,8 @@ is_result_of_mult (tree arg, tree *num, tree struct_size) + + /* If we have a integer, just check if it is a multiply of STRUCT_SIZE. */ + if (TREE_CODE (arg) == INTEGER_CST) +- { +- bool sign = false; +- HOST_WIDE_INT size = TREE_INT_CST_LOW (arg); +- if (size < 0) +- { +- size = -size; +- sign = true; +- } +- tree arg2 = build_int_cst (TREE_TYPE (arg), size); +- if (integer_zerop (size_binop (FLOOR_MOD_EXPR, arg2, struct_size))) +- { +- tree number = size_binop (FLOOR_DIV_EXPR, arg2, struct_size); +- if (sign) +- number = build_int_cst (TREE_TYPE (number), +- -tree_to_shwi (number)); +- *num = number; +- return true; +- } +- return false; +- } ++ return calculate_mult_num (arg, num, struct_size); ++ + gimple *size_def_stmt = SSA_NAME_DEF_STMT (arg); + + /* If the allocation statement was of the form +@@ -2304,43 +3218,20 @@ is_result_of_mult (tree arg, tree *num, tree struct_size) + return false; + + // FIXME: this should handle SHIFT also. +- if (gimple_assign_rhs_code (size_def_stmt) == PLUS_EXPR) +- { +- tree num1, num2; +- tree arg0 = gimple_assign_rhs1 (size_def_stmt); +- tree arg1 = gimple_assign_rhs2 (size_def_stmt); +- if (!is_result_of_mult (arg0, &num1, struct_size)) +- return false; +- if (!is_result_of_mult (arg1, &num2, struct_size)) +- return false; +- *num = size_binop (PLUS_EXPR, num1, num2); +- return true; +- } +- else if (gimple_assign_rhs_code (size_def_stmt) == MULT_EXPR) +- { +- tree arg0 = gimple_assign_rhs1 (size_def_stmt); +- tree arg1 = gimple_assign_rhs2 (size_def_stmt); +- tree num1; +- +- if (is_result_of_mult (arg0, &num1, struct_size)) +- { +- *num = size_binop (MULT_EXPR, arg1, num1); +- return true; +- } +- if (is_result_of_mult (arg1, &num1, struct_size)) +- { +- *num = size_binop (MULT_EXPR, arg0, num1); +- return true; +- } +- +- *num = NULL_TREE; +- return false; +- } +- else if (gimple_assign_rhs_code (size_def_stmt) == SSA_NAME) ++ tree_code rhs_code = gimple_assign_rhs_code (size_def_stmt); ++ if (rhs_code == PLUS_EXPR) ++ return trace_calculate_plus (size_def_stmt, num, struct_size); ++ else if (rhs_code == MULT_EXPR) ++ return trace_calculate_mult (size_def_stmt, num, struct_size); ++ else if (rhs_code == SSA_NAME) + { + arg = gimple_assign_rhs1 (size_def_stmt); + size_def_stmt = SSA_NAME_DEF_STMT (arg); + } ++ else if (rhs_code == NEGATE_EXPR && current_mode == STRUCT_REORDER_FIELDS) ++ return trace_calculate_negate (size_def_stmt, num, struct_size); ++ else if (rhs_code == NOP_EXPR && current_mode == STRUCT_REORDER_FIELDS) ++ return trace_calculate_diff (size_def_stmt, num); + else + { + *num = NULL_TREE; +@@ -2357,18 +3248,22 @@ is_result_of_mult (tree arg, tree *num, tree struct_size) + bool + ipa_struct_reorg::handled_allocation_stmt (gimple *stmt) + { +- if (current_mode == COMPLETE_STRUCT_RELAYOUT ++ if ((current_mode == STRUCT_REORDER_FIELDS) ++ && (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))) ++ return true; ++ if ((current_mode == COMPLETE_STRUCT_RELAYOUT) + && gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) + return true; +- +- if (current_mode != COMPLETE_STRUCT_RELAYOUT) +- if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) +- || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) +- || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC) +- || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC) +- || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA) +- || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA_WITH_ALIGN)) +- return true; ++ if ((current_mode == NORMAL) ++ && (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA_WITH_ALIGN))) ++ return true; + return false; + } + +@@ -2376,7 +3271,7 @@ ipa_struct_reorg::handled_allocation_stmt (gimple *stmt) + elements in the array allocated. */ + + tree +-ipa_struct_reorg::allocate_size (srtype *type, gimple *stmt) ++ipa_struct_reorg::allocate_size (srtype *type, srdecl *decl, gimple *stmt) + { + if (!stmt + || gimple_code (stmt) != GIMPLE_CALL +@@ -2396,6 +3291,10 @@ ipa_struct_reorg::allocate_size (srtype *type, gimple *stmt) + + tree struct_size = TYPE_SIZE_UNIT (type->type); + ++ /* Specify the correct size to relax multi-layer pointer. */ ++ if (TREE_CODE (decl->decl) == SSA_NAME && isptrptr (decl->orig_type)) ++ struct_size = TYPE_SIZE_UNIT (decl->orig_type); ++ + tree size = gimple_call_arg (stmt, 0); + + if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) +@@ -2409,8 +3308,10 @@ ipa_struct_reorg::allocate_size (srtype *type, gimple *stmt) + the size of structure. */ + if (operand_equal_p (arg1, struct_size, 0)) + return size; +- /* ??? Check that first argument is a constant equal to +- the size of structure. */ ++ /* ??? Check that first argument is a constant ++ equal to the size of structure. */ ++ /* If the allocated number is equal to the value of struct_size, ++ the value of arg1 is changed to the allocated number. */ + if (operand_equal_p (size, struct_size, 0)) + return arg1; + if (dump_file && (dump_flags & TDF_DETAILS)) +@@ -2453,10 +3354,16 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, + + if (!d) + { ++ /* MEM(struct arc *)_1.head = _2; _2 = calloc (100, 104). */ + if (VOID_POINTER_P (TREE_TYPE (side)) + && TREE_CODE (side) == SSA_NAME) +- current_function->record_decl (type, side, -1); ++ { ++ /* The type is other, the declaration is side. */ ++ current_function->record_decl (type, side, -1, ++ isptrptr (TREE_TYPE (other)) ? TREE_TYPE (other) : NULL); ++ } + else ++ /* *_1 = &MEM(void *)&x + 8B. */ + type->mark_escape (escape_cast_another_ptr, stmt); + } + else if (type != d->type) +@@ -2464,6 +3371,17 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, + type->mark_escape (escape_cast_another_ptr, stmt); + d->type->mark_escape (escape_cast_another_ptr, stmt); + } ++ /* x_1 = y.x_nodes; void *x; ++ Directly mark the structure pointer type assigned ++ to the void* variable as escape. */ ++ else if (current_mode == STRUCT_REORDER_FIELDS ++ && TREE_CODE (side) == SSA_NAME ++ && VOID_POINTER_P (TREE_TYPE (side)) ++ && SSA_NAME_VAR (side) ++ && VOID_POINTER_P (TREE_TYPE (SSA_NAME_VAR (side)))) ++ mark_type_as_escape (TREE_TYPE (other), escape_cast_void, stmt); ++ ++ check_ptr_layers (side, other, stmt); + } + + /* Record accesses in an assignment statement STMT. */ +@@ -2486,8 +3404,12 @@ ipa_struct_reorg::maybe_record_assign (cgraph_node *node, gassign *stmt) + if (!handled_type (TREE_TYPE (lhs))) + return; + /* Check if rhs2 is a multiplication of the size of the type. */ ++ /* The size adjustment and judgment of multi-layer pointers ++ are added. */ + if (is_result_of_mult (rhs2, &num, +- TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs))))) ++ isptrptr (TREE_TYPE (lhs)) ++ ? TYPE_SIZE_UNIT (TREE_TYPE (lhs)) ++ : TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs))))) + { + record_stmt_expr (lhs, node, stmt); + record_stmt_expr (rhs1, node, stmt); +@@ -2525,9 +3447,8 @@ ipa_struct_reorg::maybe_record_assign (cgraph_node *node, gassign *stmt) + } + + static bool +-check_mem_ref_offset (tree expr) ++check_mem_ref_offset (tree expr, tree *num) + { +- tree num = NULL; + bool ret = false; + + if (TREE_CODE (expr) != MEM_REF) +@@ -2538,15 +3459,18 @@ check_mem_ref_offset (tree expr) + tree tmp = TREE_OPERAND (expr, 0); + if (TREE_CODE (tmp) == ADDR_EXPR) + tmp = TREE_OPERAND (tmp, 0); +- tree size = TYPE_SIZE_UNIT (inner_type (TREE_TYPE (tmp))); +- ret = is_result_of_mult (field_off, &num, size); ++ /* Specify the correct size for the multi-layer pointer. */ ++ tree size = isptrptr (TREE_TYPE (tmp)) ++ ? TYPE_SIZE_UNIT (TREE_TYPE (tmp)) ++ : TYPE_SIZE_UNIT (inner_type (TREE_TYPE (tmp))); ++ ret = is_result_of_mult (field_off, num, size); + return ret; + } + + static tree + get_ref_base_and_offset (tree &e, HOST_WIDE_INT &offset, + bool &realpart, bool &imagpart, +- tree &accesstype) ++ tree &accesstype, tree *num) + { + offset = 0; + realpart = false; +@@ -2569,22 +3493,29 @@ get_ref_base_and_offset (tree &e, HOST_WIDE_INT &offset, + { + case COMPONENT_REF: + { ++ /* x.a = _1; If expr is the lvalue of stmt, ++ then field type is FIELD_DECL - POINTER_TYPE - RECORD_TYPE. */ + tree field = TREE_OPERAND (expr, 1); + tree field_off = byte_position (field); + if (TREE_CODE (field_off) != INTEGER_CST) + return NULL; + offset += tree_to_shwi (field_off); ++ /* x.a = _1; If expr is the lvalue of stmt, ++ then expr type is VAR_DECL - RECORD_TYPE (fetch x) */ + expr = TREE_OPERAND (expr, 0); + accesstype = NULL; + break; + } + case MEM_REF: + { ++ /* _2 = MEM(struct s * *)_1; ++ If expr is the right value of stmt, then field_off type is ++ INTEGER_CST - POINTER_TYPE - POINTER_TYPE - RECORD_TYPE. */ + tree field_off = TREE_OPERAND (expr, 1); + gcc_assert (TREE_CODE (field_off) == INTEGER_CST); + /* So we can mark the types as escaping if different. */ + accesstype = TREE_TYPE (field_off); +- if (!check_mem_ref_offset (expr)) ++ if (!check_mem_ref_offset (expr, num)) + offset += tree_to_uhwi (field_off); + return TREE_OPERAND (expr, 0); + } +@@ -2626,10 +3557,11 @@ ipa_struct_reorg::wholeaccess (tree expr, tree base, + bool + ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + srtype *&type, srfield *&field, +- bool &realpart, bool &imagpart, +- bool &address, bool should_create, ++ bool &realpart, bool &imagpart, bool &address, ++ bool &escape_from_base, bool should_create, + bool can_escape) + { ++ tree num = NULL_TREE; + HOST_WIDE_INT offset; + tree accesstype; + address = false; +@@ -2641,8 +3573,9 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + mark_as_bit_field = true; + } + ++ /* Ref is classified into two types: COMPONENT_REF or MER_REF. */ + base = get_ref_base_and_offset (expr, offset, realpart, imagpart, +- accesstype); ++ accesstype, &num); + + /* Variable access, unkown type. */ + if (base == NULL) +@@ -2680,6 +3613,8 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + if (!t) + return false; + } ++ /* If no such decl is finded ++ or orig_type is not added to this decl, then add it. */ + else if (!d && accesstype) + { + if (!should_create) +@@ -2691,15 +3626,52 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + t = record_type (inner_type (accesstype)); + if (!t || t->has_escaped ()) + return false; +- /* If base is not void* mark the type as escaping. */ +- if (!VOID_POINTER_P (TREE_TYPE (base))) ++ /* If base is not void* mark the type as escaping. ++ release INTEGER_TYPE cast to struct pointer. ++ (If t has escpaed above, then directly returns ++ and doesn't mark escape follow.). */ ++ /* _1 = MEM(struct arc_t * *)a_1. ++ then base a_1: ssa_name - pointer_type - integer_type. */ ++ if (current_mode == STRUCT_REORDER_FIELDS) + { +- gcc_assert (can_escape); +- t->mark_escape (escape_cast_another_ptr, NULL); +- return false; ++ bool is_int_ptr = POINTER_TYPE_P (TREE_TYPE (base)) ++ && (TREE_CODE (inner_type (TREE_TYPE (base))) ++ == INTEGER_TYPE); ++ if (!(VOID_POINTER_P (TREE_TYPE (base)) ++ || (current_function->is_safe_func && is_int_ptr))) ++ { ++ gcc_assert (can_escape); ++ t->mark_escape (escape_cast_another_ptr, NULL); ++ return false; ++ } ++ if (TREE_CODE (base) == SSA_NAME ++ && !(current_function->is_safe_func && is_int_ptr)) ++ { ++ /* Add a safe func mechanism. */ ++ if (!(current_function->is_safe_func ++ && is_from_void_ptr_parm (base))) ++ /* Add auxiliary information of the multi-layer pointer ++ type. */ ++ current_function->record_decl (t, base, -1, ++ isptrptr (accesstype) ? accesstype : NULL); ++ } ++ } ++ else ++ { ++ if (!(VOID_POINTER_P (TREE_TYPE (base)))) ++ { ++ gcc_assert (can_escape); ++ t->mark_escape (escape_cast_another_ptr, NULL); ++ return false; ++ } ++ if (TREE_CODE (base) == SSA_NAME) ++ { ++ /* Add auxiliary information of the multi-layer pointer ++ type. */ ++ current_function->record_decl (t, base, -1, ++ isptrptr (accesstype) ? accesstype : NULL); ++ } + } +- if (TREE_CODE (base) == SSA_NAME) +- current_function->record_decl (t, base, -1); + } + else if (!d) + return false; +@@ -2707,7 +3679,10 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + t = d->type; + + if (t->has_escaped ()) ++ { ++ escape_from_base = true; + return false; ++ } + + if (mark_as_bit_field) + { +@@ -2716,6 +3691,17 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + return false; + } + ++ /* Escape the operation of fetching field with pointer offset such as: ++ *(&(t->right)) = malloc (0); -> MEM(struct node * *)_1 + 8B = malloc (0); ++ */ ++ if (current_mode != NORMAL ++ && (TREE_CODE (expr) == MEM_REF) && (offset != 0)) ++ { ++ gcc_assert (can_escape); ++ t->mark_escape (escape_non_multiply_size, NULL); ++ return false; ++ } ++ + if (wholeaccess (expr, base, accesstype, t)) + { + field = NULL; +@@ -2733,7 +3719,6 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + print_generic_expr (dump_file, expr); + fprintf (dump_file, "\n"); + print_generic_expr (dump_file, base); +- fprintf (dump_file, "\n"); + } + gcc_assert (can_escape); + t->mark_escape (escape_unkown_field, NULL); +@@ -2747,9 +3732,8 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + print_generic_expr (dump_file, f->fieldtype); + fprintf (dump_file, "\naccess type = "); + print_generic_expr (dump_file, TREE_TYPE (expr)); +- fprintf (dump_file, "original expr = "); ++ fprintf (dump_file, "\noriginal expr = "); + print_generic_expr (dump_file, expr); +- fprintf (dump_file, "\n"); + } + gcc_assert (can_escape); + t->mark_escape (escape_unkown_field, NULL); +@@ -2772,8 +3756,9 @@ ipa_struct_reorg::mark_expr_escape (tree expr, escape_type escapes, + srtype *type; + srfield *field; + bool realpart, imagpart, address; ++ bool escape_from_base = false; + if (!get_type_field (expr, base, indirect, type, field, +- realpart, imagpart, address)) ++ realpart, imagpart, address, escape_from_base)) + return; + + type->mark_escape (escapes, stmt); +@@ -2846,10 +3831,23 @@ ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt) + gimple_call_arg (stmt, i)); + if (d) + d->type->mark_escape (escapes, stmt); ++ ++ if (escapes == escape_external_function ++ && !gimple_call_builtin_p (stmt, BUILT_IN_MEMSET)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "escape_external_function: "); ++ print_gimple_stmt (dump_file, stmt, 0); ++ } ++ if (d) ++ ext_func_types.safe_push (d->type); ++ } + } + return; + } + ++ /* Get func param it's tree_list. */ + argtype = TYPE_ARG_TYPES (gimple_call_fntype (stmt)); + for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) + { +@@ -2857,9 +3855,14 @@ ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt) + if (argtype) + { + tree argtypet = TREE_VALUE (argtype); +- if (!free_or_realloc ++ /* callee_func (_1, _2); ++ Check the callee func, instead of current func. */ ++ if (!(free_or_realloc ++ || (current_mode == STRUCT_REORDER_FIELDS ++ && safe_functions.contains ( ++ node->get_edge (stmt)->callee))) + && VOID_POINTER_P (argtypet)) +- mark_type_as_escape (TREE_TYPE (arg), escape_cast_void); ++ mark_type_as_escape (TREE_TYPE (arg), escape_cast_void, stmt); + else + record_stmt_expr (arg, node, stmt); + } +@@ -2878,12 +3881,22 @@ ipa_struct_reorg::record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt) + srtype *type; + srfield *field; + bool realpart, imagpart, address; ++ bool escape_from_base = false; + if (!get_type_field (expr, base, indirect, type, field, +- realpart, imagpart, address)) ++ realpart, imagpart, address, escape_from_base)) + return; + +- if (!opt_for_fn (current_function_decl, flag_ipa_struct_reorg)) +- type->mark_escape (escape_non_optimize, stmt); ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ if (!opt_for_fn (current_function_decl, flag_ipa_reorder_fields)) ++ type->mark_escape (escape_non_optimize, stmt); ++ } ++ else ++ { ++ if (!opt_for_fn (current_function_decl, flag_ipa_struct_reorg)) ++ type->mark_escape (escape_non_optimize, stmt); ++ } ++ + + /* Record it. */ + type->add_access (new sraccess (stmt, node, type, field)); +@@ -2901,10 +3914,10 @@ ipa_struct_reorg::find_function (cgraph_node *node) + } + + void +-ipa_struct_reorg::check_type_and_push (tree newdecl, srtype *type, +- vec<srdecl *> &worklist, +- gimple *stmt) ++ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl, ++ vec<srdecl *> &worklist, gimple *stmt) + { ++ srtype *type = decl->type; + if (integer_zerop (newdecl)) + return; + +@@ -2916,7 +3929,8 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srtype *type, + type->mark_escape (escape_cast_another_ptr, stmt); + return; + } +- if (d->type == type) ++ if (d->type == type ++ && cmp_ptr_layers (TREE_TYPE (newdecl), TREE_TYPE (decl->decl))) + return; + + srtype *type1 = d->type; +@@ -2967,7 +3981,9 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srtype *type, + /* Only add to the worklist if the decl is a SSA_NAME. */ + if (TREE_CODE (newdecl) == SSA_NAME) + worklist.safe_push (d); +- if (d->type == type) ++ tree a_decl = d->orig_type ? d->orig_type : TREE_TYPE (newdecl); ++ tree b_decl = decl->orig_type ? decl->orig_type : TREE_TYPE (decl->decl); ++ if (d->type == type && cmp_ptr_layers (a_decl, b_decl)) + return; + + srtype *type1 = d->type; +@@ -3000,6 +4016,96 @@ ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type) + } + } + ++/* Check the definition of gimple assign. */ ++ ++void ++ipa_struct_reorg::check_definition_assign (srdecl *decl, ++ vec<srdecl *> &worklist) ++{ ++ tree ssa_name = decl->decl; ++ srtype *type = decl->type; ++ gimple *stmt = SSA_NAME_DEF_STMT (ssa_name); ++ gcc_assert (gimple_code (stmt) == GIMPLE_ASSIGN); ++ /* a) if the SSA_NAME is sourced from a pointer plus, record the pointer and ++ check to make sure the addition was a multiple of the size. ++ check the pointer type too. */ ++ tree rhs = gimple_assign_rhs1 (stmt); ++ if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) ++ { ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ tree num = NULL_TREE; ++ /* Specify the correct size for the multi-layer pointer. */ ++ if (!is_result_of_mult (rhs2, &num, isptrptr (decl->orig_type) ++ ? TYPE_SIZE_UNIT (decl->orig_type) ++ : TYPE_SIZE_UNIT (type->type))) ++ type->mark_escape (escape_non_multiply_size, stmt); ++ ++ if (TREE_CODE (rhs) == SSA_NAME) ++ check_type_and_push (rhs, decl, worklist, stmt); ++ return; ++ } ++ ++ if (gimple_assign_rhs_code (stmt) == MAX_EXPR ++ || gimple_assign_rhs_code (stmt) == MIN_EXPR ++ || gimple_assign_rhs_code (stmt) == BIT_IOR_EXPR ++ || gimple_assign_rhs_code (stmt) == BIT_XOR_EXPR ++ || gimple_assign_rhs_code (stmt) == BIT_AND_EXPR) ++ { ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ if (TREE_CODE (rhs) == SSA_NAME) ++ check_type_and_push (rhs, decl, worklist, stmt); ++ if (TREE_CODE (rhs2) == SSA_NAME) ++ check_type_and_push (rhs2, decl, worklist, stmt); ++ return; ++ } ++ ++ /* Casts between pointers and integer are escaping. */ ++ if (gimple_assign_cast_p (stmt)) ++ { ++ type->mark_escape (escape_cast_int, stmt); ++ return; ++ } ++ ++ /* d) if the name is from a cast/assignment, make sure it is used as ++ that type or void* ++ i) If void* then push the ssa_name into worklist. */ ++ gcc_assert (gimple_assign_single_p (stmt)); ++ check_other_side (decl, rhs, stmt, worklist); ++ check_ptr_layers (decl->decl, rhs, stmt); ++} ++ ++/* Check the definition of gimple call. */ ++ ++void ++ipa_struct_reorg::check_definition_call (srdecl *decl, vec<srdecl *> &worklist) ++{ ++ tree ssa_name = decl->decl; ++ srtype *type = decl->type; ++ gimple *stmt = SSA_NAME_DEF_STMT (ssa_name); ++ gcc_assert (gimple_code (stmt) == GIMPLE_CALL); ++ ++ /* For realloc, check the type of the argument. */ ++ if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) ++ check_type_and_push (gimple_call_arg (stmt, 0), decl, worklist, stmt); ++ ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ if (!handled_allocation_stmt (stmt)) ++ type->mark_escape (escape_return, stmt); ++ if (!allocate_size (type, decl, stmt)) ++ type->mark_escape (escape_non_multiply_size, stmt); ++ } ++ else ++ { ++ if (!handled_allocation_stmt (stmt) ++ || !allocate_size (type, decl, stmt)) ++ type->mark_escape (escape_return, stmt); ++ } ++ ++ check_alloc_num (stmt, type); ++ return; ++} ++ + /* + 2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl) + a) if the SSA_NAME is sourced from a pointer plus, record the pointer and +@@ -3029,9 +4135,12 @@ ipa_struct_reorg::check_definition (srdecl *decl, vec<srdecl *> &worklist) + if (var + && TREE_CODE (var) == PARM_DECL + && VOID_POINTER_P (TREE_TYPE (ssa_name))) +- type->mark_escape (escape_cast_void, NULL); ++ type->mark_escape (escape_cast_void, SSA_NAME_DEF_STMT (ssa_name)); + return; + } ++ if (current_mode == STRUCT_REORDER_FIELDS && SSA_NAME_VAR (ssa_name) ++ && VOID_POINTER_P (TREE_TYPE (SSA_NAME_VAR (ssa_name)))) ++ type->mark_escape (escape_cast_void, SSA_NAME_DEF_STMT (ssa_name)); + gimple *stmt = SSA_NAME_DEF_STMT (ssa_name); + + /* +@@ -3039,17 +4148,7 @@ ipa_struct_reorg::check_definition (srdecl *decl, vec<srdecl *> &worklist) + i) Add SSA_NAME (void*) to the worklist if allocated from realloc + */ + if (gimple_code (stmt) == GIMPLE_CALL) +- { +- /* For realloc, check the type of the argument. */ +- if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) +- check_type_and_push (gimple_call_arg (stmt, 0), type, worklist, stmt); +- +- if (!handled_allocation_stmt (stmt) +- || !allocate_size (type, stmt)) +- type->mark_escape (escape_return, stmt); +- check_alloc_num (stmt, type); +- return; +- } ++ check_definition_call (decl, worklist); + /* If the SSA_NAME is sourced from an inline-asm, + just mark the type as escaping. */ + if (gimple_code (stmt) == GIMPLE_ASM) +@@ -3065,58 +4164,11 @@ ipa_struct_reorg::check_definition (srdecl *decl, vec<srdecl *> &worklist) + { + for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++) + check_type_and_push (gimple_phi_arg_def (stmt, i), +- type, worklist, stmt); +- return; +- } +- +- gcc_assert (gimple_code (stmt) == GIMPLE_ASSIGN); +- /* +- a) if the SSA_NAME is sourced from a pointer plus, record the pointer and +- check to make sure the addition was a multiple of the size. +- check the pointer type too. +- */ +- +- tree rhs = gimple_assign_rhs1 (stmt); +- if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) +- { +- tree rhs2 = gimple_assign_rhs2 (stmt); +- tree num; +- if (!is_result_of_mult (rhs2, &num, TYPE_SIZE_UNIT (type->type))) +- type->mark_escape (escape_non_multiply_size, stmt); +- +- if (TREE_CODE (rhs) == SSA_NAME) +- check_type_and_push (rhs, type, worklist, stmt); +- return; +- } +- +- if (gimple_assign_rhs_code (stmt) == MAX_EXPR +- || gimple_assign_rhs_code (stmt) == MIN_EXPR +- || gimple_assign_rhs_code (stmt) == BIT_IOR_EXPR +- || gimple_assign_rhs_code (stmt) == BIT_XOR_EXPR +- || gimple_assign_rhs_code (stmt) == BIT_AND_EXPR) +- { +- tree rhs2 = gimple_assign_rhs2 (stmt); +- if (TREE_CODE (rhs) == SSA_NAME) +- check_type_and_push (rhs, type, worklist, stmt); +- if (TREE_CODE (rhs2) == SSA_NAME) +- check_type_and_push (rhs2, type, worklist, stmt); +- return; +- } +- +- /* Casts between pointers and integer are escaping. */ +- if (gimple_assign_cast_p (stmt)) +- { +- type->mark_escape (escape_cast_int, stmt); ++ decl, worklist, stmt); + return; + } +- +- /* +- d) if the name is from a cast/assignment, make sure it is used as that +- type or void* +- i) If void* then push the ssa_name into worklist +- */ +- gcc_assert (gimple_assign_single_p (stmt)); +- check_other_side (decl, rhs, stmt, worklist); ++ if (gimple_code (stmt) == GIMPLE_ASSIGN) ++ check_definition_assign (decl, worklist); + } + + /* Mark the types used by the inline-asm as escaping. +@@ -3149,45 +4201,121 @@ ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, + { + srtype *type = decl->type; + +- if (TREE_CODE (other) == SSA_NAME +- || DECL_P (other) ++ if (TREE_CODE (other) == SSA_NAME || DECL_P (other) + || TREE_CODE (other) == INTEGER_CST) + { +- check_type_and_push (other, type, worklist, stmt); ++ check_type_and_push (other, decl, worklist, stmt); ++ return; ++ } ++ ++ tree t = TREE_TYPE (other); ++ if (!handled_type (t)) ++ { ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ return; ++ } ++ ++ srtype *t1 = find_type (inner_type (t)); ++ if (t1 == type) ++ { ++ /* In Complete Struct Relayout, if lhs type is the same ++ as rhs type, we could return without any harm. */ ++ if (current_mode == COMPLETE_STRUCT_RELAYOUT) ++ return; ++ ++ tree base; ++ bool indirect; ++ srtype *type1; ++ srfield *field; ++ bool realpart, imagpart, address; ++ bool escape_from_base = false; ++ if (!get_type_field (other, base, indirect, type1, field, ++ realpart, imagpart, address, escape_from_base)) ++ { ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ /* Release INTEGER_TYPE cast to struct pointer. */ ++ bool cast_from_int_ptr = current_function->is_safe_func && base ++ && find_decl (base) == NULL && POINTER_TYPE_P (TREE_TYPE (base)) ++ && (TREE_CODE (inner_type (TREE_TYPE (base))) == INTEGER_TYPE); ++ ++ /* Add a safe func mechanism. */ ++ bool from_void_ptr_parm = current_function->is_safe_func ++ && TREE_CODE (base) == SSA_NAME && is_from_void_ptr_parm (base); ++ ++ /* Release type is used by a type which escapes. */ ++ if (escape_from_base || cast_from_int_ptr || from_void_ptr_parm) ++ return; ++ } ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ } ++ + return; + } + +- tree t = TREE_TYPE (other); +- if (!handled_type (t)) ++ if (t1) ++ t1->mark_escape (escape_cast_another_ptr, stmt); ++ ++ type->mark_escape (escape_cast_another_ptr, stmt); ++} ++ ++ ++/* Get the expr base. */ ++ ++void ++get_base (tree &base, tree expr) ++{ ++ if (TREE_CODE (expr) == MEM_REF) ++ base = TREE_OPERAND (expr, 0); ++ else if (TREE_CODE (expr) == COMPONENT_REF) ++ { ++ base = TREE_OPERAND (expr, 0); ++ base = (TREE_CODE (base) == MEM_REF) ? TREE_OPERAND (base, 0) : base; ++ } ++ else if (TREE_CODE (expr) == ADDR_EXPR) ++ base = TREE_OPERAND (expr, 0); ++} ++ ++/* Check whether the number of pointer layers of exprs is equal, ++ marking unequals as escape. */ ++ ++void ++ipa_struct_reorg::check_ptr_layers (tree a_expr, tree b_expr, gimple *stmt) ++{ ++ if (current_mode != STRUCT_REORDER_FIELDS || current_function->is_safe_func ++ || !(POINTER_TYPE_P (TREE_TYPE (a_expr))) ++ || !(POINTER_TYPE_P (TREE_TYPE (b_expr))) ++ || !handled_type (TREE_TYPE (a_expr)) ++ || !handled_type (TREE_TYPE (b_expr))) ++ return; ++ ++ tree a_base = a_expr; ++ tree b_base = b_expr; ++ get_base (a_base, a_expr); ++ get_base (b_base, b_expr); ++ ++ srdecl *a = find_decl (a_base); ++ srdecl *b = find_decl (b_base); ++ if (a && b == NULL && TREE_CODE (b_expr) != INTEGER_CST) + { +- type->mark_escape (escape_cast_another_ptr, stmt); ++ a->type->mark_escape (escape_cast_another_ptr, stmt); + return; + } +- +- srtype *t1 = find_type (inner_type (t)); +- if (t1 == type) ++ else if (b && a == NULL && TREE_CODE (a_expr) != INTEGER_CST) + { +- /* In Complete Struct Relayout, if lhs type is the same +- as rhs type, we could return without any harm. */ +- if (current_mode == COMPLETE_STRUCT_RELAYOUT) +- return; +- +- tree base; +- bool indirect; +- srtype *type1; +- srfield *field; +- bool realpart, imagpart, address; +- if (!get_type_field (other, base, indirect, type1, field, +- realpart, imagpart, address)) +- type->mark_escape (escape_cast_another_ptr, stmt); +- ++ b->type->mark_escape (escape_cast_another_ptr, stmt); + return; + } ++ else if (a == NULL && b == NULL) ++ return; + +- if (t1) +- t1->mark_escape (escape_cast_another_ptr, stmt); ++ if (cmp_ptr_layers (TREE_TYPE (a_expr), TREE_TYPE (b_expr))) ++ return; + +- type->mark_escape (escape_cast_another_ptr, stmt); ++ if (a) ++ a->type->mark_escape (escape_cast_another_ptr, stmt); ++ if (b) ++ b->type->mark_escape (escape_cast_another_ptr, stmt); + } + + void +@@ -3205,7 +4333,7 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + check to make sure they are used correctly. */ + if (gimple_code (stmt) == GIMPLE_PHI) + { +- check_type_and_push (gimple_phi_result (stmt), type, worklist, stmt); ++ check_type_and_push (gimple_phi_result (stmt), decl, worklist, stmt); + return; + } + +@@ -3221,10 +4349,15 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + tree rhs2 = gimple_cond_rhs (stmt); + tree orhs = rhs1; + enum tree_code code = gimple_cond_code (stmt); +- if (code != EQ_EXPR && code != NE_EXPR +- && (current_mode != COMPLETE_STRUCT_RELAYOUT +- || (code != LT_EXPR && code != LE_EXPR +- && code != GT_EXPR && code != GE_EXPR))) ++ if ((current_mode == NORMAL && (code != EQ_EXPR && code != NE_EXPR)) ++ || (current_mode == COMPLETE_STRUCT_RELAYOUT ++ && (code != EQ_EXPR && code != NE_EXPR ++ && code != LT_EXPR && code != LE_EXPR ++ && code != GT_EXPR && code != GE_EXPR)) ++ || (current_mode == STRUCT_REORDER_FIELDS ++ && (code != EQ_EXPR && code != NE_EXPR ++ && code != LT_EXPR && code != LE_EXPR ++ && code != GT_EXPR && code != GE_EXPR))) + { + mark_expr_escape (rhs1, escape_non_eq, stmt); + mark_expr_escape (rhs2, escape_non_eq, stmt); +@@ -3235,7 +4368,7 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + return; + if (TREE_CODE (orhs) != SSA_NAME) + mark_expr_escape (rhs1, escape_non_eq, stmt); +- check_type_and_push (orhs, type, worklist, stmt); ++ check_type_and_push (orhs, decl, worklist, stmt); + return; + } + +@@ -3254,9 +4387,14 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + tree rhs2 = gimple_assign_rhs2 (stmt); + tree orhs = rhs1; + enum tree_code code = gimple_assign_rhs_code (stmt); +- if (code != EQ_EXPR && code != NE_EXPR +- && (current_mode != COMPLETE_STRUCT_RELAYOUT +- || (code != LT_EXPR && code != LE_EXPR ++ if ((current_mode == NORMAL && (code != EQ_EXPR && code != NE_EXPR)) ++ || (current_mode == COMPLETE_STRUCT_RELAYOUT ++ && (code != EQ_EXPR && code != NE_EXPR ++ && code != LT_EXPR && code != LE_EXPR ++ && code != GT_EXPR && code != GE_EXPR)) ++ || (current_mode == STRUCT_REORDER_FIELDS ++ && (code != EQ_EXPR && code != NE_EXPR ++ && code != LT_EXPR && code != LE_EXPR + && code != GT_EXPR && code != GE_EXPR))) + { + mark_expr_escape (rhs1, escape_non_eq, stmt); +@@ -3268,7 +4406,7 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + return; + if (TREE_CODE (orhs) != SSA_NAME) + mark_expr_escape (rhs1, escape_non_eq, stmt); +- check_type_and_push (orhs, type, worklist, stmt); ++ check_type_and_push (orhs, decl, worklist, stmt); + return; + } + +@@ -3282,6 +4420,7 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + check_other_side (decl, lhs, stmt, worklist); + return; + } ++ check_ptr_layers (lhs, rhs, stmt); + } + + if (is_gimple_assign (stmt) +@@ -3291,9 +4430,26 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + tree lhs = gimple_assign_lhs (stmt); + tree num; + check_other_side (decl, lhs, stmt, worklist); +- if (!is_result_of_mult (rhs2, &num, TYPE_SIZE_UNIT (type->type))) ++ check_ptr_layers (lhs, decl->decl, stmt); ++ /* Specify the correct size for the multi-layer pointer. */ ++ if (!is_result_of_mult (rhs2, &num, isptrptr (decl->orig_type) ++ ? TYPE_SIZE_UNIT (decl->orig_type) ++ : TYPE_SIZE_UNIT (type->type))) + type->mark_escape (escape_non_multiply_size, stmt); + } ++ ++ if (is_gimple_assign (stmt) ++ && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR) ++ { ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ tree other = rhs1 == decl->decl ? rhs2 : rhs1; ++ ++ check_other_side (decl, other, stmt, worklist); ++ check_ptr_layers (decl->decl, other, stmt); ++ return; ++ } ++ + } + + /* +@@ -3360,17 +4516,43 @@ ipa_struct_reorg::record_function (cgraph_node *node) + if (DECL_PRESERVE_P (node->decl)) + escapes = escape_marked_as_used; + else if (!node->local) +- escapes = escape_visible_function; ++ { ++ if (current_mode != STRUCT_REORDER_FIELDS) ++ escapes = escape_visible_function; ++ if (current_mode == STRUCT_REORDER_FIELDS && node->externally_visible) ++ escapes = escape_visible_function; ++ } + else if (!node->can_change_signature) + escapes = escape_cannot_change_signature; + else if (!tree_versionable_function_p (node->decl)) + escapes = escape_noclonable_function; +- else if (!opt_for_fn (node->decl, flag_ipa_struct_reorg)) +- escapes = escape_non_optimize; ++ ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ if (!opt_for_fn (node->decl, flag_ipa_reorder_fields)) ++ escapes = escape_non_optimize; ++ } ++ else if (current_mode == NORMAL || current_mode == COMPLETE_STRUCT_RELAYOUT) ++ { ++ if (!opt_for_fn (node->decl, flag_ipa_struct_reorg)) ++ escapes = escape_non_optimize; ++ } + + basic_block bb; + gimple_stmt_iterator si; + ++ /* Add a safe func mechanism. */ ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ current_function->is_safe_func = safe_functions.contains (node); ++ if (dump_file) ++ { ++ fprintf (dump_file, "\nfunction %s/%u: is_safe_func = %d\n", ++ node->name (), node->order, ++ current_function->is_safe_func); ++ } ++ } ++ + /* Record the static chain decl. */ + if (fn->static_chain_decl) + { +@@ -3503,6 +4685,42 @@ ipa_struct_reorg::record_function (cgraph_node *node) + return sfn; + } + ++ ++/* For a function that contains the void* parameter and passes the structure ++ pointer, check whether the function uses the input node safely. ++ For these functions, the void* parameter and related ssa_name are not ++ recorded in record_function (), and the input structure type is not escaped. ++*/ ++ ++void ++ipa_struct_reorg::record_safe_func_with_void_ptr_parm () ++{ ++ cgraph_node *node = NULL; ++ FOR_EACH_FUNCTION (node) ++ { ++ if (!node->real_symbol_p ()) ++ continue; ++ if (node->definition) ++ { ++ if (!node->has_gimple_body_p () || node->inlined_to) ++ continue; ++ node->get_body (); ++ function *fn = DECL_STRUCT_FUNCTION (node->decl); ++ if (!fn) ++ continue; ++ push_cfun (fn); ++ if (is_safe_func_with_void_ptr_parm (node)) ++ { ++ safe_functions.add (node); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nfunction %s/%u is safe function.\n", ++ node->name (), node->order); ++ } ++ pop_cfun (); ++ } ++ } ++} ++ + /* Record all accesses for all types including global variables. */ + + void +@@ -3534,6 +4752,10 @@ ipa_struct_reorg::record_accesses (void) + record_var (var->decl, escapes); + } + ++ /* Add a safe func mechanism. */ ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ record_safe_func_with_void_ptr_parm (); ++ + FOR_EACH_FUNCTION (cnode) + { + if (!cnode->real_symbol_p ()) +@@ -3552,11 +4774,14 @@ ipa_struct_reorg::record_accesses (void) + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +- fprintf (dump_file, "all types (before pruning):\n"); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "==============================================\n\n"); ++ fprintf (dump_file, "======== all types (before pruning): ========\n\n"); + dump_types (dump_file); +- fprintf (dump_file, "all functions (before pruning):\n"); ++ fprintf (dump_file, "======= all functions (before pruning): =======\n"); + dump_functions (dump_file); + } ++ /* If record_var () is called later, new types will not be recorded. */ + done_recording = true; + } + +@@ -3580,6 +4805,7 @@ ipa_struct_reorg::walk_field_for_cycles (srtype *type) + { + if (!field->type) + ; ++ /* If there are two members of the same structure pointer type? */ + else if (field->type->visited + || walk_field_for_cycles (field->type)) + { +@@ -3658,22 +4884,113 @@ ipa_struct_reorg::propagate_escape (void) + } while (changed); + } + ++/* If the original type (with members) has escaped, corresponding to the ++ struct pointer type (empty member) in the structure fields ++ should also marked as escape. */ ++ ++void ++ipa_struct_reorg::propagate_escape_via_original (void) ++{ ++ for (unsigned i = 0; i < types.length (); i++) ++ { ++ for (unsigned j = 0; j < types.length (); j++) ++ { ++ const char *type1 = get_type_name (typesi->type); ++ const char *type2 = get_type_name (typesj->type); ++ if (type1 == NULL || type2 == NULL) ++ continue; ++ if (type1 == type2 && typesj->has_escaped ()) ++ { ++ if (!typesi->has_escaped ()) ++ typesi->mark_escape (escape_via_orig_escape, NULL); ++ break; ++ } ++ } ++ } ++} ++ ++/* Marks the fileds as empty and does not have the original structure type ++ is escape. */ ++ ++void ++ipa_struct_reorg::propagate_escape_via_empty_with_no_original (void) ++{ ++ for (unsigned i = 0; i < types.length (); i++) ++ { ++ if (typesi->fields.length () == 0) ++ { ++ for (unsigned j = 0; j < types.length (); j++) ++ { ++ if (i != j && typesj->fields.length ()) ++ { ++ const char *type1 = get_type_name (typesi->type); ++ const char *type2 = get_type_name (typesj->type); ++ if (type1 != NULL && type2 != NULL && type1 == type2) ++ break; ++ } ++ if (j == types.length () - 1) ++ typesi->mark_escape (escape_via_empty_no_orig, NULL); ++ } ++ } ++ } ++} ++ ++/* Escape propagation is performed on types that escape through external ++ functions. */ ++ ++void ++ipa_struct_reorg::propagate_escape_via_ext_func_types (void) ++{ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\n propagate_escape_via_ext_func_types: \n\n"); ++ unsigned i = 0; ++ hash_set<srtype *> visited_types; ++ while (i < ext_func_types.length ()) ++ { ++ visited_types.add (ext_func_typesi); ++ unsigned j = 0; ++ srfield * field; ++ FOR_EACH_VEC_ELT (ext_func_typesi->fields, j, field) ++ { ++ if (field->type) ++ { ++ if (!field->type->has_escaped ()) ++ field->type->mark_escape (escape_dependent_type_escapes, NULL); ++ if (!visited_types.contains (field->type)) ++ ext_func_types.safe_push (field->type); ++ } ++ } ++ i++; ++ } ++} ++ + /* Prune the escaped types and their decls from what was recorded. */ + + void + ipa_struct_reorg::prune_escaped_types (void) + { +- if (current_mode != COMPLETE_STRUCT_RELAYOUT) ++ if (current_mode != COMPLETE_STRUCT_RELAYOUT ++ && current_mode != STRUCT_REORDER_FIELDS) + { ++ /* Detect recusive types and mark them as escaping. */ + detect_cycles (); ++ /* If contains or is contained by the escape type, ++ mark them as escaping. */ + propagate_escape (); + } ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ propagate_escape_via_original (); ++ propagate_escape_via_empty_with_no_original (); ++ propagate_escape_via_ext_func_types (); ++ } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +- fprintf (dump_file, "all types (after prop but before pruning):\n"); ++ fprintf (dump_file, "==============================================\n\n"); ++ fprintf (dump_file, "all types (after prop but before pruning): \n\n"); + dump_types (dump_file); +- fprintf (dump_file, "all functions (after prop but before pruning):\n"); ++ fprintf (dump_file, "all functions (after prop but before pruning): \n"); + dump_functions (dump_file); + } + +@@ -3721,7 +5038,8 @@ ipa_struct_reorg::prune_escaped_types (void) + /* Prune functions which don't refer to any variables any more. */ + if (function->args.is_empty () + && function->decls.is_empty () +- && function->globals.is_empty ()) ++ && function->globals.is_empty () ++ && current_mode != STRUCT_REORDER_FIELDS) + { + delete function; + functions.ordered_remove (i); +@@ -3746,24 +5064,31 @@ ipa_struct_reorg::prune_escaped_types (void) + + /* Prune types that escape, all references to those types + will have been removed in the above loops. */ +- for (unsigned i = 0; i < types.length ();) ++ /* The escape type is not deleted in STRUCT_REORDER_FIELDS, ++ Then the type that contains the escaped type fields ++ can find complete information. */ ++ if (current_mode != STRUCT_REORDER_FIELDS) + { +- srtype *type = typesi; +- if (type->has_escaped ()) ++ for (unsigned i = 0; i < types.length ();) + { +- /* All references to this type should have been removed now. */ +- delete type; +- types.ordered_remove (i); ++ srtype *type = typesi; ++ if (type->has_escaped ()) ++ { ++ /* All references to this type should have been removed now. */ ++ delete type; ++ types.ordered_remove (i); ++ } ++ else ++ i++; + } +- else +- i++; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +- fprintf (dump_file, "all types (after pruning):\n"); ++ fprintf (dump_file, "==============================================\n\n"); ++ fprintf (dump_file, "========= all types (after pruning): =========\n\n"); + dump_types (dump_file); +- fprintf (dump_file, "all functions (after pruning):\n"); ++ fprintf (dump_file, "======== all functions (after pruning): ========\n"); + dump_functions (dump_file); + } + } +@@ -3790,6 +5115,26 @@ ipa_struct_reorg::create_new_types (void) + for (unsigned i = 0; i < types.length (); i++) + newtypes += typesi->create_new_type (); + ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ for (unsigned i = 0; i < types.length (); i++) ++ { ++ auto_vec <tree> *fields = fields_to_finish.get (typesi->type); ++ if (fields) ++ { ++ for (unsigned j = 0; j < fields->length (); j++) ++ { ++ tree field = (*fields)j; ++ TREE_TYPE (field) ++ = reconstruct_complex_type (TREE_TYPE (field), ++ typesi->newtype0); ++ } ++ } ++ } ++ for (unsigned i = 0; i < types.length (); i++) ++ layout_type (typesi->newtype0); ++ } ++ + if (dump_file) + { + if (newtypes) +@@ -3894,7 +5239,8 @@ ipa_struct_reorg::create_new_args (cgraph_node *new_node) + char *name = NULL; + if (tname) + { +- name = concat (tname, ".reorg.0", NULL); ++ name = concat (tname, current_mode == STRUCT_REORDER_FIELDS ++ ? ".reorder.0" : ".reorg.0", NULL); + new_name = get_identifier (name); + free (name); + } +@@ -3980,9 +5326,10 @@ ipa_struct_reorg::create_new_functions (void) + fprintf (dump_file, "\n"); + } + statistics_counter_event (NULL, "Create new function", 1); +- new_node = node->create_version_clone_with_body (vNULL, NULL, +- NULL, NULL, NULL, +- "struct_reorg"); ++ new_node = node->create_version_clone_with_body ( ++ vNULL, NULL, NULL, NULL, NULL, ++ current_mode == STRUCT_REORDER_FIELDS ++ ? "struct_reorder" : "struct_reorg"); + new_node->can_change_signature = node->can_change_signature; + new_node->make_local (); + f->newnode = new_node; +@@ -4026,6 +5373,7 @@ ipa_struct_reorg::rewrite_expr (tree expr, + srfield *f; + bool realpart, imagpart; + bool address; ++ bool escape_from_base = false; + + tree newbasemax_split; + memset (newexpr, 0, sizeof (treemax_split)); +@@ -4043,8 +5391,8 @@ ipa_struct_reorg::rewrite_expr (tree expr, + return true; + } + +- if (!get_type_field (expr, base, indirect, t, f, +- realpart, imagpart, address)) ++ if (!get_type_field (expr, base, indirect, t, f, realpart, imagpart, ++ address, escape_from_base)) + return false; + + /* If the type is not changed, then just return false. */ +@@ -4107,7 +5455,38 @@ ipa_struct_reorg::rewrite_expr (tree expr, + if (address) + newbase1 = build_fold_addr_expr (newbase1); + if (indirect) +- newbase1 = build_simple_mem_ref (newbase1); ++ { ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ /* Supports the MEM_REF offset. ++ _1 = MEM(struct arc *)ap_1 + 72B.flow; ++ Old rewrite: _1 = ap.reorder.0_8->flow; ++ New rewrite: _1 ++ = MEM(struct arc.reorder.0 *)ap.reorder.0_8 + 64B.flow; ++ */ ++ HOST_WIDE_INT offset_tmp = 0; ++ HOST_WIDE_INT mem_offset = 0; ++ bool realpart_tmp = false; ++ bool imagpart_tmp = false; ++ tree accesstype_tmp = NULL_TREE; ++ tree num = NULL_TREE; ++ get_ref_base_and_offset (expr, offset_tmp, ++ realpart_tmp, imagpart_tmp, ++ accesstype_tmp, &num); ++ ++ tree ptype = TREE_TYPE (newbase1); ++ /* Specify the correct size for the multi-layer pointer. */ ++ tree size = isptrptr (ptype) ? TYPE_SIZE_UNIT (ptype) : ++ TYPE_SIZE_UNIT (inner_type (ptype)); ++ mem_offset = (num != NULL) ++ ? TREE_INT_CST_LOW (num) * tree_to_shwi (size) ++ : 0; ++ newbase1 = build2 (MEM_REF, TREE_TYPE (ptype), newbase1, ++ build_int_cst (ptype, mem_offset)); ++ } ++ else ++ newbase1 = build_simple_mem_ref (newbase1); ++ } + newexpri = build3 (COMPONENT_REF, TREE_TYPE (f->newfieldi), + newbase1, f->newfieldi, NULL_TREE); + if (imagpart) +@@ -4151,8 +5530,12 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + return remove; + } + +- if (gimple_assign_rhs_code (stmt) == EQ_EXPR +- || gimple_assign_rhs_code (stmt) == NE_EXPR) ++ if ((current_mode != STRUCT_REORDER_FIELDS ++ && (gimple_assign_rhs_code (stmt) == EQ_EXPR ++ || gimple_assign_rhs_code (stmt) == NE_EXPR)) ++ || (current_mode == STRUCT_REORDER_FIELDS ++ && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) ++ == tcc_comparison))) + { + tree rhs1 = gimple_assign_rhs1 (stmt); + tree rhs2 = gimple_assign_rhs2 (stmt); +@@ -4160,6 +5543,10 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + tree newrhs2max_split; + tree_code rhs_code = gimple_assign_rhs_code (stmt); + tree_code code = rhs_code == EQ_EXPR ? BIT_AND_EXPR : BIT_IOR_EXPR; ++ if (current_mode == STRUCT_REORDER_FIELDS ++ && rhs_code != EQ_EXPR && rhs_code != NE_EXPR) ++ code = rhs_code; ++ + if (!rewrite_lhs_rhs (rhs1, rhs2, newrhs1, newrhs2)) + return false; + tree newexpr = NULL_TREE; +@@ -4201,20 +5588,78 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + internal_error ( + "The rhs of pointer is not a multiplicate and it slips through"); + +- num = gimplify_build1 (gsi, NOP_EXPR, sizetype, num); ++ /* Add the judgment of num, support for POINTER_DIFF_EXPR. ++ _6 = _4 + _5; ++ _5 = (long unsigned int) _3; ++ _3 = _1 - old_2. */ ++ if (current_mode != STRUCT_REORDER_FIELDS ++ || (current_mode == STRUCT_REORDER_FIELDS && (num != NULL))) ++ num = gimplify_build1 (gsi, NOP_EXPR, sizetype, num); + for (unsigned i = 0; i < max_split && newlhsi; i++) + { + gimple *new_stmt; + +- tree newsize = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (newlhsi))); +- newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, num, newsize); +- new_stmt = gimple_build_assign (newlhsi, POINTER_PLUS_EXPR, +- newrhsi, newsize); ++ if (num != NULL) ++ { ++ tree newsize = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (newlhsi))); ++ newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, num, ++ newsize); ++ new_stmt = gimple_build_assign (newlhsi, POINTER_PLUS_EXPR, ++ newrhsi, newsize); ++ } ++ else ++ new_stmt = gimple_build_assign (newlhsi, POINTER_PLUS_EXPR, ++ newrhsi, rhs2); + gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); + remove = true; + } + return remove; + } ++ ++ /* Support POINTER_DIFF_EXPR rewriting. */ ++ if (current_mode == STRUCT_REORDER_FIELDS ++ && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR) ++ { ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ tree newrhs1max_split; ++ tree newrhs2max_split; ++ ++ bool r1 = rewrite_expr (rhs1, newrhs1); ++ bool r2 = rewrite_expr (rhs2, newrhs2); ++ ++ if (r1 != r2) ++ { ++ /* Handle NULL pointer specially. */ ++ if (r1 && !r2 && integer_zerop (rhs2)) ++ { ++ r2 = true; ++ for (unsigned i = 0; i < max_split && newrhs1i; i++) ++ newrhs2i = fold_convert (TREE_TYPE (newrhs1i), rhs2); ++ } ++ else if (r2 && !r1 && integer_zerop (rhs1)) ++ { ++ r1 = true; ++ for (unsigned i = 0; i < max_split && newrhs2i; i++) ++ newrhs1i = fold_convert (TREE_TYPE (newrhs2i), rhs1); ++ } ++ else ++ return false; ++ } ++ else if (!r1 && !r2) ++ return false; ++ ++ /* The two operands always have pointer/reference type. */ ++ for (unsigned i = 0; i < max_split && newrhs1i && newrhs2i; i++) ++ { ++ gimple_assign_set_rhs1 (stmt, newrhs1i); ++ gimple_assign_set_rhs2 (stmt, newrhs2i); ++ update_stmt (stmt); ++ } ++ remove = false; ++ return remove; ++ } ++ + if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS) + { + tree lhs = gimple_assign_lhs (stmt); +@@ -4222,9 +5667,8 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +- fprintf (dump_file, "rewriting statement:\n"); ++ fprintf (dump_file, "\nrewriting stamtenet:\n"); + print_gimple_stmt (dump_file, stmt, 0); +- fprintf (dump_file, "\n"); + } + tree newlhsmax_split; + tree newrhsmax_split; +@@ -4271,7 +5715,7 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) + if (!decl || !decl->type) + return false; + srtype *type = decl->type; +- tree num = allocate_size (type, stmt); ++ tree num = allocate_size (type, decl, stmt); + gcc_assert (num); + memset (newrhs1, 0, sizeof (newrhs1)); + +@@ -4291,7 +5735,10 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) + /* Go through each new lhs. */ + for (unsigned i = 0; i < max_split && decl->newdecli; i++) + { +- tree newsize = TYPE_SIZE_UNIT (type->type); ++ /* Specify the correct size for the multi-layer pointer. */ ++ tree newsize = isptrptr (decl->orig_type) ++ ? TYPE_SIZE_UNIT (decl->orig_type) ++ : TYPE_SIZE_UNIT (type->newtypei); + gimple *g; + /* Every allocation except for calloc needs + the size multiplied out. */ +@@ -4352,6 +5799,23 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) + gcc_assert (node); + srfunction *f = find_function (node); + ++ /* Add a safe func mechanism. */ ++ if (current_mode == STRUCT_REORDER_FIELDS && f && f->is_safe_func) ++ { ++ tree expr = gimple_call_arg (stmt, 0); ++ tree newexprmax_split; ++ if (!rewrite_expr (expr, newexpr)) ++ return false; ++ ++ if (newexpr1 == NULL) ++ { ++ gimple_call_set_arg (stmt, 0, newexpr0); ++ update_stmt (stmt); ++ return false; ++ } ++ return false; ++ } ++ + /* Did not find the function or had not cloned it return saying don't + change the function call. */ + if (!f || !f->newf) +@@ -4437,7 +5901,7 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) + && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME) + SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt; + +- gsi_replace (gsi, new_stmt, false); ++ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); + + /* We need to defer cleaning EH info on the new statement to + fixup-cfg. We may not have dominator information at this point +@@ -4450,7 +5914,7 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) + add_stmt_to_eh_lp (new_stmt, lp_nr); + } + +- return false; ++ return true; + } + + /* Rewrite the conditional statement STMT. Return TRUE if the +@@ -4462,50 +5926,52 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi) + tree_code rhs_code = gimple_cond_code (stmt); + + /* Handle only equals or not equals conditionals. */ +- if (rhs_code != EQ_EXPR +- && rhs_code != NE_EXPR) ++ if ((current_mode != STRUCT_REORDER_FIELDS ++ && (rhs_code != EQ_EXPR && rhs_code != NE_EXPR)) ++ || (current_mode == STRUCT_REORDER_FIELDS ++ && TREE_CODE_CLASS (rhs_code) != tcc_comparison)) + return false; +- tree rhs1 = gimple_cond_lhs (stmt); +- tree rhs2 = gimple_cond_rhs (stmt); ++ tree lhs = gimple_cond_lhs (stmt); ++ tree rhs = gimple_cond_rhs (stmt); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +- fprintf (dump_file, "COND: Rewriting\n"); ++ fprintf (dump_file, "\nCOND: Rewriting\n"); + print_gimple_stmt (dump_file, stmt, 0); ++ print_generic_expr (dump_file, lhs); + fprintf (dump_file, "\n"); +- print_generic_expr (dump_file, rhs1); +- fprintf (dump_file, "\n"); +- print_generic_expr (dump_file, rhs2); ++ print_generic_expr (dump_file, rhs); + fprintf (dump_file, "\n"); + } + +- tree newrhs1max_split; +- tree newrhs2max_split; +- tree_code code = rhs_code == EQ_EXPR ? BIT_AND_EXPR : BIT_IOR_EXPR; +- if (!rewrite_lhs_rhs (rhs1, rhs2, newrhs1, newrhs2)) ++ tree newlhsmax_split = {}; ++ tree newrhsmax_split = {}; ++ if (!rewrite_lhs_rhs (lhs, rhs, newlhs, newrhs)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) +- fprintf (dump_file, "\nDid nothing to statement.\n"); ++ fprintf (dump_file, "Did nothing to statement.\n"); + return false; + } + +- tree newexpr = NULL_TREE; +- for (unsigned i = 0; i < max_split && newrhs1i; i++) ++ /* Old rewrite: if (x_1 != 0B) ++ -> _1 = x.reorder.0_1 != 0B; if (_1 != 1) ++ The logic is incorrect. ++ New rewrite: if (x_1 != 0B) ++ -> if (x.reorder.0_1 != 0B); */ ++ for (unsigned i = 0; i < max_split && (newlhsi || newrhsi); i++) + { +- tree expr = gimplify_build2 (gsi, rhs_code, boolean_type_node, +- newrhs1i, newrhs2i); +- if (!newexpr) +- newexpr = expr; +- else +- newexpr = gimplify_build2 (gsi, code, boolean_type_node, +- newexpr, expr); +- } +- +- if (newexpr) +- { +- gimple_cond_set_lhs (stmt, newexpr); +- gimple_cond_set_rhs (stmt, boolean_true_node); ++ if (newlhsi) ++ gimple_cond_set_lhs (stmt, newlhsi); ++ if (newrhsi) ++ gimple_cond_set_rhs (stmt, newrhsi); + update_stmt (stmt); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "replaced with:\n"); ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ } + } + return false; + } +@@ -4516,6 +5982,9 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi) + bool + ipa_struct_reorg::rewrite_debug (gimple *stmt, gimple_stmt_iterator *) + { ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ /* Delete debug gimple now. */ ++ return true; + bool remove = false; + if (gimple_debug_bind_p (stmt)) + { +@@ -4568,7 +6037,7 @@ ipa_struct_reorg::rewrite_phi (gphi *phi) + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +- fprintf (dump_file, "\nrewriting PHI:"); ++ fprintf (dump_file, "\nrewriting PHI:\n"); + print_gimple_stmt (dump_file, phi, 0); + } + +@@ -4579,7 +6048,15 @@ ipa_struct_reorg::rewrite_phi (gphi *phi) + { + tree newrhsmax_split; + phi_arg_d rhs = *gimple_phi_arg (phi, i); +- rewrite_expr (rhs.def, newrhs); ++ ++ /* Handling the NULL phi Node. */ ++ bool r = rewrite_expr (rhs.def, newrhs); ++ if (!r && integer_zerop (rhs.def)) ++ { ++ for (unsigned i = 0; i < max_split && newlhsi; i++) ++ newrhsi = fold_convert (TREE_TYPE (newlhsi), rhs.def); ++ } ++ + for (unsigned j = 0; j < max_split && newlhsj; j++) + { + SET_PHI_ARG_DEF (newphij, i, newrhsj); +@@ -4590,7 +6067,7 @@ ipa_struct_reorg::rewrite_phi (gphi *phi) + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +- fprintf (dump_file, "\ninto\n:"); ++ fprintf (dump_file, "into:\n"); + for (unsigned i = 0; i < max_split && newlhsi; i++) + { + print_gimple_stmt (dump_file, newphii, 0); +@@ -4663,12 +6140,59 @@ ipa_struct_reorg::rewrite_functions (void) + /* Create new types, if we did not create any new types, + then don't rewrite any accesses. */ + if (!create_new_types ()) +- return 0; ++ { ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ for (unsigned i = 0; i < functions.length (); i++) ++ { ++ srfunction *f = functionsi; ++ cgraph_node *node = f->node; ++ push_cfun (DECL_STRUCT_FUNCTION (node->decl)); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nNo rewrite:\n"); ++ dump_function_to_file (current_function_decl, dump_file, ++ dump_flags | TDF_VOPS); ++ } ++ pop_cfun (); ++ } ++ } ++ return 0; ++ } ++ ++ if (current_mode == STRUCT_REORDER_FIELDS && dump_file) ++ { ++ fprintf (dump_file, "=========== all created newtypes: ===========\n\n"); ++ dump_newtypes (dump_file); ++ } + + if (functions.length ()) + { + retval = TODO_remove_functions; + create_new_functions (); ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ prune_escaped_types (); ++ } ++ } ++ ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ for (unsigned i = 0; i < functions.length (); i++) ++ { ++ srfunction *f = functionsi; ++ cgraph_node *node = f->node; ++ push_cfun (DECL_STRUCT_FUNCTION (node->decl)); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "==== Before create decls: %dth_%s ====\n\n", ++ i, f->node->name ()); ++ if (current_function_decl) ++ dump_function_to_file (current_function_decl, dump_file, ++ dump_flags | TDF_VOPS); ++ } ++ pop_cfun (); ++ } + } + + create_new_decls (); +@@ -4691,9 +6215,12 @@ ipa_struct_reorg::rewrite_functions (void) + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +- fprintf (dump_file, "\nBefore rewrite:\n"); ++ fprintf (dump_file, "\nBefore rewrite: %dth_%s\n", ++ i, f->node->name ()); + dump_function_to_file (current_function_decl, dump_file, + dump_flags | TDF_VOPS); ++ fprintf (dump_file, "\n======== Start to rewrite: %dth_%s ========\n", ++ i, f->node->name ()); + } + FOR_EACH_BB_FN (bb, cfun) + { +@@ -4761,9 +6288,10 @@ ipa_struct_reorg::rewrite_functions (void) + + free_dominance_info (CDI_DOMINATORS); + +- if (dump_file && (dump_flags & TDF_DETAILS)) ++ if (dump_file) + { +- fprintf (dump_file, "\nAfter rewrite:\n"); ++ fprintf (dump_file, "\nAfter rewrite: %dth_%s\n", ++ i, f->node->name ()); + dump_function_to_file (current_function_decl, dump_file, + dump_flags | TDF_VOPS); + } +@@ -4809,16 +6337,21 @@ ipa_struct_reorg::execute (enum srmode mode) + { + unsigned int ret = 0; + +- if (mode == NORMAL) ++ if (dump_file) ++ fprintf (dump_file, "\n\n====== ipa_struct_reorg level %d ======\n\n", ++ mode); ++ ++ if (mode == NORMAL || mode == STRUCT_REORDER_FIELDS) + { +- current_mode = NORMAL; +- /* FIXME: If there is a top-level inline-asm, ++ current_mode = mode; ++ /* If there is a top-level inline-asm, + the pass immediately returns. */ + if (symtab->first_asm_symbol ()) + return 0; + record_accesses (); + prune_escaped_types (); +- analyze_types (); ++ if (current_mode == NORMAL) ++ analyze_types (); + + ret = rewrite_functions (); + } +@@ -4881,7 +6414,55 @@ pass_ipa_struct_reorg::gate (function *) + && flag_lto_partition == LTO_PARTITION_ONE + /* Only enable struct optimizations in C since other + languages' grammar forbid. */ +- && lang_c_p ()); ++ && lang_c_p () ++ /* Only enable struct optimizations in lto or whole_program. */ ++ && (in_lto_p || flag_whole_program)); ++} ++ ++const pass_data pass_data_ipa_reorder_fields = ++{ ++ SIMPLE_IPA_PASS, // type ++ "reorder_fields", // name ++ OPTGROUP_NONE, // optinfo_flags ++ TV_IPA_REORDER_FIELDS, // tv_id ++ 0, // properties_required ++ 0, // properties_provided ++ 0, // properties_destroyed ++ 0, // todo_flags_start ++ 0, // todo_flags_finish ++}; ++ ++class pass_ipa_reorder_fields : public simple_ipa_opt_pass ++{ ++public: ++ pass_ipa_reorder_fields (gcc::context *ctxt) ++ : simple_ipa_opt_pass (pass_data_ipa_reorder_fields, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ virtual bool gate (function *); ++ virtual unsigned int execute (function *) ++ { ++ unsigned int ret = 0; ++ ret = ipa_struct_reorg ().execute (STRUCT_REORDER_FIELDS); ++ return ret; ++ } ++ ++}; // class pass_ipa_reorder_fields ++ ++bool ++pass_ipa_reorder_fields::gate (function *) ++{ ++ return (optimize >= 3 ++ && flag_ipa_reorder_fields ++ /* Don't bother doing anything if the program has errors. */ ++ && !seen_error () ++ && flag_lto_partition == LTO_PARTITION_ONE ++ /* Only enable struct optimizations in C since other ++ languages' grammar forbid. */ ++ && lang_c_p () ++ /* Only enable struct optimizations in lto or whole_program. */ ++ && (in_lto_p || flag_whole_program)); + } + + } // anon namespace +@@ -4891,4 +6472,10 @@ simple_ipa_opt_pass * + make_pass_ipa_struct_reorg (gcc::context *ctxt) + { + return new pass_ipa_struct_reorg (ctxt); +-} +\ No newline at end of file ++} ++ ++simple_ipa_opt_pass * ++make_pass_ipa_reorder_fields (gcc::context *ctxt) ++{ ++ return new pass_ipa_reorder_fields (ctxt); ++} +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +index ef7f4c780..6f85adeb4 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.h ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +@@ -68,12 +68,14 @@ struct srfunction + auto_vec<srdecl *> args; + auto_vec<srdecl *> globals; + auto_vec_del<srdecl> decls; +- srdecl *record_decl (srtype *, tree, int arg); ++ srdecl *record_decl (srtype *, tree, int arg, tree orig_type = NULL); + + srfunction *old; + cgraph_node *newnode; + srfunction *newf; + ++ bool is_safe_func; ++ + // Constructors + srfunction (cgraph_node *n); + +@@ -184,6 +186,11 @@ struct srfield + void create_new_fields (tree newtypemax_split, + tree newfieldsmax_split, + tree newlastmax_split); ++ void reorder_fields (tree newfieldsmax_split, tree newlastmax_split, ++ tree &field); ++ void create_new_reorder_fields (tree newtypemax_split, ++ tree newfieldsmax_split, ++ tree newlastmax_split); + }; + + struct sraccess +@@ -221,8 +228,11 @@ struct srdecl + + tree newdeclmax_split; + ++ /* Auxiliary record complete original type information of the void* type. */ ++ tree orig_type; ++ + // Constructors +- srdecl (srtype *type, tree decl, int argumentnum = -1); ++ srdecl (srtype *type, tree decl, int argumentnum = -1, tree orgtype = NULL); + + // Methods + void dump (FILE *file); +diff --git a/gcc/passes.def b/gcc/passes.def +index 9692066e4..bdc835b87 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -178,6 +178,7 @@ along with GCC; see the file COPYING3. If not see + compiled unit. */ + INSERT_PASSES_AFTER (all_late_ipa_passes) + NEXT_PASS (pass_ipa_pta); ++ NEXT_PASS (pass_ipa_reorder_fields); + /* FIXME: this should be a normal IP pass. */ + NEXT_PASS (pass_ipa_struct_reorg); + NEXT_PASS (pass_omp_simd_clone); +diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h +index 3fe64047c..6fa529eee 100644 +--- a/gcc/symbol-summary.h ++++ b/gcc/symbol-summary.h +@@ -105,7 +105,7 @@ protected: + { + /* In structure optimizatons, we call new to ensure that + the allocated memory is initialized to 0. */ +- if (flag_ipa_struct_reorg) ++ if (flag_ipa_struct_reorg || flag_ipa_reorder_fields) + return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T () + : new T (); + +@@ -122,7 +122,7 @@ protected: + ggc_delete (item); + else + { +- if (flag_ipa_struct_reorg) ++ if (flag_ipa_struct_reorg || flag_ipa_reorder_fields) + delete item; + else + m_allocator.remove (item); +diff --git a/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c b/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c +new file mode 100644 +index 000000000..b95be2dab +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c +@@ -0,0 +1,75 @@ ++// escape_instance_field, "Type escapes via a field of instance". ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++typedef struct network ++{ ++ arc_p arcs; ++ arc_p sorted_arcs; ++ int x; ++ node_p nodes; ++ node_p stop_nodes; ++ node_t node; ++} network_t; ++ ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++ network_t* net_add; ++ node_t node; ++}; ++ ++ ++const int MAX = 100; ++ ++/* let it escape_array, "Type is used in an array not handled yet". */ ++network_t* net2; ++ ++int ++main () ++{ ++ net0 = (network_t*) calloc (1, sizeof(network_t)); ++ net0->arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ ++ /* Contains an escape type and has structure instance field. */ ++ net0->arcs->node = net0->node; ++ ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c b/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c +new file mode 100644 +index 000000000..3d243313b +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c +@@ -0,0 +1,94 @@ ++// Verify in escape_dependent_type_escapes, ++// the multi-layer dereference is rewriting correctly,and the memory access ++// is correct. ++ ++// release ++// escape_dependent_type_escapes, ++// "Type uses a type which escapes or is used by a type which escapes" ++// avoid escape_cast_another_ptr, "Type escapes a cast to a different pointer" ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ arc_p arcs; ++ arc_p sorted_arcs; ++ int x; ++ node_p nodes; ++ node_p stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++ network_t* net_add; ++}; ++ ++ ++const int MAX = 100; ++ ++/* let it escape_array, "Type is used in an array not handled yet". */ ++network_t* net2; ++arc_p stop_arcs = NULL; ++ ++int ++main () ++{ ++ net0 = (network_t*) calloc (1, sizeof(network_t)); ++ net0->arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ stop_arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ ++ net0->arcs->id = 100; ++ ++ for (unsigned i = 0; i < 3; i++) ++ { ++ net0->arcs->id = net0->arcs->id + 2; ++ stop_arcs->cost = net0->arcs->id / 2; ++ stop_arcs->net_add = net0; ++ printf("stop_arcs->cost = %ld\n", stop_arcs->cost); ++ net0->arcs++; ++ stop_arcs++; ++ } ++ ++ if( net1 != 0 && stop_arcs != 0) ++ { ++ return -1; ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c b/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c +new file mode 100644 +index 000000000..faaf1e3a5 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c +@@ -0,0 +1,24 @@ ++/* check_ptr_layers bugfix.*/ ++/* { dg-do compile } */ ++struct { ++ char a; ++} **b = 0, *e = 0; ++long c; ++char d = 9; ++int f; ++ ++void g() ++{ ++ for (; f;) ++ if (c) ++ (*e).a++; ++ if (!d) ++ for (;;) ++ b &&c; ++} ++int ++main() ++{ ++ g(); ++} ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c b/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c +new file mode 100644 +index 000000000..886706ae9 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c +@@ -0,0 +1,82 @@ ++// bugfix: ++// Common members do not need to reconstruct. ++// Otherwise, eg:int* -> int** and void* -> void**. ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++#include <assert.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t* cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t** org_cost; ++}; ++ ++struct a ++{ ++ int t; ++ int t1; ++}; ++ ++__attribute__((noinline)) int ++f(int i, int j) ++{ ++ struct a *t = NULL; ++ struct a t1 = {i, j}; ++ t = &t1; ++ auto int g(void) __attribute__((noinline)); ++ int g(void) ++ { ++ return t->t + t->t1; ++ } ++ return g(); ++} ++ ++arc_t **ap = NULL; ++const int MAX = 100; ++ ++int ++main() ++{ ++ if (f(1, 2) != 3) ++ { ++ abort (); ++ } ++ ap = (arc_t**) malloc(MAX * sizeof(arc_t*)); ++ (*ap)0.id = 300; ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c b/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c +new file mode 100644 +index 000000000..f3785f392 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c +@@ -0,0 +1,56 @@ ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++#define MallocOrDie(x) sre_malloc((x)) ++ ++struct gki_elem { ++ char *key; ++ int idx; ++ struct gki_elem *nxt; ++}; ++ ++typedef struct { ++ struct gki_elem **table; ++ ++ int primelevel; ++ int nhash; ++ int nkeys; ++} GKI; ++ ++void ++Die(char *format, ...) ++{ ++ exit(1); ++} ++ ++void * ++sre_malloc(size_t size) ++{ ++ void *ptr; ++ ++ if ((ptr = malloc (size)) == NULL) ++ { ++ Die("malloc of %ld bytes failed", size); ++ } ++ return ptr; ++} ++ ++ ++__attribute__((noinline)) int ++GKIStoreKey(GKI *hash, char *key) ++{ ++ hash->table0 = MallocOrDie(sizeof(struct gki_elem)); ++} ++ ++int ++main () ++{ ++ GKI *hash; ++ char *key; ++ GKIStoreKey(hash, key); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c b/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c +new file mode 100644 +index 000000000..1415d759a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c +@@ -0,0 +1,60 @@ ++// verify newarccmp-1.flow ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++const int MAX = 100; ++arc_p ap = NULL; ++ ++int ++main () ++{ ++ ap = (arc_p) calloc(MAX, sizeof(arc_t)); ++ printf("%d\n", ap0.id); ++ for (int i = 1; i < MAX; i++) ++ { ++ api-1.id = 500; ++ } ++ printf("%d\n", ap0.id); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c b/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c +new file mode 100644 +index 000000000..003da0b57 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c +@@ -0,0 +1,83 @@ ++// release type is used by a type which escapes. ++// avoid escape_cast_another_ptr, "Type escapes a cast to a different pointer" ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ arc_p arcs; ++ arc_p sorted_arcs; ++ int x; ++ node_p nodes; ++ node_p stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++const int MAX = 100; ++network_t* net = NULL; ++arc_p stop_arcs = NULL; ++int cnt = 0; ++ ++int ++main () ++{ ++ net = (network_t*) calloc (1, 20); ++ net->arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ stop_arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ if(!(net->arcs)) ++ { ++ return -1; ++ } ++ ++ for( int i = 0; i < MAX; i++, net->arcs = stop_arcs) ++ { ++ cnt++; ++ } ++ ++ net = (network_t*) calloc (1, 20); ++ if( !(net->arcs) ) ++ { ++ return -1; ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_external_func_types.c b/gcc/testsuite/gcc.dg/struct/rf_external_func_types.c +new file mode 100644 +index 000000000..84a34f241 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_external_func_types.c +@@ -0,0 +1,69 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-shared" } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ int x; ++ arc_p arcs, sorted_arcs; ++ node_p nodes, stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++extern int bcf_sr_add_reader (network_t *); ++extern int bcf_hdr_dup (arc_p); ++ ++int ++test () ++{ ++ network_t *net = (network_t *) calloc (1, 20); ++ ++ if (!bcf_sr_add_reader(net)) ++ printf("error"); ++ arc_p arc = net->nodes->basic_arc; ++ if(!bcf_hdr_dup(arc)) ++ { ++ return -1; ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c +new file mode 100644 +index 000000000..10dcf098c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c +@@ -0,0 +1,72 @@ ++// release escape_cast_another_ptr, "Type escapes a cast to a different pointer" ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++typedef int cmp_t(const void *, const void *); ++ ++__attribute__((noinline)) void ++spec_qsort(void *a, cmp_t *cmp) ++{ ++ char *pb = NULL; ++ while (cmp(pb, a)) ++ { ++ pb += 1; ++ } ++} ++ ++static int arc_compare( arc_t **a1, int a2 ) ++{ ++ if( (*a1)->id < a2 ) ++ { ++ return -1; ++ } ++ return 1; ++} ++ ++int ++main() ++{ ++ spec_qsort(NULL, (int (*)(const void *, const void *))arc_compare); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c b/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c +new file mode 100644 +index 000000000..8d1a9a114 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c +@@ -0,0 +1,58 @@ ++/* Supports the MEM_REF offset. ++ _1 = MEM(struct arc *)ap_4 + 72B.flow; ++ Old rewrite:_1 = ap.reorder.0_8->flow; ++ New rewrite:_1 = MEM(struct arc.reorder.0 *)ap.reorder.0_8 + 64B.flow. */ ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++int ++main () ++{ ++ const int MAX = 100; ++ /* A similar scenario can be reproduced only by using local variables. */ ++ arc_p ap = NULL; ++ ap = (arc_p) calloc(MAX, sizeof(arc_t)); ++ printf("%d\n", ap1.flow); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c b/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c +new file mode 100644 +index 000000000..23765fc56 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c +@@ -0,0 +1,30 @@ ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct T_HASH_ENTRY ++{ ++ unsigned int hash; ++ unsigned int klen; ++ char *key; ++} iHashEntry; ++ ++typedef struct T_HASH ++{ ++ unsigned int size; ++ unsigned int fill; ++ unsigned int keys; ++ ++ iHashEntry **array; ++} uHash; ++ ++uHash *retval; ++ ++int ++main() { ++ retval->array = (iHashEntry **)calloc(sizeof(iHashEntry *), retval->size); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c +new file mode 100644 +index 000000000..54e737ee8 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c +@@ -0,0 +1,109 @@ ++// For testing: ++/* ++Compile options: gcc -O3 -g ++-flto -flto-partition=one -fipa-reorder-fields -fipa-struct-reorg ++-v -save-temps -fdump-ipa-all-details test.c -o test ++ ++in COMPLETE_STRUCT_RELAYOUT pass: ++N type: struct node.reorder.0 new = "Type escapes a cast to a different pointer" ++copy$head_26 = test_arc.reorder.0_49->head; ++ ++type : struct arc.reorder.0(1599) { ++fields = { ++field (5382) {type = cost_t} ++field (5383) {type = struct node.reorder.0 *} // but node has escaped. ++field (5384) {type = struct node.reorder.0 *} ++field (5386) {type = struct arc.reorder.0 *} ++field (5387) {type = struct arc.reorder.0 *} ++field (5388) {type = flow_t} ++field (5389) {type = cost_t} ++field (5381) {type = int} ++field (5385) {type = short int} ++} ++ ++// The types of the two types are inconsistent after the rewriting. ++newarc_2(D)->tail = tail_1(D); ++vs ++struct_reorder.0_61(D)->tail = tail_1(D); ++*/ ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ arc_p arcs; ++ arc_p sorted_arcs; ++ int x; ++ node_p nodes; ++ node_p stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++__attribute__((noinline)) void ++replace_weaker_arc( arc_t *newarc, node_t *tail, node_t *head) ++{ ++ printf("test"); ++} ++ ++__attribute__((noinline)) int64_t ++switch_arcs(arc_t** deleted_arcs, arc_t* arcnew) ++{ ++ int64_t count = 0; ++ arc_t *test_arc, copy; ++ ++ if (!test_arc->ident) ++ { ++ copy = *test_arc; ++ count++; ++ *test_arc = arcnew0; ++ replace_weaker_arc(arcnew, NULL, NULL); ++ } ++ return count; ++} ++ ++int ++main () ++{ ++ switch_arcs(NULL, NULL); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c +new file mode 100644 +index 000000000..2ae46fb31 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c +@@ -0,0 +1,87 @@ ++// escape_cast_void, "Type escapes a cast to/from void*" ++// stop_393 = net.stop_nodes; void *stop; ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ arc_p arcs, sorted_arcs; ++ int x; ++ node_p nodes, stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++const int MAX = 100; ++network_t* net = NULL; ++int cnt = 0; ++ ++__attribute__((noinline)) int ++primal_feasible (network_t *net) ++{ ++ void* stop; ++ node_t *node; ++ ++ node = net->nodes; ++ stop = (void *)net->stop_nodes; ++ for( node++; node < (node_t *)stop; node++ ) ++ { ++ printf( "PRIMAL NETWORK SIMPLEX: " ); ++ } ++ return 0; ++} ++ ++int ++main () ++{ ++ net = (network_t*) calloc (1, 20); ++ net->nodes = calloc (MAX, sizeof (node_t)); ++ net->stop_nodes = calloc (MAX, sizeof (node_t)); ++ cnt = primal_feasible( net ); ++ ++ net = (network_t*) calloc (1, 20); ++ if( !(net->arcs) ) ++ { ++ return -1; ++ } ++ return cnt; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c +new file mode 100644 +index 000000000..3a3c10b70 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c +@@ -0,0 +1,71 @@ ++// support POINTER_DIFF_EXPR & NOP_EXPR to avoid ++// escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt" ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ arc_p arcs; ++ arc_p sorted_arcs; ++ int x; ++ node_p nodes; ++ node_p stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++int ++main () ++{ ++ arc_t *old_arcs; ++ node_t *node; ++ node_t *stop; ++ size_t off; ++ network_t* net; ++ ++ for( ; node->number < stop->number; node++ ) ++ { ++ off = node->basic_arc - old_arcs; ++ node->basic_arc = (arc_t *)(net->arcs + off); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c +new file mode 100644 +index 000000000..7b7d110df +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c +@@ -0,0 +1,55 @@ ++// support NEGATE_EXPR rewriting ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++int ++main () ++{ ++ int64_t susp = 0; ++ const int MAX = 100; ++ arc_p ap = (arc_p) calloc(MAX, sizeof(arc_t)); ++ ap -= susp; ++ printf("%d\n", ap1.flow); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c +new file mode 100644 +index 000000000..317aafa5f +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c +@@ -0,0 +1,34 @@ ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++struct node ++{ ++ struct node *left, *right; ++ double a, b, c, d, e, f; ++} ++*a; ++int b, c; ++void ++CreateNode (struct node **p1) ++{ ++ *p1 = calloc (10, sizeof (struct node)); ++} ++ ++int ++main () ++{ ++ a->left = 0; ++ struct node *t = a; ++ CreateNode (&t->right); ++ ++ struct node p = *a; ++ b = 1; ++ if (p.left) ++ b = 0; ++ if (b) ++ printf (" Tree.\n"); ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c +new file mode 100644 +index 000000000..01a33f669 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c +@@ -0,0 +1,55 @@ ++// release escape_ptr_ptr, "Type is used in a pointer to a pointer not handled yet"; ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++const int MAX = 100; ++arc_t **ap = NULL; ++ ++int ++main () ++{ ++ ap = (arc_t**) malloc(MAX * sizeof(arc_t*)); ++ (*ap)0.id = 300; ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c +new file mode 100644 +index 000000000..a38556533 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c +@@ -0,0 +1,58 @@ ++// release escape_ptr_ptr, "Type is used in a pointer to a pointer not handled yet" ++ ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++const int MAX = 100; ++arc_p **ap; ++ ++ ++int ++main () ++{ ++ ap = (arc_p**) calloc(MAX, sizeof(arc_p*)); ++ (**ap)0.id = 500; ++ ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c b/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c +new file mode 100644 +index 000000000..5c17ee528 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c +@@ -0,0 +1,57 @@ ++// release escape_rescusive_type, "Recusive type" ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++const int MAX = 100; ++arc_p ap = NULL; ++ ++int ++main () ++{ ++ ap = (arc_p) calloc (MAX, sizeof (arc_t)); ++ ap0.id = 100; ++ ap0.head = (node_p) calloc (MAX, sizeof (node_t)); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c +new file mode 100644 +index 000000000..710517ee9 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c +@@ -0,0 +1,65 @@ ++// support more gimple assign rhs code ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++__attribute__((noinline)) int ++compare(arc_p p1, arc_p p2) ++{ ++ return p1 < p2; ++} ++ ++int n = 0; ++int m = 0; ++ ++int ++main () ++{ ++ scanf ("%d %d", &n, &m); ++ arc_p p = calloc (10, sizeof (struct arc)); ++ if (compare (&pn, &pm)) ++ { ++ printf ("ss!"); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c +new file mode 100644 +index 000000000..6ed0a5d2d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c +@@ -0,0 +1,72 @@ ++// rewrite_cond bugfix; ++/* ++if (iterator_600 != 0B) ++old rewrite: _1369 = iterator.reorder.0_1249 != 0B; if (_1369 != 1) ++new rewrite: if (iterator.reorder.0_1249 != 0B) ++*/ ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct list_elem ++{ ++ arc_t* arc; ++ struct list_elem* next; ++}list_elem; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++int i = 0; ++ ++int ++main () ++{ ++ register list_elem *first_list_elem; ++ register list_elem* iterator; ++ iterator = first_list_elem->next; ++ while (iterator) ++ { ++ iterator = iterator->next; ++ i++; ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c +new file mode 100644 +index 000000000..5a2dd964f +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c +@@ -0,0 +1,58 @@ ++// support if (_150 >= _154) ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++int ++main() ++{ ++ arc_p **ap = (arc_p**) malloc(1 * sizeof(arc_p*)); ++ arc_p **arcs_pointer_sorted = (arc_p**) malloc(1 * sizeof(arc_p*)); ++ arcs_pointer_sorted0 = (arc_p*) calloc (1, sizeof(arc_p)); ++ ++ if (arcs_pointer_sorted >= ap) ++ { ++ return -1; ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c +new file mode 100644 +index 000000000..faa90b42d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c +@@ -0,0 +1,81 @@ ++/* ++Exclude the rewriting error caused by ++first_list_elem = (list_elem *)NULL; ++rewriting PHI:first_list_elem_700 = PHI <0B(144), 0B(146)> ++into: ++first_list_elem.reorder.0_55 = PHI <(144), (146)> ++*/ ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct list_elem ++{ ++ arc_t* arc; ++ struct list_elem* next; ++}list_elem; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout, firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail, head; ++ short ident; ++ arc_p nextout, nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++const int MAX = 100; ++ ++list_elem* new_list_elem; ++list_elem* first_list_elem; ++ ++int ++main () ++{ ++ int i = 0; ++ list_elem *first_list_elem; ++ list_elem *new_list_elem; ++ arc_t *arcout; ++ for( ; i < MAX && arcout->ident == -1; i++); ++ ++ first_list_elem = (list_elem *)NULL; ++ for( ; i < MAX; i++) ++ { ++ new_list_elem = (list_elem*) calloc(1, sizeof(list_elem)); ++ new_list_elem->next = first_list_elem; ++ first_list_elem = new_list_elem; ++ } ++ if (first_list_elem != 0) ++ { ++ return -1; ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_shwi.c b/gcc/testsuite/gcc.dg/struct/rf_shwi.c +new file mode 100644 +index 000000000..2bb326ff2 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_shwi.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++ ++struct foo {int dx; long dy; int dz; }; ++struct goo {long offset; struct foo* pfoo; }; ++ ++void* func (long); ++ ++__attribute__((used)) static void ++test(struct goo* g) ++{ ++ void* pvoid; ++ struct foo* f; ++ ++ for (f = g->pfoo; f->dx; f++) ++ { ++ if (f->dy) ++ break; ++ } ++ f--; ++ ++ pvoid = func(f->dz + g->offset); ++ return; ++} +diff --git a/gcc/testsuite/gcc.dg/struct/rf_visible_func.c b/gcc/testsuite/gcc.dg/struct/rf_visible_func.c +new file mode 100644 +index 000000000..8f2da99cc +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_visible_func.c +@@ -0,0 +1,92 @@ ++// release escape_visible_function, "Type escapes via expternally visible function call" ++// compile options: gcc -O3 -fno-inline -fwhole-program ++// -flto-partition=one -fipa-struct-reorg arc_compare.c -fdump-ipa-all -S -v ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++__attribute__((noinline)) static int ++arc_compare( arc_t **a1, arc_t **a2 ) ++{ ++ if( (*a1)->flow > (*a2)->flow ) ++ { ++ return 1; ++ } ++ if( (*a1)->flow < (*a2)->flow ) ++ { ++ return -1; ++ } ++ if( (*a1)->id < (*a2)->id ) ++ { ++ return -1; ++ } ++ ++ return 1; ++} ++ ++__attribute__((noinline)) void ++spec_qsort(void *array, int nitems, int size, ++ int (*cmp)(const void*,const void*)) ++{ ++ for (int i = 0; i < nitems - 1; i++) ++ { ++ if (cmp (array , array)) ++ { ++ printf ("CMP 1\n"); ++ } ++ else ++ { ++ printf ("CMP 2\n"); ++ } ++ } ++} ++ ++typedef int cmp_t(const void *, const void *); ++ ++int ++main () ++{ ++ void *p = calloc (100, sizeof (arc_t **)); ++ spec_qsort (p, 100, 0, (int (*)(const void *, const void *))arc_compare); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c b/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c +new file mode 100644 +index 000000000..723142c59 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c +@@ -0,0 +1,54 @@ ++// Add a safe func mechanism. ++// avoid escape_unkown_field, "Type escapes via an unkown field accessed" ++// avoid escape_cast_void, "Type escapes a cast to/from void*" eg: GIMPLE_NOP ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++void ++__attribute__((noinline)) spec_qsort (void *a, size_t es) ++{ ++ char *pa; ++ char *pb; ++ int cmp_result; ++ ++ while ((*(arc_t **)a)->id < *((int *)a)) ++ { ++ if (cmp_result == 0) ++ { ++ spec_qsort (a, es); ++ pa = (char *)a - es; ++ a += es; ++ *(long *)pb = *(long *)pa; ++ } ++ else ++ { ++ a -= pa - pb; ++ } ++ } ++} ++ ++int ++main() ++{ ++ arc_p **arcs_pointer_sorted; ++ spec_qsort (arcs_pointer_sorted0, sizeof (arc_p)); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +index 43913104e..5a476e8f9 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp ++++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +@@ -27,8 +27,21 @@ set STRUCT_REORG_TORTURE_OPTIONS list \ + + set-torture-options $STRUCT_REORG_TORTURE_OPTIONS {{}} + +-gcc-dg-runtest lsort glob -nocomplain $srcdir/$subdir/*.c \ ++# -fipa-struct-reorg ++gcc-dg-runtest lsort glob -nocomplain $srcdir/$subdir/wo_*.c \ ++ "" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program" ++gcc-dg-runtest lsort glob -nocomplain $srcdir/$subdir/w_*.c \ + "" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program" ++gcc-dg-runtest lsort glob -nocomplain $srcdir/$subdir/struct_reorg*.cpp \ ++ "" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program" ++gcc-dg-runtest lsort glob -nocomplain $srcdir/$subdir/sr_*.c \ ++ "" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program" ++gcc-dg-runtest lsort glob -nocomplain $srcdir/$subdir/csr_*.c \ ++ "" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program" ++ ++# -fipa-reorder-fields ++gcc-dg-runtest lsort glob -nocomplain $srcdir/$subdir/rf_*.c \ ++ "" "-fipa-reorder-fields -fdump-ipa-all -flto-partition=one -fwhole-program" + + # All done. + torture-finish +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c +index 6565fe8dd..23444fe8b 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c +@@ -1,5 +1,5 @@ + // { dg-do compile } +-// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all" } ++// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all -fwhole-program" } + + struct a + { +@@ -21,4 +21,10 @@ int g(void) + return b->t; + } + ++int main() ++{ ++ f (); ++ return g (); ++} ++ + /* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c +index 5864ad46f..2d1f95c99 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c +@@ -1,5 +1,5 @@ + // { dg-do compile } +-// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all" } ++// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all -fwhole-program" } + + #include <stdlib.h> + typedef struct { +@@ -10,7 +10,7 @@ typedef struct { + compile_stack_elt_t *stack; + unsigned size; + } compile_stack_type; +-void f (const char *p, const char *pend, int c) ++__attribute__((noinline)) void f (const char *p, const char *pend, int c) + { + compile_stack_type compile_stack; + while (p != pend) +@@ -20,4 +20,9 @@ void f (const char *p, const char *pend, int c) + * sizeof (compile_stack_elt_t)); + } + ++int main() ++{ ++ f (NULL, NULL, 1); ++} ++ + /* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +diff --git a/gcc/timevar.def b/gcc/timevar.def +index 98a5a490f..2b27c858a 100644 +--- a/gcc/timevar.def ++++ b/gcc/timevar.def +@@ -80,6 +80,7 @@ DEFTIMEVAR (TV_IPA_CONSTANT_PROP , "ipa cp") + DEFTIMEVAR (TV_IPA_INLINING , "ipa inlining heuristics") + DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting") + DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats") ++DEFTIMEVAR (TV_IPA_REORDER_FIELDS , "ipa struct reorder fields optimization") + DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization") + DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations") + DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS , "lto stream decompression") +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index 56898e019..a9ec8ed21 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -527,6 +527,7 @@ extern ipa_opt_pass_d *make_pass_ipa_devirt (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_odr (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt); ++extern simple_ipa_opt_pass *make_pass_ipa_reorder_fields (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt); +-- +2.33.0 +
View file
_service:tar_scm:0022-DFE-Add-Dead-Field-Elimination-in-Struct-Reorg.patch
Added
@@ -0,0 +1,1753 @@ +From 9d03b0a7741915e3a0172d60b9c21bf5abbda89e Mon Sep 17 00:00:00 2001 +From: Mingchuan Wu <wumingchuan1992@foxmail.com> +Date: Mon, 28 Aug 2023 18:11:02 +0800 +Subject: PATCH 22/22 DFE Add Dead Field Elimination in Struct-Reorg. + +We can transform gimple to eliminate fields that are never read +and replace the dead fields in stmt by creating a new ssa. +--- + gcc/common.opt | 4 + + gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 240 +++++++++++++++++- + gcc/ipa-struct-reorg/ipa-struct-reorg.h | 8 + + gcc/opts.cc | 17 ++ + gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c | 86 +++++++ + .../gcc.dg/struct/dfe_ele_minus_verify.c | 60 +++++ + .../gcc.dg/struct/dfe_extr_board_init.c | 77 ++++++ + gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c | 84 ++++++ + gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c | 56 ++++ + gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c | 162 ++++++++++++ + gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c | 126 +++++++++ + .../gcc.dg/struct/dfe_extr_mv_udc_core.c | 82 ++++++ + .../gcc.dg/struct/dfe_extr_tcp_usrreq.c | 58 +++++ + .../gcc.dg/struct/dfe_extr_ui_main.c | 61 +++++ + .../gcc.dg/struct/dfe_mem_ref_offset.c | 58 +++++ + .../struct/dfe_mul_layer_ptr_record_bug.c | 30 +++ + gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c | 71 ++++++ + .../gcc.dg/struct/dfe_ptr_negate_expr.c | 55 ++++ + gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c | 55 ++++ + gcc/testsuite/gcc.dg/struct/struct-reorg.exp | 4 + + .../struct/wo_prof_escape_replace_type.c | 49 ++++ + 21 files changed, 1436 insertions(+), 7 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_replace_type.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index 14633c821..8bb735551 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1988,6 +1988,10 @@ fipa-struct-reorg + Common Var(flag_ipa_struct_reorg) Init(0) Optimization + Perform structure layout optimizations. + ++fipa-struct-reorg= ++Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 3) ++-fipa-struct-reorg=0,1,2,3 adding none, struct-reorg, reorder-fields, dfe optimizations. ++ + fipa-vrp + Common Var(flag_ipa_vrp) Optimization + Perform IPA Value Range Propagation. +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +index 3e5f9538b..eac5fac7e 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +@@ -87,6 +87,7 @@ along with GCC; see the file COPYING3. If not see + #include "tree-pretty-print.h" + #include "gimple-pretty-print.h" + #include "gimple-iterator.h" ++#include "gimple-walk.h" + #include "cfg.h" + #include "ssa.h" + #include "tree-dfa.h" +@@ -268,10 +269,43 @@ enum srmode + STRUCT_REORDER_FIELDS + }; + ++/* Enum the struct layout optimize level, ++ which should be the same as the option -fstruct-reorg=. */ ++ ++enum struct_layout_opt_level ++{ ++ NONE = 0, ++ STRUCT_REORG, ++ STRUCT_REORDER_FIELDS_SLO, ++ DEAD_FIELD_ELIMINATION ++}; ++ + static bool is_result_of_mult (tree arg, tree *num, tree struct_size); + static bool isptrptr (tree type); ++void get_base (tree &base, tree expr); + + srmode current_mode; ++hash_map<tree, tree> replace_type_map; ++ ++/* Return true if one of these types is created by struct-reorg. */ ++ ++static bool ++is_replace_type (tree type1, tree type2) ++{ ++ if (replace_type_map.is_empty ()) ++ return false; ++ if (type1 == NULL_TREE || type2 == NULL_TREE) ++ return false; ++ tree *type_value = replace_type_map.get (type1); ++ if (type_value) ++ if (types_compatible_p (*type_value, type2)) ++ return true; ++ type_value = replace_type_map.get (type2); ++ if (type_value) ++ if (types_compatible_p (*type_value, type1)) ++ return true; ++ return false; ++} + + } // anon namespace + +@@ -353,7 +387,8 @@ srfield::srfield (tree field, srtype *base) + fielddecl (field), + base (base), + type (NULL), +- clusternum (0) ++ clusternum (0), ++ field_access (EMPTY_FIELD) + { + for (int i = 0; i < max_split; i++) + newfieldi = NULL_TREE; +@@ -392,6 +427,25 @@ srtype::srtype (tree type) + } + } + ++/* Check it if all fields in the RECORD_TYPE are referenced. */ ++ ++bool ++srtype::has_dead_field (void) ++{ ++ bool may_dfe = false; ++ srfield *this_field; ++ unsigned i; ++ FOR_EACH_VEC_ELT (fields, i, this_field) ++ { ++ if (!(this_field->field_access & READ_FIELD)) ++ { ++ may_dfe = true; ++ break; ++ } ++ } ++ return may_dfe; ++} ++ + /* Mark the type as escaping type E at statement STMT. */ + + void +@@ -595,7 +649,17 @@ srtype::analyze (void) + into 2 different structures. In future we intend to add profile + info and/or static heuristics to differentiate splitting process. */ + if (fields.length () == 2) +- fields1->clusternum = 1; ++ { ++ /* Currently, when the replacement structure type exists, ++ we only split the replacement structure. */ ++ for (hash_map<tree, tree>::iterator it = replace_type_map.begin (); ++ it != replace_type_map.end (); ++it) ++ { ++ if (types_compatible_p ((*it).second, this->type)) ++ return; ++ } ++ fields1->clusternum = 1; ++ } + + /* Otherwise we do nothing. */ + if (fields.length () >= 3) +@@ -838,6 +902,10 @@ srtype::create_new_type (void) + for (unsigned i = 0; i < fields.length (); i++) + { + srfield *f = fieldsi; ++ if (current_mode == STRUCT_REORDER_FIELDS ++ && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION ++ && !(f->field_access & READ_FIELD)) ++ continue; + f->create_new_fields (newtype, newfields, newlast); + } + +@@ -856,6 +924,16 @@ srtype::create_new_type (void) + + warn_padded = save_warn_padded; + ++ if (current_mode == STRUCT_REORDER_FIELDS ++ && replace_type_map.get (this->newtype0) == NULL) ++ replace_type_map.put (this->newtype0, this->type); ++ if (dump_file) ++ { ++ if (current_mode == STRUCT_REORDER_FIELDS ++ && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION ++ && has_dead_field ()) ++ fprintf (dump_file, "Dead field elimination.\n"); ++ } + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Created %d types:\n", maxclusters); +@@ -1269,6 +1347,7 @@ public: + void maybe_mark_or_record_other_side (tree side, tree other, gimple *stmt); + + unsigned execute_struct_relayout (void); ++ bool remove_dead_field_stmt (tree lhs); + }; + + struct ipa_struct_relayout +@@ -3057,6 +3136,119 @@ ipa_struct_reorg::find_vars (gimple *stmt) + } + } + ++static HOST_WIDE_INT ++get_offset (tree op, HOST_WIDE_INT offset) ++{ ++ switch (TREE_CODE (op)) ++ { ++ case COMPONENT_REF: ++ { ++ return int_byte_position (TREE_OPERAND (op, 1)); ++ } ++ case MEM_REF: ++ { ++ return tree_to_uhwi (TREE_OPERAND (op, 1)); ++ } ++ default: ++ return offset; ++ } ++ return offset; ++} ++ ++/* Record field access. */ ++static void ++record_field_access (tree type, HOST_WIDE_INT offset, ++ unsigned access, void *data) ++{ ++ srtype *this_srtype = ((ipa_struct_reorg *)data)->find_type (type); ++ if (this_srtype == NULL) ++ return; ++ srfield *this_srfield = this_srtype->find_field (offset); ++ if (this_srfield == NULL) ++ return; ++ ++ this_srfield->field_access |= access; ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "record field access %d:", access); ++ print_generic_expr (dump_file, type); ++ fprintf (dump_file, " field:"); ++ print_generic_expr (dump_file, this_srfield->fielddecl); ++ fprintf (dump_file, "\n"); ++ } ++ return; ++ ++} ++ ++/* Update field_access in srfield. */ ++ ++static void ++update_field_access (tree node, tree op, unsigned access, void *data) ++{ ++ HOST_WIDE_INT offset = 0; ++ offset = get_offset (op, offset); ++ tree node_type = inner_type (TREE_TYPE (node)); ++ record_field_access (node_type, offset, access, data); ++ tree base = node; ++ get_base (base, node); ++ tree base_type = inner_type (TREE_TYPE (base)); ++ if (!types_compatible_p (base_type, node_type)) ++ { ++ record_field_access (base_type, get_offset (node, offset), ++ access, data); ++ } ++ return; ++} ++ ++/* A callback for walk_stmt_load_store_ops to visit store. */ ++ ++static bool ++find_field_p_store (gimple *stmt ATTRIBUTE_UNUSED, ++ tree node, tree op, void *data) ++{ ++ update_field_access (node, op, WRITE_FIELD, data); ++ ++ return false; ++} ++ ++/* A callback for walk_stmt_load_store_ops to visit load. */ ++ ++static bool ++find_field_p_load (gimple *stmt ATTRIBUTE_UNUSED, ++ tree node, tree op, void *data) ++{ ++ update_field_access (node, op, READ_FIELD, data); ++ ++ return false; ++} ++ ++/* Determine whether the stmt should be deleted. */ ++ ++bool ++ipa_struct_reorg::remove_dead_field_stmt (tree lhs) ++{ ++ tree base = NULL_TREE; ++ bool indirect = false; ++ srtype *t = NULL; ++ srfield *f = NULL; ++ bool realpart = false; ++ bool imagpart = false; ++ bool address = false; ++ bool escape_from_base = false; ++ if (!get_type_field (lhs, base, indirect, t, f, realpart, imagpart, ++ address, escape_from_base)) ++ return false; ++ if (t ==NULL) ++ return false; ++ if (t->newtype0 == t->type) ++ return false; ++ if (f == NULL) ++ return false; ++ if (f->newfield0 == NULL) ++ return true; ++ return false; ++} ++ + /* Maybe record access of statement for further analaysis. */ + + void +@@ -3078,6 +3270,13 @@ ipa_struct_reorg::maybe_record_stmt (cgraph_node *node, gimple *stmt) + default: + break; + } ++ if (current_mode == STRUCT_REORDER_FIELDS ++ && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION) ++ { ++ /* Look for loads and stores. */ ++ walk_stmt_load_store_ops (stmt, this, find_field_p_load, ++ find_field_p_store); ++ } + } + + /* Calculate the multiplier. */ +@@ -3368,8 +3567,11 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, + } + else if (type != d->type) + { +- type->mark_escape (escape_cast_another_ptr, stmt); +- d->type->mark_escape (escape_cast_another_ptr, stmt); ++ if (!is_replace_type (d->type->type, type->type)) ++ { ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ d->type->mark_escape (escape_cast_another_ptr, stmt); ++ } + } + /* x_1 = y.x_nodes; void *x; + Directly mark the structure pointer type assigned +@@ -3949,8 +4151,9 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl, + } + /* If we have a non void* or a decl (which is hard to track), + then mark the type as escaping. */ +- if (!VOID_POINTER_P (TREE_TYPE (newdecl)) +- || DECL_P (newdecl)) ++ if (replace_type_map.get (type->type) == NULL ++ && (!VOID_POINTER_P (TREE_TYPE (newdecl)) ++ || DECL_P (newdecl))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { +@@ -4216,7 +4419,9 @@ ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, + } + + srtype *t1 = find_type (inner_type (t)); +- if (t1 == type) ++ /* In the other side check, escape mark is added ++ when the replacement struct type exists. */ ++ if (t1 == type || is_replace_type (inner_type (t), type->type)) + { + /* In Complete Struct Relayout, if lhs type is the same + as rhs type, we could return without any harm. */ +@@ -5513,6 +5718,27 @@ bool + ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + { + bool remove = false; ++ ++ if (current_mode == STRUCT_REORDER_FIELDS ++ && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION ++ && remove_dead_field_stmt (gimple_assign_lhs (stmt))) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\n rewriting statement (remove): \n"); ++ print_gimple_stmt (dump_file, stmt, 0); ++ } ++ /* Replace the dead field in stmt by creating a dummy ssa. */ ++ tree dummy_ssa = make_ssa_name (TREE_TYPE (gimple_assign_lhs (stmt))); ++ gimple_assign_set_lhs (stmt, dummy_ssa); ++ update_stmt (stmt); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "To: \n"); ++ print_gimple_stmt (dump_file, stmt, 0); ++ } ++ } ++ + if (gimple_clobber_p (stmt)) + { + tree lhs = gimple_assign_lhs (stmt); +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +index 6f85adeb4..719f7b308 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.h ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +@@ -143,6 +143,7 @@ public: + + bool create_new_type (void); + void analyze (void); ++ bool has_dead_field (void); + void mark_escape (escape_type, gimple *stmt); + bool has_escaped (void) + { +@@ -164,6 +165,12 @@ public: + } + }; + ++/* Bitflags used for determining if a field ++ is never accessed, read or written. */ ++const unsigned EMPTY_FIELD = 0x0u; ++const unsigned READ_FIELD = 0x01u; ++const unsigned WRITE_FIELD = 0x02u; ++ + struct srfield + { + unsigned HOST_WIDE_INT offset; +@@ -175,6 +182,7 @@ struct srfield + unsigned clusternum; + + tree newfieldmax_split; ++ unsigned field_access; /* FIELD_DECL -> bitflag (use for dfe). */ + + // Constructors + srfield (tree field, srtype *base); +diff --git a/gcc/opts.cc b/gcc/opts.cc +index c3cc2c169..b868d189e 100644 +--- a/gcc/opts.cc ++++ b/gcc/opts.cc +@@ -2957,6 +2957,23 @@ common_handle_option (struct gcc_options *opts, + SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction, value); + break; + ++ case OPT_fipa_struct_reorg_: ++ /* No break here - do -fipa-struct-reorg processing. */ ++ /* FALLTHRU. */ ++ case OPT_fipa_struct_reorg: ++ opts->x_flag_ipa_struct_reorg = value; ++ if (value && !opts->x_struct_layout_optimize_level) ++ { ++ /* Using the -fipa-struct-reorg option is equivalent to using ++ -fipa-struct-reorg=1. */ ++ opts->x_struct_layout_optimize_level = 1; ++ } ++ break; ++ ++ case OPT_fipa_reorder_fields: ++ SET_OPTION_IF_UNSET (opts, opts_set, flag_ipa_struct_reorg, value); ++ break; ++ + case OPT_fprofile_generate_: + opts->x_profile_data_prefix = xstrdup (arg); + value = true; +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c b/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c +new file mode 100644 +index 000000000..0c9e384c4 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c +@@ -0,0 +1,86 @@ ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ arc_p arcs; ++ arc_p sorted_arcs; ++ int x; ++ node_p nodes; ++ node_p stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++ network_t* net_add; ++}; ++ ++ ++const int MAX = 100; ++ ++/* let it escape_array, "Type is used in an array not handled yet". */ ++network_t* net2; ++arc_p stop_arcs = NULL; ++ ++int ++main () ++{ ++ net0 = (network_t*) calloc (1, sizeof(network_t)); ++ net0->arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ stop_arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ ++ net0->arcs->id = 100; ++ ++ for (unsigned i = 0; i < 3; i++) ++ { ++ net0->arcs->id = net0->arcs->id + 2; ++ stop_arcs->cost = net0->arcs->id / 2; ++ stop_arcs->net_add = net0; ++ printf("stop_arcs->cost = %ld\n", stop_arcs->cost); ++ net0->arcs++; ++ stop_arcs++; ++ } ++ ++ if( net1 != 0 && stop_arcs != 0) ++ { ++ return -1; ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c b/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c +new file mode 100644 +index 000000000..717fcc386 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c +@@ -0,0 +1,60 @@ ++// verify newarccmp-1.flow ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++const int MAX = 100; ++arc_p ap = NULL; ++ ++int ++main () ++{ ++ ap = (arc_p) calloc(MAX, sizeof(arc_t)); ++ printf("%d\n", ap0.id); ++ for (int i = 1; i < MAX; i++) ++ { ++ api-1.id = 500; ++ } ++ printf("%d\n", ap0.id); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c +new file mode 100644 +index 000000000..7723c240b +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c +@@ -0,0 +1,77 @@ ++/* { dg-do compile} */ ++ ++#define NULL ((void*)0) ++typedef unsigned long size_t; ++typedef long intptr_t; ++typedef unsigned long uintptr_t; ++typedef long scalar_t__; ++typedef int bool; ++#define false 0 ++#define true 1 ++ ++typedef struct TYPE_5__ TYPE_2__; ++typedef struct TYPE_4__ TYPE_1__; ++ ++struct TYPE_4__ ++{ ++ int Pin; ++ int Pull; ++ int Mode; ++ int Speed; ++}; ++ ++struct TYPE_5__ ++{ ++ int MEMRMP; ++}; ++typedef TYPE_1__ GPIO_InitTypeDef; ++ ++int BT_RST_PIN; ++int BT_RST_PORT; ++int CONN_POS10_PIN; ++int CONN_POS10_PORT; ++int GPIO_HIGH (int, int); ++int GPIO_MODE_INPUT; ++int GPIO_MODE_OUTPUT_PP; ++int GPIO_NOPULL; ++int GPIO_PULLUP; ++int GPIO_SPEED_FREQ_LOW; ++int HAL_GPIO_Init (int, TYPE_1__ *); ++scalar_t__ IS_GPIO_RESET (int, int); ++TYPE_2__ *SYSCFG; ++int __HAL_RCC_GPIOB_CLK_ENABLE (); ++int __HAL_RCC_GPIOC_CLK_ENABLE (); ++ ++__attribute__((used)) static void ++LBF_DFU_If_Needed (void) ++{ ++ GPIO_InitTypeDef GPIO_InitStruct; ++ __HAL_RCC_GPIOC_CLK_ENABLE (); ++ GPIO_InitStruct.Mode = GPIO_MODE_OUTPUT_PP; ++ GPIO_InitStruct.Pull = GPIO_NOPULL; ++ GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW; ++ GPIO_InitStruct.Pin = BT_RST_PIN; ++ HAL_GPIO_Init (BT_RST_PORT, &GPIO_InitStruct); ++ ++ GPIO_HIGH (BT_RST_PORT, BT_RST_PIN); ++ __HAL_RCC_GPIOB_CLK_ENABLE (); ++ GPIO_InitStruct.Mode = GPIO_MODE_INPUT; ++ GPIO_InitStruct.Pull = GPIO_PULLUP; ++ GPIO_InitStruct.Pin = CONN_POS10_PIN; ++ HAL_GPIO_Init (CONN_POS10_PORT, &GPIO_InitStruct); ++ ++ if (IS_GPIO_RESET (CONN_POS10_PORT, CONN_POS10_PIN)) ++ { ++ SYSCFG->MEMRMP = 0x00000001; ++ asm ( ++ "LDR R0, =0x000000\n\t" ++ "LDR SP, R0, #0\n\t" ++ ); ++ asm ( ++ "LDR R0, R0, #0\n\t" ++ "BX R0\n\t" ++ ); ++ } ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c +new file mode 100644 +index 000000000..a1feac966 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c +@@ -0,0 +1,84 @@ ++/* { dg-do compile} */ ++ ++#define NULL ((void*)0) ++typedef unsigned long size_t; ++typedef long intptr_t; ++typedef unsigned long uintptr_t; ++typedef long scalar_t__; ++typedef int bool; ++#define false 0 ++#define true 1 ++ ++typedef struct TYPE_2__ TYPE_1__; ++ ++struct net_device ++{ ++ struct claw_privbk* ml_priv; ++}; ++struct clawctl ++{ ++ int linkid; ++}; ++struct claw_privbk ++{ ++ int system_validate_comp; ++ TYPE_1__* p_env; ++ int ctl_bk; ++}; ++typedef int __u8; ++struct TYPE_2__ ++{ ++ scalar_t__ packing; ++ int api_type; ++}; ++ ++int CLAW_DBF_TEXT (int, int, char*); ++int CONNECTION_REQUEST; ++int HOST_APPL_NAME; ++scalar_t__ PACKING_ASK; ++scalar_t__ PACK_SEND; ++int WS_APPL_NAME_IP_NAME; ++int WS_APPL_NAME_PACKED; ++int claw_send_control (struct net_device*, int, int, int, int, int, int); ++int setup; ++ ++__attribute__((noinline)) int ++claw_send_control (struct net_device* net, int a, int b, int c, int d, int e, ++ int f) ++{ ++ return net->ml_priv->system_validate_comp + a + b + c + d + f; ++} ++ ++__attribute__((used)) static int ++claw_snd_conn_req (struct net_device *dev, __u8 link) ++{ ++ int rc; ++ struct claw_privbk *privptr = dev->ml_priv; ++ struct clawctl *p_ctl; ++ CLAW_DBF_TEXT (2, setup, "snd_conn"); ++ rc = 1; ++ p_ctl = (struct clawctl *)&privptr->ctl_bk; ++ p_ctl->linkid = link; ++ if (privptr->system_validate_comp == 0x00) ++ { ++ return rc; ++ } ++ if (privptr->p_env->packing == PACKING_ASK) ++ { ++ rc = claw_send_control (dev, CONNECTION_REQUEST, 0, 0, 0, ++ WS_APPL_NAME_PACKED, WS_APPL_NAME_PACKED); ++ } ++ if (privptr->p_env->packing == PACK_SEND) ++ { ++ rc = claw_send_control (dev, CONNECTION_REQUEST, 0, 0, 0, ++ WS_APPL_NAME_IP_NAME, WS_APPL_NAME_IP_NAME); ++ } ++ if (privptr->p_env->packing == 0) ++ { ++ rc = claw_send_control (dev, CONNECTION_REQUEST, 0, 0, 0, ++ HOST_APPL_NAME, privptr->p_env->api_type); ++ } ++ return rc; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c +new file mode 100644 +index 000000000..fd1e936ca +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c +@@ -0,0 +1,56 @@ ++/* { dg-do compile} */ ++ ++#define NULL ((void*)0) ++typedef unsigned long size_t; ++typedef long intptr_t; ++typedef unsigned long uintptr_t; ++typedef long scalar_t__; ++typedef int bool; ++#define false 0 ++#define true 1 ++ ++typedef struct TYPE_4__ TYPE_2__; ++typedef struct TYPE_3__ TYPE_1__; ++ ++typedef int uint8_t; ++typedef int uint16_t; ++ ++struct TYPE_4__ ++{ ++ size_t cpu_id; ++}; ++ ++struct TYPE_3__ ++{ ++ int cpuc_dtrace_flags; ++}; ++ ++TYPE_2__ *CPU; ++volatile int CPU_DTRACE_FAULT; ++TYPE_1__ *cpu_core; ++scalar_t__ dtrace_load8 (uintptr_t); ++ ++__attribute__((used)) static int ++dtrace_bcmp (const void *s1, const void *s2, size_t len) ++{ ++ volatile uint16_t *flags; ++ flags = (volatile uint16_t *)&cpu_coreCPU->cpu_id.cpuc_dtrace_flags; ++ if (s1 == s2) ++ return (0); ++ if (s1 == NULL || s2 == NULL) ++ return (1); ++ if (s1 != s2 && len != 0) ++ { ++ const uint8_t *ps1 = s1; ++ const uint8_t *ps2 = s2; ++ do ++ { ++ if (dtrace_load8 ((uintptr_t)ps1++) != *ps2++) ++ return (1); ++ } ++ while (--len != 0 && !(*flags & CPU_DTRACE_FAULT)); ++ } ++ return (0); ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c +new file mode 100644 +index 000000000..b13d785a9 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c +@@ -0,0 +1,162 @@ ++/* { dg-do compile} */ ++ ++#define NULL ((void*)0) ++typedef unsigned long size_t; ++typedef long intptr_t; ++typedef unsigned long uintptr_t; ++typedef long scalar_t__; ++typedef int bool; ++#define false 0 ++#define true 1 ++ ++struct mrb_context ++{ ++ size_t stack; ++ size_t stbase; ++ size_t stend; ++ size_t eidx; ++ int *ci; ++ int *cibase; ++ int status; ++}; ++ ++struct RObject ++{ ++ int dummy; ++}; ++ ++struct RHash ++{ ++ int dummy; ++}; ++ ++struct RFiber ++{ ++ struct mrb_context *cxt; ++}; ++ ++struct RClass ++{ ++ int dummy; ++}; ++ ++struct RBasic ++{ ++ int tt; ++}; ++ ++struct RArray ++{ ++ int dummy; ++}; ++ ++typedef int mrb_state; ++typedef int mrb_gc; ++typedef int mrb_callinfo; ++size_t ARY_LEN (struct RArray *); ++size_t MRB_ENV_STACK_LEN (struct RBasic *); ++int MRB_FIBER_TERMINATED; ++ ++#define MRB_TT_ARRAY 140 ++#define MRB_TT_CLASS 139 ++#define MRB_TT_DATA 138 ++#define MRB_TT_ENV 137 ++#define MRB_TT_EXCEPTION 136 ++#define MRB_TT_FIBER 135 ++#define MRB_TT_HASH 134 ++#define MRB_TT_ICLASS 133 ++#define MRB_TT_MODULE 132 ++#define MRB_TT_OBJECT 131 ++#define MRB_TT_PROC 130 ++#define MRB_TT_RANGE 129 ++#define MRB_TT_SCLASS 128 ++ ++size_t ci_nregs (int *); ++int gc_mark_children (int *, int *, struct RBasic *); ++size_t mrb_gc_mark_hash_size (int *, struct RHash *); ++size_t mrb_gc_mark_iv_size (int *, struct RObject *); ++size_t mrb_gc_mark_mt_size (int *, struct RClass *); ++ ++__attribute__((used)) static size_t ++gc_gray_mark (mrb_state *mrb, mrb_gc *gc, struct RBasic *obj) ++{ ++ size_t children = 0; ++ gc_mark_children (mrb, gc, obj); ++ switch (obj->tt) ++ { ++ case MRB_TT_ICLASS: ++ children++; ++ break; ++ ++ case MRB_TT_CLASS: ++ case MRB_TT_SCLASS: ++ case MRB_TT_MODULE: ++ { ++ struct RClass *c = (struct RClass *)obj; ++ children += mrb_gc_mark_iv_size (mrb, (struct RObject *)obj); ++ children += mrb_gc_mark_mt_size (mrb, c); ++ children ++; ++ } ++ break; ++ ++ case MRB_TT_OBJECT: ++ case MRB_TT_DATA: ++ case MRB_TT_EXCEPTION: ++ children += mrb_gc_mark_iv_size (mrb, (struct RObject *)obj); ++ break; ++ ++ case MRB_TT_ENV: ++ children += MRB_ENV_STACK_LEN (obj); ++ break; ++ ++ case MRB_TT_FIBER: ++ { ++ struct mrb_context *c = ((struct RFiber *)obj)->cxt; ++ size_t i; ++ mrb_callinfo *ci; ++ if (!c || c->status == MRB_FIBER_TERMINATED) ++ break; ++ ++ i = c->stack - c->stbase; ++ if (c->ci) ++ { ++ i += ci_nregs (c->ci); ++ } ++ if (c->stbase + i > c->stend) ++ i = c->stend - c->stbase; ++ ++ children += i; ++ children += c->eidx; ++ if (c->cibase) ++ { ++ for (i = 0, ci = c->cibase; ci <= c->ci; i++, ci++) ++ ; ++ } ++ children += i; ++ } ++ break; ++ ++ case MRB_TT_ARRAY: ++ { ++ struct RArray *a = (struct RArray *)obj; ++ children += ARY_LEN (a); ++ } ++ break; ++ ++ case MRB_TT_HASH: ++ children += mrb_gc_mark_iv_size (mrb, (struct RObject *)obj); ++ children += mrb_gc_mark_hash_size (mrb, (struct RHash *)obj); ++ break; ++ ++ case MRB_TT_PROC: ++ case MRB_TT_RANGE: ++ children += 2; ++ break; ++ default: ++ break; ++ } ++ ++ return children; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c +new file mode 100644 +index 000000000..bc28a658a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c +@@ -0,0 +1,126 @@ ++/* { dg-do compile} */ ++ ++#define NULL ((void*)0) ++typedef unsigned long size_t; ++typedef long intptr_t; ++typedef unsigned long uintptr_t; ++typedef long scalar_t__; ++typedef int bool; ++#define false 0 ++#define true 1 ++ ++typedef struct TYPE_6__ TYPE_3__; ++typedef struct TYPE_5__ TYPE_2__; ++typedef struct TYPE_4__ TYPE_1__; ++ ++struct io_accel2_cmd ++{ ++ int dummy; ++}; ++ ++struct hpsa_tmf_struct ++{ ++ int it_nexus; ++}; ++ ++struct hpsa_scsi_dev_t ++{ ++ int nphysical_disks; ++ int ioaccel_handle; ++ struct hpsa_scsi_dev_t **phys_disk; ++}; ++ ++struct ctlr_info ++{ ++ TYPE_3__ *pdev; ++ struct io_accel2_cmd *ioaccel2_cmd_pool; ++}; ++struct TYPE_4__ ++{ ++ int LunAddrBytes; ++}; ++ ++struct TYPE_5__ ++{ ++ TYPE_1__ LUN; ++}; ++ ++struct CommandList ++{ ++ size_t cmdindex; ++ int cmd_type; ++ struct hpsa_scsi_dev_t *phys_disk; ++ TYPE_2__ Header; ++}; ++ ++struct TYPE_6__ ++{ ++ int dev; ++}; ++ ++int BUG (); ++#define CMD_IOACCEL1 132 ++#define CMD_IOACCEL2 131 ++#define CMD_IOCTL_PEND 130 ++#define CMD_SCSI 129 ++#define IOACCEL2_TMF 128 ++int dev_err (int *, char *, int); ++scalar_t__ hpsa_is_cmd_idle (struct CommandList *); ++int le32_to_cpu (int); ++int test_memcmp (unsigned char *, int *, int); ++ ++__attribute__((used)) static bool ++hpsa_cmd_dev_match (struct ctlr_info *h, struct CommandList *c, ++ struct hpsa_scsi_dev_t *dev, unsigned char *scsi3addr) ++{ ++ int i; ++ bool match = false; ++ struct io_accel2_cmd * c2 = &h->ioaccel2_cmd_poolc->cmdindex; ++ struct hpsa_tmf_struct *ac = (struct hpsa_tmf_struct *)c2; ++ ++ if (hpsa_is_cmd_idle (c)) ++ return false; ++ ++ switch (c->cmd_type) ++ { ++ case CMD_SCSI: ++ case CMD_IOCTL_PEND: ++ match = !test_memcmp (scsi3addr, &c->Header.LUN.LunAddrBytes, ++ sizeof (c->Header.LUN.LunAddrBytes)); ++ break; ++ ++ case CMD_IOACCEL1: ++ case CMD_IOACCEL2: ++ if (c->phys_disk == dev) ++ { ++ match = true; ++ } ++ else ++ { ++ for (i = 0; i < dev->nphysical_disks && !match; i++) ++ { ++ match = dev->phys_diski == c->phys_disk; ++ } ++ } ++ break; ++ ++ case IOACCEL2_TMF: ++ for (i = 0; i < dev->nphysical_disks && !match; i++) ++ { ++ match = dev->phys_diski->ioaccel_handle == ++ le32_to_cpu (ac->it_nexus); ++ } ++ break; ++ ++ case 0: ++ match = false; ++ break; ++ default: ++ dev_err (&h->pdev->dev, "unexpected cmd_type: %d\n", c->cmd_type); ++ BUG (); ++ } ++ ++ return match; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c +new file mode 100644 +index 000000000..0a585ac3d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c +@@ -0,0 +1,82 @@ ++/* { dg-do compile} */ ++ ++#define NULL ((void*)0) ++typedef unsigned long size_t; ++typedef long intptr_t; ++typedef unsigned long uintptr_t; ++typedef long scalar_t__; ++typedef int bool; ++#define false 0 ++#define true 1 ++ ++typedef struct TYPE_4__ TYPE_2__; ++typedef struct TYPE_3__ TYPE_1__; ++typedef int u32; ++ ++struct mv_udc ++{ ++ TYPE_2__ *op_regs; ++ TYPE_1__ *ep_dqh; ++ struct mv_ep *eps; ++}; ++ ++struct mv_ep ++{ ++ TYPE_1__ *dqh; ++ struct mv_udc *udc; ++}; ++ ++struct TYPE_4__ ++{ ++ int *epctrlx; ++}; ++ ++struct TYPE_3__ ++{ ++ int max_packet_length; ++ int next_dtd_ptr; ++}; ++ ++int EP0_MAX_PKT_SIZE; ++int EPCTRL_RX_ENABLE; ++int EPCTRL_RX_EP_TYPE_SHIFT; ++int EPCTRL_TX_ENABLE; ++int EPCTRL_TX_EP_TYPE_SHIFT; ++int EP_QUEUE_HEAD_IOS; ++int EP_QUEUE_HEAD_MAX_PKT_LEN_POS; ++int EP_QUEUE_HEAD_NEXT_TERMINATE; ++int USB_ENDPOINT_XFER_CONTROL; ++int readl (int *); ++int writel (int, int *); ++ ++__attribute__((used)) static void ++ep0_reset (struct mv_udc *udc) ++{ ++ struct mv_ep *ep; ++ u32 epctrlx; ++ int i = 0; ++ for (i = 0; i < 2; i++) ++ { ++ ep = &udc->epsi; ++ ep->udc = udc; ++ ep->dqh = &udc->ep_dqhi; ++ ep->dqh->max_packet_length = ++ (EP0_MAX_PKT_SIZE << EP_QUEUE_HEAD_MAX_PKT_LEN_POS) ++ | EP_QUEUE_HEAD_IOS; ++ ep->dqh->next_dtd_ptr = EP_QUEUE_HEAD_NEXT_TERMINATE; ++ epctrlx = readl (&udc->op_regs->epctrlx0); ++ if (i) ++ { ++ epctrlx |= EPCTRL_TX_ENABLE ++ | (USB_ENDPOINT_XFER_CONTROL << EPCTRL_TX_EP_TYPE_SHIFT); ++ } ++ else ++ { ++ epctrlx |= EPCTRL_RX_ENABLE ++ | (USB_ENDPOINT_XFER_CONTROL << EPCTRL_RX_EP_TYPE_SHIFT); ++ } ++ writel (epctrlx, &udc->op_regs->epctrlx0); ++ } ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c +new file mode 100644 +index 000000000..bddd862fe +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c +@@ -0,0 +1,58 @@ ++/* { dg-do compile} */ ++ ++#define NULL ((void*)0) ++typedef unsigned long size_t; ++typedef long intptr_t; ++typedef unsigned long uintptr_t; ++typedef long scalar_t__; ++typedef int bool; ++#define false 0 ++#define true 1 ++ ++struct tcpcb ++{ ++ int t_state; ++}; ++ ++struct socket ++{ ++ int dummy; ++}; ++ ++struct proc ++{ ++ int dummy; ++}; ++ ++struct inpcb ++{ ++ scalar_t__ inp_lport; ++}; ++ ++int COMMON_END (int); ++int COMMON_START (); ++int PRU_LISTEN; ++int TCPS_LISTEN; ++int in_pcbbind (struct inpcb *, int *, struct proc *); ++struct inpcb* sotoinpcb (struct socket *); ++ ++__attribute__((used)) static void ++tcp_usr_listen (struct socket *so, struct proc *p) ++{ ++ int error = 0; ++ struct inpcb *inp = sotoinpcb (so); ++ struct tcpcb *tp; ++ ++ COMMON_START (); ++ if (inp->inp_lport == 0) ++ { ++ error = in_pcbbind (inp, NULL, p); ++ } ++ if (error == 0) ++ { ++ tp->t_state = TCPS_LISTEN; ++ } ++ COMMON_END (PRU_LISTEN); ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c +new file mode 100644 +index 000000000..1a06f5eec +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c +@@ -0,0 +1,61 @@ ++/* { dg-do compile} */ ++ ++#define NULL ((void*)0) ++typedef unsigned long size_t; ++typedef long intptr_t; ++typedef unsigned long uintptr_t; ++typedef long scalar_t__; ++typedef int bool; ++#define false 0 ++#define true 1 ++ ++typedef struct TYPE_4__ TYPE_2__; ++typedef struct TYPE_3__ TYPE_1__; ++ ++struct TYPE_4__ ++{ ++ size_t modCount; ++ TYPE_1__ *modList; ++}; ++ ++struct TYPE_3__ ++{ ++ void *modDescr; ++ void *modName; ++}; ++ ++size_t MAX_MODS; ++void *String_Alloc (char *); ++int test_strlen (char *); ++int trap_FD_GetFileList (char *, char *, char *, int); ++TYPE_2__ uiInfo; ++ ++__attribute__((used)) static void ++UI_LoadMods () ++{ ++ int numdirs; ++ char dirlist2048; ++ char *dirptr; ++ char *descptr; ++ int i; ++ int dirlen; ++ ++ uiInfo.modCount = 0; ++ numdirs = trap_FD_GetFileList ("$modelist", "", dirlist, sizeof (dirlist)); ++ dirptr = dirlist; ++ for (i = 0; i < numdirs; i++) ++ { ++ dirlen = test_strlen (dirptr) + 1; ++ descptr = dirptr + dirlen; ++ uiInfo.modListuiInfo.modCount.modName = String_Alloc (dirptr); ++ uiInfo.modListuiInfo.modCount.modDescr = String_Alloc (descptr); ++ dirptr += dirlen + test_strlen (descptr) + 1; ++ uiInfo.modCount++; ++ if (uiInfo.modCount >= MAX_MODS) ++ { ++ break; ++ } ++ } ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c b/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c +new file mode 100644 +index 000000000..94eb88d5c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c +@@ -0,0 +1,58 @@ ++/* Supports the MEM_REF offset. ++ _1 = MEM(struct arc *)ap_4 + 72B.flow; ++ Old rewrite:_1 = ap.reorder.0_8->flow; ++ New rewrite:_1 = MEM(struct arc.reorder.0 *)ap.reorder.0_8 + 64B.flow. */ ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++int ++main () ++{ ++ const int MAX = 100; ++ /* A similar scenario can be reproduced only by using local variables. */ ++ arc_p ap = NULL; ++ ap = (arc_p) calloc(MAX, sizeof(arc_t)); ++ printf("%d\n", ap1.flow); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c b/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c +new file mode 100644 +index 000000000..bbf9420d0 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c +@@ -0,0 +1,30 @@ ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct T_HASH_ENTRY ++{ ++ unsigned int hash; ++ unsigned int klen; ++ char *key; ++} iHashEntry; ++ ++typedef struct T_HASH ++{ ++ unsigned int size; ++ unsigned int fill; ++ unsigned int keys; ++ ++ iHashEntry **array; ++} uHash; ++ ++uHash *retval; ++ ++int ++main() { ++ retval->array = (iHashEntry **)calloc(sizeof(iHashEntry *), retval->size); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c +new file mode 100644 +index 000000000..f706db968 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c +@@ -0,0 +1,71 @@ ++// support POINTER_DIFF_EXPR & NOP_EXPR to avoid ++// escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt" ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ arc_p arcs; ++ arc_p sorted_arcs; ++ int x; ++ node_p nodes; ++ node_p stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++int ++main () ++{ ++ arc_t *old_arcs; ++ node_t *node; ++ node_t *stop; ++ size_t off; ++ network_t* net; ++ ++ for( ; node->number < stop->number; node++ ) ++ { ++ off = node->basic_arc - old_arcs; ++ node->basic_arc = (arc_t *)(net->arcs + off); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 3 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c +new file mode 100644 +index 000000000..963295cb4 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c +@@ -0,0 +1,55 @@ ++// support NEGATE_EXPR rewriting ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++int ++main () ++{ ++ int64_t susp = 0; ++ const int MAX = 100; ++ arc_p ap = (arc_p) calloc(MAX, sizeof(arc_t)); ++ ap -= susp; ++ printf("%d\n", ap1.flow); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c +new file mode 100644 +index 000000000..aa10506a1 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c +@@ -0,0 +1,55 @@ ++// release escape_ptr_ptr, "Type is used in a pointer to a pointer not handled yet"; ++/* { dg-do compile } */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++const int MAX = 100; ++arc_t **ap = NULL; ++ ++int ++main () ++{ ++ ap = (arc_t**) malloc(MAX * sizeof(arc_t*)); ++ (*ap)0.id = 300; ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +index 5a476e8f9..6ccb753b5 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp ++++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +@@ -43,6 +43,10 @@ gcc-dg-runtest lsort glob -nocomplain $srcdir/$subdir/csr_*.c \ + gcc-dg-runtest lsort glob -nocomplain $srcdir/$subdir/rf_*.c \ + "" "-fipa-reorder-fields -fdump-ipa-all -flto-partition=one -fwhole-program" + ++# -fipa-struct-reorg=3 ++gcc-dg-runtest lsort glob -nocomplain $srcdir/$subdir/dfe*.c \ ++ "" "-fipa-reorder-fields -fipa-struct-reorg=3 -fdump-ipa-all -flto-partition=one -fwhole-program" ++ + # All done. + torture-finish + dg-finish +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_replace_type.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_replace_type.c +new file mode 100644 +index 000000000..fa8c66b9e +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_replace_type.c +@@ -0,0 +1,49 @@ ++/* { dg-do compile } */ ++ ++#include <stdlib.h> ++ ++struct AngleDef ++{ ++ double K; ++ double th0; ++}; ++typedef struct AngleDef angldef; ++ ++struct bndangdihe ++{ ++ int nbond; ++ int nangl; ++ int ndihe; ++}; ++typedef struct bndangdihe bah; ++ ++struct ambprmtop ++{ ++ double *AnglK; ++ double *AnglEq; ++ bah nBAH; ++ angldef *AParam; ++ char source512; ++ char eprulesource512; ++}; ++typedef struct ambprmtop prmtop; ++ ++static void OrderBondParameters (prmtop *tp) ++{ ++ int i; ++ tp->AParam = (angldef *)malloc (tp->nBAH.nangl * sizeof (angldef)); ++ for (i = 0; i < tp->nBAH.nangl; i++) ++ { ++ tp->AParami.K = tp->AnglKi; ++ tp->AParami.th0 = tp->AnglEqi; ++ } ++} ++ ++void main () ++{ ++ prmtop *tp = (prmtop *)malloc (100 * sizeof (prmtop)); ++ OrderBondParameters (tp); ++} ++ ++/*---------------------------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "No structures to transform" "struct_reorg" } } */ +-- +2.33.0 +
View file
_service
Changed
@@ -1,8 +1,8 @@ <services> <service name="tar_scm"> <param name="scm">git</param> - <param name="url">git@gitee.com:laokz/gcc.git</param> - <param name="revision">35dcf91c76f317d04a4e37c351fc9c2e574dbef1</param> + <param name="url">git@gitee.com:src-openeuler/gcc.git</param> + <param name="revision">master</param> <param name="exclude">*</param> <param name="extract">*</param> </service>
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2