开源软件构建与测试

We truncated the diff of some files because they were too big. If you want to see the full diff for every file, click here.

Changes of Revision 4

_service:tar_scm:gcc.spec Changed

@@ -2,7 +2,7 @@
 %global gcc_major 12
 # Note, gcc_release must be integer, if you want to add suffixes to
 # %%{release}, append them after %%{gcc_release} on Release: line.
-%global gcc_release 19
+%global gcc_release 22
 
 %global _unpackaged_files_terminate_build 0
 %global _performance_build 1
@@ -166,6 +166,33 @@
 Patch26: 0026-GOMP-Enabling-moutline-atomics-improves-libgomp-perf.patch
 Patch27: 0027-LoopElim-Redundant-loop-elimination-optimization.patch
 Patch28: 0028-Array-widen-compare-Fix-the-return-value-match-after.patch
+Patch29: 0029-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch
+Patch30: 0030-Struct-Reorg-Add-unsafe-structure-pointer-compressio.patch
+Patch31: 0031-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch
+Patch32: 0032-AutoBOLT-Add-bolt-linker-plugin-2-3.patch
+Patch33: 0033-AutoBOLT-Enable-BOLT-linker-plugin-on-aarch64-3-3.patch
+Patch34: 0034-Autofdo-Enable-discrimibator-and-MCF-algorithm-on-Au.patch
+Patch35: 0035-Add-insn-defs-and-correct-costs-for-cmlt-generation.patch
+Patch36: 0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch           
+Patch37: 0037-Perform-early-if-conversion-of-simple-arithmetic.patch      
+Patch38: 0038-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch  
+Patch39: 0039-Match-double-sized-mul-pattern.patch                        
+Patch40: 0040-Port-icp-patch-to-GCC-12.patch                              
+Patch41: 0041-Port-fixes-in-icp-to-GCC-12.patch
+Patch42: 0042-Add-split-complex-instructions-pass.patch                   
+Patch43: 0043-Extending-and-refactoring-of-pass_split_complex_inst.patch
+Patch44: 0044-Port-maxmin-patch-to-GCC-12.patch
+Patch45: 0045-Port-moving-minmask-pattern-to-gimple-to-GCC-12.patch
+Patch46: 0046-Add-new-pattern-to-pass-the-maxmin-tests.patch
+Patch47: 0047-AES-Implement-AES-pattern-matching.patch
+Patch48: 0048-crypto-accel-add-optimization-level-requirement-to-t.patch
+Patch49: 0049-Add-more-flexible-check-for-pointer-aliasing-during-.patch
+Patch50: 0050-Port-IPA-prefetch-to-GCC-12.patch
+Patch51: 0051-Port-fixes-for-IPA-prefetch-to-GCC-12.patch
+Patch52: 0052-Fix-fails-in-IPA-prefetch-src-openEuler-gcc-I96ID7.patch
+Patch53: 0053-struct-reorg-Add-Semi-Relayout.patch
+Patch54: 0054-Struct-Reorg-Bugfix-for-structure-pointer-compressio.patch
+Patch55: 0055-Struct-Reorg-Port-bugfixes-to-GCC-12.3.1.patch
 
 # Part 3000 ~ 4999
 %ifarch loongarch64
@@ -789,6 +816,33 @@
 %patch26 -p1
 %patch27 -p1
 %patch28 -p1
+%patch29 -p1
+%patch30 -p1
+%patch31 -p1
+%patch32 -p1
+%patch33 -p1
+%patch34 -p1
+%patch35 -p1
+%patch36 -p1
+%patch37 -p1
+%patch38 -p1
+%patch39 -p1
+%patch40 -p1
+%patch41 -p1
+%patch42 -p1
+%patch43 -p1
+%patch44 -p1
+%patch45 -p1
+%patch46 -p1
+%patch47 -p1
+%patch48 -p1
+%patch49 -p1
+%patch50 -p1
+%patch51 -p1
+%patch52 -p1
+%patch53 -p1
+%patch54 -p1
+%patch55 -p1
 
 %ifarch loongarch64
 %patch3001 -p1
@@ -3174,6 +3228,18 @@
 %doc rpm.doc/changelogs/libcc1/ChangeLog*
 
 %changelog
+* Fri Apr 12 2024 Zhengchen Hui <zhengchenhui1@huawei.com> - 12.3.1-22
+- Type: Sync
+- DESC: Sync patch from openeuler/gcc
+
+* Thu Apr 11 2024 Zhengchen Hui <zhengchenhui1@huawei.com> - 12.3.1-21
+- Type: Sync
+- DESC: Sync patch from openeuler/gcc
+
+* Thu Apr 11 2024 Zhenyu Zhao <zhaozhenyu17@huawei.com> - 12.3.1-20
+- Type: Sync
+- DESC: Sync patch from openeuler/gcc
+
 * Mon Apr 1 2024 Peng Fan <fanpeng@loongson.cn> 12.3.1-19
 - Type: SPEC
 - DESC: fix libcc1 file path for LoongArch.

_service:tar_scm:0029-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch Added

@@ -0,0 +1,1191 @@
+From 7930d75c9fd3f36cc2dce934569f00c71248bb31 Mon Sep 17 00:00:00 2001
+From: liyancheng <412998149@qq.com>
+Date: Sat, 25 Nov 2023 10:28:48 +0800
+Subject: PATCH Struct Reorg Add Safe Structure Pointer Compression
+
+Safe structure pointer compression allows safely transfer pointers
+stored in structure into the index of structure array with smaller
+type to reduce the size of structure.
+Add flag -fipa-struct-reorg=4 to enable safe structure pointer
+compression.
+Add param compressed-pointer-size=8,16,32 to control the compressed
+pointer size.
+---
+ gcc/common.opt                           |   5 +-
+ gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 908 ++++++++++++++++++++++-
+ gcc/ipa-struct-reorg/ipa-struct-reorg.h  |   4 +
+ gcc/params.opt                           |   4 +
+ 4 files changed, 882 insertions(+), 39 deletions(-)
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index b01df919e..f6e20c1e8 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1993,8 +1993,9 @@ Common Var(flag_ipa_struct_reorg) Init(0) Optimization
+ Perform structure layout optimizations.
+ 
+ fipa-struct-reorg=
+-Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 3)
+--fipa-struct-reorg=0,1,2,3 adding none, struct-reorg, reorder-fields, dfe optimizations.
++Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 4)
++-fipa-struct-reorg=0,1,2,3,4 adding none, struct-reorg, reorder-fields,
++dfe, safe-pointer-compression optimizations.
+ 
+ fipa-vrp
+ Common Var(flag_ipa_vrp) Optimization
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+index dcc6df496..5d451c4c8 100644
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+@@ -89,6 +89,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "gimple-iterator.h"
+ #include "gimple-walk.h"
+ #include "cfg.h"
++#include "cfghooks.h" /* For split_block.  */
+ #include "ssa.h"
+ #include "tree-dfa.h"
+ #include "fold-const.h"
+@@ -147,7 +148,27 @@ using namespace struct_relayout;
+ #define VOID_POINTER_P(type) \
+   (POINTER_TYPE_P (type) && VOID_TYPE_P (TREE_TYPE (type)))
+ 
+-/* Return true iff TYPE is stdarg va_list type.  */
++static void
++set_var_attributes (tree var)
++{
++  if (!var)
++    return;
++  gcc_assert (TREE_CODE (var) == VAR_DECL);
++
++  DECL_ARTIFICIAL (var) = 1;
++  DECL_EXTERNAL (var) = 0;
++  TREE_STATIC (var) = 1;
++  TREE_PUBLIC (var) = 0;
++  TREE_USED (var) = 1;
++  DECL_CONTEXT (var) = NULL;
++  TREE_THIS_VOLATILE (var) = 0;
++  TREE_ADDRESSABLE (var) = 0;
++  TREE_READONLY (var) = 0;
++  if (is_global_var (var))
++    set_decl_tls_model (var, TLS_MODEL_NONE);
++}
++
++/* Return true if TYPE is stdarg va_list type.  */
+ 
+ static inline bool
+ is_va_list_type (tree type)
+@@ -271,9 +292,15 @@ enum struct_layout_opt_level
+   STRUCT_SPLIT = 1 << 0,
+   COMPLETE_STRUCT_RELAYOUT = 1 << 1,
+   STRUCT_REORDER_FIELDS = 1 << 2,
+-  DEAD_FIELD_ELIMINATION = 1 << 3
++  DEAD_FIELD_ELIMINATION = 1 << 3,
++  POINTER_COMPRESSION_SAFE = 1 << 4
+ };
+ 
++/* Defines the target pointer size of compressed pointer, which should be 8,
++   16, 32.  */
++
++static int compressed_size = 32;
++
+ static bool is_result_of_mult (tree arg, tree *num, tree struct_size);
+ static bool isptrptr (tree type);
+ void get_base (tree &base, tree expr);
+@@ -394,7 +421,10 @@ srtype::srtype (tree type)
+   : type (type),
+     chain_type (false),
+     escapes (does_not_escape),
++    pc_gptr (NULL_TREE),
+     visited (false),
++    pc_candidate (false),
++    has_legal_alloc_num (false),
+     has_alloc_array (0)
+ {
+   for (int i = 0; i < max_split; i++)
+@@ -476,6 +506,31 @@ srtype::mark_escape (escape_type e, gimple *stmt)
+     }
+ }
+ 
++/* Create a global header for compressed struct.  */
++
++void
++srtype::create_global_ptr_for_pc ()
++{
++  if (!pc_candidate || pc_gptr != NULL_TREE)
++    return;
++
++  const char *type_name = get_type_name (type);
++  gcc_assert (type_name != NULL);
++
++  char *gptr_name = concat (type_name, "_pc", NULL);
++  tree new_name = get_identifier (gptr_name);
++  tree new_type = build_pointer_type (newtype0);
++  tree new_var = build_decl (UNKNOWN_LOCATION, VAR_DECL, new_name, new_type);
++  set_var_attributes (new_var);
++  pc_gptr = new_var;
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    fprintf (dump_file, "\nType: %s has create global header for pointer"
++	       " compression: %s\n", type_name, gptr_name);
++
++  free (gptr_name);
++}
++
+ /* Add FIELD to the list of fields that use this type.  */
+ 
+ void
+@@ -798,15 +853,31 @@ srfield::create_new_reorder_fields (tree newtypemax_split,
+       fields.safe_push (field);
+     }
+ 
+-  DECL_NAME (field) = DECL_NAME (fielddecl);
+   if (type == NULL)
+-    /* Common members do not need to reconstruct.
++    {
++      DECL_NAME (field) = DECL_NAME (fielddecl);
++      /* Common members do not need to reconstruct.
+        Otherwise, int* -> int** or void* -> void**.  */
+-    TREE_TYPE (field) = nt;
++      TREE_TYPE (field) = nt;
++      SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl));
++    }
++  else if (type->pc_candidate)
++    {
++      const char *old_name = IDENTIFIER_POINTER (DECL_NAME (fielddecl));
++      char *new_name = concat (old_name, "_pc", NULL);
++      DECL_NAME (field) = get_identifier (new_name);
++      free (new_name);
++      TREE_TYPE (field) = make_unsigned_type (compressed_size);
++      SET_DECL_ALIGN (field, compressed_size);
++    }
+   else
+-    TREE_TYPE (field) = reconstruct_complex_type (TREE_TYPE (fielddecl), nt);
++    {
++      TREE_TYPE (field) = reconstruct_complex_type (TREE_TYPE (fielddecl), nt);
++      DECL_NAME (field) = DECL_NAME (fielddecl);
++      SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl));
++    }
++
+   DECL_SOURCE_LOCATION (field) = DECL_SOURCE_LOCATION (fielddecl);
+-  SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl));
+   DECL_USER_ALIGN (field) = DECL_USER_ALIGN (fielddecl);
+   TREE_ADDRESSABLE (field) = TREE_ADDRESSABLE (fielddecl);
+   DECL_NONADDRESSABLE_P (field) = !TREE_ADDRESSABLE (fielddecl);
+@@ -925,6 +996,10 @@ srtype::create_new_type (void)
+ 	  && has_dead_field ())
+ 	fprintf (dump_file, "Dead field elimination.\n");
+     }
++
++  if (pc_candidate && pc_gptr == NULL_TREE)
++    create_global_ptr_for_pc ();
++
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     {
+       fprintf (dump_file, "Created %d types:\n", maxclusters);
+@@ -1338,6 +1413,30 @@ public:
+ 
+   unsigned execute_struct_relayout (void);
+   bool remove_dead_field_stmt (tree lhs);
++
++  // Pointer compression methods:
++  void check_and_prune_struct_for_pointer_compression (void);
++  void try_rewrite_with_pointer_compression (gassign *, gimple_stmt_iterator *,
++					     tree, tree, tree &, tree &);
++  bool safe_void_cmp_p (tree, srtype *);
++  bool pc_candidate_st_type_p (tree);
++  bool pc_candidate_tree_p (tree);
++  bool pc_type_conversion_candidate_p (tree);
++  bool pc_direct_rewrite_chance_p (tree, tree &);
++  bool compress_candidate_with_check (gimple_stmt_iterator *, tree, tree &);

_service:tar_scm:0030-Struct-Reorg-Add-unsafe-structure-pointer-compressio.patch Added

@@ -0,0 +1,1232 @@
+From 82d6166cd29fb1c3474f29b28cb7e5478d3a551a Mon Sep 17 00:00:00 2001
+From: liyancheng <412998149@qq.com>
+Date: Mon, 25 Dec 2023 11:17:04 +0800
+Subject: PATCH Struct Reorg Add unsafe structure pointer compression
+
+Unsafe structure pointer compression allows for some dangerous
+conversions for better performance.
+Add flag -fipa-struct-reorg=5 to enable unsafe structure pointer
+compression.
+---
+ gcc/common.opt                                |   6 +-
+ gcc/ipa-struct-reorg/ipa-struct-reorg.cc      | 365 ++++++++++++++----
+ gcc/symbol-summary.h                          |  22 +-
+ .../gcc.dg/struct/csr_skip_void_struct_name.c |  53 +++
+ gcc/testsuite/gcc.dg/struct/pc_cast_int.c     |  91 +++++
+ .../gcc.dg/struct/pc_compress_and_decomress.c |  90 +++++
+ gcc/testsuite/gcc.dg/struct/pc_ptr2void.c     |  87 +++++
+ .../gcc.dg/struct/pc_simple_rewrite_pc.c      | 112 ++++++
+ .../gcc.dg/struct/pc_skip_void_struct_name.c  |  53 +++
+ gcc/testsuite/gcc.dg/struct/struct-reorg.exp  |   8 +
+ 10 files changed, 804 insertions(+), 83 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/struct/csr_skip_void_struct_name.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/pc_cast_int.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/pc_compress_and_decomress.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/pc_ptr2void.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/pc_simple_rewrite_pc.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/pc_skip_void_struct_name.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 56b547506..c7c6bc256 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1993,9 +1993,9 @@ Common Var(flag_ipa_struct_reorg) Init(0) Optimization
+ Perform structure layout optimizations.
+ 
+ fipa-struct-reorg=
+-Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 4)
+--fipa-struct-reorg=0,1,2,3,4 adding none, struct-reorg, reorder-fields,
+-dfe, safe-pointer-compression optimizations.
++Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 5)
++-fipa-struct-reorg=0,1,2,3,4,5 adding none, struct-reorg, reorder-fields,
++dfe, safe-pointer-compression, unsafe-pointer-compression optimizations.
+ 
+ fipa-vrp
+ Common Var(flag_ipa_vrp) Optimization
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+index 5d451c4c8..fa33f2d35 100644
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+@@ -293,7 +293,8 @@ enum struct_layout_opt_level
+   COMPLETE_STRUCT_RELAYOUT = 1 << 1,
+   STRUCT_REORDER_FIELDS = 1 << 2,
+   DEAD_FIELD_ELIMINATION = 1 << 3,
+-  POINTER_COMPRESSION_SAFE = 1 << 4
++  POINTER_COMPRESSION_SAFE = 1 << 4,
++  POINTER_COMPRESSION_UNSAFE = 1 << 5
+ };
+ 
+ /* Defines the target pointer size of compressed pointer, which should be 8,
+@@ -1267,10 +1268,10 @@ csrtype::init_type_info (void)
+ 
+   /* Close enough to pad to improve performance.
+      33~63 should pad to 64 but 33~48 (first half) are too far away, and
+-     65~127 should pad to 128 but 65~96 (first half) are too far away.  */
++     70~127 should pad to 128 but 65~70 (first half) are too far away.  */
+   if (old_size > 48 && old_size < 64)
+     new_size = 64;
+-  if (old_size > 96 && old_size < 128)
++  if (old_size > 70 && old_size < 128)
+     new_size = 128;
+ 
+   /* For performance reasons, only allow structure size
+@@ -1423,8 +1424,12 @@ public:
+   bool pc_candidate_tree_p (tree);
+   bool pc_type_conversion_candidate_p (tree);
+   bool pc_direct_rewrite_chance_p (tree, tree &);
++  bool pc_simplify_chance_for_compress_p (gassign *, tree);
++  bool compress_candidate_without_check (gimple_stmt_iterator *, tree, tree &);
+   bool compress_candidate_with_check (gimple_stmt_iterator *, tree, tree &);
+   bool compress_candidate (gassign *, gimple_stmt_iterator *, tree, tree &);
++  bool decompress_candidate_without_check (gimple_stmt_iterator *,
++					   tree, tree, tree &, tree &);
+   bool decompress_candidate_with_check (gimple_stmt_iterator *, tree, tree &);
+   bool decompress_candidate (gimple_stmt_iterator *, tree, tree, tree &,
+ 			     tree &);
+@@ -1924,7 +1929,6 @@ bool
+ ipa_struct_relayout::maybe_rewrite_cst (tree cst, gimple_stmt_iterator *gsi,
+ 					HOST_WIDE_INT &times)
+ {
+-  bool ret = false;
+   gcc_assert (TREE_CODE (cst) == INTEGER_CST);
+ 
+   gimple *stmt = gsi_stmt (*gsi);
+@@ -1948,27 +1952,95 @@ ipa_struct_relayout::maybe_rewrite_cst (tree cst, gimple_stmt_iterator *gsi,
+     {
+       if (gsi_one_before_end_p (*gsi))
+ 	return false;
+-      gsi_next (gsi);
+-      gimple *stmt2 = gsi_stmt (*gsi);
+-
+-      if (gimple_code (stmt2) == GIMPLE_ASSIGN
+-	  && gimple_assign_rhs_code (stmt2) == POINTER_PLUS_EXPR)
++      // Check uses.
++      imm_use_iterator imm_iter_lhs;
++      use_operand_p use_p_lhs;
++      FOR_EACH_IMM_USE_FAST (use_p_lhs, imm_iter_lhs, gimple_assign_lhs (stmt))
+ 	{
+-	  tree lhs = gimple_assign_lhs (stmt2);
+-	  tree rhs1 = gimple_assign_rhs1 (stmt2);
+-	  if (types_compatible_p (inner_type (TREE_TYPE (rhs1)), ctype.type)
+-	      || types_compatible_p (inner_type (TREE_TYPE (lhs)), ctype.type))
++	  gimple *stmt2 = USE_STMT (use_p_lhs);
++	  if (gimple_code (stmt2) != GIMPLE_ASSIGN)
++	    continue;
++	  if (gimple_assign_rhs_code (stmt2) == POINTER_PLUS_EXPR)
+ 	    {
+-	      tree num = NULL;
+-	      if (is_result_of_mult (cst, &num, TYPE_SIZE_UNIT (ctype.type)))
++	      tree lhs = gimple_assign_lhs (stmt2);
++	      tree rhs1 = gimple_assign_rhs1 (stmt2);
++	      if (types_compatible_p (inner_type (TREE_TYPE (rhs1)), ctype.type)
++		  || types_compatible_p (inner_type (TREE_TYPE (lhs)),
++					 ctype.type))
+ 		{
+-		  times = TREE_INT_CST_LOW (num);
+-		  ret = true;
++		  tree num = NULL;
++		  if (is_result_of_mult (cst, &num,
++					 TYPE_SIZE_UNIT (ctype.type)))
++		    {
++		      times = TREE_INT_CST_LOW (num);
++		      return true;
++		    }
++		}
++	    }
++	  // For pointer compression, handle plus stmt.
++	  else if (gimple_assign_rhs_code (stmt2) == PLUS_EXPR)
++	    {
++	      // Check uses.
++	      imm_use_iterator imm_iter_cast;
++	      use_operand_p use_p_cast;
++	      FOR_EACH_IMM_USE_FAST (use_p_cast, imm_iter_cast,
++				     gimple_assign_lhs (stmt2))
++		{
++		  gimple *stmt_cast = USE_STMT (use_p_cast);
++		  if (gimple_code (stmt_cast) != GIMPLE_ASSIGN)
++		    continue;
++		  if (gimple_assign_cast_p (stmt_cast))
++		    {
++		      tree lhs_type = inner_type (TREE_TYPE (
++					gimple_assign_lhs (stmt_cast)));
++		      if (types_compatible_p (lhs_type, ctype.type))
++			{
++			  tree num = NULL;
++			  if (is_result_of_mult (cst, &num,
++						 TYPE_SIZE_UNIT (ctype.type)))
++			    {
++			      times = TREE_INT_CST_LOW (num);
++			      return true;
++			    }
++			}
++		    }
+ 		}
+ 	    }
+ 	}
+-      gsi_prev (gsi);
+-      return ret;
++    }
++  // For pointer compression, handle div stmt.
++  if (gimple_assign_rhs_code (stmt) == TRUNC_DIV_EXPR)
++    {
++      imm_use_iterator imm_iter;
++      use_operand_p use_p;
++      tree lhs = gimple_assign_lhs (stmt);
++      if (lhs == NULL_TREE)
++	return false;
++      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
++	{
++	  gimple *use_stmt = USE_STMT (use_p);
++	  if (is_gimple_debug (use_stmt))
++	    continue;
++	  if (gimple_code (use_stmt) != GIMPLE_ASSIGN)
++	    continue;
++	  if (gimple_assign_cast_p (use_stmt))
++	    {
++	      tree lhs_type = inner_type (TREE_TYPE (
++				gimple_assign_lhs (use_stmt)));
++	      if (TYPE_UNSIGNED (lhs_type)
++		  && TREE_CODE (lhs_type) == INTEGER_TYPE
++		  && TYPE_PRECISION (lhs_type) == compressed_size)
++		{
++		  tree num = NULL;
++		  if (is_result_of_mult (cst, &num,
++					 TYPE_SIZE_UNIT (ctype.type)))
++		    {
++		      times = TREE_INT_CST_LOW (num);
++		      return true;
++		    }
++		}

_service:tar_scm:0031-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch Added

@@ -0,0 +1,550 @@
+From 72531376df5ed93c2d945469368ba5514eca8407 Mon Sep 17 00:00:00 2001
+From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
+Date: Tue, 5 Dec 2023 15:33:08 +0800
+Subject: PATCH AutoBOLT Support saving feedback count info to ELF segment
+ 1/3
+
+---
+ gcc/common.opt |   8 +
+ gcc/final.cc   | 405 ++++++++++++++++++++++++++++++++++++++++++++++++-
+ gcc/opts.cc    |  61 ++++++++
+ 3 files changed, 473 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index b01df919e..e69947fc2 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -2546,6 +2546,14 @@ freorder-functions
+ Common Var(flag_reorder_functions) Optimization
+ Reorder functions to improve code placement.
+ 
++fauto-bolt
++Common Var(flag_auto_bolt)
++Generate profile from AutoFDO or PGO and do BOLT optimization after linkage.
++
++fauto-bolt=
++Common Joined RejectNegative
++Specify the feedback data directory required by BOLT-plugin.  The default is the current directory.
++
+ frerun-cse-after-loop
+ Common Var(flag_rerun_cse_after_loop) Optimization
+ Add a common subexpression elimination pass after loop optimizations.
+diff --git a/gcc/final.cc b/gcc/final.cc
+index a9868861b..d4c4fa08f 100644
+--- a/gcc/final.cc
++++ b/gcc/final.cc
+@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "rtl-iter.h"
+ #include "print-rtl.h"
+ #include "function-abi.h"
++#include "insn-codes.h"
+ #include "common/common-target.h"
+ 
+ #ifdef XCOFF_DEBUGGING_INFO
+@@ -4266,7 +4267,403 @@ leaf_renumber_regs_insn (rtx in_rtx)
+       }
+ }
+ #endif
+-&#xc;
++
++#define ASM_FDO_SECTION_PREFIX ".text.fdo."
++
++#define ASM_FDO_CALLER_FLAG ".fdo.caller "
++#define ASM_FDO_CALLER_SIZE_FLAG ".fdo.caller.size "
++#define ASM_FDO_CALLER_BIND_FLAG ".fdo.caller.bind"
++
++#define ASM_FDO_CALLEE_FLAG ".fdo.callee"
++
++/* Return the relative offset address of the start instruction of BB,
++   return -1 if it is empty instruction.    */
++
++static int 
++get_bb_start_addr (basic_block bb)
++{
++  rtx_insn *insn;
++  FOR_BB_INSNS (bb, insn)
++    {
++      if (!INSN_P (insn))
++	{
++	  continue;
++	}
++      /* The jump target of call is not in this function, so
++	 it should be excluded.    */
++      if (CALL_P (insn))
++        {
++	  return -1;
++	}
++
++      int insn_code = recog_memoized (insn);
++
++      /* The instruction NOP in llvm-bolt belongs to the previous
++	 BB, so it needs to be skipped.   */
++      if (insn_code != CODE_FOR_nop)
++        {
++	  return INSN_ADDRESSES (INSN_UID (insn));
++	}
++    }
++  return -1;
++}
++
++/* Return the relative offet address of the end instruction of BB,
++   return -1 if it is empty or call instruction.    */
++
++static int
++get_bb_end_addr (basic_block bb)
++{
++  rtx_insn *insn;
++  int num_succs = EDGE_COUNT (bb->succs);
++  FOR_BB_INSNS_REVERSE (bb, insn)
++    {
++      if (!INSN_P (insn))
++        {
++	  continue;
++	}
++      /* The jump target of call is not in this function, so
++	 it should be excluded.     */
++      if (CALL_P (insn))
++        {
++	  return -1;
++	}
++      if ((num_succs == 1)
++	   || ((num_succs == 2) && any_condjump_p (insn)))
++	{
++	  return INSN_ADDRESSES (INSN_UID (insn));
++	}
++      else
++        {
++	  return -1;
++	}
++    }
++  return -1;
++}
++
++/* Return the end address of cfun.    */
++
++static int 
++get_function_end_addr ()
++{
++  rtx_insn *insn = get_last_insn ();
++  for (; insn != get_insns (); insn = PREV_INSN (insn))
++    {
++      if (!INSN_P (insn))
++        {
++	  continue;
++	}
++      return INSN_ADDRESSES (INSN_UID (insn));
++    }
++	  
++  return -1;
++} 
++
++/* Return the function profile status string.    */
++
++static const char * 
++get_function_profile_status () 
++{
++  const char *profile_status = {
++    "PROFILE_ABSENT",
++    "PROFILE_GUESSED",
++    "PROFILE_READ",
++    "PROFILE_LAST"     /* Last value, used by profile streaming.    */
++  };
++
++  return profile_statusprofile_status_for_fn (cfun);
++}
++
++/* Return the count from the feedback data, such as PGO or ADDO.    */
++
++inline static gcov_type 
++get_fdo_count (profile_count count)
++{
++  return count.quality () >= GUESSED 
++         ? count.to_gcov_type () : 0;
++}
++
++/* Return the profile quality string.    */
++
++static const char *
++get_fdo_count_quality (profile_count count)
++{
++  const char *profile_quality = {
++    "UNINITIALIZED_PROFILE",
++    "GUESSED_LOCAL",
++    "GUESSED_GLOBAL0",
++    "GUESSED_GLOBAL0_ADJUSTED",
++    "GUESSED",
++    "AFDO",
++    "ADJUSTED",
++    "PRECISE"
++  };
++
++  return profile_qualitycount.quality ();
++}
++
++static const char *
++alias_local_functions (const char *fnname)
++{
++  if (TREE_PUBLIC (cfun->decl))
++    {
++      return fnname;
++    }
++  return concat (fnname, "/", lbasename (dump_base_name), NULL);
++}
++
++/* Return function bind type string.    */
++
++static const char * 
++simple_get_function_bind ()
++{
++  const char *function_bind = {

_service:tar_scm:0032-AutoBOLT-Add-bolt-linker-plugin-2-3.patch Added

@@ -0,0 +1,34094 @@
+From 82f9f48406955a6150def998b69b4eace4bd51eb Mon Sep 17 00:00:00 2001
+From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
+Date: Thu, 7 Dec 2023 11:43:08 +0800
+Subject: PATCH AutoBOLT Add bolt linker plugin 2/3
+
+---
+ bolt-plugin/Makefile       |   675 ++
+ bolt-plugin/Makefile.am    |    43 +
+ bolt-plugin/Makefile.in    |   675 ++
+ bolt-plugin/aclocal.m4     | 10250 +++++++++++++++++
+ bolt-plugin/bolt-plugin.cc |  1153 ++
+ bolt-plugin/config.h.in    |   179 +
+ bolt-plugin/configure      | 20909 +++++++++++++++++++++++++++++++++++
+ bolt-plugin/configure.ac   |    60 +
+ gcc/common.opt             |    16 +
+ gcc/opts.cc                |    27 +-
+ 10 files changed, 33985 insertions(+), 2 deletions(-)
+ create mode 100644 bolt-plugin/Makefile
+ create mode 100644 bolt-plugin/Makefile.am
+ create mode 100644 bolt-plugin/Makefile.in
+ create mode 100644 bolt-plugin/aclocal.m4
+ create mode 100644 bolt-plugin/bolt-plugin.cc
+ create mode 100644 bolt-plugin/config.h.in
+ create mode 100755 bolt-plugin/configure
+ create mode 100644 bolt-plugin/configure.ac
+
+diff --git a/bolt-plugin/Makefile b/bolt-plugin/Makefile
+new file mode 100644
+index 000000000..82a4bc2c6
+--- /dev/null
++++ b/bolt-plugin/Makefile
+@@ -0,0 +1,675 @@
++# Makefile.in generated by automake 1.16.5 from Makefile.am.
++# Makefile.  Generated from Makefile.in by configure.
++
++# Copyright (C) 1994-2021 Free Software Foundation, Inc.
++
++# This Makefile.in is free software; the Free Software Foundation
++# gives unlimited permission to copy and/or distribute it,
++# with or without modifications, as long as this notice is preserved.
++
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
++# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
++# PARTICULAR PURPOSE.
++
++
++
++
++am__is_gnu_make = { \
++  if test -z '$(MAKELEVEL)'; then \
++    false; \
++  elif test -n '$(MAKE_HOST)'; then \
++    true; \
++  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
++    true; \
++  else \
++    false; \
++  fi; \
++}
++am__make_running_with_option = \
++  case $${target_option-} in \
++      ?) ;; \
++      *) echo "am__make_running_with_option: internal error: invalid" \
++              "target option '$${target_option-}' specified" >&2; \
++         exit 1;; \
++  esac; \
++  has_opt=no; \
++  sane_makeflags=$$MAKEFLAGS; \
++  if $(am__is_gnu_make); then \
++    sane_makeflags=$$MFLAGS; \
++  else \
++    case $$MAKEFLAGS in \
++      *\\\ \	*) \
++        bs=\\; \
++        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
++          | sed "s/$$bs$$bs$$bs $$bs	*//g"`;; \
++    esac; \
++  fi; \
++  skip_next=no; \
++  strip_trailopt () \
++  { \
++    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
++  }; \
++  for flg in $$sane_makeflags; do \
++    test $$skip_next = yes && { skip_next=no; continue; }; \
++    case $$flg in \
++      *=*|--*) continue;; \
++        -*I) strip_trailopt 'I'; skip_next=yes;; \
++      -*I?*) strip_trailopt 'I';; \
++        -*O) strip_trailopt 'O'; skip_next=yes;; \
++      -*O?*) strip_trailopt 'O';; \
++        -*l) strip_trailopt 'l'; skip_next=yes;; \
++      -*l?*) strip_trailopt 'l';; \
++      -dEDm) skip_next=yes;; \
++      -JT) skip_next=yes;; \
++    esac; \
++    case $$flg in \
++      *$$target_option*) has_opt=yes; break;; \
++    esac; \
++  done; \
++  test $$has_opt = yes
++am__make_dryrun = (target_option=n; $(am__make_running_with_option))
++am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
++pkgdatadir = $(datadir)/bolt-plugin
++pkgincludedir = $(includedir)/bolt-plugin
++pkglibdir = $(libdir)/bolt-plugin
++pkglibexecdir = $(libexecdir)/bolt-plugin
++am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
++install_sh_DATA = $(install_sh) -c -m 644
++install_sh_PROGRAM = $(install_sh) -c
++install_sh_SCRIPT = $(install_sh) -c
++INSTALL_HEADER = $(INSTALL_DATA)
++transform = $(program_transform_name)
++NORMAL_INSTALL = :
++PRE_INSTALL = :
++POST_INSTALL = :
++NORMAL_UNINSTALL = :
++PRE_UNINSTALL = :
++POST_UNINSTALL = :
++build_triplet = aarch64-unknown-linux-gnu
++host_triplet = aarch64-unknown-linux-gnu
++target_triplet = aarch64-unknown-linux-gnu
++subdir = .
++ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
++am__aclocal_m4_deps = $(top_srcdir)/configure.ac
++am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
++	$(ACLOCAL_M4)
++DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \
++	$(am__configure_deps)
++am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
++ configure.lineno config.status.lineno
++mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs
++CONFIG_HEADER = config.h
++CONFIG_CLEAN_FILES =
++CONFIG_CLEAN_VPATH_FILES =
++am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
++am__vpath_adj = case $$p in \
++    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
++    *) f=$$p;; \
++  esac;
++am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
++am__install_max = 40
++am__nobase_strip_setup = \
++  srcdirstrip=`echo "$(srcdir)" | sed 's/.^$$\\*|/\\\\&/g'`
++am__nobase_strip = \
++  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
++am__nobase_list = $(am__nobase_strip_setup); \
++  for p in $$list; do echo "$$p $$p"; done | \
++  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,$ .*$/^/*$$,\1,' | \
++  $(AWK) 'BEGIN { files"." = "" } { files$$2 = files$$2 " " $$1; \
++    if (++n$$2 == $(am__install_max)) \
++      { print $$2, files$$2; n$$2 = 0; files$$2 = "" } } \
++    END { for (dir in files) print dir, filesdir }'
++am__base_list = \
++  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
++  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
++am__uninstall_files_from_dir = { \
++  test -z "$$files" \
++    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
++    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
++         $(am__cd) "$$dir" && rm -f $$files; }; \
++  }
++am__installdirs = "$(DESTDIR)$(libexecsubdir)"
++LTLIBRARIES = $(libexecsub_LTLIBRARIES)
++am_libbolt_plugin_la_OBJECTS = bolt-plugin.lo
++libbolt_plugin_la_OBJECTS = $(am_libbolt_plugin_la_OBJECTS)
++AM_V_P = $(am__v_P_$(V))
++am__v_P_ = $(am__v_P_$(AM_DEFAULT_VERBOSITY))
++am__v_P_0 = false
++am__v_P_1 = :
++AM_V_GEN = $(am__v_GEN_$(V))
++am__v_GEN_ = $(am__v_GEN_$(AM_DEFAULT_VERBOSITY))
++am__v_GEN_0 = @echo "  GEN     " $@;
++am__v_GEN_1 = 
++AM_V_at = $(am__v_at_$(V))
++am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY))
++am__v_at_0 = @
++am__v_at_1 = 
++DEFAULT_INCLUDES = -I.
++depcomp =
++am__maybe_remake_depfiles =
++CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
++	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
++AM_V_lt = $(am__v_lt_$(V))
++am__v_lt_ = $(am__v_lt_$(AM_DEFAULT_VERBOSITY))
++am__v_lt_0 = --silent
++am__v_lt_1 = 
++LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
++	$(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \
++	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
++	$(AM_CXXFLAGS) $(CXXFLAGS)
++AM_V_CXX = $(am__v_CXX_$(V))
++am__v_CXX_ = $(am__v_CXX_$(AM_DEFAULT_VERBOSITY))
++am__v_CXX_0 = @echo "  CXX     " $@;
++am__v_CXX_1 = 
++CXXLD = $(CXX)
++CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
++	$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \

_service:tar_scm:0033-AutoBOLT-Enable-BOLT-linker-plugin-on-aarch64-3-3.patch Added

@@ -0,0 +1,345 @@
+From 94242286383a80e6ab83d824a4d7ea23ea311f75 Mon Sep 17 00:00:00 2001
+From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
+Date: Mon, 22 Jan 2024 15:38:24 +0800
+Subject: PATCH AutoBOLT Enable BOLT linker plugin on aarch64 3/3
+
+---
+ Makefile.def     | 10 ++++++++++
+ configure        | 27 ++++++++++++++++++++++++++-
+ configure.ac     | 22 +++++++++++++++++++++-
+ gcc/config.host  |  1 +
+ gcc/config.in    | 13 +++++++++++++
+ gcc/configure    | 10 ++++++++--
+ gcc/configure.ac |  4 ++++
+ gcc/gcc.cc       | 23 +++++++++++++++++++++++
+ 8 files changed, 106 insertions(+), 4 deletions(-)
+
+diff --git a/Makefile.def b/Makefile.def
+index 72d585496..0ba868890 100644
+--- a/Makefile.def
++++ b/Makefile.def
+@@ -145,6 +145,9 @@ host_modules= { module= gnattools; };
+ host_modules= { module= lto-plugin; bootstrap=true;
+ 		extra_configure_flags='--enable-shared @extra_linker_plugin_flags@ @extra_linker_plugin_configure_flags@';
+ 		extra_make_flags='@extra_linker_plugin_flags@'; };
++host_modules= { module= bolt-plugin; bootstrap=true;
++		extra_configure_flags='--enable-shared @extra_linker_plugin_flags@ @extra_linker_plugin_configure_flags@';
++		extra_make_flags='@extra_linker_plugin_flags@'; };
+ host_modules= { module= libcc1; extra_configure_flags=--enable-shared; };
+ host_modules= { module= gotools; };
+ host_modules= { module= libctf; bootstrap=true; };
+@@ -349,6 +352,7 @@ dependencies = { module=configure-gcc; on=all-mpfr; };
+ dependencies = { module=configure-gcc; on=all-mpc; };
+ dependencies = { module=configure-gcc; on=all-isl; };
+ dependencies = { module=configure-gcc; on=all-lto-plugin; };
++dependencies = { module=configure-gcc; on=all-bolt-plugin; };
+ dependencies = { module=configure-gcc; on=all-binutils; };
+ dependencies = { module=configure-gcc; on=all-gas; };
+ dependencies = { module=configure-gcc; on=all-ld; };
+@@ -374,6 +378,7 @@ dependencies = { module=all-gcc; on=all-libdecnumber; hard=true; };
+ dependencies = { module=all-gcc; on=all-libiberty; };
+ dependencies = { module=all-gcc; on=all-fixincludes; };
+ dependencies = { module=all-gcc; on=all-lto-plugin; };
++dependencies = { module=all-gcc; on=all-bolt-plugin; };
+ dependencies = { module=all-gcc; on=all-libiconv; };
+ dependencies = { module=info-gcc; on=all-build-libiberty; };
+ dependencies = { module=dvi-gcc; on=all-build-libiberty; };
+@@ -381,8 +386,10 @@ dependencies = { module=pdf-gcc; on=all-build-libiberty; };
+ dependencies = { module=html-gcc; on=all-build-libiberty; };
+ dependencies = { module=install-gcc ; on=install-fixincludes; };
+ dependencies = { module=install-gcc ; on=install-lto-plugin; };
++dependencies = { module=install-gcc ; on=install-bolt-plugin; };
+ dependencies = { module=install-strip-gcc ; on=install-strip-fixincludes; };
+ dependencies = { module=install-strip-gcc ; on=install-strip-lto-plugin; };
++dependencies = { module=install-strip-gcc ; on=install-strip-bolt-plugin; };
+ 
+ dependencies = { module=configure-libcpp; on=configure-libiberty; hard=true; };
+ dependencies = { module=configure-libcpp; on=configure-intl; };
+@@ -401,6 +408,9 @@ dependencies = { module=all-gnattools; on=all-target-libstdc++-v3; };
+ dependencies = { module=all-lto-plugin; on=all-libiberty; };
+ dependencies = { module=all-lto-plugin; on=all-libiberty-linker-plugin; };
+ 
++dependencies = { module=all-bolt-plugin; on=all-libiberty; };
++dependencies = { module=all-bolt-plugin; on=all-libiberty-linker-plugin; };
++
+ dependencies = { module=configure-libcc1; on=configure-gcc; };
+ dependencies = { module=all-libcc1; on=all-gcc; };
+ 
+diff --git a/configure b/configure
+index 5dcaab14a..aff62c464 100755
+--- a/configure
++++ b/configure
+@@ -826,6 +826,7 @@ with_isl
+ with_isl_include
+ with_isl_lib
+ enable_isl_version_check
++enable_bolt
+ enable_lto
+ enable_linker_plugin_configure_flags
+ enable_linker_plugin_flags
+@@ -1550,6 +1551,7 @@ Optional Features:
+                           enable the PGO build
+   --disable-isl-version-check
+                           disable check for isl version
++  --enable-bolt           enable bolt optimization support
+   --enable-lto            enable link time optimization support
+   --enable-linker-plugin-configure-flags=FLAGS
+                           additional flags for configuring linker plugins
+@@ -8564,6 +8566,15 @@ fi
+ 
+ 
+ 
++# Check for BOLT support.
++# Check whether --enable-bolt was given.
++if test "${enable_bolt+set}" = set; then :
++  enableval=$enable_bolt; enable_bolt=$enableval
++else
++  enable_bolt=no; default_enable_bolt=no
++fi
++
++
+ # Check for LTO support.
+ # Check whether --enable-lto was given.
+ if test "${enable_lto+set}" = set; then :
+@@ -8593,6 +8604,16 @@ if test $target_elf = yes; then :
+   # ELF platforms build the lto-plugin always.
+   build_lto_plugin=yes
+ 
++  # ELF platforms can build the bolt-plugin.
++  # NOT BUILD BOLT BY DEFAULT.
++  case $target in
++    aarch64*-*-linux*)
++    if test $enable_bolt = yes; then :
++      build_bolt_plugin=yes
++    fi
++    ;;
++  esac
++
+ else
+   if test x"$default_enable_lto" = x"yes" ; then
+     case $target in
+@@ -8780,6 +8801,10 @@ if test -d ${srcdir}/gcc; then
+     fi
+   fi
+ 
++  if test "${build_bolt_plugin}" = "yes" ; then
++      configdirs="$configdirs bolt-plugin"
++  fi
++
+   # If we're building an offloading compiler, add the LTO front end.
+   if test x"$enable_as_accelerator_for" != x ; then
+     case ,${enable_languages}, in
+@@ -9202,7 +9227,7 @@ fi
+ extra_host_libiberty_configure_flags=
+ extra_host_zlib_configure_flags=
+ case " $configdirs " in
+-  *" lto-plugin "* | *" libcc1 "*)
++  *" lto-plugin "* | *" libcc1 "* | *" bolt-plugin "*)
+     # When these are to be built as shared libraries, the same applies to
+     # libiberty.
+     extra_host_libiberty_configure_flags=--enable-shared
+diff --git a/configure.ac b/configure.ac
+index 85977482a..f310d75ca 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -1863,6 +1863,12 @@ fi
+ AC_SUBST(isllibs)
+ AC_SUBST(islinc)
+ 
++# Check for BOLT support.
++AC_ARG_ENABLE(bolt,
++AS_HELP_STRING(--enable-bolt, enable bolt optimization support),
++enable_bolt=$enableval,
++enable_bolt=no; default_enable_bolt=no)
++
+ # Check for LTO support.
+ AC_ARG_ENABLE(lto,
+ AS_HELP_STRING(--enable-lto, enable link time optimization support),
+@@ -1871,6 +1877,16 @@ enable_lto=yes; default_enable_lto=yes)
+ 
+ ACX_ELF_TARGET_IFELSE(# ELF platforms build the lto-plugin always.
+   build_lto_plugin=yes
++
++  # ELF platforms can build the bolt-plugin.
++  # NOT BUILD BOLT BY DEFAULT.
++  case $target in
++    aarch64*-*-linux*)
++    if test $enable_bolt = yes; then :
++      build_bolt_plugin=yes
++    fi
++    ;;
++  esac
+ ,if test x"$default_enable_lto" = x"yes" ; then
+     case $target in
+       *-apple-darwin912* | *-cygwin* | *-mingw* | *djgpp*) ;;
+@@ -2049,6 +2065,10 @@ if test -d ${srcdir}/gcc; then
+     fi
+   fi
+ 
++  if test "${build_bolt_plugin}" = "yes" ; then
++      configdirs="$configdirs bolt-plugin"
++  fi
++
+   # If we're building an offloading compiler, add the LTO front end.
+   if test x"$enable_as_accelerator_for" != x ; then
+     case ,${enable_languages}, in
+@@ -2457,7 +2477,7 @@ fi
+ extra_host_libiberty_configure_flags=
+ extra_host_zlib_configure_flags=
+ case " $configdirs " in
+-  *" lto-plugin "* | *" libcc1 "*)
++  *" lto-plugin "* | *" libcc1 "* | *" bolt-plugin "*)    
+     # When these are to be built as shared libraries, the same applies to
+     # libiberty.
+     extra_host_libiberty_configure_flags=--enable-shared
+diff --git a/gcc/config.host b/gcc/config.host
+index 4ca300f11..bf7dcb4cc 100644
+--- a/gcc/config.host
++++ b/gcc/config.host
+@@ -75,6 +75,7 @@ out_host_hook_obj=host-default.o

_service:tar_scm:0034-Autofdo-Enable-discrimibator-and-MCF-algorithm-on-Au.patch Added

@@ -0,0 +1,312 @@
+From b020447c840c6e22440a9b9063298a06333fd2f1 Mon Sep 17 00:00:00 2001
+From: zhenyu--zhao <zhaozhenyu17@huawei.com>
+Date: Sat, 23 Mar 2024 22:56:09 +0800
+Subject: PATCH AutofdoEnable discrimibator and MCF algorithm on Autofdo
+
+---
+ gcc/auto-profile.cc | 171 +++++++++++++++++++++++++++++++++++++++++++-
+ gcc/cfghooks.cc     |   7 ++
+ gcc/opts.cc         |   5 +-
+ gcc/tree-inline.cc  |  14 ++++
+ 4 files changed, 193 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc
+index 2b34b80b8..f45f0ec66 100644
+--- a/gcc/auto-profile.cc
++++ b/gcc/auto-profile.cc
+@@ -466,6 +466,17 @@ string_table::get_index (const char *name) const
+   if (name == NULL)
+     return -1;
+   string_index_map::const_iterator iter = map_.find (name);
++  /* Function name may be duplicate.  Try to distinguish by the
++     #file_name#function_name defined by the autofdo tool chain.  */
++  if (iter == map_.end ())
++    {
++      char* file_name = get_original_name (lbasename (dump_base_name));
++      char* file_func_name
++	= concat ("#", file_name, "#", name, NULL);
++      iter = map_.find (file_func_name);
++      free (file_name);
++      free (file_func_name);
++    }
+   if (iter == map_.end ())
+     return -1;
+ 
+@@ -654,7 +665,7 @@ function_instance::read_function_instance (function_instance_stack *stack,
+ 
+   for (unsigned i = 0; i < num_pos_counts; i++)
+     {
+-      unsigned offset = gcov_read_unsigned () & 0xffff0000;
++      unsigned offset = gcov_read_unsigned ();
+       unsigned num_targets = gcov_read_unsigned ();
+       gcov_type count = gcov_read_counter ();
+       s->pos_countsoffset.count = count;
+@@ -733,6 +744,10 @@ autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
+   function_instance *s = get_function_instance_by_inline_stack (stack);
+   if (s == NULL)
+     return false;
++  if (s->get_count_info (stack0.second + stmt->bb->discriminator, info))
++    {
++      return true;
++    }
+   return s->get_count_info (stack0.second, info);
+ }
+ 
+@@ -1395,6 +1410,66 @@ afdo_propagate (bb_set *annotated_bb)
+     }
+ }
+ 
++/* Process the following scene when the branch probability
++   inversion when do function afdo_propagate (). E.g.
++   BB_NUM (sample count)
++      BB1 (1000)
++       /    \
++    BB2 (10) BB3 (0)
++      \       /
++	BB4
++   In afdo_propagate ().count of BB3 is calculated by
++   COUNT (BB3) = 990 (990 = COUNT (BB1) - COUNT (BB2) = 1000 - 10)
++   In fact, BB3 may be colder than BB2 by sample count.
++   This function allocate source BB count to wach succ BB by sample
++   rate, E.g.
++   BB2_COUNT = BB1_COUNT * (BB2_COUNT / (BB2_COUNT + BB3_COUNT))  */
++
++static void
++afdo_preprocess_bb_count ()
++{
++  basic_block bb;
++  FOR_ALL_BB_FN (bb, cfun)
++    {
++      if (bb->count.ipa_p () && EDGE_COUNT (bb->succs) > 1
++	  && bb->count > profile_count::zero ().afdo ())
++	{
++	  basic_block bb1 = EDGE_SUCC (bb, 0)->dest;
++	  basic_block bb2 = EDGE_SUCC (bb, 1)->dest;
++	  if (single_succ_edge (bb1) && single_succ_edge (bb2)
++	      && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
++	    {
++	      gcov_type max_count = 0;
++	      gcov_type total_count = 0;
++	      edge e;
++	      edge_iterator ei;
++	      FOR_EACH_EDGE (e, ei, bb->succs)
++		{
++		  if (!e->dest->count.ipa_p ())
++		    {
++		      continue;
++		    }
++		  max_count = MAX (max_count, e->dest->count.to_gcov_type ());
++		  total_count += e->dest->count.to_gcov_type ();
++		}
++	      /* Only bb_count > max_count * 2, branch probability will
++		 inversion.  */
++	      if (max_count > 0 && bb->count.to_gcov_type () > max_count * 2)
++		{
++		  FOR_EACH_EDGE (e, ei, bb->succs)
++		    {
++		      gcov_type target_count = bb->count.to_gcov_type ()
++			* e->dest->count.to_gcov_type ()/ total_count;
++		      e->dest->count
++			= profile_count::from_gcov_type
++			  (target_count).afdo ();
++		    }
++		}
++	    }
++	}
++    }
++}
++
+ /* Propagate counts on control flow graph and calculate branch
+    probabilities.  */
+ 
+@@ -1420,6 +1495,7 @@ afdo_calculate_branch_prob (bb_set *annotated_bb)
+     }
+ 
+   afdo_find_equiv_class (annotated_bb);
++  afdo_preprocess_bb_count ();
+   afdo_propagate (annotated_bb);
+ 
+   FOR_EACH_BB_FN (bb, cfun)
+@@ -1523,6 +1599,83 @@ afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
+   return false;
+ }
+ 
++/* Preparation before executing MCF algorithm.  */
++
++static void
++afdo_init_mcf ()
++{
++  basic_block bb;
++  edge e;
++  edge_iterator ei;
++
++  if (dump_file)
++    {
++      fprintf (dump_file, "\n init calling mcf_smooth_cfg (). \n");
++    }
++
++  /* Step1: when use mcf, BB id must be continous,
++     so we need compact_blocks ().  */
++  compact_blocks ();
++
++  /* Step2: allocate memory for MCF input data.  */
++  bb_gcov_counts.safe_grow_cleared (cfun->cfg->x_last_basic_block);
++  edge_gcov_counts = new hash_map<edge, gcov_type>;
++
++  /* Step3: init MCF input data from cfg.  */
++  FOR_ALL_BB_FN (bb, cfun)
++    {
++      /* Init BB count for MCF.  */
++      bb_gcov_count (bb) = bb->count.to_gcov_type ();
++
++      gcov_type total_count = 0;
++      FOR_EACH_EDGE (e, ei, bb->succs)
++	{
++	  total_count += e->dest->count.to_gcov_type ();
++	}
++
++      /* If there is no sample in each successor blocks, source
++	 BB samples are allocated to each edge by branch static prob.  */
++
++      FOR_EACH_EDGE (e, ei, bb->succs)
++	{
++	  if (total_count == 0)
++	    {
++	      edge_gcov_count (e) = e->src->count.to_gcov_type ()
++		* e->probability.to_reg_br_prob_base () / REG_BR_PROB_BASE;
++	    }
++	  else
++	    {
++	      edge_gcov_count (e) = e->src->count.to_gcov_type ()
++		* e->dest->count.to_gcov_type () / total_count;
++	    }
++	}
++    }
++}
++
++
++/* Free the resources used by MCF and reset BB count from MCF result.
++   branch probability has been updated in mcf_smooth_cfg ().  */
++
++static void
++afdo_process_after_mcf ()
++{
++  basic_block bb;
++  /* Reset BB count from MCF result.  */
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      if (bb_gcov_count (bb))
++	{

_service:tar_scm:0035-Add-insn-defs-and-correct-costs-for-cmlt-generation.patch Added

@@ -0,0 +1,194 @@
+From aa39a66f6029fe16a656d7c6339908b953fb1e04 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
+Date: Thu, 22 Feb 2024 11:27:43 +0300
+Subject: PATCH 01/18 Add insn defs and correct costs for cmlt generation
+
+---
+ gcc/config/aarch64/aarch64-simd.md  | 48 +++++++++++++++++++++++++++++
+ gcc/config/aarch64/aarch64.cc       | 15 +++++++++
+ gcc/config/aarch64/aarch64.opt      |  4 +++
+ gcc/config/aarch64/iterators.md     |  3 +-
+ gcc/config/aarch64/predicates.md    | 25 +++++++++++++++
+ gcc/testsuite/gcc.dg/combine-cmlt.c | 20 ++++++++++++
+ 6 files changed, 114 insertions(+), 1 deletion(-)
+ create mode 100755 gcc/testsuite/gcc.dg/combine-cmlt.c
+
+diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
+index ee7f0b89c..82f73805f 100644
+--- a/gcc/config/aarch64/aarch64-simd.md
++++ b/gcc/config/aarch64/aarch64-simd.md
+@@ -6454,6 +6454,54 @@
+   (set_attr "type" "neon_compare<q>, neon_compare_zero<q>")
+ )
+ 
++;; Use cmlt to replace vector arithmetic operations like this (SImode example):
++;; B = (((A >> 15) & 0x00010001) << 16) - ((A >> 15) & 0x00010001)
++;; TODO: maybe extend to scalar operations or other cm** instructions.
++
++(define_insn "*aarch64_cmlt_as_arith<mode>"
++  (set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
++	(minus:<V_INT_EQUIV>
++	  (ashift:<V_INT_EQUIV>
++	    (and:<V_INT_EQUIV>
++	      (lshiftrt:<V_INT_EQUIV>
++		(match_operand:VDQHSD 1 "register_operand" "w")
++		(match_operand:VDQHSD 2 "half_size_minus_one_operand"))
++	      (match_operand:VDQHSD 3 "cmlt_arith_mask_operand"))
++	    (match_operand:VDQHSD 4 "half_size_operand"))
++	  (and:<V_INT_EQUIV>
++	    (lshiftrt:<V_INT_EQUIV>
++	      (match_dup 1)
++	      (match_dup 2))
++	    (match_dup 3))))
++  "TARGET_SIMD && flag_cmlt_arith"
++  "cmlt\t%<v>0.<V2ntype>, %<v>1.<V2ntype>, #0"
++  (set_attr "type" "neon_compare_zero")
++)
++
++;; The helper definition that allows combiner to use the previous pattern.
++
++(define_insn_and_split "*arch64_cmlt_tmp<mode>"
++  (set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
++	(and:<V_INT_EQUIV>
++	  (lshiftrt:<V_INT_EQUIV>
++	    (match_operand:VDQHSD 1 "register_operand" "w")
++	    (match_operand:VDQHSD 2 "half_size_minus_one_operand"))
++	  (match_operand:VDQHSD 3 "cmlt_arith_mask_operand")))
++  "TARGET_SIMD && flag_cmlt_arith"
++  "#"
++  "&& reload_completed"
++  (set (match_operand:<V_INT_EQUIV> 0 "register_operand")
++	(lshiftrt:<V_INT_EQUIV>
++	  (match_operand:VDQHSD 1 "register_operand")
++	  (match_operand:VDQHSD 2 "half_size_minus_one_operand")))
++   (set (match_dup 0)
++	(and:<V_INT_EQUIV>
++	  (match_dup 0)
++	  (match_operand:VDQHSD 3 "cmlt_arith_mask_operand")))
++  ""
++  (set_attr "type" "neon_compare_zero")
++)
++
+ (define_insn_and_split "aarch64_cm<optab>di"
+   (set (match_operand:DI 0 "register_operand" "=w,w,r")
+ 	(neg:DI
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index a3da4ca30..04072ca25 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -14064,6 +14064,21 @@ cost_minus:
+ 	    return true;
+ 	  }
+ 
++	/* Detect aarch64_cmlt_as_arith instruction. Now only this pattern
++	   matches the condition. The costs of cmlt and sub instructions
++	   are comparable, so we are not increasing the cost here.  */
++	if (flag_cmlt_arith && GET_CODE (op0) == ASHIFT
++	    && GET_CODE (op1) == AND)
++	  {
++	    rtx op0_subop0 = XEXP (op0, 0);
++	    if (rtx_equal_p (op0_subop0, op1))
++	      {
++		rtx lshrt_op = XEXP (op0_subop0, 0);
++		if (GET_CODE (lshrt_op) == LSHIFTRT)
++		  return true;
++	      }
++	  }
++
+ 	/* Look for SUB (extended register).  */
+ 	if (is_a <scalar_int_mode> (mode)
+ 	    && aarch64_rtx_arith_op_extract_p (op1))
+diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
+index a64b927e9..101664c7c 100644
+--- a/gcc/config/aarch64/aarch64.opt
++++ b/gcc/config/aarch64/aarch64.opt
+@@ -262,6 +262,10 @@ Use an immediate to offset from the stack protector guard register, sp_el0.
+ This option is for use with fstack-protector-strong and not for use in
+ user-land code.
+ 
++mcmlt-arith
++Target Var(flag_cmlt_arith) Optimization Init(0)
++Use SIMD cmlt instruction to perform some arithmetic/logic calculations.
++
+ TargetVariable
+ long aarch64_stack_protector_guard_offset = 0
+ 
+diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
+index 26a840d7f..967e6b0b1 100644
+--- a/gcc/config/aarch64/iterators.md
++++ b/gcc/config/aarch64/iterators.md
+@@ -1485,7 +1485,8 @@
+ 			  (V2DI "2s"))
+ 
+ ;; Register suffix narrowed modes for VQN.
+-(define_mode_attr V2ntype (V8HI "16b") (V4SI "8h")
++(define_mode_attr V2ntype (V4HI "8b") (V2SI "4h")
++			   (V8HI "16b") (V4SI "8h")
+ 			   (V2DI "4s"))
+ 
+ ;; Widened modes of vector modes.
+diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
+index c308015ac..07c14aacb 100644
+--- a/gcc/config/aarch64/predicates.md
++++ b/gcc/config/aarch64/predicates.md
+@@ -49,6 +49,31 @@
+   return CONST_INT_P (op) && IN_RANGE (INTVAL (op), 1, 3);
+ })
+ 
++(define_predicate "half_size_minus_one_operand"
++  (match_code "const_vector")
++{
++  op = unwrap_const_vec_duplicate (op);
++  unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2;
++  return CONST_INT_P (op) && (UINTVAL (op) == size - 1);
++})
++
++(define_predicate "half_size_operand"
++  (match_code "const_vector")
++{
++  op = unwrap_const_vec_duplicate (op);
++  unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2;
++  return CONST_INT_P (op) && (UINTVAL (op) == size);
++})
++
++(define_predicate "cmlt_arith_mask_operand"
++  (match_code "const_vector")
++{
++  op = unwrap_const_vec_duplicate (op);
++  unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2;
++  unsigned long long mask = ((unsigned long long) 1 << size) | 1;
++  return CONST_INT_P (op) && (UINTVAL (op) == mask);
++})
++
+ (define_predicate "subreg_lowpart_operator"
+   (ior (match_code "truncate")
+        (and (match_code "subreg")
+diff --git a/gcc/testsuite/gcc.dg/combine-cmlt.c b/gcc/testsuite/gcc.dg/combine-cmlt.c
+new file mode 100755
+index 000000000..b4c9a37ff
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/combine-cmlt.c
+@@ -0,0 +1,20 @@
++/* { dg-do compile { target aarch64-*-* } } */
++/* { dg-options "-O3 -mcmlt-arith" } */
++
++/* The test checks usage of cmlt insns for arithmetic/logic calculations
++ * in foo ().  It's inspired by sources of x264 codec.  */
++
++typedef unsigned short int uint16_t;
++typedef unsigned int uint32_t;
++
++void foo( uint32_t *a, uint32_t *b)
++{
++  for (unsigned i = 0; i < 4; i++)
++    {
++      uint32_t s = ((ai>>((8 * sizeof(uint16_t))-1))
++		    &(((uint32_t)1<<(8 * sizeof(uint16_t)))+1))*((uint16_t)-1);
++      bi = (ai+s)^s;
++    }
++}
++
++/* { dg-final { scan-assembler-times {cmlt\t} 1 } }  */
+-- 
+2.33.0
+

_service:tar_scm:0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch Added

@@ -0,0 +1,560 @@
+From 4cae948c1c00ad7a59f0f234f809fbd9a0208eb4 Mon Sep 17 00:00:00 2001
+From: vchernon <chernonog.vyacheslav@huawei.com>
+Date: Wed, 28 Feb 2024 23:05:12 +0800
+Subject: PATCH 02/18 rtl-ifcvt introduce rtl ifcvt enchancements     new
+ option:       -fifcvt-allow-complicated-cmps:         allows ifcvt to deal
+ with complicated cmps like
+
+        cmp reg1 (reg2 + reg3)
+
+        can increase compilation time
+    new param:
+      -param=ifcvt-allow-register-renaming=0,1,2
+        1 : allows ifcvt to rename registers in then and else bb
+        2 : allows to rename registers in condition and else/then bb
+        can increase compilation time and register pressure
+---
+ gcc/common.opt                                |   4 +
+ gcc/ifcvt.cc                                  | 291 +++++++++++++++---
+ gcc/params.opt                                |   4 +
+ .../gcc.c-torture/execute/ifcvt-renaming-1.c  |  35 +++
+ gcc/testsuite/gcc.dg/ifcvt-6.c                |  27 ++
+ 5 files changed, 311 insertions(+), 50 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/ifcvt-6.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index c7c6bc256..aa00fb7b0 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -3691,4 +3691,8 @@ fipa-ra
+ Common Var(flag_ipa_ra) Optimization
+ Use caller save register across calls if possible.
+ 
++fifcvt-allow-complicated-cmps
++Common Var(flag_ifcvt_allow_complicated_cmps) Optimization
++Allow RTL if-conversion pass to deal with complicated cmps (can increase compilation time).
++
+ ; This comment is to ensure we retain the blank line above.
+diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
+index 2c1eba312..584db7b55 100644
+--- a/gcc/ifcvt.cc
++++ b/gcc/ifcvt.cc
+@@ -886,7 +886,9 @@ noce_emit_store_flag (struct noce_if_info *if_info, rtx x, int reversep,
+     }
+ 
+   /* Don't even try if the comparison operands or the mode of X are weird.  */
+-  if (cond_complex || !SCALAR_INT_MODE_P (GET_MODE (x)))
++  if (!flag_ifcvt_allow_complicated_cmps
++      && (cond_complex
++	  || !SCALAR_INT_MODE_P (GET_MODE (x))))
+     return NULL_RTX;
+ 
+   return emit_store_flag (x, code, XEXP (cond, 0),
+@@ -1965,7 +1967,8 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc)
+   /* Currently support only simple single sets in test_bb.  */
+   if (!sset
+       || !noce_operand_ok (SET_DEST (sset))
+-      || contains_ccmode_rtx_p (SET_DEST (sset))
++      || (!flag_ifcvt_allow_complicated_cmps
++	  && contains_ccmode_rtx_p (SET_DEST (sset)))
+       || !noce_operand_ok (SET_SRC (sset)))
+     return false;
+ 
+@@ -1979,13 +1982,17 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc)
+    in this function.  */
+ 
+ static bool
+-bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
++bbs_ok_for_cmove_arith (basic_block bb_a,
++			basic_block bb_b,
++			rtx to_rename,
++			bitmap conflict_regs)
+ {
+   rtx_insn *a_insn;
+   bitmap bba_sets = BITMAP_ALLOC (&reg_obstack);
+-
++  bitmap intersections = BITMAP_ALLOC (&reg_obstack);
+   df_ref def;
+   df_ref use;
++  rtx_insn *last_a = last_active_insn (bb_a, FALSE);
+ 
+   FOR_BB_INSNS (bb_a, a_insn)
+     {
+@@ -1995,18 +2002,15 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
+       rtx sset_a = single_set (a_insn);
+ 
+       if (!sset_a)
+-	{
+-	  BITMAP_FREE (bba_sets);
+-	  return false;
+-	}
++	goto end_cmove_arith_check_and_fail;
+       /* Record all registers that BB_A sets.  */
+       FOR_EACH_INSN_DEF (def, a_insn)
+-	if (!(to_rename && DF_REF_REG (def) == to_rename))
++	if (!(to_rename && DF_REF_REG (def) == to_rename && a_insn == last_a))
+ 	  bitmap_set_bit (bba_sets, DF_REF_REGNO (def));
+     }
+ 
++  bitmap_and (intersections, df_get_live_in (bb_b), bba_sets);
+   rtx_insn *b_insn;
+-
+   FOR_BB_INSNS (bb_b, b_insn)
+     {
+       if (!active_insn_p (b_insn))
+@@ -2015,10 +2019,7 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
+       rtx sset_b = single_set (b_insn);
+ 
+       if (!sset_b)
+-	{
+-	  BITMAP_FREE (bba_sets);
+-	  return false;
+-	}
++	goto end_cmove_arith_check_and_fail;
+ 
+       /* Make sure this is a REG and not some instance
+ 	 of ZERO_EXTRACT or SUBREG or other dangerous stuff.
+@@ -2030,25 +2031,34 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
+       if (MEM_P (SET_DEST (sset_b)))
+ 	gcc_assert (rtx_equal_p (SET_DEST (sset_b), to_rename));
+       else if (!REG_P (SET_DEST (sset_b)))
+-	{
+-	  BITMAP_FREE (bba_sets);
+-	  return false;
+-	}
++	goto end_cmove_arith_check_and_fail;
+ 
+-      /* If the insn uses a reg set in BB_A return false.  */
++      /* If the insn uses a reg set in BB_A return false
++	 or try to collect register list for renaming.  */
+       FOR_EACH_INSN_USE (use, b_insn)
+ 	{
+-	  if (bitmap_bit_p (bba_sets, DF_REF_REGNO (use)))
++	  if (bitmap_bit_p (intersections, DF_REF_REGNO (use)))
+ 	    {
+-	      BITMAP_FREE (bba_sets);
+-	      return false;
++	      if (param_ifcvt_allow_register_renaming < 1)
++		  goto end_cmove_arith_check_and_fail;
++
++	      /* Those regs should be renamed.  We can't rename CC reg, but
++		 possibly we can provide combined comparison in the future.  */
++	      if (GET_MODE_CLASS (GET_MODE (DF_REF_REG (use))) == MODE_CC)
++		goto end_cmove_arith_check_and_fail;
++	      bitmap_set_bit (conflict_regs, DF_REF_REGNO (use));
+ 	    }
+ 	}
+-
+     }
+ 
+   BITMAP_FREE (bba_sets);
++  BITMAP_FREE (intersections);
+   return true;
++
++end_cmove_arith_check_and_fail:
++  BITMAP_FREE (bba_sets);
++  BITMAP_FREE (intersections);
++  return false;
+ }
+ 
+ /* Emit copies of all the active instructions in BB except the last.
+@@ -2103,6 +2113,142 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool simple)
+   return true;
+ }
+ 
++/* This function tries to rename regs that intersect with considered bb
++   inside condition expression.  Condition expression will be moved down
++   if the optimization will be applied, so it is essential to be sure that
++   all intersected registers will be renamed otherwise transformation
++   can't be applied.  Function returns true if renaming was successful
++   and optimization can proceed futher.  */
++
++static bool
++noce_rename_regs_in_cond (struct noce_if_info *if_info, bitmap cond_rename_regs)
++{
++  bool success = true;
++  if (bitmap_empty_p (cond_rename_regs))
++    return true;
++  if (param_ifcvt_allow_register_renaming < 2)
++    return false;
++  df_ref use;
++  rtx_insn *cmp_insn = if_info->cond_earliest;
++  /*  Jump instruction as a condion currently unsupported.  */
++  if (JUMP_P (cmp_insn))
++    return false;
++  rtx_insn *before_cmp = PREV_INSN (cmp_insn);
++  start_sequence ();
++  rtx_insn *copy_of_cmp = as_a <rtx_insn *> (copy_rtx (cmp_insn));
++  basic_block cmp_block = BLOCK_FOR_INSN (cmp_insn);
++  FOR_EACH_INSN_USE (use, cmp_insn)
++    {
++      if (bitmap_bit_p (cond_rename_regs, DF_REF_REGNO (use)))
++	{
++	  rtx use_reg = DF_REF_REG (use);
++	  rtx tmp = gen_reg_rtx (GET_MODE (use_reg));
++	  if (!validate_replace_rtx (use_reg, tmp, copy_of_cmp))
++	    {
++	      end_sequence ();
++	      return false;

_service:tar_scm:0037-Perform-early-if-conversion-of-simple-arithmetic.patch Added

@@ -0,0 +1,109 @@
+From 310eade1450995b55d9f8120561022fbf164b2ec Mon Sep 17 00:00:00 2001
+From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
+Date: Thu, 12 Jan 2023 14:52:49 +0300
+Subject: PATCH 03/18 Perform early if-conversion of simple arithmetic
+
+---
+ gcc/common.opt                      |  4 ++++
+ gcc/match.pd                        | 25 +++++++++++++++++++
+ gcc/testsuite/gcc.dg/ifcvt-gimple.c | 37 +++++++++++++++++++++++++++++
+ 3 files changed, 66 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.dg/ifcvt-gimple.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index aa00fb7b0..dac477c04 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1821,6 +1821,10 @@ fif-conversion2
+ Common Var(flag_if_conversion2) Optimization
+ Perform conversion of conditional jumps to conditional execution.
+ 
++fif-conversion-gimple
++Common Var(flag_if_conversion_gimple) Optimization
++Perform conversion of conditional jumps to branchless equivalents during gimple transformations.
++
+ fstack-reuse=
+ Common Joined RejectNegative Enum(stack_reuse_level) Var(flag_stack_reuse) Init(SR_ALL) Optimization
+ -fstack-reuse=all|named_vars|none	Set stack reuse level for local variables.
+diff --git a/gcc/match.pd b/gcc/match.pd
+index 6f24d5079..3cbaf2a5b 100644
+--- a/gcc/match.pd
++++ b/gcc/match.pd
+@@ -4278,6 +4278,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+   )
+  )
+ )
++
++(if (flag_if_conversion_gimple)
++ (for simple_op (plus minus bit_and bit_ior bit_xor)
++  (simplify
++   (cond @0 (simple_op @1 INTEGER_CST@2) @1)
++   (switch
++    /* a = cond ? a + 1 : a -> a = a + ((int) cond) */
++    (if (integer_onep (@2))
++     (simple_op @1 (convert (convert:boolean_type_node @0))))
++    /* a = cond ? a + powerof2cst : a ->
++       a = a + ((int) cond) << log2 (powerof2cst) */
++    (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@2))
++     (with
++      {
++	tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
++      }
++      (simple_op @1 (lshift (convert (convert:boolean_type_node @0))
++			    { shift; })
++      )
++     )
++    )
++   )
++  )
++ )
++)
+ #endif
+ 
+ #if GIMPLE
+diff --git a/gcc/testsuite/gcc.dg/ifcvt-gimple.c b/gcc/testsuite/gcc.dg/ifcvt-gimple.c
+new file mode 100644
+index 000000000..0f7c87e5c
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/ifcvt-gimple.c
+@@ -0,0 +1,37 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fif-conversion-gimple -fdump-tree-optimized" } */
++
++int test_int (int optimizable_int) {
++    if (optimizable_int > 5)
++	++optimizable_int;
++    return optimizable_int;
++}
++
++int test_int_pow2 (int optimizable_int_pow2) {
++    if (optimizable_int_pow2 <= 4)
++	optimizable_int_pow2 += 1024;
++    return optimizable_int_pow2;
++}
++
++int test_int_non_pow2 (int not_optimizable_int_non_pow2) {
++    if (not_optimizable_int_non_pow2 == 1)
++	not_optimizable_int_non_pow2 += 513;
++    return not_optimizable_int_non_pow2;
++}
++
++float test_float (float not_optimizable_float) {
++    if (not_optimizable_float > 5)
++	not_optimizable_float += 1;
++    return not_optimizable_float;
++}
++
++/* Expecting if-else block in test_float and test_int_non_pow2 only. */
++/* { dg-final { scan-tree-dump-not "if \\(optimizable" "optimized" } } */
++/* { dg-final { scan-tree-dump "if \\(not_optimizable_int_non_pow2" "optimized" } } */
++/* { dg-final { scan-tree-dump "if \\(not_optimizable_float" "optimized" } } */
++/* { dg-final { scan-tree-dump-times "if " 2 "optimized" } } */
++/* { dg-final { scan-tree-dump-times "else" 2 "optimized" } } */
++
++/* Expecting shifted result only for optimizable_int_pow2. */
++/* { dg-final { scan-tree-dump-times " << " 1 "optimized" } } */
++/* { dg-final { scan-tree-dump " << 10;" "optimized" } } */
+-- 
+2.33.0
+

_service:tar_scm:0038-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch Added

@@ -0,0 +1,252 @@
+From 6684509e81e4341675c73a7dc853180229a8abcb Mon Sep 17 00:00:00 2001
+From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
+Date: Tue, 24 Jan 2023 16:43:40 +0300
+Subject: PATCH 04/18 Add option to allow matching uaddsub overflow for widen
+ ops too.
+
+---
+ gcc/common.opt                 |   5 ++
+ gcc/testsuite/gcc.dg/uaddsub.c | 143 +++++++++++++++++++++++++++++++++
+ gcc/tree-ssa-math-opts.cc      |  43 ++++++++--
+ 3 files changed, 184 insertions(+), 7 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/uaddsub.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index dac477c04..39c90604e 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -3106,6 +3106,11 @@ freciprocal-math
+ Common Var(flag_reciprocal_math) SetByCombined Optimization
+ Same as -fassociative-math for expressions which include division.
+ 
++fuaddsub-overflow-match-all
++Common Var(flag_uaddsub_overflow_match_all)
++Match unsigned add/sub overflow even if the target does not support
++the corresponding instruction.
++
+ ; Nonzero means that unsafe floating-point math optimizations are allowed
+ ; for the sake of speed.  IEEE compliance is not guaranteed, and operations
+ ; are allowed to assume that their arguments and results are "normal"
+diff --git a/gcc/testsuite/gcc.dg/uaddsub.c b/gcc/testsuite/gcc.dg/uaddsub.c
+new file mode 100644
+index 000000000..96c26d308
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/uaddsub.c
+@@ -0,0 +1,143 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fuaddsub-overflow-match-all -fdump-tree-optimized" } */
++#include <stdint.h>
++
++typedef unsigned __int128 uint128_t;
++typedef struct uint256_t
++{
++  uint128_t lo;
++  uint128_t hi;
++} uint256_t;
++
++uint16_t add16 (uint8_t a, uint8_t b)
++{
++  uint8_t tmp = a + b;
++  uint8_t overflow = 0;
++  if (tmp < a)
++    overflow = 1;
++
++  uint16_t res = overflow;
++  res <<= 8;
++  res += tmp;
++  return res;
++}
++
++uint32_t add32 (uint16_t a, uint16_t b)
++{
++  uint16_t tmp = a + b;
++  uint16_t overflow = 0;
++  if (tmp < a)
++    overflow = 1;
++
++  uint32_t res = overflow;
++  res <<= 16;
++  res += tmp;
++  return res;
++}
++
++uint64_t add64 (uint32_t a, uint32_t b)
++{
++  uint32_t tmp = a + b;
++  uint32_t overflow = 0;
++  if (tmp < a)
++    overflow = 1;
++
++  uint64_t res = overflow;
++  res <<= 32;
++  res += tmp;
++  return res;
++}
++
++uint128_t add128 (uint64_t a, uint64_t b)
++{
++  uint64_t tmp = a + b;
++  uint64_t overflow = 0;
++  if (tmp < a)
++    overflow = 1;
++
++  uint128_t res = overflow;
++  res <<= 64;
++  res += tmp;
++  return res;
++}
++
++uint256_t add256 (uint128_t a, uint128_t b)
++{
++  uint128_t tmp = a + b;
++  uint128_t overflow = 0;
++  if (tmp < a)
++    overflow = 1;
++
++  uint256_t res;
++  res.hi = overflow;
++  res.lo = tmp;
++  return res;
++}
++
++uint16_t sub16 (uint8_t a, uint8_t b)
++{
++  uint8_t tmp = a - b;
++  uint8_t overflow = 0;
++  if (tmp > a)
++    overflow = -1;
++
++  uint16_t res = overflow;
++  res <<= 8;
++  res += tmp;
++  return res;
++}
++
++uint32_t sub32 (uint16_t a, uint16_t b)
++{
++  uint16_t tmp = a - b;
++  uint16_t overflow = 0;
++  if (tmp > a)
++    overflow = -1;
++
++  uint32_t res = overflow;
++  res <<= 16;
++  res += tmp;
++  return res;
++}
++
++uint64_t sub64 (uint32_t a, uint32_t b)
++{
++  uint32_t tmp = a - b;
++  uint32_t overflow = 0;
++  if (tmp > a)
++    overflow = -1;
++
++  uint64_t res = overflow;
++  res <<= 32;
++  res += tmp;
++  return res;
++}
++
++uint128_t sub128 (uint64_t a, uint64_t b)
++{
++  uint64_t tmp = a - b;
++  uint64_t overflow = 0;
++  if (tmp > a)
++    overflow = -1;
++
++  uint128_t res = overflow;
++  res <<= 64;
++  res += tmp;
++  return res;
++}
++
++uint256_t sub256 (uint128_t a, uint128_t b)
++{
++  uint128_t tmp = a - b;
++  uint128_t overflow = 0;
++  if (tmp > a)
++    overflow = -1;
++
++  uint256_t res;
++  res.hi = overflow;
++  res.lo = tmp;
++  return res;
++}
++
++/* { dg-final { scan-tree-dump-times "= .ADD_OVERFLOW \$a_\0-9\+\\(D\$, b_\0-9\+\$D\$\\)" 5 "optimized" } } */
++/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \$a_\0-9\+\\(D\$, b_\0-9\+\$D\$\\)" 5 "optimized" } } */
+diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
+index 232e903b0..55d6ee8ae 100644
+--- a/gcc/tree-ssa-math-opts.cc
++++ b/gcc/tree-ssa-math-opts.cc
+@@ -3468,6 +3468,27 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
+     }
+ }
+ 
++/* Check if the corresponding operation has wider equivalent on the target.  */
++
++static bool
++wider_optab_check_p (optab op, machine_mode mode, int unsignedp)
++{
++  machine_mode wider_mode;
++  FOR_EACH_WIDER_MODE (wider_mode, mode)
++    {
++      machine_mode next_mode;
++      if (optab_handler (op, wider_mode) != CODE_FOR_nothing
++	  || (op == smul_optab
++	      && GET_MODE_WIDER_MODE (wider_mode).exists (&next_mode)
++	      && (find_widening_optab_handler ((unsignedp

_service:tar_scm:0039-Match-double-sized-mul-pattern.patch Added

@@ -0,0 +1,488 @@
+From e7b22f97f960b62e555dfd6f2e3ae43973fcbb3e Mon Sep 17 00:00:00 2001
+From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
+Date: Wed, 25 Jan 2023 15:04:07 +0300
+Subject: PATCH 05/18 Match double sized mul pattern
+
+---
+ gcc/match.pd                              | 136 +++++++++++++++++++++
+ gcc/testsuite/gcc.dg/double_sized_mul-1.c | 141 ++++++++++++++++++++++
+ gcc/testsuite/gcc.dg/double_sized_mul-2.c |  62 ++++++++++
+ gcc/tree-ssa-math-opts.cc                 |  80 ++++++++++++
+ 4 files changed, 419 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.dg/double_sized_mul-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/double_sized_mul-2.c
+
+diff --git a/gcc/match.pd b/gcc/match.pd
+index 3cbaf2a5b..61866cb90 100644
+--- a/gcc/match.pd
++++ b/gcc/match.pd
+@@ -7895,3 +7895,139 @@ and,
+ 	       == TYPE_UNSIGNED (TREE_TYPE (@3))))
+        && single_use (@4)
+        && single_use (@5))))
++
++/* Match multiplication with double sized result.
++
++   Consider the following calculations:
++   arg0 * arg1 = (2^(bit_size/2) * arg0_hi + arg0_lo)
++	       * (2^(bit_size/2) * arg1_hi + arg1_lo)
++   arg0 * arg1 = 2^bit_size * arg0_hi * arg1_hi
++	       + 2^(bit_size/2) * (arg0_hi * arg1_lo + arg0_lo * arg1_hi)
++	       + arg0_lo * arg1_lo
++
++   The products of high and low parts fits in bit_size values, thus they are
++   placed in high and low parts of result respectively.
++
++   The sum of the mixed products may overflow, so we need a detection for that.
++   Also it has a bit_size/2 offset, thus it intersects with both high and low
++   parts of result.  Overflow detection constant is bit_size/2 due to this.
++
++   With this info:
++   arg0 * arg1 = 2^bit_size * arg0_hi * arg1_hi
++	       + 2^(bit_size/2) * middle
++	       + 2^bit_size * possible_middle_overflow
++	       + arg0_lo * arg1_lo
++   arg0 * arg1 = 2^bit_size * (arg0_hi * arg1_hi + possible_middle_overflow)
++	       + 2^(bit_size/2) * (2^(bit_size/2) * middle_hi + middle_lo)
++	       + arg0_lo * arg1_lo
++   arg0 * arg1 = 2^bit_size * (arg0_hi * arg1_hi + middle_hi
++	       +	       possible_middle_overflow)
++	       + 2^(bit_size/2) * middle_lo
++	       + arg0_lo * arg1_lo
++
++   The last sum can produce overflow for the high result part.  With this:
++   arg0 * arg1 = 2^bit_size * (arg0_hi * arg1_hi + possible_middle_overflow
++	       +	       possible_res_lo_overflow + middle_hi)
++	       + res_lo
++	       = res_hi + res_lo
++
++   This formula is quite big to fit into one match pattern with all of the
++   combinations of terms inside it.  There are many helpers for better code
++   readability.
++
++   The simplification basis is res_hi: assuming that res_lo only is not
++   real practical case for such calculations.
++
++   Overflow handling is done via matching complex calculations:
++   the realpart and imagpart are quite handy here.  */
++/* Match low and high parts of the argument.  */
++(match (double_size_mul_arg_lo @0 @1)
++ (bit_and @0 INTEGER_CST@1)
++  (if (wi::to_wide (@1)
++       == wi::mask (TYPE_PRECISION (type) / 2, false, TYPE_PRECISION (type)))))
++(match (double_size_mul_arg_hi @0 @1)
++ (rshift @0 INTEGER_CST@1)
++  (if (wi::to_wide (@1) == TYPE_PRECISION (type) / 2)))
++
++/* Match various argument parts products.  */
++(match (double_size_mul_lolo @0 @1)
++ (mult@4 (double_size_mul_arg_lo @0 @2) (double_size_mul_arg_lo @1 @3))
++  (if (single_use (@4))))
++(match (double_size_mul_hihi @0 @1)
++ (mult@4 (double_size_mul_arg_hi @0 @2) (double_size_mul_arg_hi @1 @3))
++  (if (single_use (@4))))
++(match (double_size_mul_lohi @0 @1)
++ (mult:c@4 (double_size_mul_arg_lo @0 @2) (double_size_mul_arg_hi @1 @3))
++  (if (single_use (@4))))
++
++/* Match complex middle sum.  */
++(match (double_size_mul_middle_complex @0 @1)
++ (IFN_ADD_OVERFLOW@2 (double_size_mul_lohi @0 @1) (double_size_mul_lohi @1 @0))
++  (if (num_imm_uses (@2) == 2)))
++
++/* Match real middle results.  */
++(match (double_size_mul_middle @0 @1)
++ (realpart@2 (double_size_mul_middle_complex @0 @1))
++  (if (num_imm_uses (@2) == 2)))
++(match (double_size_mul_middleres_lo @0 @1)
++ (lshift@3 (double_size_mul_middle @0 @1) INTEGER_CST@2)
++  (if (wi::to_wide (@2) == TYPE_PRECISION (type) / 2
++       && single_use (@3))))
++(match (double_size_mul_middleres_hi @0 @1)
++ (rshift@3 (double_size_mul_middle @0 @1) INTEGER_CST@2)
++  (if (wi::to_wide (@2) == TYPE_PRECISION (type) / 2
++       && single_use (@3))))
++
++/* Match low result part.  */
++/* Number of uses may be < 2 in case when we are interested in
++   high part only.  */
++(match (double_size_mul_res_lo_complex @0 @1)
++ (IFN_ADD_OVERFLOW:c@2
++  (double_size_mul_lolo:c @0 @1) (double_size_mul_middleres_lo @0 @1))
++  (if (num_imm_uses (@2) <= 2)))
++(match (double_size_mul_res_lo @0 @1)
++ (realpart (double_size_mul_res_lo_complex @0 @1)))
++
++/* Match overflow terms.  */
++(match (double_size_mul_overflow_check_lo @0 @1 @5)
++ (convert@4 (ne@3
++  (imagpart@2 (double_size_mul_res_lo_complex@5 @0 @1)) integer_zerop))
++  (if (single_use (@2) && single_use (@3) && single_use (@4))))
++(match (double_size_mul_overflow_check_hi @0 @1)
++ (lshift@6 (convert@5 (ne@4
++  (imagpart@3 (double_size_mul_middle_complex @0 @1)) integer_zerop))
++	   INTEGER_CST@2)
++  (if (wi::to_wide (@2) == TYPE_PRECISION (type) / 2
++       && single_use (@3) && single_use (@4) && single_use (@5)
++       && single_use (@6))))
++
++/* Match all possible permutations for high result part calculations.  */
++(for op1 (double_size_mul_hihi
++	  double_size_mul_overflow_check_hi
++	  double_size_mul_middleres_hi)
++     op2 (double_size_mul_overflow_check_hi
++	  double_size_mul_middleres_hi
++	  double_size_mul_hihi)
++     op3 (double_size_mul_middleres_hi
++	  double_size_mul_hihi
++	  double_size_mul_overflow_check_hi)
++ (match (double_size_mul_candidate @0 @1 @2 @3)
++  (plus:c@2
++   (plus:c@4 (double_size_mul_overflow_check_lo @0 @1 @3) (op1:c @0 @1))
++   (plus:c@5 (op2:c @0 @1) (op3:c @0 @1)))
++    (if (single_use (@4) && single_use (@5))))
++ (match (double_size_mul_candidate @0 @1 @2 @3)
++  (plus:c@2 (double_size_mul_overflow_check_lo @0 @1 @3)
++   (plus:c@4 (op1:c @0 @1)
++    (plus:c@5 (op2:c @0 @1) (op3:c @0 @1))))
++     (if (single_use (@4) && single_use (@5))))
++ (match (double_size_mul_candidate @0 @1 @2 @3)
++  (plus:c@2 (op1:c @0 @1)
++   (plus:c@4 (double_size_mul_overflow_check_lo @0 @1 @3)
++    (plus:c@5 (op2:c @0 @1) (op3:c @0 @1))))
++     (if (single_use (@4) && single_use (@5))))
++ (match (double_size_mul_candidate @0 @1 @2 @3)
++  (plus:c@2 (op1:c @0 @1)
++   (plus:c@4 (op2:c @0 @1)
++    (plus:c@5 (double_size_mul_overflow_check_lo @0 @1 @3) (op3:c @0 @1))))
++     (if (single_use (@4) && single_use (@5)))))
+diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-1.c b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
+new file mode 100644
+index 000000000..4d475cc8a
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
+@@ -0,0 +1,141 @@
++/* { dg-do compile } */
++/* fif-conversion-gimple and fuaddsub-overflow-match-all are required for
++   proper overflow detection in some cases.  */
++/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
++#include <stdint.h>
++
++typedef unsigned __int128 uint128_t;
++
++uint16_t mul16 (uint8_t a, uint8_t b)
++{
++  uint8_t a_lo = a & 0xF;
++  uint8_t b_lo = b & 0xF;
++  uint8_t a_hi = a >> 4;
++  uint8_t b_hi = b >> 4;
++  uint8_t lolo = a_lo * b_lo;
++  uint8_t lohi = a_lo * b_hi;
++  uint8_t hilo = a_hi * b_lo;
++  uint8_t hihi = a_hi * b_hi;
++  uint8_t middle = hilo + lohi;
++  uint8_t middle_hi = middle >> 4;
++  uint8_t middle_lo = middle << 4;
++  uint8_t res_lo = lolo + middle_lo;
++  uint8_t res_hi = hihi + middle_hi;
++  res_hi += (res_lo < middle_lo ? 1 : 0);
++  res_hi += (middle < hilo ? 0x10 : 0);
++  uint16_t res = ((uint16_t) res_hi) << 8;
++  res += res_lo;
++  return res;
++}
++
++uint32_t mul32 (uint16_t a, uint16_t b)
++{
++  uint16_t a_lo = a & 0xFF;
++  uint16_t b_lo = b & 0xFF;
++  uint16_t a_hi = a >> 8;

_service:tar_scm:0040-Port-icp-patch-to-GCC-12.patch Added

@@ -0,0 +1,2387 @@
+From b73462757734c62f64e7a4379340679ec6f19669 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Tue, 27 Feb 2024 07:28:12 +0800
+Subject: PATCH 06/18 Port icp patch to GCC 12
+
+---
+ gcc/common.opt              |    8 +
+ gcc/dbgcnt.def              |    1 +
+ gcc/ipa-devirt.cc           | 1855 +++++++++++++++++++++++++++++++++++
+ gcc/passes.def              |    1 +
+ gcc/testsuite/gcc.dg/icp1.c |   40 +
+ gcc/testsuite/gcc.dg/icp2.c |   38 +
+ gcc/testsuite/gcc.dg/icp3.c |   52 +
+ gcc/testsuite/gcc.dg/icp4.c |   55 ++
+ gcc/testsuite/gcc.dg/icp5.c |   66 ++
+ gcc/testsuite/gcc.dg/icp6.c |   66 ++
+ gcc/testsuite/gcc.dg/icp7.c |   48 +
+ gcc/timevar.def             |    1 +
+ gcc/tree-pass.h             |    1 +
+ 13 files changed, 2232 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.dg/icp1.c
+ create mode 100644 gcc/testsuite/gcc.dg/icp2.c
+ create mode 100644 gcc/testsuite/gcc.dg/icp3.c
+ create mode 100644 gcc/testsuite/gcc.dg/icp4.c
+ create mode 100644 gcc/testsuite/gcc.dg/icp5.c
+ create mode 100644 gcc/testsuite/gcc.dg/icp6.c
+ create mode 100644 gcc/testsuite/gcc.dg/icp7.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 39c90604e..16aadccf6 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1316,6 +1316,14 @@ fdevirtualize
+ Common Var(flag_devirtualize) Optimization
+ Try to convert virtual calls to direct ones.
+ 
++ficp
++Common Var(flag_icp) Optimization Init(0)
++Try to promote indirect calls to direct ones.
++
++ficp-speculatively
++Common Var(flag_icp_speculatively) Optimization
++Promote indirect calls speculatively.
++
+ fdiagnostics-show-location=
+ Common Joined RejectNegative Enum(diagnostic_prefixing_rule)
+ -fdiagnostics-show-location=once|every-line	How often to emit source location at the beginning of line-wrapped diagnostics.
+diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def
+index 3aa18cd0c..a00bbc31b 100644
+--- a/gcc/dbgcnt.def
++++ b/gcc/dbgcnt.def
+@@ -170,6 +170,7 @@ DEBUG_COUNTER (graphite_scop)
+ DEBUG_COUNTER (hoist)
+ DEBUG_COUNTER (hoist_insn)
+ DEBUG_COUNTER (ia64_sched2)
++DEBUG_COUNTER (icp)
+ DEBUG_COUNTER (if_after_combine)
+ DEBUG_COUNTER (if_after_reload)
+ DEBUG_COUNTER (if_conversion)
+diff --git a/gcc/ipa-devirt.cc b/gcc/ipa-devirt.cc
+index 74fe65608..383839189 100644
+--- a/gcc/ipa-devirt.cc
++++ b/gcc/ipa-devirt.cc
+@@ -103,9 +103,14 @@ along with GCC; see the file COPYING3.  If not see
+   indirect polymorphic edge all possible polymorphic call targets of the call.
+ 
+   pass_ipa_devirt performs simple speculative devirtualization.
++  pass_ipa_icp performs simple indirect call promotion.
+ */
+ 
+ #include "config.h"
++#define INCLUDE_ALGORITHM
++#define INCLUDE_SET
++#define INCLUDE_MAP
++#define INCLUDE_LIST
+ #include "system.h"
+ #include "coretypes.h"
+ #include "backend.h"
+@@ -127,6 +132,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "ipa-fnsummary.h"
+ #include "demangle.h"
+ #include "dbgcnt.h"
++#include "gimple-iterator.h"
+ #include "gimple-pretty-print.h"
+ #include "intl.h"
+ #include "stringpool.h"
+@@ -4401,5 +4407,1854 @@ make_pass_ipa_odr (gcc::context *ctxt)
+   return new pass_ipa_odr (ctxt);
+ }
+ 
++/* Function signature map used to look up function decl which corresponds to
++   the given function type.  */
++typedef std::set<unsigned> type_set;
++typedef std::set<tree> decl_set;
++typedef std::map<unsigned, type_set*> type_alias_map;
++typedef std::map<unsigned, decl_set*> type_decl_map;
++typedef std::map<unsigned, tree> uid_to_type_map;
++typedef std::map<tree, tree> type_map;
++
++static bool has_address_taken_functions_with_varargs = false;
++static type_set *unsafe_types = NULL;
++static type_alias_map *fta_map = NULL;
++static type_alias_map *ta_map = NULL;
++static type_map *ctype_map = NULL;
++static type_alias_map *cbase_to_ptype = NULL;
++static type_decl_map *fs_map = NULL;
++static uid_to_type_map *type_uid_map = NULL;
++
++static void
++print_type_set(unsigned ftype_uid, type_alias_map *map)
++{
++  if (!map->count (ftype_uid))
++    return;
++  type_set* s = (*map)ftype_uid;
++  for (type_set::const_iterator it = s->begin (); it != s->end (); it++)
++    fprintf (dump_file, it == s->begin () ? "%d" : ", %d", *it);
++}
++
++static void
++dump_type_with_uid (const char *msg, tree type, dump_flags_t flags = TDF_NONE)
++{
++  fprintf (dump_file, msg);
++  print_generic_expr (dump_file, type, flags);
++  fprintf (dump_file, " (%d)\n", TYPE_UID (type));
++}
++
++/* Walk aggregate type and collect types of scalar elements.  */
++
++static void
++collect_scalar_types (tree tp, std::list<tree> &types)
++{
++  /* TODO: take into account different field offsets.
++     Also support array casts.  */
++  if (tp && dump_file && (dump_flags & TDF_DETAILS))
++    dump_type_with_uid ("Walk var's type: ", tp, TDF_UID);
++  if (RECORD_OR_UNION_TYPE_P (tp))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	fprintf (dump_file, "Record's fields {\n");
++      for (tree field = TYPE_FIELDS (tp); field;
++	   field = DECL_CHAIN (field))
++	{
++	  if (TREE_CODE (field) != FIELD_DECL)
++	    continue;
++	  collect_scalar_types (TREE_TYPE (field), types);
++	}
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	fprintf (dump_file, "}\n");
++      return;
++    }
++  if (TREE_CODE (tp) == ARRAY_TYPE)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	fprintf (dump_file, "Array's innermost type:\n");
++      /* Take the innermost component type.  */
++      tree elt;
++      for (elt = TREE_TYPE (tp); TREE_CODE (elt) == ARRAY_TYPE;
++	   elt = TREE_TYPE (elt))
++	if (dump_file && (dump_flags & TDF_DETAILS))
++	  print_generic_expr (dump_file, elt);
++      collect_scalar_types (elt, types);
++      return;
++    }
++  types.push_back (tp);
++}
++
++static void maybe_register_aliases (tree type1, tree type2);
++
++/* Walk type lists and maybe register type aliases.  */
++
++static void
++compare_type_lists (std::list<tree> tlist1, std::list<tree> tlist2)
++{
++  for (std::list<tree>::iterator ti1 = tlist1.begin (), ti2 = tlist2.begin ();
++       ti1 != tlist1.end (); ++ti1, ++ti2)
++    {
++      /* TODO: correct the analysis results if lists have different length.  */
++      if (ti2 == tlist2.end ())
++	{
++	  if (dump_file && (dump_flags & TDF_DETAILS))
++	    fprintf (dump_file, "Type lists with different length!\n");
++	  break;
++	}
++      maybe_register_aliases (*ti1, *ti2);
++    }
++}
++
++/* For two given types collect scalar element types and
++   compare the result lists to find type aliases.  */
++
++static void
++collect_scalar_types_and_find_aliases (tree t1, tree t2)
++{
++  std::list<tree> tlist1;
++  std::list<tree> tlist2;
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    fprintf (dump_file, "First type list: ");
++  collect_scalar_types (t1, tlist1);
++  if (dump_file && (dump_flags & TDF_DETAILS))

_service:tar_scm:0041-Port-fixes-in-icp-to-GCC-12.patch Added

@@ -0,0 +1,100 @@
+From aaa117a9ff58fb208e8c8859e075ca425f995f63 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Tue, 27 Feb 2024 07:43:57 +0800
+Subject: PATCH 07/18 Port fixes in icp to GCC 12
+
+---
+ gcc/ipa-devirt.cc | 37 ++++++++++++++++++++++++++++++-------
+ 1 file changed, 30 insertions(+), 7 deletions(-)
+
+diff --git a/gcc/ipa-devirt.cc b/gcc/ipa-devirt.cc
+index 383839189..318535d06 100644
+--- a/gcc/ipa-devirt.cc
++++ b/gcc/ipa-devirt.cc
+@@ -4431,6 +4431,11 @@ print_type_set(unsigned ftype_uid, type_alias_map *map)
+   if (!map->count (ftype_uid))
+     return;
+   type_set* s = (*map)ftype_uid;
++  if (!s)
++    {
++      fprintf (dump_file, "%d (no set)", ftype_uid);
++      return;
++    }
+   for (type_set::const_iterator it = s->begin (); it != s->end (); it++)
+     fprintf (dump_file, it == s->begin () ? "%d" : ", %d", *it);
+ }
+@@ -4696,12 +4701,19 @@ maybe_register_aliases (tree type1, tree type2)
+       if (register_ailas_type (type1, type2, ta_map))
+ 	analyze_pointees (type1, type2);
+     }
++  unsigned type1_uid = TYPE_UID (type1);
++  unsigned type2_uid = TYPE_UID (type2);
++  if (type_uid_map->count (type1_uid) == 0)
++    (*type_uid_map)type1_uid = type1;
++  if (type_uid_map->count (type2_uid) == 0)
++    (*type_uid_map)type2_uid = type2;
++
+   /* If function and non-function type pointers alias,
+      the function type is unsafe.  */
+   if (FUNCTION_POINTER_TYPE_P (type1) && !FUNCTION_POINTER_TYPE_P (type2))
+-    unsafe_types->insert (TYPE_UID (type1));
++    unsafe_types->insert (type1_uid);
+   if (FUNCTION_POINTER_TYPE_P (type2) && !FUNCTION_POINTER_TYPE_P (type1))
+-    unsafe_types->insert (TYPE_UID (type2));
++    unsafe_types->insert (type2_uid);
+ 
+   /* Try to figure out with pointers to incomplete types.  */
+   if (POINTER_TYPE_P (type1) && POINTER_TYPE_P (type2))
+@@ -4825,10 +4837,12 @@ compare_block_and_init_type (tree block, tree t1)
+ static void
+ analyze_global_var (varpool_node *var)
+ {
+-  var->get_constructor();
+   tree decl = var->decl;
+-  if (TREE_CODE (decl) == SSA_NAME || !DECL_INITIAL (decl)
+-      || integer_zerop (DECL_INITIAL (decl)))
++  if (decl || !DECL_INITIAL (decl))
++    return;
++  var->get_constructor ();
++  if (TREE_CODE (decl) == SSA_NAME || integer_zerop (DECL_INITIAL (decl))
++      || TREE_CODE (DECL_INITIAL (decl)) == ERROR_MARK)
+     return;
+ 
+   if (dump_file && (dump_flags & TDF_DETAILS))
+@@ -4998,7 +5012,9 @@ analyze_assign_stmt (gimple *stmt)
+     {
+       rhs = TREE_OPERAND (rhs, 0);
+       if (VAR_OR_FUNCTION_DECL_P (rhs) || TREE_CODE (rhs) == STRING_CST
+-	  || TREE_CODE (rhs) == ARRAY_REF || TREE_CODE (rhs) == PARM_DECL)
++	  || TREE_CODE (rhs) == ARRAY_REF || TREE_CODE (rhs) == PARM_DECL
++	  || TREE_CODE (rhs) == LABEL_DECL || TREE_CODE (rhs) == CONST_DECL
++	  || TREE_CODE (rhs) == RESULT_DECL)
+ 	rhs_type = build_pointer_type (TREE_TYPE (rhs));
+       else if (TREE_CODE (rhs) == COMPONENT_REF)
+ 	{
+@@ -5012,7 +5028,12 @@ analyze_assign_stmt (gimple *stmt)
+ 	  gcc_assert (POINTER_TYPE_P (rhs_type));
+ 	}
+       else
+-	gcc_unreachable();
++	{
++	  fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ",
++		   get_tree_code_name (TREE_CODE (rhs)));
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  gcc_unreachable ();
++	}
+     }
+   else
+     rhs_type = TREE_TYPE (rhs);
+@@ -5710,6 +5731,8 @@ merge_fs_map_for_ftype_aliases ()
+       decl_set *d_set = it1->second;
+       tree type = (*type_uid_map)it1->first;
+       type_set *set = (*fta_map)it1->first;
++      if (!set)
++	continue;
+       for (type_set::const_iterator it2 = set->begin ();
+ 	   it2 != set->end (); it2++)
+ 	{
+-- 
+2.33.0
+

_service:tar_scm:0042-Add-split-complex-instructions-pass.patch Added

@@ -0,0 +1,1245 @@
+From 9a8e5716543972dec36bae1f9d380d27bfbcdae1 Mon Sep 17 00:00:00 2001
+From: Agrachev Andrey WX1228450 <agrachev.andrey@huawei-partners.com>
+Date: Mon, 21 Aug 2023 12:35:19 +0300
+Subject: PATCH 09/18 Add split-complex-instructions pass
+
+ - Add option -fsplit-ldp-stp
+ - Add functionality to detect and split depended from store LDP instructions.
+ - Add -param=param-ldp-dependency-search-range= to configure ldp dependency search range
+ - Add RTL tests
+
+Co-authored-by: Chernonog Vyacheslav 00812786 <chernonog.vyacheslav@huawei.com>
+Co-authored-by: Zinin Ivan WX1305386 <zinin.ivan@huawei-partners.com>
+Co-authored-by: Gadzhiev Emin WX1195297 <gadzhiev.emin@huawei-partners.com>
+---
+ gcc/common.opt                                |   5 +
+ gcc/config/aarch64/aarch64.cc                 |  42 ++
+ gcc/doc/tm.texi                               |   8 +
+ gcc/doc/tm.texi.in                            |   4 +
+ gcc/params.opt                                |   3 +
+ gcc/passes.def                                |   1 +
+ gcc/sched-rgn.cc                              | 704 +++++++++++++++++-
+ gcc/target.def                                |  10 +
+ .../gcc.dg/rtl/aarch64/test-ldp-dont-split.c  |  74 ++
+ .../rtl/aarch64/test-ldp-split-rearrange.c    |  40 +
+ .../gcc.dg/rtl/aarch64/test-ldp-split.c       | 174 +++++
+ gcc/timevar.def                               |   1 +
+ gcc/tree-pass.h                               |   1 +
+ 13 files changed, 1066 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c
+ create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c
+ create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index a42bee250..c0e3f5687 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1797,6 +1797,11 @@ floop-nest-optimize
+ Common Var(flag_loop_nest_optimize) Optimization
+ Enable the loop nest optimizer.
+ 
++fsplit-ldp-stp
++Common Var(flag_split_ldp_stp) Optimization
++Split load/store pair instructions into separate load/store operations
++for better performance.
++
+ fstrict-volatile-bitfields
+ Common Var(flag_strict_volatile_bitfields) Init(-1) Optimization
+ Force bitfield accesses to match their type width.
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 04072ca25..48e2eded0 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -27507,6 +27507,48 @@ aarch64_run_selftests (void)
+ 
+ #endif /* #if CHECKING_P */
+ 
++/* TODO: refuse to use ranges intead of full list of an instruction codes.  */
++
++bool
++is_aarch64_ldp_insn (int icode)
++{
++  if ((icode >= CODE_FOR_load_pair_sw_sisi
++	  && icode <= CODE_FOR_load_pair_dw_tftf)
++      || (icode >= CODE_FOR_loadwb_pairsi_si
++	     && icode <= CODE_FOR_loadwb_pairtf_di)
++      || (icode >= CODE_FOR_load_pairv8qiv8qi
++	     && icode <= CODE_FOR_load_pairdfdf)
++      || (icode >= CODE_FOR_load_pairv16qiv16qi
++	     && icode <= CODE_FOR_load_pairv8bfv2df)
++      || (icode >= CODE_FOR_load_pair_lanesv8qi
++	     && icode <= CODE_FOR_load_pair_lanesdf))
++    return true;
++  return false;
++}
++
++bool
++is_aarch64_stp_insn (int icode)
++{
++  if ((icode >= CODE_FOR_store_pair_sw_sisi
++	  && icode <= CODE_FOR_store_pair_dw_tftf)
++      || (icode >= CODE_FOR_storewb_pairsi_si
++	     && icode <= CODE_FOR_storewb_pairtf_di)
++      || (icode >= CODE_FOR_vec_store_pairv8qiv8qi
++	     && icode <= CODE_FOR_vec_store_pairdfdf)
++      || (icode >= CODE_FOR_vec_store_pairv16qiv16qi
++	     && icode <= CODE_FOR_vec_store_pairv8bfv2df)
++      || (icode >= CODE_FOR_store_pair_lanesv8qi
++	     && icode <= CODE_FOR_store_pair_lanesdf))
++    return true;
++  return false;
++}
++
++#undef TARGET_IS_LDP_INSN
++#define TARGET_IS_LDP_INSN is_aarch64_ldp_insn
++
++#undef TARGET_IS_STP_INSN
++#define TARGET_IS_STP_INSN is_aarch64_stp_insn
++
+ #undef TARGET_STACK_PROTECT_GUARD
+ #define TARGET_STACK_PROTECT_GUARD aarch64_stack_protect_guard
+ 
+diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
+index c5006afc0..0c6415a9c 100644
+--- a/gcc/doc/tm.texi
++++ b/gcc/doc/tm.texi
+@@ -12113,6 +12113,14 @@ object files that are not referenced from @code{main} and uses export
+ lists.
+ @end defmac
+ 
++@deftypefn {Target Hook} bool TARGET_IS_LDP_INSN (int @var{icode})
++Return true if icode is corresponding to any of the LDP instruction types.
++@end deftypefn
++
++@deftypefn {Target Hook} bool TARGET_IS_STP_INSN (int @var{icode})
++Return true if icode is corresponding to any of the STP instruction types.
++@end deftypefn
++
+ @deftypefn {Target Hook} bool TARGET_CANNOT_MODIFY_JUMPS_P (void)
+ This target hook returns @code{true} past the point in which new jump
+ instructions could be created.  On machines that require a register for
+diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
+index f869ddd5e..6ff60e562 100644
+--- a/gcc/doc/tm.texi.in
++++ b/gcc/doc/tm.texi.in
+@@ -7977,6 +7977,10 @@ object files that are not referenced from @code{main} and uses export
+ lists.
+ @end defmac
+ 
++@hook TARGET_IS_LDP_INSN
++
++@hook TARGET_IS_STP_INSN
++
+ @hook TARGET_CANNOT_MODIFY_JUMPS_P
+ 
+ @hook TARGET_HAVE_CONDITIONAL_EXECUTION
+diff --git a/gcc/params.opt b/gcc/params.opt
+index 7fcc2398d..6176d4790 100644
+--- a/gcc/params.opt
++++ b/gcc/params.opt
+@@ -1217,4 +1217,7 @@ Enum(vrp_mode) String(ranger) Value(VRP_MODE_RANGER)
+ Common Joined UInteger Var(param_pointer_compression_size) Init(32) IntegerRange(8, 32) Param Optimization
+ Target size of compressed pointer, which should be 8, 16 or 32.
+ 
++-param=param-ldp-dependency-search-range=
++Common Joined UInteger Var(param_ldp_dependency_search_range) Init(16) IntegerRange(1, 32) Param Optimization
++Range for depended ldp search in split-ldp-stp path.
+ ; This comment is to ensure we retain the blank line above.
+diff --git a/gcc/passes.def b/gcc/passes.def
+index 941bbadf0..a30e05688 100644
+--- a/gcc/passes.def
++++ b/gcc/passes.def
+@@ -514,6 +514,7 @@ along with GCC; see the file COPYING3.  If not see
+ 	  NEXT_PASS (pass_reorder_blocks);
+ 	  NEXT_PASS (pass_leaf_regs);
+ 	  NEXT_PASS (pass_split_before_sched2);
++	  NEXT_PASS (pass_split_complex_instructions);
+ 	  NEXT_PASS (pass_sched2);
+ 	  NEXT_PASS (pass_stack_regs);
+ 	  PUSH_INSERT_PASSES_WITHIN (pass_stack_regs)
+diff --git a/gcc/sched-rgn.cc b/gcc/sched-rgn.cc
+index a0dfdb788..b4df8bdc5 100644
+--- a/gcc/sched-rgn.cc
++++ b/gcc/sched-rgn.cc
+@@ -44,6 +44,8 @@ along with GCC; see the file COPYING3.  If not see
+    are actually scheduled.  */
+ &#xc;
+ #include "config.h"
++#define INCLUDE_SET
++#define INCLUDE_VECTOR
+ #include "system.h"
+ #include "coretypes.h"
+ #include "backend.h"
+@@ -65,6 +67,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "dbgcnt.h"
+ #include "pretty-print.h"
+ #include "print-rtl.h"
++#include "cfgrtl.h"
+ 
+ /* Disable warnings about quoting issues in the pp_xxx calls below
+    that (intentionally) don't follow GCC diagnostic conventions.  */
+@@ -3951,6 +3954,705 @@ make_pass_sched_fusion (gcc::context *ctxt)
+   return new pass_sched_fusion (ctxt);
+ }
+ 
++namespace {
++
++/* Def-use analisys special functions implementation.  */
++
++static struct df_link *
++get_defs (rtx_insn *insn, rtx reg)
++{
++  df_ref use;
++  struct df_link *ref_chain, *ref_link;
++
++  FOR_EACH_INSN_USE (use, insn)
++    {
++      if (GET_CODE (DF_REF_REG (use)) == SUBREG)
++	return NULL;
++      if (REGNO (DF_REF_REG (use)) == REGNO (reg))

_service:tar_scm:0043-Extending-and-refactoring-of-pass_split_complex_inst.patch Added

@@ -0,0 +1,1426 @@
+From a49db831320ac70ca8f46b94ee60d7c6951f65c3 Mon Sep 17 00:00:00 2001
+From: Gadzhiev Emin WX1195297 <gadzhiev.emin@huawei-partners.com>
+Date: Wed, 20 Dec 2023 21:36:07 +0300
+Subject: PATCH 10/18 Extending and refactoring of
+ pass_split_complex_instructions
+
+- Add flag parameter in is_ldp_insn and is_stp_insn to know
+  if instruction has writeback operation
+- Add support of PRE_*, POST_* operands as a memory address
+  expression
+- Split only LDPs that intersect with a dependent store
+  instruction
+- Make the selection of dependent store instructions stricter
+  so it will be enough to check by BFS that dependent store
+  instruction appears in search range.
+- Add helper methods to retrieve fields of rtx
+- Remove redundant iterations in find_dependent_stores_candidates
+- Refactor generation of instructions
+- Add more test cases
+---
+ gcc/config/aarch64/aarch64.cc                 |  62 +-
+ gcc/doc/tm.texi                               |  12 +-
+ gcc/sched-rgn.cc                              | 771 +++++++++---------
+ gcc/target.def                                |  14 +-
+ .../gcc.dg/rtl/aarch64/test-ldp-dont-split.c  |  35 +-
+ .../rtl/aarch64/test-ldp-split-rearrange.c    |   2 +-
+ .../gcc.dg/rtl/aarch64/test-ldp-split.c       | 181 +++-
+ 7 files changed, 603 insertions(+), 474 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 48e2eded0..fa566dd80 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -27507,39 +27507,59 @@ aarch64_run_selftests (void)
+ 
+ #endif /* #if CHECKING_P */
+ 
+-/* TODO: refuse to use ranges intead of full list of an instruction codes.  */
++/* TODO: refuse to use ranges instead of full list of an instruction codes.  */
+ 
+ bool
+-is_aarch64_ldp_insn (int icode)
++is_aarch64_ldp_insn (int icode, bool *has_wb)
+ {
+   if ((icode >= CODE_FOR_load_pair_sw_sisi
+-	  && icode <= CODE_FOR_load_pair_dw_tftf)
++	  && icode <= CODE_FOR_load_pair_sw_sfsf)
++      || (icode >= CODE_FOR_load_pair_dw_didi
++	  && icode <= CODE_FOR_load_pair_dw_dfdf)
++      || (icode == CODE_FOR_load_pair_dw_tftf)
+       || (icode >= CODE_FOR_loadwb_pairsi_si
+-	     && icode <= CODE_FOR_loadwb_pairtf_di)
+-      || (icode >= CODE_FOR_load_pairv8qiv8qi
+-	     && icode <= CODE_FOR_load_pairdfdf)
+-      || (icode >= CODE_FOR_load_pairv16qiv16qi
+-	     && icode <= CODE_FOR_load_pairv8bfv2df)
+-      || (icode >= CODE_FOR_load_pair_lanesv8qi
+-	     && icode <= CODE_FOR_load_pair_lanesdf))
+-    return true;
++	  && icode <= CODE_FOR_loadwb_pairdi_di)
++      || (icode >= CODE_FOR_loadwb_pairsf_si
++	  && icode <= CODE_FOR_loadwb_pairdf_di)
++      || (icode >= CODE_FOR_loadwb_pairti_si
++	  && icode <= CODE_FOR_loadwb_pairtf_di))
++    {
++      if (has_wb)
++	*has_wb = ((icode >= CODE_FOR_loadwb_pairsi_si
++		     && icode <= CODE_FOR_loadwb_pairdi_di)
++		   || (icode >= CODE_FOR_loadwb_pairsf_si
++		     && icode <= CODE_FOR_loadwb_pairdf_di)
++		   || (icode >= CODE_FOR_loadwb_pairti_si
++		      && icode <= CODE_FOR_loadwb_pairtf_di));
++      return true;
++    }
+   return false;
+ }
+ 
+ bool
+-is_aarch64_stp_insn (int icode)
++is_aarch64_stp_insn (int icode, bool *has_wb)
+ {
+   if ((icode >= CODE_FOR_store_pair_sw_sisi
+-	  && icode <= CODE_FOR_store_pair_dw_tftf)
++	  && icode <= CODE_FOR_store_pair_sw_sfsf)
++      || (icode >= CODE_FOR_store_pair_dw_didi
++	  && icode <= CODE_FOR_store_pair_dw_dfdf)
++      || (icode == CODE_FOR_store_pair_dw_tftf)
+       || (icode >= CODE_FOR_storewb_pairsi_si
+-	     && icode <= CODE_FOR_storewb_pairtf_di)
+-      || (icode >= CODE_FOR_vec_store_pairv8qiv8qi
+-	     && icode <= CODE_FOR_vec_store_pairdfdf)
+-      || (icode >= CODE_FOR_vec_store_pairv16qiv16qi
+-	     && icode <= CODE_FOR_vec_store_pairv8bfv2df)
+-      || (icode >= CODE_FOR_store_pair_lanesv8qi
+-	     && icode <= CODE_FOR_store_pair_lanesdf))
+-    return true;
++	  && icode <= CODE_FOR_storewb_pairdi_di)
++      || (icode >= CODE_FOR_storewb_pairsf_si
++	  && icode <= CODE_FOR_storewb_pairdf_di)
++      || (icode >= CODE_FOR_storewb_pairti_si
++	  && icode <= CODE_FOR_storewb_pairtf_di))
++    {
++      if (has_wb)
++	*has_wb = ((icode >= CODE_FOR_storewb_pairsi_si
++		     && icode <= CODE_FOR_storewb_pairdi_di)
++		   || (icode >= CODE_FOR_storewb_pairsf_si
++		     && icode <= CODE_FOR_storewb_pairdf_di)
++		   || (icode >= CODE_FOR_storewb_pairti_si
++		     && icode <= CODE_FOR_storewb_pairtf_di));
++      return true;
++    }
+   return false;
+ }
+ 
+diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
+index 0c6415a9c..3b6e90bf2 100644
+--- a/gcc/doc/tm.texi
++++ b/gcc/doc/tm.texi
+@@ -12113,12 +12113,16 @@ object files that are not referenced from @code{main} and uses export
+ lists.
+ @end defmac
+ 
+-@deftypefn {Target Hook} bool TARGET_IS_LDP_INSN (int @var{icode})
+-Return true if icode is corresponding to any of the LDP instruction types.
++@deftypefn {Target Hook} bool TARGET_IS_LDP_INSN (int @var{icode}, bool *@var{has_wb})
++Return true if @var{icode} is corresponding to any of the LDP instruction
++types.  If @var{has_wb} is not NULL then its value is set to true if LDP
++contains post-index or pre-index operation.
+ @end deftypefn
+ 
+-@deftypefn {Target Hook} bool TARGET_IS_STP_INSN (int @var{icode})
+-Return true if icode is corresponding to any of the STP instruction types.
++@deftypefn {Target Hook} bool TARGET_IS_STP_INSN (int @var{icode}, bool *@var{has_wb})
++Return true if @var{icode} is corresponding to any of the STP instruction
++types.  If @var{has_wb} is not NULL then its value is set to true if STP
++contains post-index or pre-index operation.
+ @end deftypefn
+ 
+ @deftypefn {Target Hook} bool TARGET_CANNOT_MODIFY_JUMPS_P (void)
+diff --git a/gcc/sched-rgn.cc b/gcc/sched-rgn.cc
+index b4df8bdc5..5f61de1c8 100644
+--- a/gcc/sched-rgn.cc
++++ b/gcc/sched-rgn.cc
+@@ -3956,7 +3956,7 @@ make_pass_sched_fusion (gcc::context *ctxt)
+ 
+ namespace {
+ 
+-/* Def-use analisys special functions implementation.  */
++/* Def-use analysis special functions implementation.  */
+ 
+ static struct df_link *
+ get_defs (rtx_insn *insn, rtx reg)
+@@ -4032,42 +4032,66 @@ const pass_data pass_data_split_complex_instructions = {
+   (TODO_df_verify | TODO_df_finish), /* Todo_flags_finish.  */
+ };
+ 
++/* Pass split_complex_instructions finds LOAD PAIR instructions (LDP) that can
++   be split into two LDR instructions.  It splits only those LDP for which one
++   half of the requested memory is contained in the preceding STORE (STR/STP)
++   instruction whose base register has the same definition.  This allows
++   to use hardware store-to-load forwarding mechanism and to get one half of
++   requested memory from the store queue of CPU.
++
++   TODO: Add split of STP.
++   TODO: Add split of vector STP and LDP.  */
+ class pass_split_complex_instructions : public rtl_opt_pass
+ {
+ private:
+-  enum complex_instructions_t
++  enum mem_access_insn_t
+   {
+     UNDEFINED,
+     LDP,
++    /* LDP with post-index (see loadwb_pair in config/aarch64.md).  */
++    LDP_WB,
++    /* LDP that contains one destination register in RTL IR
++       (see movti_aarch64 in config/aarch64.md).  */
+     LDP_TI,
+     STP,
++    /* STP with pre-index (see storewb_pair in config/aarch64.md).  */
++    STP_WB,
++    /* STP that contains one source register in RTL IR
++       (see movti_aarch64 in config/aarch64.md).  */
++    STP_TI,
+     STR
+   };
+ 
+-  void split_complex_insn (rtx_insn *insn);
+-  void split_ldp_ti (rtx_insn *insn);
+-  void split_ldp_with_offset (rtx_insn *ldp_insn);
+-  void split_simple_ldp (rtx_insn *ldp_insn);
+-  void split_ldp_stp (rtx_insn *insn);
+-  complex_instructions_t get_insn_type (rtx_insn *insn);
+-
+-  basic_block bb;
+-  rtx_insn *insn;
+   std::set<rtx_insn *> dependent_stores_candidates;
+   std::set<rtx_insn *> ldp_to_split_list;
+

_service:tar_scm:0044-Port-maxmin-patch-to-GCC-12.patch Added

@@ -0,0 +1,378 @@
+From a3013c074cd2ab5f71eb98a587a627f38c68656c Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Thu, 22 Feb 2024 17:07:24 +0800
+Subject: PATCH 12/18 Port maxmin patch to GCC 12
+
+---
+ gcc/config/aarch64/aarch64-simd.md    | 256 ++++++++++++++++++++++++++
+ gcc/config/aarch64/predicates.md      |  19 ++
+ gcc/testsuite/gcc.dg/combine-maxmin.c |  46 +++++
+ 3 files changed, 321 insertions(+)
+ create mode 100755 gcc/testsuite/gcc.dg/combine-maxmin.c
+
+diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
+index 82f73805f..de92802f5 100644
+--- a/gcc/config/aarch64/aarch64-simd.md
++++ b/gcc/config/aarch64/aarch64-simd.md
+@@ -1138,6 +1138,82 @@
+   (set_attr "type" "neon_compare<q>,neon_shift_imm<q>")
+ )
+ 
++;; Simplify the extension with following truncation for shift+neg operation.
++
++(define_insn_and_split "*aarch64_sshr_neg_v8hi"
++  (set (match_operand:V8HI 0 "register_operand" "=w")
++	(vec_concat:V8HI
++	  (truncate:V4HI
++	    (ashiftrt:V4SI
++	      (neg:V4SI
++		(sign_extend:V4SI
++		  (vec_select:V4HI
++		    (match_operand:V8HI 1 "register_operand")
++		    (match_operand:V8HI 3 "vect_par_cnst_lo_half"))))
++	      (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
++	  (truncate:V4HI
++	    (ashiftrt:V4SI
++	      (neg:V4SI
++		(sign_extend:V4SI
++		  (vec_select:V4HI
++		    (match_dup 1)
++		    (match_operand:V8HI 4 "vect_par_cnst_hi_half"))))
++	      (match_dup 2)))))
++  "TARGET_SIMD"
++  "#"
++  "&& true"
++  (set (match_operand:V8HI 0 "register_operand" "=w")
++	(ashiftrt:V8HI
++	  (neg:V8HI
++	    (match_operand:V8HI 1 "register_operand" "w"))
++	  (match_operand:V8HI 2 "aarch64_simd_imm_minus_one")))
++  {
++    /* Reduce the shift amount to smaller mode.  */
++    int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands2, 0))
++	      - (GET_MODE_UNIT_BITSIZE (GET_MODE (operands2)) / 2);
++    operands2 = aarch64_simd_gen_const_vector_dup (V8HImode, val);
++  }
++  (set_attr "type" "multiple")
++)
++
++;; The helper definition that allows combiner to use the previous pattern.
++
++(define_insn_and_split "*aarch64_sshr_neg_tmpv8hi"
++  (set (match_operand:V8HI 0 "register_operand" "=w")
++	(vec_concat:V8HI
++	  (truncate:V4HI
++	    (ashiftrt:V4SI
++	      (neg:V4SI
++		(match_operand:V4SI 1 "register_operand" "w"))
++	      (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
++	  (truncate:V4HI
++	    (ashiftrt:V4SI
++	      (neg:V4SI
++		(match_operand:V4SI 3 "register_operand" "w"))
++	      (match_dup 2)))))
++  "TARGET_SIMD"
++  "#"
++  "&& true"
++  (set (match_operand:V4SI 1 "register_operand" "=w")
++	(ashiftrt:V4SI
++	  (neg:V4SI
++	    (match_dup 1))
++	  (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
++   (set (match_operand:V4SI 3 "register_operand" "=w")
++	(ashiftrt:V4SI
++	  (neg:V4SI
++	    (match_dup 3))
++	  (match_dup 2)))
++   (set (match_operand:V8HI 0 "register_operand" "=w")
++	(vec_concat:V8HI
++	  (truncate:V4HI
++	    (match_dup 1))
++	  (truncate:V4HI
++	    (match_dup 3))))
++  ""
++  (set_attr "type" "multiple")
++)
++
+ (define_insn "*aarch64_simd_sra<mode>"
+  (set (match_operand:VDQ_I 0 "register_operand" "=w")
+ 	(plus:VDQ_I
+@@ -1714,6 +1790,26 @@
+  }
+ )
+ 
++(define_insn "vec_pack_trunc_shifted_<mode>"
++ (set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
++       (vec_concat:<VNARROWQ2>
++	 (truncate:<VNARROWQ>
++	   (ashiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
++	      (match_operand:VQN 2 "half_size_operand" "w")))
++	 (truncate:<VNARROWQ>
++	   (ashiftrt:VQN (match_operand:VQN 3 "register_operand" "w")
++	      (match_operand:VQN 4 "half_size_operand" "w")))))
++ "TARGET_SIMD"
++ {
++   if (BYTES_BIG_ENDIAN)
++     return "uzp2\\t%0.<V2ntype>, %3.<V2ntype>, %1.<V2ntype>";
++   else
++     return "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>";
++ }
++  (set_attr "type" "neon_permute<q>")
++   (set_attr "length" "4")
++)
++
+ (define_insn "aarch64_shrn<mode>_insn_le"
+   (set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+ 	(vec_concat:<VNARROWQ2>
+@@ -6652,6 +6748,166 @@
+   (set_attr "type" "neon_tst<q>")
+ )
+ 
++;; Simplify the extension with following truncation for cmtst-like operation.
++
++(define_insn_and_split "*aarch64_cmtst_arith_v8hi"
++  (set (match_operand:V8HI 0 "register_operand" "=w")
++	(vec_concat:V8HI
++	  (plus:V4HI
++	    (truncate:V4HI
++	      (eq:V4SI
++		(sign_extend:V4SI
++		  (vec_select:V4HI
++		    (and:V8HI
++		      (match_operand:V8HI 1 "register_operand")
++		      (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
++		    (match_operand:V8HI 3 "vect_par_cnst_lo_half")))
++		(match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero")))
++	    (match_operand:V4HI 5 "aarch64_simd_imm_minus_one"))
++	  (plus:V4HI
++	    (truncate:V4HI
++	      (eq:V4SI
++		(sign_extend:V4SI
++		  (vec_select:V4HI
++		    (and:V8HI
++		      (match_dup 1)
++		      (match_dup 2))
++		    (match_operand:V8HI 6 "vect_par_cnst_hi_half")))
++		(match_dup 4)))
++	    (match_dup 5))))
++  "TARGET_SIMD && !reload_completed"
++  "#"
++  "&& true"
++  (set (match_operand:V8HI 6 "register_operand" "=w")
++	(match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
++   (set (match_operand:V8HI 0 "register_operand" "=w")
++	(plus:V8HI
++	  (eq:V8HI
++	    (and:V8HI
++	      (match_operand:V8HI 1 "register_operand" "w")
++	      (match_dup 6))
++	    (match_operand:V8HI 4 "aarch64_simd_imm_zero"))
++	  (match_operand:V8HI 5 "aarch64_simd_imm_minus_one")))
++  {
++    if (can_create_pseudo_p ())
++      {
++	int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands4, 0));
++	operands4 = aarch64_simd_gen_const_vector_dup (V8HImode, val);
++	int val2 = INTVAL (CONST_VECTOR_ENCODED_ELT (operands5, 0));
++	operands5 = aarch64_simd_gen_const_vector_dup (V8HImode, val2);
++
++	operands6 = gen_reg_rtx (V8HImode);
++      }
++    else
++      FAIL;
++  }
++  (set_attr "type" "neon_tst_q")
++)
++
++;; Three helper definitions that allow combiner to use the previous pattern.
++
++(define_insn_and_split "*aarch64_cmtst_arith_tmp_lo_v8hi"
++  (set (match_operand:V4SI 0 "register_operand" "=w")
++	(neg:V4SI
++	  (eq:V4SI
++	    (sign_extend:V4SI
++	      (vec_select:V4HI
++		(and:V8HI
++		  (match_operand:V8HI 1 "register_operand")
++		  (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
++		(match_operand:V8HI 3 "vect_par_cnst_lo_half")))
++	    (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))

_service:tar_scm:0045-Port-moving-minmask-pattern-to-gimple-to-GCC-12.patch Added

@@ -0,0 +1,239 @@
+From 11da40d18e35219961226d40f11b0702b8649044 Mon Sep 17 00:00:00 2001
+From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
+Date: Thu, 22 Feb 2024 17:13:27 +0800
+Subject: PATCH 13/18 Port moving minmask pattern to gimple to GCC 12
+
+---
+ gcc/common.opt                          |   4 +
+ gcc/match.pd                            | 104 ++++++++++++++++++++++++
+ gcc/testsuite/gcc.dg/combine-maxmin-1.c |  15 ++++
+ gcc/testsuite/gcc.dg/combine-maxmin-2.c |  14 ++++
+ gcc/testsuite/gcc.dg/combine-maxmin.c   |  19 +++--
+ 5 files changed, 151 insertions(+), 5 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/combine-maxmin-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/combine-maxmin-2.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 6c6fabb31..3a5004271 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1846,6 +1846,10 @@ fif-conversion-gimple
+ Common Var(flag_if_conversion_gimple) Optimization
+ Perform conversion of conditional jumps to branchless equivalents during gimple transformations.
+ 
++fconvert-minmax
++Common Var(flag_convert_minmax) Optimization
++Convert saturating clipping to min max.
++
+ fstack-reuse=
+ Common Joined RejectNegative Enum(stack_reuse_level) Var(flag_stack_reuse) Init(SR_ALL) Optimization
+ -fstack-reuse=all|named_vars|none	Set stack reuse level for local variables.
+diff --git a/gcc/match.pd b/gcc/match.pd
+index 61866cb90..3a19e93b3 100644
+--- a/gcc/match.pd
++++ b/gcc/match.pd
+@@ -8031,3 +8031,107 @@ and,
+    (plus:c@4 (op2:c @0 @1)
+     (plus:c@5 (double_size_mul_overflow_check_lo @0 @1 @3) (op3:c @0 @1))))
+      (if (single_use (@4) && single_use (@5)))))
++
++/* MinMax pattern matching helpers.  More info on the transformation below.  */
++
++/* Match (a & 0b11..100..0) pattern.  */
++(match (minmax_cmp_arg @0 @1)
++ (bit_and @0 INTEGER_CST@1)
++ (if (wi::popcount (~wi::to_widest (@1) + 1) == 1)))
++
++/* Match (inversed_sign_bit >> sign_bit_pos) pattern.
++   This statement is blocking for the transformation of unsigned integers.
++   Do type check here to avoid unnecessary duplications.  */
++(match (minmax_sat_arg @0)
++ (rshift (negate @0) INTEGER_CST@1)
++ (if (!TYPE_UNSIGNED (TREE_TYPE (@0))
++      && wi::eq_p (wi::to_widest (@1), TYPE_PRECISION (TREE_TYPE (@0)) - 1))))
++
++/* Transform ((x & ~mask) ? (-x)>>31 & mask : x) to (min (max (x, 0), mask)).
++   The matched pattern can be described as saturated clipping.
++
++   The pattern supports truncation via both casts and bit_and.
++   Also there are patterns for possible inverted conditions.  */
++(if (flag_convert_minmax)
++/* Truncation via casts.  Unfortunately convert? cannot be applied here
++   because convert and cond take different number of arguments.  */
++ (simplify
++  (convert
++   (cond
++    (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
++    (convert? (minmax_sat_arg @0))
++    (convert? @0)))
++  (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
++   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
++    (convert (min (max @0 { integer_zero_node; })
++		  { mask; })))))
++ (simplify
++  (cond
++   (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
++   (convert? (minmax_sat_arg @0))
++   (convert? @0))
++  (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
++   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
++    (convert (min (max @0 { integer_zero_node; })
++		  { mask; })))))
++
++ (simplify
++  (convert
++   (cond
++    (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
++    (convert? @0)
++    (convert? (minmax_sat_arg @0))))
++  (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
++   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
++    (convert (min (max @0 { integer_zero_node; })
++		  { mask; })))))
++ (simplify
++  (cond
++   (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
++   (convert? @0)
++   (convert? (minmax_sat_arg @0)))
++  (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
++   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
++    (convert (min (max @0 { integer_zero_node; })
++		  { mask; })))))
++
++ /* Truncation via bit_and with mask.  Same concerns on convert? here.  */
++ (simplify
++  (convert
++   (cond
++    (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
++    (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2))
++    (convert? @0)))
++  (if (wi::to_widest (@2) == ~wi::to_widest (@1))
++   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
++    (convert (min (max @0 { integer_zero_node; })
++		  { mask; })))))
++ (simplify
++  (cond
++   (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
++   (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2))
++   (convert? @0))
++  (if (wi::to_widest (@2) == ~wi::to_widest (@1))
++   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
++    (convert (min (max @0 { integer_zero_node; })
++		  { mask; })))))
++
++ (simplify
++  (convert
++   (cond
++    (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
++    (convert? @0)
++    (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2))))
++  (if (wi::to_widest (@2) == ~wi::to_widest (@1))
++   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
++    (convert (min (max @0 { integer_zero_node; })
++		  { mask; })))))
++ (simplify
++  (cond
++   (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
++   (convert? @0)
++   (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2)))
++  (if (wi::to_widest (@2) == ~wi::to_widest (@1))
++   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
++    (convert (min (max @0 { integer_zero_node; })
++		  { mask; }))))))
+diff --git a/gcc/testsuite/gcc.dg/combine-maxmin-1.c b/gcc/testsuite/gcc.dg/combine-maxmin-1.c
+new file mode 100644
+index 000000000..859ff7df8
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/combine-maxmin-1.c
+@@ -0,0 +1,15 @@
++/* { dg-do compile { target aarch64-*-* } } */
++/* { dg-options "-O3 -fconvert-minmax" } */
++
++#include <inttypes.h>
++
++__attribute__((noinline))
++void test (int32_t *restrict a, int32_t *restrict x)
++{
++  for (int i = 0; i < 4; i++)
++    ai = ((((-xi) >> 31) ^ xi)
++            & (-((int32_t)((xi & (~((1 << 8)-1))) == 0)))) ^ ((-xi) >> 31);
++}
++
++/* { dg-final { scan-assembler-not {smax\t} } }  */
++/* { dg-final { scan-assembler-not {smin\t} } }  */
+diff --git a/gcc/testsuite/gcc.dg/combine-maxmin-2.c b/gcc/testsuite/gcc.dg/combine-maxmin-2.c
+new file mode 100644
+index 000000000..63d4d85b3
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/combine-maxmin-2.c
+@@ -0,0 +1,14 @@
++/* { dg-do compile { target aarch64-*-* } } */
++/* { dg-options "-O3 -fconvert-minmax" } */
++
++#include <inttypes.h>
++
++__attribute__((noinline))
++void test (int8_t *restrict a, int32_t *restrict x)
++{
++  for (int i = 0; i < 8; i++)
++    ai = ((xi & ~((1 << 9)-1)) ? (-xi)>>31 & ((1 << 9)-1) : xi);
++}
++
++/* { dg-final { scan-assembler-times {smax\t} 4 } }  */
++/* { dg-final { scan-assembler-times {smin\t} 4 } }  */
+diff --git a/gcc/testsuite/gcc.dg/combine-maxmin.c b/gcc/testsuite/gcc.dg/combine-maxmin.c
+index 06bce7029..a984fa560 100755
+--- a/gcc/testsuite/gcc.dg/combine-maxmin.c
++++ b/gcc/testsuite/gcc.dg/combine-maxmin.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile { target aarch64-*-* } } */
+-/* { dg-options "-O3 -fdump-rtl-combine-all" } */
++/* { dg-options "-O3 -fconvert-minmax" } */
+ 
+ /* The test checks usage of smax/smin insns for clip evaluation and
+  * uzp1/uzp2 insns for vector element narrowing.  It's inspired by
+@@ -19,20 +19,26 @@ void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
+ {
+     const int pad = (8 > 9) ? (-10 * ((1 << 8)-1)) : 0;
+     for( int y = 0; y < height; y++ ) {
++        /* This loop is not being vectorized now.  */

_service:tar_scm:0046-Add-new-pattern-to-pass-the-maxmin-tests.patch Added

@@ -0,0 +1,65 @@
+From dbcb2630c426c8dd2117b5ce625da8422dd8cd65 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Thu, 22 Feb 2024 17:20:17 +0800
+Subject: PATCH 14/18 Add new pattern to pass the maxmin tests
+
+---
+ gcc/match.pd                          | 24 ++++++++++++++++++++++++
+ gcc/testsuite/gcc.dg/combine-maxmin.c |  2 +-
+ 2 files changed, 25 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/match.pd b/gcc/match.pd
+index 3a19e93b3..aee58e47b 100644
+--- a/gcc/match.pd
++++ b/gcc/match.pd
+@@ -8038,6 +8038,10 @@ and,
+ (match (minmax_cmp_arg @0 @1)
+  (bit_and @0 INTEGER_CST@1)
+  (if (wi::popcount (~wi::to_widest (@1) + 1) == 1)))
++/* Match ((unsigned) a > 0b0..01..1) pattern.  */
++(match (minmax_cmp_arg1 @0 @1)
++ (gt @0 INTEGER_CST@1)
++ (if (wi::popcount (wi::to_widest (@1) + 1) == 1)))
+ 
+ /* Match (inversed_sign_bit >> sign_bit_pos) pattern.
+    This statement is blocking for the transformation of unsigned integers.
+@@ -8095,6 +8099,26 @@ and,
+     (convert (min (max @0 { integer_zero_node; })
+ 		  { mask; })))))
+ 
++ (simplify
++  (convert
++   (cond
++    (minmax_cmp_arg1 (convert? @0) INTEGER_CST@1)
++    (convert? (minmax_sat_arg @0))
++    (convert? @0)))
++  (if (wi::geu_p (wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
++   (with { tree mask = build_int_cst (integer_type_node, tree_to_shwi (@1)); }
++    (convert (min (max (convert:integer_type_node @0) { integer_zero_node; })
++		  { mask; })))))
++ (simplify
++  (cond
++   (minmax_cmp_arg1 (convert? @0) INTEGER_CST@1)
++   (convert? (minmax_sat_arg @0))
++   (convert? @0))
++  (if (wi::geu_p (wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
++   (with { tree mask = build_int_cst (integer_type_node, tree_to_shwi (@1)); }
++    (convert (min (max (convert:integer_type_node @0) { integer_zero_node; })
++		  { mask; })))))
++
+  /* Truncation via bit_and with mask.  Same concerns on convert? here.  */
+  (simplify
+   (convert
+diff --git a/gcc/testsuite/gcc.dg/combine-maxmin.c b/gcc/testsuite/gcc.dg/combine-maxmin.c
+index a984fa560..5c0c9cc49 100755
+--- a/gcc/testsuite/gcc.dg/combine-maxmin.c
++++ b/gcc/testsuite/gcc.dg/combine-maxmin.c
+@@ -52,4 +52,4 @@ void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
+ /* { dg-final { scan-assembler-times {usmin\t} 6 } }  */
+ /* All of the vectorized patterns are expected to be matched.  */
+ /* { dg-final { scan-assembler-not {cmtst\t} } }  */
+-/* { dg-final { scan-assembler-times {uzp1\t} 5 } }  */
++/* { dg-final { scan-assembler-times {uzp1\t} 2 } }  */
+-- 
+2.33.0
+

_service:tar_scm:0047-AES-Implement-AES-pattern-matching.patch Added

@@ -0,0 +1,3968 @@
+From 53d321d2fe08f69a29527be157d4bcaaefea04ab Mon Sep 17 00:00:00 2001
+From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
+Date: Wed, 6 Dec 2023 10:46:28 +0300
+Subject: PATCH 15/18 AES Implement AES pattern matching
+
+---
+ gcc/Makefile.in                               |    1 +
+ gcc/common.opt                                |    4 +
+ gcc/config/aarch64/aarch64.cc                 |   24 +
+ gcc/crypto-accel.cc                           | 2415 +++++++++++++++++
+ gcc/doc/tm.texi                               |   29 +
+ gcc/doc/tm.texi.in                            |   12 +
+ gcc/passes.def                                |    1 +
+ gcc/rtl-matcher.h                             |  367 +++
+ gcc/target.def                                |   41 +
+ .../gcc.target/aarch64/aes-decrypt.c          |  478 ++++
+ .../gcc.target/aarch64/aes-encrypt.c          |  443 +++
+ gcc/timevar.def                               |    1 +
+ gcc/tree-pass.h                               |    1 +
+ 13 files changed, 3817 insertions(+)
+ create mode 100644 gcc/crypto-accel.cc
+ create mode 100644 gcc/rtl-matcher.h
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/aes-decrypt.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/aes-encrypt.c
+
+diff --git a/gcc/Makefile.in b/gcc/Makefile.in
+index 45705c1f3..876000bda 100644
+--- a/gcc/Makefile.in
++++ b/gcc/Makefile.in
+@@ -1332,6 +1332,7 @@ OBJS = \
+ 	cgraphunit.o \
+ 	cgraphclones.o \
+ 	combine.o \
++	crypto-accel.o \
+ 	combine-stack-adj.o \
+ 	compare-elim.o \
+ 	context.o \
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 3a5004271..1eb62ada5 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1129,6 +1129,10 @@ Common Var(flag_array_widen_compare) Optimization
+ Extends types for pointers to arrays to improve array comparsion performance.
+ In some extreme situations this may result in unsafe behavior.
+ 
++fcrypto-accel-aes
++Common Var(flag_crypto_accel_aes) Init(0) Optimization
++Perform crypto acceleration AES pattern matching.
++
+ fauto-inc-dec
+ Common Var(flag_auto_inc_dec) Init(1) Optimization
+ Generate auto-inc/dec instructions.
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index fa566dd80..9171d9d56 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -27569,6 +27569,30 @@ is_aarch64_stp_insn (int icode, bool *has_wb)
+ #undef TARGET_IS_STP_INSN
+ #define TARGET_IS_STP_INSN is_aarch64_stp_insn
+ 
++machine_mode
++aarch64_get_v16qi_mode ()
++{
++  return V16QImode;
++}
++
++#undef TARGET_GET_V16QI_MODE
++#define TARGET_GET_V16QI_MODE aarch64_get_v16qi_mode
++
++#undef TARGET_GEN_REV32V16QI
++#define TARGET_GEN_REV32V16QI gen_aarch64_rev32v16qi
++
++#undef TARGET_GEN_AESEV16QI
++#define TARGET_GEN_AESEV16QI gen_aarch64_crypto_aesev16qi
++
++#undef TARGET_GEN_AESDV16QI
++#define TARGET_GEN_AESDV16QI gen_aarch64_crypto_aesdv16qi
++
++#undef TARGET_GEN_AESMCV16QI
++#define TARGET_GEN_AESMCV16QI gen_aarch64_crypto_aesmcv16qi
++
++#undef TARGET_GEN_AESIMCV16QI
++#define TARGET_GEN_AESIMCV16QI gen_aarch64_crypto_aesimcv16qi
++
+ #undef TARGET_STACK_PROTECT_GUARD
+ #define TARGET_STACK_PROTECT_GUARD aarch64_stack_protect_guard
+ 
+diff --git a/gcc/crypto-accel.cc b/gcc/crypto-accel.cc
+new file mode 100644
+index 000000000..f4e810a6b
+--- /dev/null
++++ b/gcc/crypto-accel.cc
+@@ -0,0 +1,2415 @@
++/* Crypto-pattern optimizer.
++   Copyright (C) 2003-2023 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free
++Software Foundation; either version 3, or (at your option) any later
++version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#include "config.h"
++#define INCLUDE_VECTOR
++#define INCLUDE_MAP
++#define INCLUDE_SET
++#define INCLUDE_ALGORITHM
++#include "system.h"
++#include "coretypes.h"
++#include "backend.h"
++#include "target.h"
++#include "rtl.h"
++#include "tree.h"
++#include "df.h"
++#include "memmodel.h"
++#include "optabs.h"
++#include "regs.h"
++#include "emit-rtl.h"
++#include "recog.h"
++#include "cfgrtl.h"
++#include "cfgcleanup.h"
++#include "expr.h"
++#include "tree-pass.h"
++#include "rtl-matcher.h"
++
++/* Basic AES table descryption.  */
++struct aes_table
++{
++  /* Number of elements per table.  */
++  static const unsigned int table_nelts = 256;
++  /* Number of tables.  */
++  static const unsigned int basic_tables_num = 4;
++  /* Number of rounds.  */
++  static const unsigned int rounds_num = 4;
++  /* Common ID for wrong table.  */
++  static const unsigned int BAD_TABLE = -1;
++
++  typedef const unsigned int table_typetable_nelts;
++  typedef table_type *table_mapbasic_tables_num;
++
++  template<typename T>
++  static bool is_basic_table (tree ctor, const T ethalontable_nelts)
++    {
++      if (TREE_CODE (ctor) != CONSTRUCTOR
++	  ||CONSTRUCTOR_NELTS (ctor) != table_nelts)
++	return false;
++
++      unsigned ix;
++      tree val;
++      FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (ctor), ix, val)
++	if (TREE_INT_CST_LOW (val) != ethalonix)
++	  return false;
++      return true;
++    }
++
++  static unsigned check_table (tree ctor,
++			       table_map tables)
++    {
++      for (unsigned i = 0; i < 4; ++i)
++	if (is_basic_table (ctor, *tablesi))
++	  return i;
++      return BAD_TABLE;
++    }
++};
++
++/* AES encryption info.  */
++struct aes_encrypt_table : aes_table
++{
++  typedef enum
++  {
++    TE0,
++    TE1,
++    TE2,
++    TE3,
++    BAD_TABLE = aes_table::BAD_TABLE
++  } table_entry;
++
++  static table_type Te0;
++  static table_type Te1;
++  static table_type Te2;
++  static table_type Te3;
++
++  static table_map tables;
++  static table_entry roundsrounds_num;
++  static table_entry final_roundsrounds_num;
++
++  static table_entry get_table_id (tree ctor)
++    {
++      return static_cast<table_entry> (check_table (ctor, tables));

_service:tar_scm:0048-crypto-accel-add-optimization-level-requirement-to-t.patch Added

_service:tar_scm:0049-Add-more-flexible-check-for-pointer-aliasing-during-.patch Added

@@ -0,0 +1,239 @@
+From b5865aef36ebaac87ae30d51f08bfe081795ed67 Mon Sep 17 00:00:00 2001
+From: Chernonog Viacheslav <chernonog.vyacheslav@huawei.com>
+Date: Tue, 12 Mar 2024 23:30:56 +0800
+Subject: PATCH 17/18 Add more flexible check for pointer aliasing during
+ vectorization It takes minimum between number of iteration and segment length
+ it helps to speed up loops with small number of iterations when only tail can
+ be vectorized
+
+---
+ gcc/params.opt                                |  5 ++
+ .../sve/var_stride_flexible_segment_len_1.c   | 23 +++++++
+ gcc/tree-data-ref.cc                          | 67 +++++++++++++------
+ gcc/tree-data-ref.h                           | 11 ++-
+ gcc/tree-vect-data-refs.cc                    | 14 +++-
+ 5 files changed, 95 insertions(+), 25 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c
+
+diff --git a/gcc/params.opt b/gcc/params.opt
+index 6176d4790..7e5c119cf 100644
+--- a/gcc/params.opt
++++ b/gcc/params.opt
+@@ -1180,6 +1180,11 @@ Maximum number of loop peels to enhance alignment of data references in a loop.
+ Common Joined UInteger Var(param_vect_max_version_for_alias_checks) Init(10) Param Optimization
+ Bound on number of runtime checks inserted by the vectorizer's loop versioning for alias check.
+ 
++-param=vect-alias-flexible-segment-len=
++Common Joined UInteger Var(param_flexible_seg_len) Init(0) IntegerRange(0, 1) Param Optimization
++Use a minimum length of different segments.  Currenlty the minimum between
++iteration number and vectorization length is chosen by this param.
++
+ -param=vect-max-version-for-alignment-checks=
+ Common Joined UInteger Var(param_vect_max_version_for_alignment_checks) Init(6) Param Optimization
+ Bound on number of runtime checks inserted by the vectorizer's loop versioning for alignment check.
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c
+new file mode 100644
+index 000000000..894f075f3
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c
+@@ -0,0 +1,23 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -ftree-vectorize --param=vect-alias-flexible-segment-len=1" } */
++
++#define TYPE int
++#define SIZE 257
++
++void __attribute__ ((weak))
++f (TYPE *x, TYPE *y, unsigned short n, long m __attribute__((unused)))
++{
++  for (int i = 0; i < SIZE; ++i)
++    xi * n += yi * n;
++}
++
++/* { dg-final { scan-assembler {\tld1w\tz0-9+} } } */
++/* { dg-final { scan-assembler {\tst1w\tz0-9+} } } */
++/* { dg-final { scan-assembler {\tldr\tw0-9+} } } */
++/* { dg-final { scan-assembler {\tstr\tw0-9+} } } */
++/* Should use a WAR check that multiplies by (VF-2)*4 rather than
++   an overlap check that multiplies by (257-1)*4.  */
++/* { dg-final { scan-assembler {\tcntb\t(x0-9+)\n.*\tsub\tx0-9+, \1, #8\n.*\tmul\tx0-9+,^\n*\1} } } */
++/* One range check and a check for n being zero.  */
++/* { dg-final { scan-assembler-times {\t(?:cmp|tst)\t} 2 } } */
++/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */
+diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc
+index 397792c35..e6ae9e847 100644
+--- a/gcc/tree-data-ref.cc
++++ b/gcc/tree-data-ref.cc
+@@ -2329,31 +2329,15 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
+    same arguments.  Try to optimize cases in which the second access
+    is a write and in which some overlap is valid.  */
+ 
+-static bool
+-create_waw_or_war_checks (tree *cond_expr,
++static void
++create_waw_or_war_checks2 (tree *cond_expr, tree seg_len_a,
+ 			  const dr_with_seg_len_pair_t &alias_pair)
+ {
+   const dr_with_seg_len& dr_a = alias_pair.first;
+   const dr_with_seg_len& dr_b = alias_pair.second;
+ 
+-  /* Check for cases in which:
+-
+-     (a) DR_B is always a write;
+-     (b) the accesses are well-ordered in both the original and new code
+-	 (see the comment above the DR_ALIAS_* flags for details); and
+-     (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
+-  if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
+-    return false;
+-
+-  /* Check for equal (but possibly variable) steps.  */
+   tree step = DR_STEP (dr_a.dr);
+-  if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
+-    return false;
+-
+-  /* Make sure that we can operate on sizetype without loss of precision.  */
+   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
+-  if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
+-    return false;
+ 
+   /* All addresses involved are known to have a common alignment ALIGN.
+      We can therefore subtract ALIGN from an exclusive endpoint to get
+@@ -2370,9 +2354,6 @@ create_waw_or_war_checks (tree *cond_expr,
+ 			       fold_convert (ssizetype, indicator),
+ 			       ssize_int (0));
+ 
+-  /* Get lengths in sizetype.  */
+-  tree seg_len_a
+-    = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
+   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
+ 
+   /* Each access has the following pattern:
+@@ -2479,6 +2460,50 @@ create_waw_or_war_checks (tree *cond_expr,
+   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
+   if (dump_enabled_p ())
+     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
++}
++
++/* This is a wrapper function for create_waw_or_war_checks2.  */
++static bool
++create_waw_or_war_checks (tree *cond_expr,
++			  const dr_with_seg_len_pair_t &alias_pair)
++{
++  const dr_with_seg_len& dr_a = alias_pair.first;
++  const dr_with_seg_len& dr_b = alias_pair.second;
++
++  /* Check for cases in which:
++
++     (a) DR_B is always a write;
++     (b) the accesses are well-ordered in both the original and new code
++     (see the comment above the DR_ALIAS_* flags for details); and
++     (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
++  if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
++    return false;
++
++  /* Check for equal (but possibly variable) steps.  */
++  tree step = DR_STEP (dr_a.dr);
++  if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
++    return false;
++
++  /* Make sure that we can operate on sizetype without loss of precision.  */
++  tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
++  if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
++    return false;
++
++  /* Get lengths in sizetype.  */
++  tree seg_len_a
++    = fold_convert (sizetype,
++		    rewrite_to_non_trapping_overflow (dr_a.seg_len));
++  create_waw_or_war_checks2 (cond_expr, seg_len_a, alias_pair);
++  if (param_flexible_seg_len && dr_a.seg_len != dr_a.seg_len2)
++    {
++      tree seg_len2_a
++	= fold_convert (sizetype,
++			rewrite_to_non_trapping_overflow (dr_a.seg_len2));
++      tree cond_expr2;
++      create_waw_or_war_checks2 (&cond_expr2, seg_len2_a, alias_pair);
++      *cond_expr =  fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
++				 *cond_expr, cond_expr2);
++   }
+   return true;
+ }
+ 
+diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
+index f643a95b2..9bc5f16ee 100644
+--- a/gcc/tree-data-ref.h
++++ b/gcc/tree-data-ref.h
+@@ -213,12 +213,19 @@ class dr_with_seg_len
+ public:
+   dr_with_seg_len (data_reference_p d, tree len, unsigned HOST_WIDE_INT size,
+ 		   unsigned int a)
+-    : dr (d), seg_len (len), access_size (size), align (a) {}
+-
++    : dr (d), seg_len (len), seg_len2 (len), access_size (size), align (a)
++    {}
++  dr_with_seg_len (data_reference_p d, tree len, tree len2,
++		   unsigned HOST_WIDE_INT size, unsigned int a)
++    : dr (d), seg_len (len), seg_len2 (len2), access_size (size), align (a)
++    {}
+   data_reference_p dr;
+   /* The offset of the last access that needs to be checked minus
+      the offset of the first.  */
+   tree seg_len;
++  /* The second version of segment length.  Currently this is used to
++     soften checks for a small number of iterations.  */
++  tree seg_len2;
+   /* A value that, when added to abs (SEG_LEN), gives the total number of
+      bytes in the segment.  */
+   poly_uint64 access_size;
+diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
+index 4e615b80b..04e68f621 100644
+--- a/gcc/tree-vect-data-refs.cc
++++ b/gcc/tree-vect-data-refs.cc
+@@ -3646,6 +3646,7 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
+     {
+       poly_uint64 lower_bound;
+       tree segment_length_a, segment_length_b;
++      tree segment_length2_a, segment_length2_b;
+       unsigned HOST_WIDE_INT access_size_a, access_size_b;
+       unsigned int align_a, align_b;
+

_service:tar_scm:0050-Port-IPA-prefetch-to-GCC-12.patch Added

@@ -0,0 +1,2071 @@
+From 7ee50ce44c652e21ca8ad33dc4e175f02b51b072 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Fri, 8 Mar 2024 06:50:39 +0800
+Subject: PATCH 18/18 Port IPA prefetch to GCC 12
+
+---
+ gcc/Makefile.in     |    1 +
+ gcc/cgraph.cc       |    1 +
+ gcc/cgraph.h        |    2 +
+ gcc/common.opt      |    8 +
+ gcc/ipa-devirt.cc   |   54 +-
+ gcc/ipa-prefetch.cc | 1819 +++++++++++++++++++++++++++++++++++++++++++
+ gcc/ipa-sra.cc      |    8 +
+ gcc/params.opt      |    8 +
+ gcc/passes.def      |    1 +
+ gcc/timevar.def     |    1 +
+ gcc/tree-pass.h     |    1 +
+ 11 files changed, 1902 insertions(+), 2 deletions(-)
+ create mode 100644 gcc/ipa-prefetch.cc
+
+diff --git a/gcc/Makefile.in b/gcc/Makefile.in
+index 876000bda..10544e4a9 100644
+--- a/gcc/Makefile.in
++++ b/gcc/Makefile.in
+@@ -1468,6 +1468,7 @@ OBJS = \
+ 	ipa-modref.o \
+ 	ipa-modref-tree.o \
+ 	ipa-predicate.o \
++	ipa-prefetch.o \
+ 	ipa-profile.o \
+ 	ipa-prop.o \
+ 	ipa-param-manipulation.o \
+diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc
+index 3734c85db..7d738b891 100644
+--- a/gcc/cgraph.cc
++++ b/gcc/cgraph.cc
+@@ -998,6 +998,7 @@ cgraph_node::create_indirect_edge (gcall *call_stmt, int ecf_flags,
+   edge->indirect_info = cgraph_allocate_init_indirect_info ();
+   edge->indirect_info->ecf_flags = ecf_flags;
+   edge->indirect_info->vptr_changed = true;
++  edge->indirect_info->targets = NULL;
+ 
+   /* Record polymorphic call info.  */
+   if (!cloning_p
+diff --git a/gcc/cgraph.h b/gcc/cgraph.h
+index d96690326..b84ff2f98 100644
+--- a/gcc/cgraph.h
++++ b/gcc/cgraph.h
+@@ -1659,6 +1659,8 @@ public:
+   int param_index;
+   /* ECF flags determined from the caller.  */
+   int ecf_flags;
++  /* Vector of potential call targets determined by analysis.  */
++  vec<cgraph_node *, va_gc_atomic> *targets;
+ 
+   /* Number of speculative call targets, it's less than GCOV_TOPN_VALUES.  */
+   unsigned num_speculative_call_targets : 16;
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 1eb62ada5..e65a06af9 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1328,6 +1328,10 @@ fdevirtualize
+ Common Var(flag_devirtualize) Optimization
+ Try to convert virtual calls to direct ones.
+ 
++fipa-ic
++Common Var(flag_ipa_ic) Optimization Init(0)
++Perform interprocedural analysis of indirect calls.
++
+ ficp
+ Common Var(flag_icp) Optimization Init(0)
+ Try to promote indirect calls to direct ones.
+@@ -2367,6 +2371,10 @@ fprefetch-loop-arrays
+ Common Var(flag_prefetch_loop_arrays) Init(-1) Optimization
+ Generate prefetch instructions, if available, for arrays in loops.
+ 
++fipa-prefetch
++Common Var(flag_ipa_prefetch) Init(0) Optimization
++Generate prefetch instructions, if available, using IPA info.
++
+ fprofile
+ Common Var(profile_flag)
+ Enable basic program profiling code.
+diff --git a/gcc/ipa-devirt.cc b/gcc/ipa-devirt.cc
+index 318535d06..dd3562d56 100644
+--- a/gcc/ipa-devirt.cc
++++ b/gcc/ipa-devirt.cc
+@@ -5758,6 +5758,54 @@ merge_fs_map_for_ftype_aliases ()
+     }
+ }
+ 
++/* Save results of indirect call analysis for the next passes.  */
++
++static void
++save_analysis_results ()
++{
++  if (dump_file)
++    fprintf (dump_file, "\n\nSave results of indirect call analysis.\n");
++
++  struct cgraph_node *n;
++  FOR_EACH_FUNCTION (n)
++    {
++      cgraph_edge *e, *next;
++      for (e = n->indirect_calls; e; e = next)
++	{
++	  next = e->next_callee;
++	  if (e->indirect_info->polymorphic)
++	    continue;
++	  gcall *stmt = e->call_stmt;
++	  gcc_assert (stmt != NULL);
++	  tree call_fn = gimple_call_fn (stmt);
++	  tree call_fn_ty = TREE_TYPE (call_fn);
++	  if (!POINTER_TYPE_P (call_fn_ty))
++	    continue;
++
++	  tree ctype = TYPE_CANONICAL (TREE_TYPE (call_fn_ty));
++	  unsigned ctype_uid = ctype ? TYPE_UID (ctype) : 0;
++	  if (!ctype_uid || unsafe_types->count (ctype_uid)
++	      || !fs_map->count (ctype_uid))
++	    continue;
++	  /* TODO: cleanup noninterposable aliases.  */
++	  decl_set *decls = (*fs_map)ctype_uid;
++	  if (dump_file)
++	    {
++	      fprintf (dump_file, "For call ");
++	      print_gimple_stmt (dump_file, stmt, 0);
++	    }
++	  vec_alloc (e->indirect_info->targets, decls->size ());
++	  for (decl_set::const_iterator it = decls->begin ();
++	       it != decls->end (); it++)
++	    {
++	      struct cgraph_node *target = cgraph_node::get (*it);
++	      /* TODO: maybe discard some targets.  */
++	      e->indirect_info->targets->quick_push (target);
++	    }
++	}
++    }
++}
++
+ /* Dump function types with set of functions corresponding to it.  */
+ 
+ static void
+@@ -5822,6 +5870,8 @@ collect_function_signatures ()
+ 	}
+     }
+   merge_fs_map_for_ftype_aliases ();
++  if (flag_ipa_ic)
++    save_analysis_results ();
+   if (dump_file)
+     dump_function_signature_sets ();
+ }
+@@ -6217,7 +6267,7 @@ ipa_icp (void)
+      optimize indirect calls.  */
+   collect_function_type_aliases ();
+   collect_function_signatures ();
+-  bool optimized = optimize_indirect_calls ();
++  bool optimized = flag_icp ? optimize_indirect_calls () : false;
+ 
+   remove_type_alias_map (ta_map);
+   remove_type_alias_map (fta_map);
+@@ -6264,7 +6314,7 @@ public:
+   /* opt_pass methods: */
+   virtual bool gate (function *)
+     {
+-      return (optimize && flag_icp && !seen_error ()
++      return (optimize && (flag_icp || flag_ipa_ic) && !seen_error ()
+ 	      && (in_lto_p || flag_whole_program));
+     }
+ 
+diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
+new file mode 100644
+index 000000000..aeea51105
+--- /dev/null
++++ b/gcc/ipa-prefetch.cc
+@@ -0,0 +1,1819 @@
++/* IPA prefetch optimizations.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   Contributed by Ilia Diachkov.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free
++Software Foundation; either version 3, or (at your option) any later
++version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++/* IPA prefetch is an interprocedural pass that detects cases of indirect
++   memory access potentially in loops and inserts prefetch instructions
++   to optimize cache usage during these indirect memory accesses.  */
++

_service:tar_scm:0051-Port-fixes-for-IPA-prefetch-to-GCC-12.patch Added

@@ -0,0 +1,2216 @@
+From 4c262af8e178ac7c81b32be5b159b4d09a5841c9 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Fri, 8 Mar 2024 07:07:50 +0800
+Subject: PATCH 1/2 Port fixes for IPA prefetch to GCC 12
+
+---
+ gcc/ipa-devirt.cc                          |    9 +-
+ gcc/ipa-prefetch.cc                        |  174 +-
+ gcc/ipa-sra.cc                             |    7 +
+ gcc/params.opt                             |    4 +-
+ gcc/testsuite/gcc.dg/completion-1.c        |    1 +
+ gcc/testsuite/gcc.dg/ipa/ipa-prefetch-xz.c | 1843 ++++++++++++++++++++
+ 6 files changed, 1974 insertions(+), 64 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/ipa/ipa-prefetch-xz.c
+
+diff --git a/gcc/ipa-devirt.cc b/gcc/ipa-devirt.cc
+index dd3562d56..dd000b401 100644
+--- a/gcc/ipa-devirt.cc
++++ b/gcc/ipa-devirt.cc
+@@ -5029,9 +5029,12 @@ analyze_assign_stmt (gimple *stmt)
+ 	}
+       else
+ 	{
+-	  fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ",
+-		   get_tree_code_name (TREE_CODE (rhs)));
+-	  print_gimple_stmt (dump_file, stmt, 0);
++	  if (dump_file && (dump_flags & TDF_DETAILS))
++	    {
++	      fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ",
++		       get_tree_code_name (TREE_CODE (rhs)));
++	      print_gimple_stmt (dump_file, stmt, 0);
++	    }
+ 	  gcc_unreachable ();
+ 	}
+     }
+diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
+index aeea51105..9537e4835 100644
+--- a/gcc/ipa-prefetch.cc
++++ b/gcc/ipa-prefetch.cc
+@@ -167,6 +167,7 @@ analyse_cgraph ()
+ 	}
+ 
+       /* TODO: maybe remove loop info here.  */
++      n->get_body ();
+       push_cfun (DECL_STRUCT_FUNCTION (n->decl));
+       calculate_dominance_info (CDI_DOMINATORS);
+       loop_optimizer_init (LOOPS_NORMAL);
+@@ -942,6 +943,9 @@ compare_memrefs (memref_t* mr, memref_t* mr2)
+       (*mr_candidate_map)mr = mr2;
+       return;
+     }
++  /* Probably we shouldn't leave nulls in the map.  */
++  if ((*mr_candidate_map)mr == NULL)
++    return;
+   /* TODO: support analysis with incrementation of different fields.  */
+   if ((*mr_candidate_map)mr->offset != mr2->offset)
+     {
+@@ -1090,6 +1094,15 @@ analyse_loops ()
+ 	  memref_t *mr = it->first, *mr2 = it->second;
+ 	  if (mr2 == NULL || !(*fmrs_map)fn->count (mr))
+ 	    continue;
++	  /* For now optimize only MRs that mem is MEM_REF.
++	     TODO: support other MR types.  */
++	  if (TREE_CODE (mr->mem) != MEM_REF)
++	    {
++	      if (dump_file)
++		fprintf (dump_file, "Skip MR %d: unsupported tree code = %s\n",
++			 mr->mr_id, get_tree_code_name (TREE_CODE (mr->mem)));
++	      continue;
++	    }
+ 	  if (!optimize_mrs_map->count (fn))
+ 	    (*optimize_mrs_map)fn = new memref_set;
+ 	  (*optimize_mrs_map)fn->insert (mr);
+@@ -1102,7 +1115,7 @@ analyse_loops ()
+ 	       it != (*optimize_mrs_map)fn->end (); it++)
+ 	    {
+ 	      memref_t *mr = *it, *mr2 = (*mr_candidate_map)mr;
+-	      fprintf (dump_file, "MRs %d,%d with incremental offset ",
++	      fprintf (dump_file, "MRs %d, %d with incremental offset ",
+ 		       mr->mr_id, mr2->mr_id);
+ 	      print_generic_expr (dump_file, mr2->offset);
+ 	      fprintf (dump_file, "\n");
+@@ -1435,6 +1448,52 @@ remap_gimple_op_r (tree *tp, int *walk_subtrees, void *data)
+   return NULL_TREE;
+ }
+ 
++/* Copy stmt and remap its operands.  */
++
++static gimple *
++gimple_copy_and_remap (gimple *stmt)
++{
++  gimple *copy = gimple_copy (stmt);
++  gcc_checking_assert (!is_gimple_debug (copy));
++
++  /* Remap all the operands in COPY.  */
++  struct walk_stmt_info wi;
++  memset (&wi, 0, sizeof (wi));
++  wi.info = copy;
++  walk_gimple_op (copy, remap_gimple_op_r, &wi);
++  if (dump_file)
++    {
++      fprintf (dump_file, "Stmt copy after remap:\n");
++      print_gimple_stmt (dump_file, copy, 0);
++    }
++  return copy;
++}
++
++/* Copy and remap stmts listed in MR in reverse order to last_idx, skipping
++   processed ones.  Insert new stmts to the sequence.  */
++
++static gimple *
++gimple_copy_and_remap_memref_stmts (memref_t *mr, gimple_seq &stmts,
++				    int last_idx, stmt_set &processed)
++{
++  gimple *last_stmt = NULL;
++  for (int i = mr->stmts.length () - 1; i >= last_idx ; i--)
++    {
++      if (processed.count (mr->stmtsi))
++	continue;
++      processed.insert (mr->stmtsi);
++      if (dump_file)
++	{
++	  fprintf (dump_file, "Copy stmt %d from used MR (%d):\n",
++		   i, mr->mr_id);
++	  print_gimple_stmt (dump_file, mr->stmtsi, 0);
++	}
++      last_stmt = gimple_copy_and_remap (mr->stmtsi);
++      gimple_seq_add_stmt (&stmts, last_stmt);
++  }
++  return last_stmt;
++}
++
+ static void
+ create_cgraph_edge (cgraph_node *n, gimple *stmt)
+ {
+@@ -1490,6 +1549,13 @@ optimize_function (cgraph_node *n, function *fn)
+ 		 "Skip the case.\n");
+       return 0;
+     }
++  if (!tree_fits_shwi_p (inc_mr->step))
++    {
++      if (dump_file)
++	fprintf (dump_file, "Cannot represent incremental MR's step as "
++		 "integer.  Skip the case.\n");
++      return 0;
++    }
+   if (dump_file && !used_mrs.empty ())
+     print_mrs_ids (used_mrs, "Common list of used mrs:\n");
+ 
+@@ -1539,16 +1605,44 @@ optimize_function (cgraph_node *n, function *fn)
+       return 0;
+     }
+   else if (dump_file)
+-    fprintf (dump_file, "Dominator bb %d for MRs\n", dom_bb->index);
++    {
++      fprintf (dump_file, "Dominator bb %d for MRs:\n", dom_bb->index);
++      gimple_dump_bb (dump_file, dom_bb, 0, dump_flags);
++      fprintf (dump_file, "\n");
++    }
+ 
+-  split_block (dom_bb, (gimple *) NULL);
++  /* Try to find comp_mr's stmt in the dominator bb.  */
++  gimple *last_used = NULL;
++  for (gimple_stmt_iterator si = gsi_last_bb (dom_bb); !gsi_end_p (si);
++       gsi_prev (&si))
++    if (comp_mr->stmts0 == gsi_stmt (si))
++      {
++	last_used = gsi_stmt (si);
++	if (dump_file)
++	  {
++	    fprintf (dump_file, "Last used stmt in dominator bb:\n");
++	    print_gimple_stmt (dump_file, last_used, 0);
++	  }
++	break;
++      }
++
++  split_block (dom_bb, last_used);
+   gimple_stmt_iterator gsi = gsi_last_bb (dom_bb);
+ 
+   /* Create new inc var.  Insert new_var = old_var + step * factor.  */
+   decl_map = new tree_map;
+   gcc_assert (comp_mr->stmts0 && gimple_assign_single_p (comp_mr->stmts0));
+   tree inc_var = gimple_assign_lhs (comp_mr->stmts0);
++  /* If old_var definition dominates the current use, just use it, otherwise
++     evaluate it just before new inc var evaluation.  */
+   gimple_seq stmts = NULL;
++  stmt_set processed_stmts;
++  if (!dominated_by_p (CDI_DOMINATORS, dom_bb, gimple_bb (comp_mr->stmts0)))
++    {
++      gimple *tmp = gimple_copy_and_remap_memref_stmts (comp_mr, stmts, 0,
++							processed_stmts);
++      inc_var = gimple_assign_lhs (tmp);
++    }
+   tree var_type = TREE_TYPE (inc_var);
+   enum tree_code inc_code;
+   if (TREE_CODE (var_type) == POINTER_TYPE)
+@@ -1556,52 +1650,28 @@ optimize_function (cgraph_node *n, function *fn)
+   else
+     inc_code = PLUS_EXPR;

_service:tar_scm:0052-Fix-fails-in-IPA-prefetch-src-openEuler-gcc-I96ID7.patch Added

@@ -0,0 +1,94 @@
+From 0263daa1312d0cdcdf9c770bcf5d982a2d4fc16b Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Fri, 29 Mar 2024 17:15:41 +0800
+Subject: PATCH 2/2 Fix fails in IPA prefetch (src-openEuler/gcc: I96ID7)
+
+---
+ gcc/ipa-prefetch.cc | 28 ++++++++++++++++++++++++++--
+ 1 file changed, 26 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
+index 9537e4835..1ceb5137f 100644
+--- a/gcc/ipa-prefetch.cc
++++ b/gcc/ipa-prefetch.cc
+@@ -366,6 +366,7 @@ typedef std::map<memref_t *, memref_t *> memref_map;
+ typedef std::map<memref_t *, tree> memref_tree_map;
+ 
+ typedef std::set<gimple *> stmt_set;
++typedef std::set<tree> tree_set;
+ typedef std::map<tree, tree> tree_map;
+ 
+ tree_memref_map *tm_map;
+@@ -1124,8 +1125,21 @@ analyse_loops ()
+     }
+ }
+ 
++/* Compare memrefs by IDs; helper for qsort.  */
++
++static int
++memref_id_cmp (const void *p1, const void *p2)
++{
++  const memref_t *mr1 = *(const memref_t **) p1;
++  const memref_t *mr2 = *(const memref_t **) p2;
++
++  if ((unsigned) mr1->mr_id > (unsigned) mr2->mr_id)
++    return 1;
++  return -1;
++}
++
+ /* Reduce the set filtering out memrefs with the same memory references,
+-   return the result vector of memrefs.  */
++   sort and return the result vector of memrefs.  */
+ 
+ static void
+ reduce_memref_set (memref_set *set, vec<memref_t *> &vec)
+@@ -1162,6 +1176,7 @@ reduce_memref_set (memref_set *set, vec<memref_t *> &vec)
+ 	    vec.safe_push (mr1);
+ 	}
+     }
++  vec.qsort (memref_id_cmp);
+   if (dump_file)
+     {
+       fprintf (dump_file, "MRs (%d) after filtering: ", vec.length ());
+@@ -1663,10 +1678,15 @@ optimize_function (cgraph_node *n, function *fn)
+     }
+ 
+   /* Create other new vars.  Insert new stmts.  */
++  vec<memref_t *> used_mr_vec = vNULL;
+   for (memref_set::const_iterator it = used_mrs.begin ();
+        it != used_mrs.end (); it++)
++    used_mr_vec.safe_push (*it);
++  used_mr_vec.qsort (memref_id_cmp);
++
++  for (unsigned int j = 0; j < used_mr_vec.length (); j++)
+     {
+-      memref_t *mr = *it;
++      memref_t *mr = used_mr_vecj;
+       if (mr == comp_mr)
+ 	continue;
+       gimple *last_stmt = gimple_copy_and_remap_memref_stmts (mr, stmts, 0,
+@@ -1702,6 +1722,7 @@ optimize_function (cgraph_node *n, function *fn)
+       local = integer_three_node;
+       break;
+     }
++  tree_set prefetched_addrs;
+   for (unsigned int j = 0; j < vmrs.length (); j++)
+     {
+       memref_t *mr = vmrsj;
+@@ -1714,10 +1735,13 @@ optimize_function (cgraph_node *n, function *fn)
+       tree addr = get_mem_ref_address_ssa_name (mr->mem, NULL_TREE);
+       if (decl_map->count (addr))
+ 	addr = (*decl_map)addr;
++      if (prefetched_addrs.count (addr))
++	continue;
+       last_stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH),
+ 				     3, addr, write_p, local);
+       pcalls.safe_push (last_stmt);
+       gimple_seq_add_stmt (&stmts, last_stmt);
++      prefetched_addrs.insert (addr);
+       if (dump_file)
+ 	{
+ 	  fprintf (dump_file, "Insert %d prefetch stmt:\n", j);
+-- 
+2.33.0
+

_service:tar_scm:0053-struct-reorg-Add-Semi-Relayout.patch Added

@@ -0,0 +1,1366 @@
+From c2a0dcc565e0f6274f26644bd389337db8f2940c Mon Sep 17 00:00:00 2001
+From: tiancheng-bao <baotiancheng1@huawei.com>
+Date: Sat, 30 Mar 2024 11:04:23 +0800
+Subject: PATCH struct-reorg Add Semi Relayout
+
+---
+ gcc/common.opt                                |   6 +-
+ gcc/ipa-struct-reorg/ipa-struct-reorg.cc      | 975 +++++++++++++++++-
+ gcc/ipa-struct-reorg/ipa-struct-reorg.h       |   8 +
+ gcc/params.opt                                |   5 +
+ .../gcc.dg/struct/semi_relayout_rewrite.c     |  86 ++
+ gcc/testsuite/gcc.dg/struct/struct-reorg.exp  |   4 +
+ 6 files changed, 1040 insertions(+), 44 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/struct/semi_relayout_rewrite.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 38f1e457d..9484df5ad 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -2010,9 +2010,9 @@ Common Var(flag_ipa_struct_reorg) Init(0) Optimization
+ Perform structure layout optimizations.
+ 
+ fipa-struct-reorg=
+-Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 5)
+--fipa-struct-reorg=0,1,2,3,4,5 adding none, struct-reorg, reorder-fields,
+-dfe, safe-pointer-compression, unsafe-pointer-compression optimizations.
++Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 6)
++-fipa-struct-reorg=0,1,2,3,4,5,6 adding none, struct-reorg, reorder-fields,
++dfe, safe-pointer-compression, unsafe-pointer-compression, semi-relayout optimizations.
+ 
+ fipa-vrp
+ Common Var(flag_ipa_vrp) Optimization
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+index 3922873f3..6a202b4bd 100644
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+@@ -294,7 +294,8 @@ enum struct_layout_opt_level
+   STRUCT_REORDER_FIELDS = 1 << 2,
+   DEAD_FIELD_ELIMINATION = 1 << 3,
+   POINTER_COMPRESSION_SAFE = 1 << 4,
+-  POINTER_COMPRESSION_UNSAFE = 1 << 5
++  POINTER_COMPRESSION_UNSAFE = 1 << 5,
++  SEMI_RELAYOUT = 1 << 6
+ };
+ 
+ /* Defines the target pointer size of compressed pointer, which should be 8,
+@@ -308,6 +309,7 @@ void get_base (tree &base, tree expr);
+ 
+ static unsigned int current_layout_opt_level;
+ hash_map<tree, tree> replace_type_map;
++hash_map<tree, tree> semi_relayout_map;
+ 
+ /* Return true if one of these types is created by struct-reorg.  */
+ 
+@@ -426,7 +428,9 @@ srtype::srtype (tree type)
+     visited (false),
+     pc_candidate (false),
+     has_legal_alloc_num (false),
+-    has_alloc_array (0)
++    has_alloc_array (0),
++    semi_relayout (false),
++    bucket_parts (0)
+ {
+   for (int i = 0; i < max_split; i++)
+     newtypei = NULL_TREE;
+@@ -891,6 +895,66 @@ srfield::create_new_reorder_fields (tree newtypemax_split,
+   newfield0 = field;
+ }
+ 
++/* Given a struct s whose fields has already reordered by size, we try to
++   combine fields less than 8 bytes together to 8 bytes.  Example:
++   struct s {
++     uint64_t a,
++     uint32_t b,
++     uint32_t c,
++     uint32_t d,
++     uint16_t e,
++     uint8_t f
++   }
++
++   We allocate memory for arrays of struct S, before semi-relayout, their
++   layout in memory is shown as below:
++   a,b,c,d,e,f,padding;a,b,c,d,e,f,padding;...
++
++   During semi-relayout, we put a number of structs into a same region called
++   bucket.  The number is determined by param realyout-bucket-capacity-level.
++   Using 1024 here as example.  After semi-relayout, the layout in a bucket is
++   shown as below:
++   part1 a;a;a...
++   part2 b,c;b,c;b,c;...
++   part3 d,e,f,pad;d,e,f,pad;d,e,f,pad;...
++
++   In the last bucket, if the amount of rest structs is less than the capacity
++   of a bucket, the rest of allcated memory will be wasted as padding.  */
++
++unsigned
++srtype::calculate_bucket_size ()
++{
++  unsigned parts = 0;
++  unsigned bit_sum = 0;
++  unsigned relayout_offset = 0;
++  /* Currently, limit each 8 bytes with less than 2 fields.  */
++  unsigned curr_part_num = 0;
++  unsigned field_num = 0;
++  for (tree f = TYPE_FIELDS (newtype0); f; f = DECL_CHAIN (f))
++    {
++      unsigned size = TYPE_PRECISION (TREE_TYPE (f));
++      bit_sum += size;
++      field_num++;
++      if (++curr_part_num > 2 || bit_sum > 64)
++	{
++	  bit_sum = size;
++	  parts++;
++	  relayout_offset = relayout_part_size * parts;
++	  curr_part_num = 1;
++	}
++      else
++	{
++	  relayout_offset = relayout_part_size * parts + (bit_sum - size) / 8;
++	}
++      new_field_offsets.put (f, relayout_offset);
++    }
++  /* Donnot relayout a struct with only one field after DFE.  */
++  if (field_num == 1)
++    return 0;
++  bucket_parts = ++parts;
++  return parts * relayout_part_size;
++}
++
+ /* Create the new TYPE corresponding to THIS type.  */
+ 
+ bool
+@@ -1001,6 +1065,15 @@ srtype::create_new_type (void)
+   if (pc_candidate && pc_gptr == NULL_TREE)
+     create_global_ptr_for_pc ();
+ 
++  if (semi_relayout)
++    {
++      bucket_size = calculate_bucket_size ();
++      if (bucket_size == 0)
++	return false;
++      if (semi_relayout_map.get (this->newtype0) == NULL)
++	semi_relayout_map.put (this->newtype0, this->type);
++    }
++
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     {
+       fprintf (dump_file, "Created %d types:\n", maxclusters);
+@@ -1393,7 +1466,7 @@ public:
+ 		       bool should_create = false, bool can_escape = false);
+   bool wholeaccess (tree expr, tree base, tree accesstype, srtype *t);
+ 
+-  void check_alloc_num (gimple *stmt, srtype *type);
++  void check_alloc_num (gimple *stmt, srtype *type, bool ptrptr);
+   void check_definition_assign (srdecl *decl, vec<srdecl *> &worklist);
+   void check_definition_call (srdecl *decl, vec<srdecl *> &worklist);
+   void check_definition (srdecl *decl, vec<srdecl *> &);
+@@ -1442,6 +1515,33 @@ public:
+ 						  tree &);
+   basic_block create_bb_for_compress_nullptr (basic_block, tree &);
+   basic_block create_bb_for_decompress_nullptr (basic_block, tree, tree &);
++
++   // Semi-relayout methods:
++  bool is_semi_relayout_candidate (tree);
++  srtype *get_semi_relayout_candidate_type (tree);
++  void check_and_prune_struct_for_semi_relayout (void);
++  tree rewrite_pointer_diff (gimple_stmt_iterator *, tree, tree, srtype *);
++  tree rewrite_pointer_plus_integer (gimple *, gimple_stmt_iterator *, tree,
++				     tree, srtype *);
++  tree build_div_expr (gimple_stmt_iterator *, tree, tree);
++  tree get_true_pointer_base (gimple_stmt_iterator *, tree, srtype *);
++  tree get_real_allocated_ptr (tree, gimple_stmt_iterator *);
++  tree set_ptr_for_use (tree, gimple_stmt_iterator *);
++  void record_allocated_size (tree, gimple_stmt_iterator *, tree);
++  tree read_allocated_size (tree, gimple_stmt_iterator *);
++  gimple *create_aligned_alloc (gimple_stmt_iterator *, srtype *, tree,
++				tree &);
++  void create_memset_zero (tree, gimple_stmt_iterator *, tree);
++  void create_memcpy (tree, tree, tree, gimple_stmt_iterator *);
++  void create_free (tree, gimple_stmt_iterator *);
++  void copy_to_lhs (tree, tree, gimple_stmt_iterator *);
++  srtype *get_relayout_candidate_type (tree);
++  long unsigned int get_true_field_offset (srfield *, srtype *);
++  tree rewrite_address (tree, srfield *, srtype *, gimple_stmt_iterator *);
++  bool check_sr_copy (gimple *);
++  void relayout_field_copy (gimple_stmt_iterator *, gimple *, tree, tree,
++			    tree&, tree &);
++  bool do_semi_relayout (gimple_stmt_iterator *, gimple *, tree &, tree &);
+ };
+ 
+ struct ipa_struct_relayout
+@@ -4355,7 +4455,7 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl,
+ }
+ 
+ void
+-ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type)
++ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type, bool ptrptr)
+ {
+   if (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT

_service:tar_scm:0054-Struct-Reorg-Bugfix-for-structure-pointer-compressio.patch Added

_service:tar_scm:0055-Struct-Reorg-Port-bugfixes-to-GCC-12.3.1.patch Added

@@ -0,0 +1,420 @@
+From 55c547748af36ffc3f2d5ed154a91fb3fcb8431c Mon Sep 17 00:00:00 2001
+From: Mingchuan Wu <wumingchuan1992@foxmail.com>
+Date: Thu, 11 Apr 2024 15:49:59 +0800
+Subject: PATCH Struct Reorg Port bugfixes to GCC 12.3.1
+
+Migrated from commits in GCC10.3.1:
+https://gitee.com/openeuler/gcc/commit/41af6d361a6d85ef4fce8a8438113d765596afdd
+https://gitee.com/openeuler/gcc/commit/25d74b98caeaae881e374924886ee664aa1af5bc
+https://gitee.com/openeuler/gcc/commit/b5a3bfe92f96cd0d2224d80ac4eaa80dab1bd6bf
+https://gitee.com/openeuler/gcc/commit/708ffe6f132ee39441b66b6ab6b98847d35916b7
+https://gitee.com/openeuler/gcc/commit/e875e4e7f3716aa268ffbbf55ee199ec82b6aeba
+---
+ gcc/ipa-struct-reorg/ipa-struct-reorg.cc      | 97 ++++++++++---------
+ gcc/testsuite/gcc.dg/struct/dfe_escape.c      | 50 ++++++++++
+ gcc/testsuite/gcc.dg/struct/dfe_func_ptr.c    | 69 +++++++++++++
+ gcc/testsuite/gcc.dg/struct/struct-reorg.exp  |  2 +
+ gcc/testsuite/gcc.dg/struct/struct_reorg-10.c | 29 ++++++
+ gcc/testsuite/gcc.dg/struct/struct_reorg-11.c | 16 +++
+ gcc/testsuite/gcc.dg/struct/struct_reorg-12.c | 26 +++++
+ 7 files changed, 243 insertions(+), 46 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_escape.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_func_ptr.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-10.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-11.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-12.c
+
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+index 6a202b4bd..f03d1d875 100644
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+@@ -466,10 +466,19 @@ srtype::has_dead_field (void)
+   unsigned i;
+   FOR_EACH_VEC_ELT (fields, i, this_field)
+     {
+-      if (!(this_field->field_access & READ_FIELD))
+-	{
+-	  may_dfe = true;
+-	  break;
++      /* Function pointer members are not processed, because DFE
++         does not currently support accurate analysis of function
++         pointers, and we have not identified specific use cases. */
++      if (!(this_field->field_access & READ_FIELD)
++	 && !FUNCTION_POINTER_TYPE_P (this_field->fieldtype))
++	{
++	  /* Fields with escape risks should not be processed. */
++	  if (this_field->type == NULL
++	      || (this_field->type->escapes == does_not_escape))
++	    {
++	      may_dfe = true;
++	      break;
++	    }
+ 	}
+     }
+   return may_dfe;
+@@ -1032,8 +1041,13 @@ srtype::create_new_type (void)
+     {
+       srfield *f = fieldsi;
+       if (current_layout_opt_level & DEAD_FIELD_ELIMINATION
+-	  && !(f->field_access & READ_FIELD))
+-	continue;
++	  && !(f->field_access & READ_FIELD)
++	  && !FUNCTION_POINTER_TYPE_P (f->fieldtype))
++	{
++	  /* Fields with escape risks should not be processed. */
++	  if (f->type == NULL || (f->type->escapes == does_not_escape))
++	    continue;
++	}
+       f->create_new_fields (newtype, newfields, newlast);
+     }
+ 
+@@ -3815,9 +3829,17 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other,
+       if (VOID_POINTER_P (TREE_TYPE (side))
+ 	  && TREE_CODE (side) == SSA_NAME)
+ 	{
+-	  /* The type is other, the declaration is side.  */
+-	  current_function->record_decl (type, side, -1,
+-		isptrptr (TREE_TYPE (other)) ? TREE_TYPE (other) : NULL);
++	  tree inner = SSA_NAME_VAR (side);
++	  if (inner)
++	    {
++	      srdecl *in = find_decl (inner);
++	      if (in && !in->type->has_escaped ())
++		{
++		  /* The type is other, the declaration is side.  */
++		  current_function->record_decl (type, side, -1,
++			isptrptr (TREE_TYPE (other)) ? TREE_TYPE (other) : NULL);
++		}
++	     }
+ 	}
+       else
+ 	/* *_1 = &MEM(void *)&x + 8B.  */
+@@ -3910,6 +3932,12 @@ ipa_struct_reorg::maybe_record_assign (cgraph_node *node, gassign *stmt)
+ 	maybe_mark_or_record_other_side (rhs, lhs, stmt);
+       if (TREE_CODE (lhs) == SSA_NAME)
+ 	maybe_mark_or_record_other_side (lhs, rhs, stmt);
++
++      /* Handle missing ARRAY_REF cases.  */
++      if (TREE_CODE (lhs) == ARRAY_REF)
++	mark_type_as_escape (TREE_TYPE (lhs), escape_array, stmt);
++      if (TREE_CODE (rhs) == ARRAY_REF)
++	mark_type_as_escape (TREE_TYPE (rhs), escape_array, stmt);
+     }
+ }
+ 
+@@ -5272,8 +5300,11 @@ ipa_struct_reorg::record_accesses (void)
+ 	record_function (cnode);
+       else
+ 	{
+-	  tree return_type = TREE_TYPE (TREE_TYPE (cnode->decl));
+-	  mark_type_as_escape (return_type, escape_return, NULL);
++	  if (cnode->externally_visible)
++	    {
++	      tree return_type = TREE_TYPE (TREE_TYPE (cnode->decl));
++	      mark_type_as_escape (return_type, escape_return, NULL);
++	    }
+ 	}
+ 
+     }
+@@ -5889,6 +5920,7 @@ ipa_struct_reorg::rewrite_expr (tree expr,
+   bool escape_from_base = false;
+ 
+   tree newbasemax_split;
++  memset (newbase, 0, sizeof (treemax_split));
+   memset (newexpr, 0, sizeof (treemax_split));
+ 
+   if (TREE_CODE (expr) == CONSTRUCTOR)
+@@ -6912,7 +6944,7 @@ create_bb_for_group_diff_ne_0 (basic_block new_bb, tree &phi, tree ptr,
+ }
+ 
+ tree
+-ipa_struct_reorg::rewrite_pointer_plus_integer (gimple *stmt,
++ipa_struct_reorg::rewrite_pointer_plus_integer (gimple *stmt ATTRIBUTE_UNUSED,
+ 						gimple_stmt_iterator *gsi,
+ 						tree ptr, tree offset,
+ 						srtype *type)
+@@ -7889,41 +7921,14 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt,
+    should be removed.  */
+ 
+ bool
+-ipa_struct_reorg::rewrite_debug (gimple *stmt, gimple_stmt_iterator *)
++ipa_struct_reorg::rewrite_debug (gimple *, gimple_stmt_iterator *)
+ {
+-  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
+-    /* Delete debug gimple now.  */
+-    return true;
+-  bool remove = false;
+-  if (gimple_debug_bind_p (stmt))
+-    {
+-      tree var = gimple_debug_bind_get_var (stmt);
+-      tree newvarmax_split;
+-      if (rewrite_expr (var, newvar, true))
+-	remove = true;
+-      if (gimple_debug_bind_has_value_p (stmt))
+-	{
+-	  var = gimple_debug_bind_get_value (stmt);
+-	  if (TREE_CODE (var) == POINTER_PLUS_EXPR)
+-	    var = TREE_OPERAND (var, 0);
+-	  if (rewrite_expr (var, newvar, true))
+-	    remove = true;
+-	}
+-    }
+-  else if (gimple_debug_source_bind_p (stmt))
+-    {
+-      tree var = gimple_debug_source_bind_get_var (stmt);
+-      tree newvarmax_split;
+-      if (rewrite_expr (var, newvar, true))
+-	remove = true;
+-      var = gimple_debug_source_bind_get_value (stmt);
+-      if (TREE_CODE (var) == POINTER_PLUS_EXPR)
+-	var = TREE_OPERAND (var, 0);
+-      if (rewrite_expr (var, newvar, true))
+-	remove = true;
+-    }
+-
+-  return remove;
++  /* In debug statements, there might be some statements that have
++     been optimized out in gimple but left in debug gimple.  Sometimes
++     these statements need to be analyzed to escape, but in rewrite
++     stage it shouldn't happen.  It needs to care a lot to handle these
++     cases but seems useless.  So now we just delete debug gimple.  */
++  return true;
+ }
+ 
+ /* Rewrite PHI nodes, return true if the PHI was replaced.  */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_escape.c b/gcc/testsuite/gcc.dg/struct/dfe_escape.c
+new file mode 100644
+index 000000000..09efe8027
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_escape.c
+@@ -0,0 +1,50 @@
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network