开源软件构建与测试

We truncated the diff of some files because they were too big. If you want to see the full diff for every file, click here.

Difference Between Revision 1 and Mega:24.03 / gcc

_service:tar_scm:gcc.spec Changed

@@ -2,7 +2,7 @@
 %global gcc_major 12
 # Note, gcc_release must be integer, if you want to add suffixes to
 # %%{release}, append them after %%{gcc_release} on Release: line.
-%global gcc_release 25
+%global gcc_release 19
 
 %global _unpackaged_files_terminate_build 0
 %global _performance_build 1
@@ -23,7 +23,7 @@
 %else
 %global build_libquadmath 0
 %endif
-%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 loongarch64 riscv64
+%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 loongarch64
 %global build_libasan 1
 %else
 %global build_libasan 0
@@ -38,7 +38,7 @@
 %else
 %global build_liblsan 0
 %endif
-%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 loongarch64 riscv64
+%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 loongarch64
 %global build_libubsan 1
 %else
 %global build_libubsan 0
@@ -166,65 +166,6 @@
 Patch26: 0026-GOMP-Enabling-moutline-atomics-improves-libgomp-perf.patch
 Patch27: 0027-LoopElim-Redundant-loop-elimination-optimization.patch
 Patch28: 0028-Array-widen-compare-Fix-the-return-value-match-after.patch
-Patch29: 0029-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch
-Patch30: 0030-Struct-Reorg-Add-unsafe-structure-pointer-compressio.patch
-Patch31: 0031-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch
-Patch32: 0032-AutoBOLT-Add-bolt-linker-plugin-2-3.patch
-Patch33: 0033-AutoBOLT-Enable-BOLT-linker-plugin-on-aarch64-3-3.patch
-Patch34: 0034-Autofdo-Enable-discrimibator-and-MCF-algorithm-on-Au.patch
-Patch35: 0035-Add-insn-defs-and-correct-costs-for-cmlt-generation.patch
-Patch36: 0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch           
-Patch37: 0037-Perform-early-if-conversion-of-simple-arithmetic.patch      
-Patch38: 0038-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch  
-Patch39: 0039-Match-double-sized-mul-pattern.patch                        
-Patch40: 0040-Port-icp-patch-to-GCC-12.patch                              
-Patch41: 0041-Port-fixes-in-icp-to-GCC-12.patch
-Patch42: 0042-Add-split-complex-instructions-pass.patch                   
-Patch43: 0043-Extending-and-refactoring-of-pass_split_complex_inst.patch
-Patch44: 0044-Port-maxmin-patch-to-GCC-12.patch
-Patch45: 0045-Port-moving-minmask-pattern-to-gimple-to-GCC-12.patch
-Patch46: 0046-Add-new-pattern-to-pass-the-maxmin-tests.patch
-Patch47: 0047-AES-Implement-AES-pattern-matching.patch
-Patch48: 0048-crypto-accel-add-optimization-level-requirement-to-t.patch
-Patch49: 0049-Add-more-flexible-check-for-pointer-aliasing-during-.patch
-Patch50: 0050-Port-IPA-prefetch-to-GCC-12.patch
-Patch51: 0051-Port-fixes-for-IPA-prefetch-to-GCC-12.patch
-Patch52: 0052-Fix-fails-in-IPA-prefetch-src-openEuler-gcc-I96ID7.patch
-Patch53: 0053-struct-reorg-Add-Semi-Relayout.patch
-Patch54: 0054-Struct-Reorg-Bugfix-for-structure-pointer-compressio.patch
-Patch55: 0055-Struct-Reorg-Port-bugfixes-to-GCC-12.3.1.patch
-Patch56: 0056-Fix-bug-that-verifying-gimple-failed-when-reorg-leve.patch
-Patch57: 0057-AutoFdo-Fix-memory-leaks-in-autofdo.patch
-Patch58: 0058-x86-Add-a-new-option-mdaz-ftz-to-enable-FTZ-and-DAZ-.patch
-Patch59: 0059-Explicitly-view_convert_expr-mask-to-signed-type-whe.patch
-Patch60: 0060-Make-option-mvzeroupper-independent-of-optimization-.patch
-Patch61: 0061-i386-Sync-tune_string-with-arch_string-for-target-at.patch
-Patch62: 0062-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch
-Patch63: 0063-Refine-maskstore-patterns-with-UNSPEC_MASKMOV.patch
-Patch64: 0064-x86-Update-model-values-for-Alderlake-and-Rocketlake.patch
-Patch65: 0065-Workaround-possible-CPUID-bug-in-Sandy-Bridge.patch
-Patch66: 0066-Software-mitigation-Disable-gather-generation-in-vec.patch
-Patch67: 0067-Support-m-no-gather-m-no-scatter-to-enable-disable-v.patch
-Patch68: 0068-Remove-constraint-modifier-for-fcmaddcph-fmaddcph-fc.patch
-Patch69: 0069-Disparage-slightly-for-the-alternative-which-move-DF.patch
-Patch70: 0070-Fix-wrong-code-due-to-vec_merge-pcmp-to-blendvb-spli.patch
-Patch71: 0071-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch
-Patch72: 0072-Disable-FMADD-in-chains-for-Zen4-and-generic.patch
-Patch73: 0073-Initial-Raptorlake-Support.patch
-Patch74: 0074-Initial-Meteorlake-Support.patch
-Patch75: 0075-Support-Intel-AMX-FP16-ISA.patch
-Patch76: 0076-Support-Intel-prefetchit0-t1.patch
-Patch77: 0077-Initial-Granite-Rapids-Support.patch
-Patch78: 0078-Support-Intel-AMX-COMPLEX.patch
-Patch79: 0079-i386-Add-AMX-COMPLEX-to-Granite-Rapids.patch
-Patch80: 0080-Initial-Granite-Rapids-D-Support.patch
-Patch81: 0081-Correct-Granite-Rapids-D-documentation.patch
-Patch82: 0082-i386-Remove-Meteorlake-s-family_model.patch
-Patch83: 0083-x86-Update-model-values-for-Alderlake-Rocketlake-and.patch
-Patch84: 0084-x86-Update-model-values-for-Raptorlake.patch
-Patch85: 0085-Fix-target_clone-arch-graniterapids-d.patch
-Patch86: 0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch
-Patch87: 0087-Add-cost-calculation-for-reg-equivalence-invariants.patch
 
 # Part 3000 ~ 4999
 %ifarch loongarch64
@@ -353,10 +294,6 @@
 Patch3124: libsanitizer-add-LoongArch-support.patch
 Patch3125: LoongArch-fix-error-building.patch
 Patch3126: libjccjit-do-not-link-objects-contained-same-element.patch
-Patch3127: LoongArch-Use-finer-grained-DBAR-hints.patch
-Patch3128: LoongArch-Add-LA664-support.patch
-Patch3129: LoongArch-Fix-internal-error-running-gcc-march-nativ.patch
-Patch3130: LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch
 %endif
 
 # On ARM EABI systems, we do want -gnueabi to be part of the
@@ -852,65 +789,6 @@
 %patch26 -p1
 %patch27 -p1
 %patch28 -p1
-%patch29 -p1
-%patch30 -p1
-%patch31 -p1
-%patch32 -p1
-%patch33 -p1
-%patch34 -p1
-%patch35 -p1
-%patch36 -p1
-%patch37 -p1
-%patch38 -p1
-%patch39 -p1
-%patch40 -p1
-%patch41 -p1
-%patch42 -p1
-%patch43 -p1
-%patch44 -p1
-%patch45 -p1
-%patch46 -p1
-%patch47 -p1
-%patch48 -p1
-%patch49 -p1
-%patch50 -p1
-%patch51 -p1
-%patch52 -p1
-%patch53 -p1
-%patch54 -p1
-%patch55 -p1
-%patch56 -p1
-%patch57 -p1
-%patch58 -p1
-%patch59 -p1
-%patch60 -p1
-%patch61 -p1
-%patch62 -p1
-%patch63 -p1
-%patch64 -p1
-%patch65 -p1
-%patch66 -p1
-%patch67 -p1
-%patch68 -p1
-%patch69 -p1
-%patch70 -p1
-%patch71 -p1
-%patch72 -p1
-%patch73 -p1
-%patch74 -p1
-%patch75 -p1
-%patch76 -p1
-%patch77 -p1
-%patch78 -p1
-%patch79 -p1
-%patch80 -p1
-%patch81 -p1
-%patch82 -p1
-%patch83 -p1
-%patch84 -p1
-%patch85 -p1
-%patch86 -p1
-%patch87 -p1
 
 %ifarch loongarch64
 %patch3001 -p1
@@ -1038,10 +916,6 @@
 %patch3124 -p1
 %patch3125 -p1
 %patch3126 -p1
-%patch3127 -p1
-%patch3128 -p1
-%patch3129 -p1
-%patch3130 -p1
 %endif
 
 echo '%{_vendor} %{version}-%{release}' > gcc/DEV-PHASE
@@ -2488,8 +2362,6 @@
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-def.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-tune.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-driver.h
-%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/lsxintrin.h
-%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/lasxintrin.h
 %endif
 %ifarch sparc sparcv9 sparc64
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/visintrin.h
@@ -3302,29 +3174,6 @@
 %doc rpm.doc/changelogs/libcc1/ChangeLog*
 
 %changelog
-* Wed Apr 24 2024 Wang Ding <wangding16@huawei.com> - 12.3.1-25
-- Type: Sync
-- DESC: Sync patch from openeuler/gcc
-
-* Tue Apr 23 2024 laokz <zhangkai@iscas.ac.cn> - 12.3.1-24
-- Type: SPEC

_service:tar_scm:0029-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch Deleted

@@ -1,1191 +0,0 @@
-From 7930d75c9fd3f36cc2dce934569f00c71248bb31 Mon Sep 17 00:00:00 2001
-From: liyancheng <412998149@qq.com>
-Date: Sat, 25 Nov 2023 10:28:48 +0800
-Subject: PATCH Struct Reorg Add Safe Structure Pointer Compression
-
-Safe structure pointer compression allows safely transfer pointers
-stored in structure into the index of structure array with smaller
-type to reduce the size of structure.
-Add flag -fipa-struct-reorg=4 to enable safe structure pointer
-compression.
-Add param compressed-pointer-size=8,16,32 to control the compressed
-pointer size.
----
- gcc/common.opt                           |   5 +-
- gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 908 ++++++++++++++++++++++-
- gcc/ipa-struct-reorg/ipa-struct-reorg.h  |   4 +
- gcc/params.opt                           |   4 +
- 4 files changed, 882 insertions(+), 39 deletions(-)
-
-diff --git a/gcc/common.opt b/gcc/common.opt
-index b01df919e..f6e20c1e8 100644
---- a/gcc/common.opt
-+++ b/gcc/common.opt
-@@ -1993,8 +1993,9 @@ Common Var(flag_ipa_struct_reorg) Init(0) Optimization
- Perform structure layout optimizations.
- 
- fipa-struct-reorg=
--Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 3)
---fipa-struct-reorg=0,1,2,3 adding none, struct-reorg, reorder-fields, dfe optimizations.
-+Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 4)
-+-fipa-struct-reorg=0,1,2,3,4 adding none, struct-reorg, reorder-fields,
-+dfe, safe-pointer-compression optimizations.
- 
- fipa-vrp
- Common Var(flag_ipa_vrp) Optimization
-diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
-index dcc6df496..5d451c4c8 100644
---- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
-+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
-@@ -89,6 +89,7 @@ along with GCC; see the file COPYING3.  If not see
- #include "gimple-iterator.h"
- #include "gimple-walk.h"
- #include "cfg.h"
-+#include "cfghooks.h" /* For split_block.  */
- #include "ssa.h"
- #include "tree-dfa.h"
- #include "fold-const.h"
-@@ -147,7 +148,27 @@ using namespace struct_relayout;
- #define VOID_POINTER_P(type) \
-   (POINTER_TYPE_P (type) && VOID_TYPE_P (TREE_TYPE (type)))
- 
--/* Return true iff TYPE is stdarg va_list type.  */
-+static void
-+set_var_attributes (tree var)
-+{
-+  if (!var)
-+    return;
-+  gcc_assert (TREE_CODE (var) == VAR_DECL);
-+
-+  DECL_ARTIFICIAL (var) = 1;
-+  DECL_EXTERNAL (var) = 0;
-+  TREE_STATIC (var) = 1;
-+  TREE_PUBLIC (var) = 0;
-+  TREE_USED (var) = 1;
-+  DECL_CONTEXT (var) = NULL;
-+  TREE_THIS_VOLATILE (var) = 0;
-+  TREE_ADDRESSABLE (var) = 0;
-+  TREE_READONLY (var) = 0;
-+  if (is_global_var (var))
-+    set_decl_tls_model (var, TLS_MODEL_NONE);
-+}
-+
-+/* Return true if TYPE is stdarg va_list type.  */
- 
- static inline bool
- is_va_list_type (tree type)
-@@ -271,9 +292,15 @@ enum struct_layout_opt_level
-   STRUCT_SPLIT = 1 << 0,
-   COMPLETE_STRUCT_RELAYOUT = 1 << 1,
-   STRUCT_REORDER_FIELDS = 1 << 2,
--  DEAD_FIELD_ELIMINATION = 1 << 3
-+  DEAD_FIELD_ELIMINATION = 1 << 3,
-+  POINTER_COMPRESSION_SAFE = 1 << 4
- };
- 
-+/* Defines the target pointer size of compressed pointer, which should be 8,
-+   16, 32.  */
-+
-+static int compressed_size = 32;
-+
- static bool is_result_of_mult (tree arg, tree *num, tree struct_size);
- static bool isptrptr (tree type);
- void get_base (tree &base, tree expr);
-@@ -394,7 +421,10 @@ srtype::srtype (tree type)
-   : type (type),
-     chain_type (false),
-     escapes (does_not_escape),
-+    pc_gptr (NULL_TREE),
-     visited (false),
-+    pc_candidate (false),
-+    has_legal_alloc_num (false),
-     has_alloc_array (0)
- {
-   for (int i = 0; i < max_split; i++)
-@@ -476,6 +506,31 @@ srtype::mark_escape (escape_type e, gimple *stmt)
-     }
- }
- 
-+/* Create a global header for compressed struct.  */
-+
-+void
-+srtype::create_global_ptr_for_pc ()
-+{
-+  if (!pc_candidate || pc_gptr != NULL_TREE)
-+    return;
-+
-+  const char *type_name = get_type_name (type);
-+  gcc_assert (type_name != NULL);
-+
-+  char *gptr_name = concat (type_name, "_pc", NULL);
-+  tree new_name = get_identifier (gptr_name);
-+  tree new_type = build_pointer_type (newtype0);
-+  tree new_var = build_decl (UNKNOWN_LOCATION, VAR_DECL, new_name, new_type);
-+  set_var_attributes (new_var);
-+  pc_gptr = new_var;
-+
-+  if (dump_file && (dump_flags & TDF_DETAILS))
-+    fprintf (dump_file, "\nType: %s has create global header for pointer"
-+	       " compression: %s\n", type_name, gptr_name);
-+
-+  free (gptr_name);
-+}
-+
- /* Add FIELD to the list of fields that use this type.  */
- 
- void
-@@ -798,15 +853,31 @@ srfield::create_new_reorder_fields (tree newtypemax_split,
-       fields.safe_push (field);
-     }
- 
--  DECL_NAME (field) = DECL_NAME (fielddecl);
-   if (type == NULL)
--    /* Common members do not need to reconstruct.
-+    {
-+      DECL_NAME (field) = DECL_NAME (fielddecl);
-+      /* Common members do not need to reconstruct.
-        Otherwise, int* -> int** or void* -> void**.  */
--    TREE_TYPE (field) = nt;
-+      TREE_TYPE (field) = nt;
-+      SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl));
-+    }
-+  else if (type->pc_candidate)
-+    {
-+      const char *old_name = IDENTIFIER_POINTER (DECL_NAME (fielddecl));
-+      char *new_name = concat (old_name, "_pc", NULL);
-+      DECL_NAME (field) = get_identifier (new_name);
-+      free (new_name);
-+      TREE_TYPE (field) = make_unsigned_type (compressed_size);
-+      SET_DECL_ALIGN (field, compressed_size);
-+    }
-   else
--    TREE_TYPE (field) = reconstruct_complex_type (TREE_TYPE (fielddecl), nt);
-+    {
-+      TREE_TYPE (field) = reconstruct_complex_type (TREE_TYPE (fielddecl), nt);
-+      DECL_NAME (field) = DECL_NAME (fielddecl);
-+      SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl));
-+    }
-+
-   DECL_SOURCE_LOCATION (field) = DECL_SOURCE_LOCATION (fielddecl);
--  SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl));
-   DECL_USER_ALIGN (field) = DECL_USER_ALIGN (fielddecl);
-   TREE_ADDRESSABLE (field) = TREE_ADDRESSABLE (fielddecl);
-   DECL_NONADDRESSABLE_P (field) = !TREE_ADDRESSABLE (fielddecl);
-@@ -925,6 +996,10 @@ srtype::create_new_type (void)
- 	  && has_dead_field ())
- 	fprintf (dump_file, "Dead field elimination.\n");
-     }
-+
-+  if (pc_candidate && pc_gptr == NULL_TREE)
-+    create_global_ptr_for_pc ();
-+
-   if (dump_file && (dump_flags & TDF_DETAILS))
-     {
-       fprintf (dump_file, "Created %d types:\n", maxclusters);
-@@ -1338,6 +1413,30 @@ public:
- 
-   unsigned execute_struct_relayout (void);
-   bool remove_dead_field_stmt (tree lhs);
-+
-+  // Pointer compression methods:
-+  void check_and_prune_struct_for_pointer_compression (void);
-+  void try_rewrite_with_pointer_compression (gassign *, gimple_stmt_iterator *,
-+					     tree, tree, tree &, tree &);
-+  bool safe_void_cmp_p (tree, srtype *);
-+  bool pc_candidate_st_type_p (tree);
-+  bool pc_candidate_tree_p (tree);
-+  bool pc_type_conversion_candidate_p (tree);
-+  bool pc_direct_rewrite_chance_p (tree, tree &);
-+  bool compress_candidate_with_check (gimple_stmt_iterator *, tree, tree &);

_service:tar_scm:0030-Struct-Reorg-Add-unsafe-structure-pointer-compressio.patch Deleted

@@ -1,1232 +0,0 @@
-From 82d6166cd29fb1c3474f29b28cb7e5478d3a551a Mon Sep 17 00:00:00 2001
-From: liyancheng <412998149@qq.com>
-Date: Mon, 25 Dec 2023 11:17:04 +0800
-Subject: PATCH Struct Reorg Add unsafe structure pointer compression
-
-Unsafe structure pointer compression allows for some dangerous
-conversions for better performance.
-Add flag -fipa-struct-reorg=5 to enable unsafe structure pointer
-compression.
----
- gcc/common.opt                                |   6 +-
- gcc/ipa-struct-reorg/ipa-struct-reorg.cc      | 365 ++++++++++++++----
- gcc/symbol-summary.h                          |  22 +-
- .../gcc.dg/struct/csr_skip_void_struct_name.c |  53 +++
- gcc/testsuite/gcc.dg/struct/pc_cast_int.c     |  91 +++++
- .../gcc.dg/struct/pc_compress_and_decomress.c |  90 +++++
- gcc/testsuite/gcc.dg/struct/pc_ptr2void.c     |  87 +++++
- .../gcc.dg/struct/pc_simple_rewrite_pc.c      | 112 ++++++
- .../gcc.dg/struct/pc_skip_void_struct_name.c  |  53 +++
- gcc/testsuite/gcc.dg/struct/struct-reorg.exp  |   8 +
- 10 files changed, 804 insertions(+), 83 deletions(-)
- create mode 100644 gcc/testsuite/gcc.dg/struct/csr_skip_void_struct_name.c
- create mode 100644 gcc/testsuite/gcc.dg/struct/pc_cast_int.c
- create mode 100644 gcc/testsuite/gcc.dg/struct/pc_compress_and_decomress.c
- create mode 100644 gcc/testsuite/gcc.dg/struct/pc_ptr2void.c
- create mode 100644 gcc/testsuite/gcc.dg/struct/pc_simple_rewrite_pc.c
- create mode 100644 gcc/testsuite/gcc.dg/struct/pc_skip_void_struct_name.c
-
-diff --git a/gcc/common.opt b/gcc/common.opt
-index 56b547506..c7c6bc256 100644
---- a/gcc/common.opt
-+++ b/gcc/common.opt
-@@ -1993,9 +1993,9 @@ Common Var(flag_ipa_struct_reorg) Init(0) Optimization
- Perform structure layout optimizations.
- 
- fipa-struct-reorg=
--Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 4)
---fipa-struct-reorg=0,1,2,3,4 adding none, struct-reorg, reorder-fields,
--dfe, safe-pointer-compression optimizations.
-+Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 5)
-+-fipa-struct-reorg=0,1,2,3,4,5 adding none, struct-reorg, reorder-fields,
-+dfe, safe-pointer-compression, unsafe-pointer-compression optimizations.
- 
- fipa-vrp
- Common Var(flag_ipa_vrp) Optimization
-diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
-index 5d451c4c8..fa33f2d35 100644
---- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
-+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
-@@ -293,7 +293,8 @@ enum struct_layout_opt_level
-   COMPLETE_STRUCT_RELAYOUT = 1 << 1,
-   STRUCT_REORDER_FIELDS = 1 << 2,
-   DEAD_FIELD_ELIMINATION = 1 << 3,
--  POINTER_COMPRESSION_SAFE = 1 << 4
-+  POINTER_COMPRESSION_SAFE = 1 << 4,
-+  POINTER_COMPRESSION_UNSAFE = 1 << 5
- };
- 
- /* Defines the target pointer size of compressed pointer, which should be 8,
-@@ -1267,10 +1268,10 @@ csrtype::init_type_info (void)
- 
-   /* Close enough to pad to improve performance.
-      33~63 should pad to 64 but 33~48 (first half) are too far away, and
--     65~127 should pad to 128 but 65~96 (first half) are too far away.  */
-+     70~127 should pad to 128 but 65~70 (first half) are too far away.  */
-   if (old_size > 48 && old_size < 64)
-     new_size = 64;
--  if (old_size > 96 && old_size < 128)
-+  if (old_size > 70 && old_size < 128)
-     new_size = 128;
- 
-   /* For performance reasons, only allow structure size
-@@ -1423,8 +1424,12 @@ public:
-   bool pc_candidate_tree_p (tree);
-   bool pc_type_conversion_candidate_p (tree);
-   bool pc_direct_rewrite_chance_p (tree, tree &);
-+  bool pc_simplify_chance_for_compress_p (gassign *, tree);
-+  bool compress_candidate_without_check (gimple_stmt_iterator *, tree, tree &);
-   bool compress_candidate_with_check (gimple_stmt_iterator *, tree, tree &);
-   bool compress_candidate (gassign *, gimple_stmt_iterator *, tree, tree &);
-+  bool decompress_candidate_without_check (gimple_stmt_iterator *,
-+					   tree, tree, tree &, tree &);
-   bool decompress_candidate_with_check (gimple_stmt_iterator *, tree, tree &);
-   bool decompress_candidate (gimple_stmt_iterator *, tree, tree, tree &,
- 			     tree &);
-@@ -1924,7 +1929,6 @@ bool
- ipa_struct_relayout::maybe_rewrite_cst (tree cst, gimple_stmt_iterator *gsi,
- 					HOST_WIDE_INT &times)
- {
--  bool ret = false;
-   gcc_assert (TREE_CODE (cst) == INTEGER_CST);
- 
-   gimple *stmt = gsi_stmt (*gsi);
-@@ -1948,27 +1952,95 @@ ipa_struct_relayout::maybe_rewrite_cst (tree cst, gimple_stmt_iterator *gsi,
-     {
-       if (gsi_one_before_end_p (*gsi))
- 	return false;
--      gsi_next (gsi);
--      gimple *stmt2 = gsi_stmt (*gsi);
--
--      if (gimple_code (stmt2) == GIMPLE_ASSIGN
--	  && gimple_assign_rhs_code (stmt2) == POINTER_PLUS_EXPR)
-+      // Check uses.
-+      imm_use_iterator imm_iter_lhs;
-+      use_operand_p use_p_lhs;
-+      FOR_EACH_IMM_USE_FAST (use_p_lhs, imm_iter_lhs, gimple_assign_lhs (stmt))
- 	{
--	  tree lhs = gimple_assign_lhs (stmt2);
--	  tree rhs1 = gimple_assign_rhs1 (stmt2);
--	  if (types_compatible_p (inner_type (TREE_TYPE (rhs1)), ctype.type)
--	      || types_compatible_p (inner_type (TREE_TYPE (lhs)), ctype.type))
-+	  gimple *stmt2 = USE_STMT (use_p_lhs);
-+	  if (gimple_code (stmt2) != GIMPLE_ASSIGN)
-+	    continue;
-+	  if (gimple_assign_rhs_code (stmt2) == POINTER_PLUS_EXPR)
- 	    {
--	      tree num = NULL;
--	      if (is_result_of_mult (cst, &num, TYPE_SIZE_UNIT (ctype.type)))
-+	      tree lhs = gimple_assign_lhs (stmt2);
-+	      tree rhs1 = gimple_assign_rhs1 (stmt2);
-+	      if (types_compatible_p (inner_type (TREE_TYPE (rhs1)), ctype.type)
-+		  || types_compatible_p (inner_type (TREE_TYPE (lhs)),
-+					 ctype.type))
- 		{
--		  times = TREE_INT_CST_LOW (num);
--		  ret = true;
-+		  tree num = NULL;
-+		  if (is_result_of_mult (cst, &num,
-+					 TYPE_SIZE_UNIT (ctype.type)))
-+		    {
-+		      times = TREE_INT_CST_LOW (num);
-+		      return true;
-+		    }
-+		}
-+	    }
-+	  // For pointer compression, handle plus stmt.
-+	  else if (gimple_assign_rhs_code (stmt2) == PLUS_EXPR)
-+	    {
-+	      // Check uses.
-+	      imm_use_iterator imm_iter_cast;
-+	      use_operand_p use_p_cast;
-+	      FOR_EACH_IMM_USE_FAST (use_p_cast, imm_iter_cast,
-+				     gimple_assign_lhs (stmt2))
-+		{
-+		  gimple *stmt_cast = USE_STMT (use_p_cast);
-+		  if (gimple_code (stmt_cast) != GIMPLE_ASSIGN)
-+		    continue;
-+		  if (gimple_assign_cast_p (stmt_cast))
-+		    {
-+		      tree lhs_type = inner_type (TREE_TYPE (
-+					gimple_assign_lhs (stmt_cast)));
-+		      if (types_compatible_p (lhs_type, ctype.type))
-+			{
-+			  tree num = NULL;
-+			  if (is_result_of_mult (cst, &num,
-+						 TYPE_SIZE_UNIT (ctype.type)))
-+			    {
-+			      times = TREE_INT_CST_LOW (num);
-+			      return true;
-+			    }
-+			}
-+		    }
- 		}
- 	    }
- 	}
--      gsi_prev (gsi);
--      return ret;
-+    }
-+  // For pointer compression, handle div stmt.
-+  if (gimple_assign_rhs_code (stmt) == TRUNC_DIV_EXPR)
-+    {
-+      imm_use_iterator imm_iter;
-+      use_operand_p use_p;
-+      tree lhs = gimple_assign_lhs (stmt);
-+      if (lhs == NULL_TREE)
-+	return false;
-+      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
-+	{
-+	  gimple *use_stmt = USE_STMT (use_p);
-+	  if (is_gimple_debug (use_stmt))
-+	    continue;
-+	  if (gimple_code (use_stmt) != GIMPLE_ASSIGN)
-+	    continue;
-+	  if (gimple_assign_cast_p (use_stmt))
-+	    {
-+	      tree lhs_type = inner_type (TREE_TYPE (
-+				gimple_assign_lhs (use_stmt)));
-+	      if (TYPE_UNSIGNED (lhs_type)
-+		  && TREE_CODE (lhs_type) == INTEGER_TYPE
-+		  && TYPE_PRECISION (lhs_type) == compressed_size)
-+		{
-+		  tree num = NULL;
-+		  if (is_result_of_mult (cst, &num,
-+					 TYPE_SIZE_UNIT (ctype.type)))
-+		    {
-+		      times = TREE_INT_CST_LOW (num);
-+		      return true;
-+		    }
-+		}

_service:tar_scm:0031-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch Deleted

@@ -1,550 +0,0 @@
-From 72531376df5ed93c2d945469368ba5514eca8407 Mon Sep 17 00:00:00 2001
-From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
-Date: Tue, 5 Dec 2023 15:33:08 +0800
-Subject: PATCH AutoBOLT Support saving feedback count info to ELF segment
- 1/3
-
----
- gcc/common.opt |   8 +
- gcc/final.cc   | 405 ++++++++++++++++++++++++++++++++++++++++++++++++-
- gcc/opts.cc    |  61 ++++++++
- 3 files changed, 473 insertions(+), 1 deletion(-)
-
-diff --git a/gcc/common.opt b/gcc/common.opt
-index b01df919e..e69947fc2 100644
---- a/gcc/common.opt
-+++ b/gcc/common.opt
-@@ -2546,6 +2546,14 @@ freorder-functions
- Common Var(flag_reorder_functions) Optimization
- Reorder functions to improve code placement.
- 
-+fauto-bolt
-+Common Var(flag_auto_bolt)
-+Generate profile from AutoFDO or PGO and do BOLT optimization after linkage.
-+
-+fauto-bolt=
-+Common Joined RejectNegative
-+Specify the feedback data directory required by BOLT-plugin.  The default is the current directory.
-+
- frerun-cse-after-loop
- Common Var(flag_rerun_cse_after_loop) Optimization
- Add a common subexpression elimination pass after loop optimizations.
-diff --git a/gcc/final.cc b/gcc/final.cc
-index a9868861b..d4c4fa08f 100644
---- a/gcc/final.cc
-+++ b/gcc/final.cc
-@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3.  If not see
- #include "rtl-iter.h"
- #include "print-rtl.h"
- #include "function-abi.h"
-+#include "insn-codes.h"
- #include "common/common-target.h"
- 
- #ifdef XCOFF_DEBUGGING_INFO
-@@ -4266,7 +4267,403 @@ leaf_renumber_regs_insn (rtx in_rtx)
-       }
- }
- #endif
--&#xc;
-+
-+#define ASM_FDO_SECTION_PREFIX ".text.fdo."
-+
-+#define ASM_FDO_CALLER_FLAG ".fdo.caller "
-+#define ASM_FDO_CALLER_SIZE_FLAG ".fdo.caller.size "
-+#define ASM_FDO_CALLER_BIND_FLAG ".fdo.caller.bind"
-+
-+#define ASM_FDO_CALLEE_FLAG ".fdo.callee"
-+
-+/* Return the relative offset address of the start instruction of BB,
-+   return -1 if it is empty instruction.    */
-+
-+static int 
-+get_bb_start_addr (basic_block bb)
-+{
-+  rtx_insn *insn;
-+  FOR_BB_INSNS (bb, insn)
-+    {
-+      if (!INSN_P (insn))
-+	{
-+	  continue;
-+	}
-+      /* The jump target of call is not in this function, so
-+	 it should be excluded.    */
-+      if (CALL_P (insn))
-+        {
-+	  return -1;
-+	}
-+
-+      int insn_code = recog_memoized (insn);
-+
-+      /* The instruction NOP in llvm-bolt belongs to the previous
-+	 BB, so it needs to be skipped.   */
-+      if (insn_code != CODE_FOR_nop)
-+        {
-+	  return INSN_ADDRESSES (INSN_UID (insn));
-+	}
-+    }
-+  return -1;
-+}
-+
-+/* Return the relative offet address of the end instruction of BB,
-+   return -1 if it is empty or call instruction.    */
-+
-+static int
-+get_bb_end_addr (basic_block bb)
-+{
-+  rtx_insn *insn;
-+  int num_succs = EDGE_COUNT (bb->succs);
-+  FOR_BB_INSNS_REVERSE (bb, insn)
-+    {
-+      if (!INSN_P (insn))
-+        {
-+	  continue;
-+	}
-+      /* The jump target of call is not in this function, so
-+	 it should be excluded.     */
-+      if (CALL_P (insn))
-+        {
-+	  return -1;
-+	}
-+      if ((num_succs == 1)
-+	   || ((num_succs == 2) && any_condjump_p (insn)))
-+	{
-+	  return INSN_ADDRESSES (INSN_UID (insn));
-+	}
-+      else
-+        {
-+	  return -1;
-+	}
-+    }
-+  return -1;
-+}
-+
-+/* Return the end address of cfun.    */
-+
-+static int 
-+get_function_end_addr ()
-+{
-+  rtx_insn *insn = get_last_insn ();
-+  for (; insn != get_insns (); insn = PREV_INSN (insn))
-+    {
-+      if (!INSN_P (insn))
-+        {
-+	  continue;
-+	}
-+      return INSN_ADDRESSES (INSN_UID (insn));
-+    }
-+	  
-+  return -1;
-+} 
-+
-+/* Return the function profile status string.    */
-+
-+static const char * 
-+get_function_profile_status () 
-+{
-+  const char *profile_status = {
-+    "PROFILE_ABSENT",
-+    "PROFILE_GUESSED",
-+    "PROFILE_READ",
-+    "PROFILE_LAST"     /* Last value, used by profile streaming.    */
-+  };
-+
-+  return profile_statusprofile_status_for_fn (cfun);
-+}
-+
-+/* Return the count from the feedback data, such as PGO or ADDO.    */
-+
-+inline static gcov_type 
-+get_fdo_count (profile_count count)
-+{
-+  return count.quality () >= GUESSED 
-+         ? count.to_gcov_type () : 0;
-+}
-+
-+/* Return the profile quality string.    */
-+
-+static const char *
-+get_fdo_count_quality (profile_count count)
-+{
-+  const char *profile_quality = {
-+    "UNINITIALIZED_PROFILE",
-+    "GUESSED_LOCAL",
-+    "GUESSED_GLOBAL0",
-+    "GUESSED_GLOBAL0_ADJUSTED",
-+    "GUESSED",
-+    "AFDO",
-+    "ADJUSTED",
-+    "PRECISE"
-+  };
-+
-+  return profile_qualitycount.quality ();
-+}
-+
-+static const char *
-+alias_local_functions (const char *fnname)
-+{
-+  if (TREE_PUBLIC (cfun->decl))
-+    {
-+      return fnname;
-+    }
-+  return concat (fnname, "/", lbasename (dump_base_name), NULL);
-+}
-+
-+/* Return function bind type string.    */
-+
-+static const char * 
-+simple_get_function_bind ()
-+{
-+  const char *function_bind = {

_service:tar_scm:0032-AutoBOLT-Add-bolt-linker-plugin-2-3.patch Deleted

@@ -1,34094 +0,0 @@
-From 82f9f48406955a6150def998b69b4eace4bd51eb Mon Sep 17 00:00:00 2001
-From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
-Date: Thu, 7 Dec 2023 11:43:08 +0800
-Subject: PATCH AutoBOLT Add bolt linker plugin 2/3
-
----
- bolt-plugin/Makefile       |   675 ++
- bolt-plugin/Makefile.am    |    43 +
- bolt-plugin/Makefile.in    |   675 ++
- bolt-plugin/aclocal.m4     | 10250 +++++++++++++++++
- bolt-plugin/bolt-plugin.cc |  1153 ++
- bolt-plugin/config.h.in    |   179 +
- bolt-plugin/configure      | 20909 +++++++++++++++++++++++++++++++++++
- bolt-plugin/configure.ac   |    60 +
- gcc/common.opt             |    16 +
- gcc/opts.cc                |    27 +-
- 10 files changed, 33985 insertions(+), 2 deletions(-)
- create mode 100644 bolt-plugin/Makefile
- create mode 100644 bolt-plugin/Makefile.am
- create mode 100644 bolt-plugin/Makefile.in
- create mode 100644 bolt-plugin/aclocal.m4
- create mode 100644 bolt-plugin/bolt-plugin.cc
- create mode 100644 bolt-plugin/config.h.in
- create mode 100755 bolt-plugin/configure
- create mode 100644 bolt-plugin/configure.ac
-
-diff --git a/bolt-plugin/Makefile b/bolt-plugin/Makefile
-new file mode 100644
-index 000000000..82a4bc2c6
---- /dev/null
-+++ b/bolt-plugin/Makefile
-@@ -0,0 +1,675 @@
-+# Makefile.in generated by automake 1.16.5 from Makefile.am.
-+# Makefile.  Generated from Makefile.in by configure.
-+
-+# Copyright (C) 1994-2021 Free Software Foundation, Inc.
-+
-+# This Makefile.in is free software; the Free Software Foundation
-+# gives unlimited permission to copy and/or distribute it,
-+# with or without modifications, as long as this notice is preserved.
-+
-+# This program is distributed in the hope that it will be useful,
-+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-+# PARTICULAR PURPOSE.
-+
-+
-+
-+
-+am__is_gnu_make = { \
-+  if test -z '$(MAKELEVEL)'; then \
-+    false; \
-+  elif test -n '$(MAKE_HOST)'; then \
-+    true; \
-+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
-+    true; \
-+  else \
-+    false; \
-+  fi; \
-+}
-+am__make_running_with_option = \
-+  case $${target_option-} in \
-+      ?) ;; \
-+      *) echo "am__make_running_with_option: internal error: invalid" \
-+              "target option '$${target_option-}' specified" >&2; \
-+         exit 1;; \
-+  esac; \
-+  has_opt=no; \
-+  sane_makeflags=$$MAKEFLAGS; \
-+  if $(am__is_gnu_make); then \
-+    sane_makeflags=$$MFLAGS; \
-+  else \
-+    case $$MAKEFLAGS in \
-+      *\\\ \	*) \
-+        bs=\\; \
-+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
-+          | sed "s/$$bs$$bs$$bs $$bs	*//g"`;; \
-+    esac; \
-+  fi; \
-+  skip_next=no; \
-+  strip_trailopt () \
-+  { \
-+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
-+  }; \
-+  for flg in $$sane_makeflags; do \
-+    test $$skip_next = yes && { skip_next=no; continue; }; \
-+    case $$flg in \
-+      *=*|--*) continue;; \
-+        -*I) strip_trailopt 'I'; skip_next=yes;; \
-+      -*I?*) strip_trailopt 'I';; \
-+        -*O) strip_trailopt 'O'; skip_next=yes;; \
-+      -*O?*) strip_trailopt 'O';; \
-+        -*l) strip_trailopt 'l'; skip_next=yes;; \
-+      -*l?*) strip_trailopt 'l';; \
-+      -dEDm) skip_next=yes;; \
-+      -JT) skip_next=yes;; \
-+    esac; \
-+    case $$flg in \
-+      *$$target_option*) has_opt=yes; break;; \
-+    esac; \
-+  done; \
-+  test $$has_opt = yes
-+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
-+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
-+pkgdatadir = $(datadir)/bolt-plugin
-+pkgincludedir = $(includedir)/bolt-plugin
-+pkglibdir = $(libdir)/bolt-plugin
-+pkglibexecdir = $(libexecdir)/bolt-plugin
-+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-+install_sh_DATA = $(install_sh) -c -m 644
-+install_sh_PROGRAM = $(install_sh) -c
-+install_sh_SCRIPT = $(install_sh) -c
-+INSTALL_HEADER = $(INSTALL_DATA)
-+transform = $(program_transform_name)
-+NORMAL_INSTALL = :
-+PRE_INSTALL = :
-+POST_INSTALL = :
-+NORMAL_UNINSTALL = :
-+PRE_UNINSTALL = :
-+POST_UNINSTALL = :
-+build_triplet = aarch64-unknown-linux-gnu
-+host_triplet = aarch64-unknown-linux-gnu
-+target_triplet = aarch64-unknown-linux-gnu
-+subdir = .
-+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-+am__aclocal_m4_deps = $(top_srcdir)/configure.ac
-+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
-+	$(ACLOCAL_M4)
-+DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \
-+	$(am__configure_deps)
-+am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
-+ configure.lineno config.status.lineno
-+mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs
-+CONFIG_HEADER = config.h
-+CONFIG_CLEAN_FILES =
-+CONFIG_CLEAN_VPATH_FILES =
-+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
-+am__vpath_adj = case $$p in \
-+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
-+    *) f=$$p;; \
-+  esac;
-+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
-+am__install_max = 40
-+am__nobase_strip_setup = \
-+  srcdirstrip=`echo "$(srcdir)" | sed 's/.^$$\\*|/\\\\&/g'`
-+am__nobase_strip = \
-+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
-+am__nobase_list = $(am__nobase_strip_setup); \
-+  for p in $$list; do echo "$$p $$p"; done | \
-+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,$ .*$/^/*$$,\1,' | \
-+  $(AWK) 'BEGIN { files"." = "" } { files$$2 = files$$2 " " $$1; \
-+    if (++n$$2 == $(am__install_max)) \
-+      { print $$2, files$$2; n$$2 = 0; files$$2 = "" } } \
-+    END { for (dir in files) print dir, filesdir }'
-+am__base_list = \
-+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
-+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
-+am__uninstall_files_from_dir = { \
-+  test -z "$$files" \
-+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
-+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
-+         $(am__cd) "$$dir" && rm -f $$files; }; \
-+  }
-+am__installdirs = "$(DESTDIR)$(libexecsubdir)"
-+LTLIBRARIES = $(libexecsub_LTLIBRARIES)
-+am_libbolt_plugin_la_OBJECTS = bolt-plugin.lo
-+libbolt_plugin_la_OBJECTS = $(am_libbolt_plugin_la_OBJECTS)
-+AM_V_P = $(am__v_P_$(V))
-+am__v_P_ = $(am__v_P_$(AM_DEFAULT_VERBOSITY))
-+am__v_P_0 = false
-+am__v_P_1 = :
-+AM_V_GEN = $(am__v_GEN_$(V))
-+am__v_GEN_ = $(am__v_GEN_$(AM_DEFAULT_VERBOSITY))
-+am__v_GEN_0 = @echo "  GEN     " $@;
-+am__v_GEN_1 = 
-+AM_V_at = $(am__v_at_$(V))
-+am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY))
-+am__v_at_0 = @
-+am__v_at_1 = 
-+DEFAULT_INCLUDES = -I.
-+depcomp =
-+am__maybe_remake_depfiles =
-+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
-+	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
-+AM_V_lt = $(am__v_lt_$(V))
-+am__v_lt_ = $(am__v_lt_$(AM_DEFAULT_VERBOSITY))
-+am__v_lt_0 = --silent
-+am__v_lt_1 = 
-+LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
-+	$(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \
-+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
-+	$(AM_CXXFLAGS) $(CXXFLAGS)
-+AM_V_CXX = $(am__v_CXX_$(V))
-+am__v_CXX_ = $(am__v_CXX_$(AM_DEFAULT_VERBOSITY))
-+am__v_CXX_0 = @echo "  CXX     " $@;
-+am__v_CXX_1 = 
-+CXXLD = $(CXX)
-+CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
-+	$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \

_service:tar_scm:0033-AutoBOLT-Enable-BOLT-linker-plugin-on-aarch64-3-3.patch Deleted

@@ -1,345 +0,0 @@
-From 94242286383a80e6ab83d824a4d7ea23ea311f75 Mon Sep 17 00:00:00 2001
-From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
-Date: Mon, 22 Jan 2024 15:38:24 +0800
-Subject: PATCH AutoBOLT Enable BOLT linker plugin on aarch64 3/3
-
----
- Makefile.def     | 10 ++++++++++
- configure        | 27 ++++++++++++++++++++++++++-
- configure.ac     | 22 +++++++++++++++++++++-
- gcc/config.host  |  1 +
- gcc/config.in    | 13 +++++++++++++
- gcc/configure    | 10 ++++++++--
- gcc/configure.ac |  4 ++++
- gcc/gcc.cc       | 23 +++++++++++++++++++++++
- 8 files changed, 106 insertions(+), 4 deletions(-)
-
-diff --git a/Makefile.def b/Makefile.def
-index 72d585496..0ba868890 100644
---- a/Makefile.def
-+++ b/Makefile.def
-@@ -145,6 +145,9 @@ host_modules= { module= gnattools; };
- host_modules= { module= lto-plugin; bootstrap=true;
- 		extra_configure_flags='--enable-shared @extra_linker_plugin_flags@ @extra_linker_plugin_configure_flags@';
- 		extra_make_flags='@extra_linker_plugin_flags@'; };
-+host_modules= { module= bolt-plugin; bootstrap=true;
-+		extra_configure_flags='--enable-shared @extra_linker_plugin_flags@ @extra_linker_plugin_configure_flags@';
-+		extra_make_flags='@extra_linker_plugin_flags@'; };
- host_modules= { module= libcc1; extra_configure_flags=--enable-shared; };
- host_modules= { module= gotools; };
- host_modules= { module= libctf; bootstrap=true; };
-@@ -349,6 +352,7 @@ dependencies = { module=configure-gcc; on=all-mpfr; };
- dependencies = { module=configure-gcc; on=all-mpc; };
- dependencies = { module=configure-gcc; on=all-isl; };
- dependencies = { module=configure-gcc; on=all-lto-plugin; };
-+dependencies = { module=configure-gcc; on=all-bolt-plugin; };
- dependencies = { module=configure-gcc; on=all-binutils; };
- dependencies = { module=configure-gcc; on=all-gas; };
- dependencies = { module=configure-gcc; on=all-ld; };
-@@ -374,6 +378,7 @@ dependencies = { module=all-gcc; on=all-libdecnumber; hard=true; };
- dependencies = { module=all-gcc; on=all-libiberty; };
- dependencies = { module=all-gcc; on=all-fixincludes; };
- dependencies = { module=all-gcc; on=all-lto-plugin; };
-+dependencies = { module=all-gcc; on=all-bolt-plugin; };
- dependencies = { module=all-gcc; on=all-libiconv; };
- dependencies = { module=info-gcc; on=all-build-libiberty; };
- dependencies = { module=dvi-gcc; on=all-build-libiberty; };
-@@ -381,8 +386,10 @@ dependencies = { module=pdf-gcc; on=all-build-libiberty; };
- dependencies = { module=html-gcc; on=all-build-libiberty; };
- dependencies = { module=install-gcc ; on=install-fixincludes; };
- dependencies = { module=install-gcc ; on=install-lto-plugin; };
-+dependencies = { module=install-gcc ; on=install-bolt-plugin; };
- dependencies = { module=install-strip-gcc ; on=install-strip-fixincludes; };
- dependencies = { module=install-strip-gcc ; on=install-strip-lto-plugin; };
-+dependencies = { module=install-strip-gcc ; on=install-strip-bolt-plugin; };
- 
- dependencies = { module=configure-libcpp; on=configure-libiberty; hard=true; };
- dependencies = { module=configure-libcpp; on=configure-intl; };
-@@ -401,6 +408,9 @@ dependencies = { module=all-gnattools; on=all-target-libstdc++-v3; };
- dependencies = { module=all-lto-plugin; on=all-libiberty; };
- dependencies = { module=all-lto-plugin; on=all-libiberty-linker-plugin; };
- 
-+dependencies = { module=all-bolt-plugin; on=all-libiberty; };
-+dependencies = { module=all-bolt-plugin; on=all-libiberty-linker-plugin; };
-+
- dependencies = { module=configure-libcc1; on=configure-gcc; };
- dependencies = { module=all-libcc1; on=all-gcc; };
- 
-diff --git a/configure b/configure
-index 5dcaab14a..aff62c464 100755
---- a/configure
-+++ b/configure
-@@ -826,6 +826,7 @@ with_isl
- with_isl_include
- with_isl_lib
- enable_isl_version_check
-+enable_bolt
- enable_lto
- enable_linker_plugin_configure_flags
- enable_linker_plugin_flags
-@@ -1550,6 +1551,7 @@ Optional Features:
-                           enable the PGO build
-   --disable-isl-version-check
-                           disable check for isl version
-+  --enable-bolt           enable bolt optimization support
-   --enable-lto            enable link time optimization support
-   --enable-linker-plugin-configure-flags=FLAGS
-                           additional flags for configuring linker plugins
-@@ -8564,6 +8566,15 @@ fi
- 
- 
- 
-+# Check for BOLT support.
-+# Check whether --enable-bolt was given.
-+if test "${enable_bolt+set}" = set; then :
-+  enableval=$enable_bolt; enable_bolt=$enableval
-+else
-+  enable_bolt=no; default_enable_bolt=no
-+fi
-+
-+
- # Check for LTO support.
- # Check whether --enable-lto was given.
- if test "${enable_lto+set}" = set; then :
-@@ -8593,6 +8604,16 @@ if test $target_elf = yes; then :
-   # ELF platforms build the lto-plugin always.
-   build_lto_plugin=yes
- 
-+  # ELF platforms can build the bolt-plugin.
-+  # NOT BUILD BOLT BY DEFAULT.
-+  case $target in
-+    aarch64*-*-linux*)
-+    if test $enable_bolt = yes; then :
-+      build_bolt_plugin=yes
-+    fi
-+    ;;
-+  esac
-+
- else
-   if test x"$default_enable_lto" = x"yes" ; then
-     case $target in
-@@ -8780,6 +8801,10 @@ if test -d ${srcdir}/gcc; then
-     fi
-   fi
- 
-+  if test "${build_bolt_plugin}" = "yes" ; then
-+      configdirs="$configdirs bolt-plugin"
-+  fi
-+
-   # If we're building an offloading compiler, add the LTO front end.
-   if test x"$enable_as_accelerator_for" != x ; then
-     case ,${enable_languages}, in
-@@ -9202,7 +9227,7 @@ fi
- extra_host_libiberty_configure_flags=
- extra_host_zlib_configure_flags=
- case " $configdirs " in
--  *" lto-plugin "* | *" libcc1 "*)
-+  *" lto-plugin "* | *" libcc1 "* | *" bolt-plugin "*)
-     # When these are to be built as shared libraries, the same applies to
-     # libiberty.
-     extra_host_libiberty_configure_flags=--enable-shared
-diff --git a/configure.ac b/configure.ac
-index 85977482a..f310d75ca 100644
---- a/configure.ac
-+++ b/configure.ac
-@@ -1863,6 +1863,12 @@ fi
- AC_SUBST(isllibs)
- AC_SUBST(islinc)
- 
-+# Check for BOLT support.
-+AC_ARG_ENABLE(bolt,
-+AS_HELP_STRING(--enable-bolt, enable bolt optimization support),
-+enable_bolt=$enableval,
-+enable_bolt=no; default_enable_bolt=no)
-+
- # Check for LTO support.
- AC_ARG_ENABLE(lto,
- AS_HELP_STRING(--enable-lto, enable link time optimization support),
-@@ -1871,6 +1877,16 @@ enable_lto=yes; default_enable_lto=yes)
- 
- ACX_ELF_TARGET_IFELSE(# ELF platforms build the lto-plugin always.
-   build_lto_plugin=yes
-+
-+  # ELF platforms can build the bolt-plugin.
-+  # NOT BUILD BOLT BY DEFAULT.
-+  case $target in
-+    aarch64*-*-linux*)
-+    if test $enable_bolt = yes; then :
-+      build_bolt_plugin=yes
-+    fi
-+    ;;
-+  esac
- ,if test x"$default_enable_lto" = x"yes" ; then
-     case $target in
-       *-apple-darwin912* | *-cygwin* | *-mingw* | *djgpp*) ;;
-@@ -2049,6 +2065,10 @@ if test -d ${srcdir}/gcc; then
-     fi
-   fi
- 
-+  if test "${build_bolt_plugin}" = "yes" ; then
-+      configdirs="$configdirs bolt-plugin"
-+  fi
-+
-   # If we're building an offloading compiler, add the LTO front end.
-   if test x"$enable_as_accelerator_for" != x ; then
-     case ,${enable_languages}, in
-@@ -2457,7 +2477,7 @@ fi
- extra_host_libiberty_configure_flags=
- extra_host_zlib_configure_flags=
- case " $configdirs " in
--  *" lto-plugin "* | *" libcc1 "*)
-+  *" lto-plugin "* | *" libcc1 "* | *" bolt-plugin "*)    
-     # When these are to be built as shared libraries, the same applies to
-     # libiberty.
-     extra_host_libiberty_configure_flags=--enable-shared
-diff --git a/gcc/config.host b/gcc/config.host
-index 4ca300f11..bf7dcb4cc 100644
---- a/gcc/config.host
-+++ b/gcc/config.host
-@@ -75,6 +75,7 @@ out_host_hook_obj=host-default.o

_service:tar_scm:0034-Autofdo-Enable-discrimibator-and-MCF-algorithm-on-Au.patch Deleted

@@ -1,312 +0,0 @@
-From b020447c840c6e22440a9b9063298a06333fd2f1 Mon Sep 17 00:00:00 2001
-From: zhenyu--zhao <zhaozhenyu17@huawei.com>
-Date: Sat, 23 Mar 2024 22:56:09 +0800
-Subject: PATCH AutofdoEnable discrimibator and MCF algorithm on Autofdo
-
----
- gcc/auto-profile.cc | 171 +++++++++++++++++++++++++++++++++++++++++++-
- gcc/cfghooks.cc     |   7 ++
- gcc/opts.cc         |   5 +-
- gcc/tree-inline.cc  |  14 ++++
- 4 files changed, 193 insertions(+), 4 deletions(-)
-
-diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc
-index 2b34b80b8..f45f0ec66 100644
---- a/gcc/auto-profile.cc
-+++ b/gcc/auto-profile.cc
-@@ -466,6 +466,17 @@ string_table::get_index (const char *name) const
-   if (name == NULL)
-     return -1;
-   string_index_map::const_iterator iter = map_.find (name);
-+  /* Function name may be duplicate.  Try to distinguish by the
-+     #file_name#function_name defined by the autofdo tool chain.  */
-+  if (iter == map_.end ())
-+    {
-+      char* file_name = get_original_name (lbasename (dump_base_name));
-+      char* file_func_name
-+	= concat ("#", file_name, "#", name, NULL);
-+      iter = map_.find (file_func_name);
-+      free (file_name);
-+      free (file_func_name);
-+    }
-   if (iter == map_.end ())
-     return -1;
- 
-@@ -654,7 +665,7 @@ function_instance::read_function_instance (function_instance_stack *stack,
- 
-   for (unsigned i = 0; i < num_pos_counts; i++)
-     {
--      unsigned offset = gcov_read_unsigned () & 0xffff0000;
-+      unsigned offset = gcov_read_unsigned ();
-       unsigned num_targets = gcov_read_unsigned ();
-       gcov_type count = gcov_read_counter ();
-       s->pos_countsoffset.count = count;
-@@ -733,6 +744,10 @@ autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
-   function_instance *s = get_function_instance_by_inline_stack (stack);
-   if (s == NULL)
-     return false;
-+  if (s->get_count_info (stack0.second + stmt->bb->discriminator, info))
-+    {
-+      return true;
-+    }
-   return s->get_count_info (stack0.second, info);
- }
- 
-@@ -1395,6 +1410,66 @@ afdo_propagate (bb_set *annotated_bb)
-     }
- }
- 
-+/* Process the following scene when the branch probability
-+   inversion when do function afdo_propagate (). E.g.
-+   BB_NUM (sample count)
-+      BB1 (1000)
-+       /    \
-+    BB2 (10) BB3 (0)
-+      \       /
-+	BB4
-+   In afdo_propagate ().count of BB3 is calculated by
-+   COUNT (BB3) = 990 (990 = COUNT (BB1) - COUNT (BB2) = 1000 - 10)
-+   In fact, BB3 may be colder than BB2 by sample count.
-+   This function allocate source BB count to wach succ BB by sample
-+   rate, E.g.
-+   BB2_COUNT = BB1_COUNT * (BB2_COUNT / (BB2_COUNT + BB3_COUNT))  */
-+
-+static void
-+afdo_preprocess_bb_count ()
-+{
-+  basic_block bb;
-+  FOR_ALL_BB_FN (bb, cfun)
-+    {
-+      if (bb->count.ipa_p () && EDGE_COUNT (bb->succs) > 1
-+	  && bb->count > profile_count::zero ().afdo ())
-+	{
-+	  basic_block bb1 = EDGE_SUCC (bb, 0)->dest;
-+	  basic_block bb2 = EDGE_SUCC (bb, 1)->dest;
-+	  if (single_succ_edge (bb1) && single_succ_edge (bb2)
-+	      && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
-+	    {
-+	      gcov_type max_count = 0;
-+	      gcov_type total_count = 0;
-+	      edge e;
-+	      edge_iterator ei;
-+	      FOR_EACH_EDGE (e, ei, bb->succs)
-+		{
-+		  if (!e->dest->count.ipa_p ())
-+		    {
-+		      continue;
-+		    }
-+		  max_count = MAX (max_count, e->dest->count.to_gcov_type ());
-+		  total_count += e->dest->count.to_gcov_type ();
-+		}
-+	      /* Only bb_count > max_count * 2, branch probability will
-+		 inversion.  */
-+	      if (max_count > 0 && bb->count.to_gcov_type () > max_count * 2)
-+		{
-+		  FOR_EACH_EDGE (e, ei, bb->succs)
-+		    {
-+		      gcov_type target_count = bb->count.to_gcov_type ()
-+			* e->dest->count.to_gcov_type ()/ total_count;
-+		      e->dest->count
-+			= profile_count::from_gcov_type
-+			  (target_count).afdo ();
-+		    }
-+		}
-+	    }
-+	}
-+    }
-+}
-+
- /* Propagate counts on control flow graph and calculate branch
-    probabilities.  */
- 
-@@ -1420,6 +1495,7 @@ afdo_calculate_branch_prob (bb_set *annotated_bb)
-     }
- 
-   afdo_find_equiv_class (annotated_bb);
-+  afdo_preprocess_bb_count ();
-   afdo_propagate (annotated_bb);
- 
-   FOR_EACH_BB_FN (bb, cfun)
-@@ -1523,6 +1599,83 @@ afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
-   return false;
- }
- 
-+/* Preparation before executing MCF algorithm.  */
-+
-+static void
-+afdo_init_mcf ()
-+{
-+  basic_block bb;
-+  edge e;
-+  edge_iterator ei;
-+
-+  if (dump_file)
-+    {
-+      fprintf (dump_file, "\n init calling mcf_smooth_cfg (). \n");
-+    }
-+
-+  /* Step1: when use mcf, BB id must be continous,
-+     so we need compact_blocks ().  */
-+  compact_blocks ();
-+
-+  /* Step2: allocate memory for MCF input data.  */
-+  bb_gcov_counts.safe_grow_cleared (cfun->cfg->x_last_basic_block);
-+  edge_gcov_counts = new hash_map<edge, gcov_type>;
-+
-+  /* Step3: init MCF input data from cfg.  */
-+  FOR_ALL_BB_FN (bb, cfun)
-+    {
-+      /* Init BB count for MCF.  */
-+      bb_gcov_count (bb) = bb->count.to_gcov_type ();
-+
-+      gcov_type total_count = 0;
-+      FOR_EACH_EDGE (e, ei, bb->succs)
-+	{
-+	  total_count += e->dest->count.to_gcov_type ();
-+	}
-+
-+      /* If there is no sample in each successor blocks, source
-+	 BB samples are allocated to each edge by branch static prob.  */
-+
-+      FOR_EACH_EDGE (e, ei, bb->succs)
-+	{
-+	  if (total_count == 0)
-+	    {
-+	      edge_gcov_count (e) = e->src->count.to_gcov_type ()
-+		* e->probability.to_reg_br_prob_base () / REG_BR_PROB_BASE;
-+	    }
-+	  else
-+	    {
-+	      edge_gcov_count (e) = e->src->count.to_gcov_type ()
-+		* e->dest->count.to_gcov_type () / total_count;
-+	    }
-+	}
-+    }
-+}
-+
-+
-+/* Free the resources used by MCF and reset BB count from MCF result.
-+   branch probability has been updated in mcf_smooth_cfg ().  */
-+
-+static void
-+afdo_process_after_mcf ()
-+{
-+  basic_block bb;
-+  /* Reset BB count from MCF result.  */
-+  FOR_EACH_BB_FN (bb, cfun)
-+    {
-+      if (bb_gcov_count (bb))
-+	{

_service:tar_scm:0035-Add-insn-defs-and-correct-costs-for-cmlt-generation.patch Deleted

@@ -1,194 +0,0 @@
-From aa39a66f6029fe16a656d7c6339908b953fb1e04 Mon Sep 17 00:00:00 2001
-From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
-Date: Thu, 22 Feb 2024 11:27:43 +0300
-Subject: PATCH 01/18 Add insn defs and correct costs for cmlt generation
-
----
- gcc/config/aarch64/aarch64-simd.md  | 48 +++++++++++++++++++++++++++++
- gcc/config/aarch64/aarch64.cc       | 15 +++++++++
- gcc/config/aarch64/aarch64.opt      |  4 +++
- gcc/config/aarch64/iterators.md     |  3 +-
- gcc/config/aarch64/predicates.md    | 25 +++++++++++++++
- gcc/testsuite/gcc.dg/combine-cmlt.c | 20 ++++++++++++
- 6 files changed, 114 insertions(+), 1 deletion(-)
- create mode 100755 gcc/testsuite/gcc.dg/combine-cmlt.c
-
-diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
-index ee7f0b89c..82f73805f 100644
---- a/gcc/config/aarch64/aarch64-simd.md
-+++ b/gcc/config/aarch64/aarch64-simd.md
-@@ -6454,6 +6454,54 @@
-   (set_attr "type" "neon_compare<q>, neon_compare_zero<q>")
- )
- 
-+;; Use cmlt to replace vector arithmetic operations like this (SImode example):
-+;; B = (((A >> 15) & 0x00010001) << 16) - ((A >> 15) & 0x00010001)
-+;; TODO: maybe extend to scalar operations or other cm** instructions.
-+
-+(define_insn "*aarch64_cmlt_as_arith<mode>"
-+  (set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
-+	(minus:<V_INT_EQUIV>
-+	  (ashift:<V_INT_EQUIV>
-+	    (and:<V_INT_EQUIV>
-+	      (lshiftrt:<V_INT_EQUIV>
-+		(match_operand:VDQHSD 1 "register_operand" "w")
-+		(match_operand:VDQHSD 2 "half_size_minus_one_operand"))
-+	      (match_operand:VDQHSD 3 "cmlt_arith_mask_operand"))
-+	    (match_operand:VDQHSD 4 "half_size_operand"))
-+	  (and:<V_INT_EQUIV>
-+	    (lshiftrt:<V_INT_EQUIV>
-+	      (match_dup 1)
-+	      (match_dup 2))
-+	    (match_dup 3))))
-+  "TARGET_SIMD && flag_cmlt_arith"
-+  "cmlt\t%<v>0.<V2ntype>, %<v>1.<V2ntype>, #0"
-+  (set_attr "type" "neon_compare_zero")
-+)
-+
-+;; The helper definition that allows combiner to use the previous pattern.
-+
-+(define_insn_and_split "*arch64_cmlt_tmp<mode>"
-+  (set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
-+	(and:<V_INT_EQUIV>
-+	  (lshiftrt:<V_INT_EQUIV>
-+	    (match_operand:VDQHSD 1 "register_operand" "w")
-+	    (match_operand:VDQHSD 2 "half_size_minus_one_operand"))
-+	  (match_operand:VDQHSD 3 "cmlt_arith_mask_operand")))
-+  "TARGET_SIMD && flag_cmlt_arith"
-+  "#"
-+  "&& reload_completed"
-+  (set (match_operand:<V_INT_EQUIV> 0 "register_operand")
-+	(lshiftrt:<V_INT_EQUIV>
-+	  (match_operand:VDQHSD 1 "register_operand")
-+	  (match_operand:VDQHSD 2 "half_size_minus_one_operand")))
-+   (set (match_dup 0)
-+	(and:<V_INT_EQUIV>
-+	  (match_dup 0)
-+	  (match_operand:VDQHSD 3 "cmlt_arith_mask_operand")))
-+  ""
-+  (set_attr "type" "neon_compare_zero")
-+)
-+
- (define_insn_and_split "aarch64_cm<optab>di"
-   (set (match_operand:DI 0 "register_operand" "=w,w,r")
- 	(neg:DI
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index a3da4ca30..04072ca25 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -14064,6 +14064,21 @@ cost_minus:
- 	    return true;
- 	  }
- 
-+	/* Detect aarch64_cmlt_as_arith instruction. Now only this pattern
-+	   matches the condition. The costs of cmlt and sub instructions
-+	   are comparable, so we are not increasing the cost here.  */
-+	if (flag_cmlt_arith && GET_CODE (op0) == ASHIFT
-+	    && GET_CODE (op1) == AND)
-+	  {
-+	    rtx op0_subop0 = XEXP (op0, 0);
-+	    if (rtx_equal_p (op0_subop0, op1))
-+	      {
-+		rtx lshrt_op = XEXP (op0_subop0, 0);
-+		if (GET_CODE (lshrt_op) == LSHIFTRT)
-+		  return true;
-+	      }
-+	  }
-+
- 	/* Look for SUB (extended register).  */
- 	if (is_a <scalar_int_mode> (mode)
- 	    && aarch64_rtx_arith_op_extract_p (op1))
-diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
-index a64b927e9..101664c7c 100644
---- a/gcc/config/aarch64/aarch64.opt
-+++ b/gcc/config/aarch64/aarch64.opt
-@@ -262,6 +262,10 @@ Use an immediate to offset from the stack protector guard register, sp_el0.
- This option is for use with fstack-protector-strong and not for use in
- user-land code.
- 
-+mcmlt-arith
-+Target Var(flag_cmlt_arith) Optimization Init(0)
-+Use SIMD cmlt instruction to perform some arithmetic/logic calculations.
-+
- TargetVariable
- long aarch64_stack_protector_guard_offset = 0
- 
-diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
-index 26a840d7f..967e6b0b1 100644
---- a/gcc/config/aarch64/iterators.md
-+++ b/gcc/config/aarch64/iterators.md
-@@ -1485,7 +1485,8 @@
- 			  (V2DI "2s"))
- 
- ;; Register suffix narrowed modes for VQN.
--(define_mode_attr V2ntype (V8HI "16b") (V4SI "8h")
-+(define_mode_attr V2ntype (V4HI "8b") (V2SI "4h")
-+			   (V8HI "16b") (V4SI "8h")
- 			   (V2DI "4s"))
- 
- ;; Widened modes of vector modes.
-diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
-index c308015ac..07c14aacb 100644
---- a/gcc/config/aarch64/predicates.md
-+++ b/gcc/config/aarch64/predicates.md
-@@ -49,6 +49,31 @@
-   return CONST_INT_P (op) && IN_RANGE (INTVAL (op), 1, 3);
- })
- 
-+(define_predicate "half_size_minus_one_operand"
-+  (match_code "const_vector")
-+{
-+  op = unwrap_const_vec_duplicate (op);
-+  unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2;
-+  return CONST_INT_P (op) && (UINTVAL (op) == size - 1);
-+})
-+
-+(define_predicate "half_size_operand"
-+  (match_code "const_vector")
-+{
-+  op = unwrap_const_vec_duplicate (op);
-+  unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2;
-+  return CONST_INT_P (op) && (UINTVAL (op) == size);
-+})
-+
-+(define_predicate "cmlt_arith_mask_operand"
-+  (match_code "const_vector")
-+{
-+  op = unwrap_const_vec_duplicate (op);
-+  unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2;
-+  unsigned long long mask = ((unsigned long long) 1 << size) | 1;
-+  return CONST_INT_P (op) && (UINTVAL (op) == mask);
-+})
-+
- (define_predicate "subreg_lowpart_operator"
-   (ior (match_code "truncate")
-        (and (match_code "subreg")
-diff --git a/gcc/testsuite/gcc.dg/combine-cmlt.c b/gcc/testsuite/gcc.dg/combine-cmlt.c
-new file mode 100755
-index 000000000..b4c9a37ff
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/combine-cmlt.c
-@@ -0,0 +1,20 @@
-+/* { dg-do compile { target aarch64-*-* } } */
-+/* { dg-options "-O3 -mcmlt-arith" } */
-+
-+/* The test checks usage of cmlt insns for arithmetic/logic calculations
-+ * in foo ().  It's inspired by sources of x264 codec.  */
-+
-+typedef unsigned short int uint16_t;
-+typedef unsigned int uint32_t;
-+
-+void foo( uint32_t *a, uint32_t *b)
-+{
-+  for (unsigned i = 0; i < 4; i++)
-+    {
-+      uint32_t s = ((ai>>((8 * sizeof(uint16_t))-1))
-+		    &(((uint32_t)1<<(8 * sizeof(uint16_t)))+1))*((uint16_t)-1);
-+      bi = (ai+s)^s;
-+    }
-+}
-+
-+/* { dg-final { scan-assembler-times {cmlt\t} 1 } }  */
--- 
-2.33.0
-

_service:tar_scm:0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch Deleted

@@ -1,560 +0,0 @@
-From 4cae948c1c00ad7a59f0f234f809fbd9a0208eb4 Mon Sep 17 00:00:00 2001
-From: vchernon <chernonog.vyacheslav@huawei.com>
-Date: Wed, 28 Feb 2024 23:05:12 +0800
-Subject: PATCH 02/18 rtl-ifcvt introduce rtl ifcvt enchancements     new
- option:       -fifcvt-allow-complicated-cmps:         allows ifcvt to deal
- with complicated cmps like
-
-        cmp reg1 (reg2 + reg3)
-
-        can increase compilation time
-    new param:
-      -param=ifcvt-allow-register-renaming=0,1,2
-        1 : allows ifcvt to rename registers in then and else bb
-        2 : allows to rename registers in condition and else/then bb
-        can increase compilation time and register pressure
----
- gcc/common.opt                                |   4 +
- gcc/ifcvt.cc                                  | 291 +++++++++++++++---
- gcc/params.opt                                |   4 +
- .../gcc.c-torture/execute/ifcvt-renaming-1.c  |  35 +++
- gcc/testsuite/gcc.dg/ifcvt-6.c                |  27 ++
- 5 files changed, 311 insertions(+), 50 deletions(-)
- create mode 100644 gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c
- create mode 100644 gcc/testsuite/gcc.dg/ifcvt-6.c
-
-diff --git a/gcc/common.opt b/gcc/common.opt
-index c7c6bc256..aa00fb7b0 100644
---- a/gcc/common.opt
-+++ b/gcc/common.opt
-@@ -3691,4 +3691,8 @@ fipa-ra
- Common Var(flag_ipa_ra) Optimization
- Use caller save register across calls if possible.
- 
-+fifcvt-allow-complicated-cmps
-+Common Var(flag_ifcvt_allow_complicated_cmps) Optimization
-+Allow RTL if-conversion pass to deal with complicated cmps (can increase compilation time).
-+
- ; This comment is to ensure we retain the blank line above.
-diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
-index 2c1eba312..584db7b55 100644
---- a/gcc/ifcvt.cc
-+++ b/gcc/ifcvt.cc
-@@ -886,7 +886,9 @@ noce_emit_store_flag (struct noce_if_info *if_info, rtx x, int reversep,
-     }
- 
-   /* Don't even try if the comparison operands or the mode of X are weird.  */
--  if (cond_complex || !SCALAR_INT_MODE_P (GET_MODE (x)))
-+  if (!flag_ifcvt_allow_complicated_cmps
-+      && (cond_complex
-+	  || !SCALAR_INT_MODE_P (GET_MODE (x))))
-     return NULL_RTX;
- 
-   return emit_store_flag (x, code, XEXP (cond, 0),
-@@ -1965,7 +1967,8 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc)
-   /* Currently support only simple single sets in test_bb.  */
-   if (!sset
-       || !noce_operand_ok (SET_DEST (sset))
--      || contains_ccmode_rtx_p (SET_DEST (sset))
-+      || (!flag_ifcvt_allow_complicated_cmps
-+	  && contains_ccmode_rtx_p (SET_DEST (sset)))
-       || !noce_operand_ok (SET_SRC (sset)))
-     return false;
- 
-@@ -1979,13 +1982,17 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc)
-    in this function.  */
- 
- static bool
--bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
-+bbs_ok_for_cmove_arith (basic_block bb_a,
-+			basic_block bb_b,
-+			rtx to_rename,
-+			bitmap conflict_regs)
- {
-   rtx_insn *a_insn;
-   bitmap bba_sets = BITMAP_ALLOC (&reg_obstack);
--
-+  bitmap intersections = BITMAP_ALLOC (&reg_obstack);
-   df_ref def;
-   df_ref use;
-+  rtx_insn *last_a = last_active_insn (bb_a, FALSE);
- 
-   FOR_BB_INSNS (bb_a, a_insn)
-     {
-@@ -1995,18 +2002,15 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
-       rtx sset_a = single_set (a_insn);
- 
-       if (!sset_a)
--	{
--	  BITMAP_FREE (bba_sets);
--	  return false;
--	}
-+	goto end_cmove_arith_check_and_fail;
-       /* Record all registers that BB_A sets.  */
-       FOR_EACH_INSN_DEF (def, a_insn)
--	if (!(to_rename && DF_REF_REG (def) == to_rename))
-+	if (!(to_rename && DF_REF_REG (def) == to_rename && a_insn == last_a))
- 	  bitmap_set_bit (bba_sets, DF_REF_REGNO (def));
-     }
- 
-+  bitmap_and (intersections, df_get_live_in (bb_b), bba_sets);
-   rtx_insn *b_insn;
--
-   FOR_BB_INSNS (bb_b, b_insn)
-     {
-       if (!active_insn_p (b_insn))
-@@ -2015,10 +2019,7 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
-       rtx sset_b = single_set (b_insn);
- 
-       if (!sset_b)
--	{
--	  BITMAP_FREE (bba_sets);
--	  return false;
--	}
-+	goto end_cmove_arith_check_and_fail;
- 
-       /* Make sure this is a REG and not some instance
- 	 of ZERO_EXTRACT or SUBREG or other dangerous stuff.
-@@ -2030,25 +2031,34 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
-       if (MEM_P (SET_DEST (sset_b)))
- 	gcc_assert (rtx_equal_p (SET_DEST (sset_b), to_rename));
-       else if (!REG_P (SET_DEST (sset_b)))
--	{
--	  BITMAP_FREE (bba_sets);
--	  return false;
--	}
-+	goto end_cmove_arith_check_and_fail;
- 
--      /* If the insn uses a reg set in BB_A return false.  */
-+      /* If the insn uses a reg set in BB_A return false
-+	 or try to collect register list for renaming.  */
-       FOR_EACH_INSN_USE (use, b_insn)
- 	{
--	  if (bitmap_bit_p (bba_sets, DF_REF_REGNO (use)))
-+	  if (bitmap_bit_p (intersections, DF_REF_REGNO (use)))
- 	    {
--	      BITMAP_FREE (bba_sets);
--	      return false;
-+	      if (param_ifcvt_allow_register_renaming < 1)
-+		  goto end_cmove_arith_check_and_fail;
-+
-+	      /* Those regs should be renamed.  We can't rename CC reg, but
-+		 possibly we can provide combined comparison in the future.  */
-+	      if (GET_MODE_CLASS (GET_MODE (DF_REF_REG (use))) == MODE_CC)
-+		goto end_cmove_arith_check_and_fail;
-+	      bitmap_set_bit (conflict_regs, DF_REF_REGNO (use));
- 	    }
- 	}
--
-     }
- 
-   BITMAP_FREE (bba_sets);
-+  BITMAP_FREE (intersections);
-   return true;
-+
-+end_cmove_arith_check_and_fail:
-+  BITMAP_FREE (bba_sets);
-+  BITMAP_FREE (intersections);
-+  return false;
- }
- 
- /* Emit copies of all the active instructions in BB except the last.
-@@ -2103,6 +2113,142 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool simple)
-   return true;
- }
- 
-+/* This function tries to rename regs that intersect with considered bb
-+   inside condition expression.  Condition expression will be moved down
-+   if the optimization will be applied, so it is essential to be sure that
-+   all intersected registers will be renamed otherwise transformation
-+   can't be applied.  Function returns true if renaming was successful
-+   and optimization can proceed futher.  */
-+
-+static bool
-+noce_rename_regs_in_cond (struct noce_if_info *if_info, bitmap cond_rename_regs)
-+{
-+  bool success = true;
-+  if (bitmap_empty_p (cond_rename_regs))
-+    return true;
-+  if (param_ifcvt_allow_register_renaming < 2)
-+    return false;
-+  df_ref use;
-+  rtx_insn *cmp_insn = if_info->cond_earliest;
-+  /*  Jump instruction as a condion currently unsupported.  */
-+  if (JUMP_P (cmp_insn))
-+    return false;
-+  rtx_insn *before_cmp = PREV_INSN (cmp_insn);
-+  start_sequence ();
-+  rtx_insn *copy_of_cmp = as_a <rtx_insn *> (copy_rtx (cmp_insn));
-+  basic_block cmp_block = BLOCK_FOR_INSN (cmp_insn);
-+  FOR_EACH_INSN_USE (use, cmp_insn)
-+    {
-+      if (bitmap_bit_p (cond_rename_regs, DF_REF_REGNO (use)))
-+	{
-+	  rtx use_reg = DF_REF_REG (use);
-+	  rtx tmp = gen_reg_rtx (GET_MODE (use_reg));
-+	  if (!validate_replace_rtx (use_reg, tmp, copy_of_cmp))
-+	    {
-+	      end_sequence ();
-+	      return false;

_service:tar_scm:0037-Perform-early-if-conversion-of-simple-arithmetic.patch Deleted

@@ -1,109 +0,0 @@
-From 310eade1450995b55d9f8120561022fbf164b2ec Mon Sep 17 00:00:00 2001
-From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
-Date: Thu, 12 Jan 2023 14:52:49 +0300
-Subject: PATCH 03/18 Perform early if-conversion of simple arithmetic
-
----
- gcc/common.opt                      |  4 ++++
- gcc/match.pd                        | 25 +++++++++++++++++++
- gcc/testsuite/gcc.dg/ifcvt-gimple.c | 37 +++++++++++++++++++++++++++++
- 3 files changed, 66 insertions(+)
- create mode 100644 gcc/testsuite/gcc.dg/ifcvt-gimple.c
-
-diff --git a/gcc/common.opt b/gcc/common.opt
-index aa00fb7b0..dac477c04 100644
---- a/gcc/common.opt
-+++ b/gcc/common.opt
-@@ -1821,6 +1821,10 @@ fif-conversion2
- Common Var(flag_if_conversion2) Optimization
- Perform conversion of conditional jumps to conditional execution.
- 
-+fif-conversion-gimple
-+Common Var(flag_if_conversion_gimple) Optimization
-+Perform conversion of conditional jumps to branchless equivalents during gimple transformations.
-+
- fstack-reuse=
- Common Joined RejectNegative Enum(stack_reuse_level) Var(flag_stack_reuse) Init(SR_ALL) Optimization
- -fstack-reuse=all|named_vars|none	Set stack reuse level for local variables.
-diff --git a/gcc/match.pd b/gcc/match.pd
-index 6f24d5079..3cbaf2a5b 100644
---- a/gcc/match.pd
-+++ b/gcc/match.pd
-@@ -4278,6 +4278,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
-   )
-  )
- )
-+
-+(if (flag_if_conversion_gimple)
-+ (for simple_op (plus minus bit_and bit_ior bit_xor)
-+  (simplify
-+   (cond @0 (simple_op @1 INTEGER_CST@2) @1)
-+   (switch
-+    /* a = cond ? a + 1 : a -> a = a + ((int) cond) */
-+    (if (integer_onep (@2))
-+     (simple_op @1 (convert (convert:boolean_type_node @0))))
-+    /* a = cond ? a + powerof2cst : a ->
-+       a = a + ((int) cond) << log2 (powerof2cst) */
-+    (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@2))
-+     (with
-+      {
-+	tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
-+      }
-+      (simple_op @1 (lshift (convert (convert:boolean_type_node @0))
-+			    { shift; })
-+      )
-+     )
-+    )
-+   )
-+  )
-+ )
-+)
- #endif
- 
- #if GIMPLE
-diff --git a/gcc/testsuite/gcc.dg/ifcvt-gimple.c b/gcc/testsuite/gcc.dg/ifcvt-gimple.c
-new file mode 100644
-index 000000000..0f7c87e5c
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/ifcvt-gimple.c
-@@ -0,0 +1,37 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2 -fif-conversion-gimple -fdump-tree-optimized" } */
-+
-+int test_int (int optimizable_int) {
-+    if (optimizable_int > 5)
-+	++optimizable_int;
-+    return optimizable_int;
-+}
-+
-+int test_int_pow2 (int optimizable_int_pow2) {
-+    if (optimizable_int_pow2 <= 4)
-+	optimizable_int_pow2 += 1024;
-+    return optimizable_int_pow2;
-+}
-+
-+int test_int_non_pow2 (int not_optimizable_int_non_pow2) {
-+    if (not_optimizable_int_non_pow2 == 1)
-+	not_optimizable_int_non_pow2 += 513;
-+    return not_optimizable_int_non_pow2;
-+}
-+
-+float test_float (float not_optimizable_float) {
-+    if (not_optimizable_float > 5)
-+	not_optimizable_float += 1;
-+    return not_optimizable_float;
-+}
-+
-+/* Expecting if-else block in test_float and test_int_non_pow2 only. */
-+/* { dg-final { scan-tree-dump-not "if \\(optimizable" "optimized" } } */
-+/* { dg-final { scan-tree-dump "if \\(not_optimizable_int_non_pow2" "optimized" } } */
-+/* { dg-final { scan-tree-dump "if \\(not_optimizable_float" "optimized" } } */
-+/* { dg-final { scan-tree-dump-times "if " 2 "optimized" } } */
-+/* { dg-final { scan-tree-dump-times "else" 2 "optimized" } } */
-+
-+/* Expecting shifted result only for optimizable_int_pow2. */
-+/* { dg-final { scan-tree-dump-times " << " 1 "optimized" } } */
-+/* { dg-final { scan-tree-dump " << 10;" "optimized" } } */
--- 
-2.33.0
-

_service:tar_scm:0038-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch Deleted

@@ -1,252 +0,0 @@
-From 6684509e81e4341675c73a7dc853180229a8abcb Mon Sep 17 00:00:00 2001
-From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
-Date: Tue, 24 Jan 2023 16:43:40 +0300
-Subject: PATCH 04/18 Add option to allow matching uaddsub overflow for widen
- ops too.
-
----
- gcc/common.opt                 |   5 ++
- gcc/testsuite/gcc.dg/uaddsub.c | 143 +++++++++++++++++++++++++++++++++
- gcc/tree-ssa-math-opts.cc      |  43 ++++++++--
- 3 files changed, 184 insertions(+), 7 deletions(-)
- create mode 100644 gcc/testsuite/gcc.dg/uaddsub.c
-
-diff --git a/gcc/common.opt b/gcc/common.opt
-index dac477c04..39c90604e 100644
---- a/gcc/common.opt
-+++ b/gcc/common.opt
-@@ -3106,6 +3106,11 @@ freciprocal-math
- Common Var(flag_reciprocal_math) SetByCombined Optimization
- Same as -fassociative-math for expressions which include division.
- 
-+fuaddsub-overflow-match-all
-+Common Var(flag_uaddsub_overflow_match_all)
-+Match unsigned add/sub overflow even if the target does not support
-+the corresponding instruction.
-+
- ; Nonzero means that unsafe floating-point math optimizations are allowed
- ; for the sake of speed.  IEEE compliance is not guaranteed, and operations
- ; are allowed to assume that their arguments and results are "normal"
-diff --git a/gcc/testsuite/gcc.dg/uaddsub.c b/gcc/testsuite/gcc.dg/uaddsub.c
-new file mode 100644
-index 000000000..96c26d308
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/uaddsub.c
-@@ -0,0 +1,143 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2 -fuaddsub-overflow-match-all -fdump-tree-optimized" } */
-+#include <stdint.h>
-+
-+typedef unsigned __int128 uint128_t;
-+typedef struct uint256_t
-+{
-+  uint128_t lo;
-+  uint128_t hi;
-+} uint256_t;
-+
-+uint16_t add16 (uint8_t a, uint8_t b)
-+{
-+  uint8_t tmp = a + b;
-+  uint8_t overflow = 0;
-+  if (tmp < a)
-+    overflow = 1;
-+
-+  uint16_t res = overflow;
-+  res <<= 8;
-+  res += tmp;
-+  return res;
-+}
-+
-+uint32_t add32 (uint16_t a, uint16_t b)
-+{
-+  uint16_t tmp = a + b;
-+  uint16_t overflow = 0;
-+  if (tmp < a)
-+    overflow = 1;
-+
-+  uint32_t res = overflow;
-+  res <<= 16;
-+  res += tmp;
-+  return res;
-+}
-+
-+uint64_t add64 (uint32_t a, uint32_t b)
-+{
-+  uint32_t tmp = a + b;
-+  uint32_t overflow = 0;
-+  if (tmp < a)
-+    overflow = 1;
-+
-+  uint64_t res = overflow;
-+  res <<= 32;
-+  res += tmp;
-+  return res;
-+}
-+
-+uint128_t add128 (uint64_t a, uint64_t b)
-+{
-+  uint64_t tmp = a + b;
-+  uint64_t overflow = 0;
-+  if (tmp < a)
-+    overflow = 1;
-+
-+  uint128_t res = overflow;
-+  res <<= 64;
-+  res += tmp;
-+  return res;
-+}
-+
-+uint256_t add256 (uint128_t a, uint128_t b)
-+{
-+  uint128_t tmp = a + b;
-+  uint128_t overflow = 0;
-+  if (tmp < a)
-+    overflow = 1;
-+
-+  uint256_t res;
-+  res.hi = overflow;
-+  res.lo = tmp;
-+  return res;
-+}
-+
-+uint16_t sub16 (uint8_t a, uint8_t b)
-+{
-+  uint8_t tmp = a - b;
-+  uint8_t overflow = 0;
-+  if (tmp > a)
-+    overflow = -1;
-+
-+  uint16_t res = overflow;
-+  res <<= 8;
-+  res += tmp;
-+  return res;
-+}
-+
-+uint32_t sub32 (uint16_t a, uint16_t b)
-+{
-+  uint16_t tmp = a - b;
-+  uint16_t overflow = 0;
-+  if (tmp > a)
-+    overflow = -1;
-+
-+  uint32_t res = overflow;
-+  res <<= 16;
-+  res += tmp;
-+  return res;
-+}
-+
-+uint64_t sub64 (uint32_t a, uint32_t b)
-+{
-+  uint32_t tmp = a - b;
-+  uint32_t overflow = 0;
-+  if (tmp > a)
-+    overflow = -1;
-+
-+  uint64_t res = overflow;
-+  res <<= 32;
-+  res += tmp;
-+  return res;
-+}
-+
-+uint128_t sub128 (uint64_t a, uint64_t b)
-+{
-+  uint64_t tmp = a - b;
-+  uint64_t overflow = 0;
-+  if (tmp > a)
-+    overflow = -1;
-+
-+  uint128_t res = overflow;
-+  res <<= 64;
-+  res += tmp;
-+  return res;
-+}
-+
-+uint256_t sub256 (uint128_t a, uint128_t b)
-+{
-+  uint128_t tmp = a - b;
-+  uint128_t overflow = 0;
-+  if (tmp > a)
-+    overflow = -1;
-+
-+  uint256_t res;
-+  res.hi = overflow;
-+  res.lo = tmp;
-+  return res;
-+}
-+
-+/* { dg-final { scan-tree-dump-times "= .ADD_OVERFLOW \$a_\0-9\+\\(D\$, b_\0-9\+\$D\$\\)" 5 "optimized" } } */
-+/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \$a_\0-9\+\\(D\$, b_\0-9\+\$D\$\\)" 5 "optimized" } } */
-diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
-index 232e903b0..55d6ee8ae 100644
---- a/gcc/tree-ssa-math-opts.cc
-+++ b/gcc/tree-ssa-math-opts.cc
-@@ -3468,6 +3468,27 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
-     }
- }
- 
-+/* Check if the corresponding operation has wider equivalent on the target.  */
-+
-+static bool
-+wider_optab_check_p (optab op, machine_mode mode, int unsignedp)
-+{
-+  machine_mode wider_mode;
-+  FOR_EACH_WIDER_MODE (wider_mode, mode)
-+    {
-+      machine_mode next_mode;
-+      if (optab_handler (op, wider_mode) != CODE_FOR_nothing
-+	  || (op == smul_optab
-+	      && GET_MODE_WIDER_MODE (wider_mode).exists (&next_mode)
-+	      && (find_widening_optab_handler ((unsignedp

_service:tar_scm:0039-Match-double-sized-mul-pattern.patch Deleted

@@ -1,488 +0,0 @@
-From e7b22f97f960b62e555dfd6f2e3ae43973fcbb3e Mon Sep 17 00:00:00 2001
-From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
-Date: Wed, 25 Jan 2023 15:04:07 +0300
-Subject: PATCH 05/18 Match double sized mul pattern
-
----
- gcc/match.pd                              | 136 +++++++++++++++++++++
- gcc/testsuite/gcc.dg/double_sized_mul-1.c | 141 ++++++++++++++++++++++
- gcc/testsuite/gcc.dg/double_sized_mul-2.c |  62 ++++++++++
- gcc/tree-ssa-math-opts.cc                 |  80 ++++++++++++
- 4 files changed, 419 insertions(+)
- create mode 100644 gcc/testsuite/gcc.dg/double_sized_mul-1.c
- create mode 100644 gcc/testsuite/gcc.dg/double_sized_mul-2.c
-
-diff --git a/gcc/match.pd b/gcc/match.pd
-index 3cbaf2a5b..61866cb90 100644
---- a/gcc/match.pd
-+++ b/gcc/match.pd
-@@ -7895,3 +7895,139 @@ and,
- 	       == TYPE_UNSIGNED (TREE_TYPE (@3))))
-        && single_use (@4)
-        && single_use (@5))))
-+
-+/* Match multiplication with double sized result.
-+
-+   Consider the following calculations:
-+   arg0 * arg1 = (2^(bit_size/2) * arg0_hi + arg0_lo)
-+	       * (2^(bit_size/2) * arg1_hi + arg1_lo)
-+   arg0 * arg1 = 2^bit_size * arg0_hi * arg1_hi
-+	       + 2^(bit_size/2) * (arg0_hi * arg1_lo + arg0_lo * arg1_hi)
-+	       + arg0_lo * arg1_lo
-+
-+   The products of high and low parts fits in bit_size values, thus they are
-+   placed in high and low parts of result respectively.
-+
-+   The sum of the mixed products may overflow, so we need a detection for that.
-+   Also it has a bit_size/2 offset, thus it intersects with both high and low
-+   parts of result.  Overflow detection constant is bit_size/2 due to this.
-+
-+   With this info:
-+   arg0 * arg1 = 2^bit_size * arg0_hi * arg1_hi
-+	       + 2^(bit_size/2) * middle
-+	       + 2^bit_size * possible_middle_overflow
-+	       + arg0_lo * arg1_lo
-+   arg0 * arg1 = 2^bit_size * (arg0_hi * arg1_hi + possible_middle_overflow)
-+	       + 2^(bit_size/2) * (2^(bit_size/2) * middle_hi + middle_lo)
-+	       + arg0_lo * arg1_lo
-+   arg0 * arg1 = 2^bit_size * (arg0_hi * arg1_hi + middle_hi
-+	       +	       possible_middle_overflow)
-+	       + 2^(bit_size/2) * middle_lo
-+	       + arg0_lo * arg1_lo
-+
-+   The last sum can produce overflow for the high result part.  With this:
-+   arg0 * arg1 = 2^bit_size * (arg0_hi * arg1_hi + possible_middle_overflow
-+	       +	       possible_res_lo_overflow + middle_hi)
-+	       + res_lo
-+	       = res_hi + res_lo
-+
-+   This formula is quite big to fit into one match pattern with all of the
-+   combinations of terms inside it.  There are many helpers for better code
-+   readability.
-+
-+   The simplification basis is res_hi: assuming that res_lo only is not
-+   real practical case for such calculations.
-+
-+   Overflow handling is done via matching complex calculations:
-+   the realpart and imagpart are quite handy here.  */
-+/* Match low and high parts of the argument.  */
-+(match (double_size_mul_arg_lo @0 @1)
-+ (bit_and @0 INTEGER_CST@1)
-+  (if (wi::to_wide (@1)
-+       == wi::mask (TYPE_PRECISION (type) / 2, false, TYPE_PRECISION (type)))))
-+(match (double_size_mul_arg_hi @0 @1)
-+ (rshift @0 INTEGER_CST@1)
-+  (if (wi::to_wide (@1) == TYPE_PRECISION (type) / 2)))
-+
-+/* Match various argument parts products.  */
-+(match (double_size_mul_lolo @0 @1)
-+ (mult@4 (double_size_mul_arg_lo @0 @2) (double_size_mul_arg_lo @1 @3))
-+  (if (single_use (@4))))
-+(match (double_size_mul_hihi @0 @1)
-+ (mult@4 (double_size_mul_arg_hi @0 @2) (double_size_mul_arg_hi @1 @3))
-+  (if (single_use (@4))))
-+(match (double_size_mul_lohi @0 @1)
-+ (mult:c@4 (double_size_mul_arg_lo @0 @2) (double_size_mul_arg_hi @1 @3))
-+  (if (single_use (@4))))
-+
-+/* Match complex middle sum.  */
-+(match (double_size_mul_middle_complex @0 @1)
-+ (IFN_ADD_OVERFLOW@2 (double_size_mul_lohi @0 @1) (double_size_mul_lohi @1 @0))
-+  (if (num_imm_uses (@2) == 2)))
-+
-+/* Match real middle results.  */
-+(match (double_size_mul_middle @0 @1)
-+ (realpart@2 (double_size_mul_middle_complex @0 @1))
-+  (if (num_imm_uses (@2) == 2)))
-+(match (double_size_mul_middleres_lo @0 @1)
-+ (lshift@3 (double_size_mul_middle @0 @1) INTEGER_CST@2)
-+  (if (wi::to_wide (@2) == TYPE_PRECISION (type) / 2
-+       && single_use (@3))))
-+(match (double_size_mul_middleres_hi @0 @1)
-+ (rshift@3 (double_size_mul_middle @0 @1) INTEGER_CST@2)
-+  (if (wi::to_wide (@2) == TYPE_PRECISION (type) / 2
-+       && single_use (@3))))
-+
-+/* Match low result part.  */
-+/* Number of uses may be < 2 in case when we are interested in
-+   high part only.  */
-+(match (double_size_mul_res_lo_complex @0 @1)
-+ (IFN_ADD_OVERFLOW:c@2
-+  (double_size_mul_lolo:c @0 @1) (double_size_mul_middleres_lo @0 @1))
-+  (if (num_imm_uses (@2) <= 2)))
-+(match (double_size_mul_res_lo @0 @1)
-+ (realpart (double_size_mul_res_lo_complex @0 @1)))
-+
-+/* Match overflow terms.  */
-+(match (double_size_mul_overflow_check_lo @0 @1 @5)
-+ (convert@4 (ne@3
-+  (imagpart@2 (double_size_mul_res_lo_complex@5 @0 @1)) integer_zerop))
-+  (if (single_use (@2) && single_use (@3) && single_use (@4))))
-+(match (double_size_mul_overflow_check_hi @0 @1)
-+ (lshift@6 (convert@5 (ne@4
-+  (imagpart@3 (double_size_mul_middle_complex @0 @1)) integer_zerop))
-+	   INTEGER_CST@2)
-+  (if (wi::to_wide (@2) == TYPE_PRECISION (type) / 2
-+       && single_use (@3) && single_use (@4) && single_use (@5)
-+       && single_use (@6))))
-+
-+/* Match all possible permutations for high result part calculations.  */
-+(for op1 (double_size_mul_hihi
-+	  double_size_mul_overflow_check_hi
-+	  double_size_mul_middleres_hi)
-+     op2 (double_size_mul_overflow_check_hi
-+	  double_size_mul_middleres_hi
-+	  double_size_mul_hihi)
-+     op3 (double_size_mul_middleres_hi
-+	  double_size_mul_hihi
-+	  double_size_mul_overflow_check_hi)
-+ (match (double_size_mul_candidate @0 @1 @2 @3)
-+  (plus:c@2
-+   (plus:c@4 (double_size_mul_overflow_check_lo @0 @1 @3) (op1:c @0 @1))
-+   (plus:c@5 (op2:c @0 @1) (op3:c @0 @1)))
-+    (if (single_use (@4) && single_use (@5))))
-+ (match (double_size_mul_candidate @0 @1 @2 @3)
-+  (plus:c@2 (double_size_mul_overflow_check_lo @0 @1 @3)
-+   (plus:c@4 (op1:c @0 @1)
-+    (plus:c@5 (op2:c @0 @1) (op3:c @0 @1))))
-+     (if (single_use (@4) && single_use (@5))))
-+ (match (double_size_mul_candidate @0 @1 @2 @3)
-+  (plus:c@2 (op1:c @0 @1)
-+   (plus:c@4 (double_size_mul_overflow_check_lo @0 @1 @3)
-+    (plus:c@5 (op2:c @0 @1) (op3:c @0 @1))))
-+     (if (single_use (@4) && single_use (@5))))
-+ (match (double_size_mul_candidate @0 @1 @2 @3)
-+  (plus:c@2 (op1:c @0 @1)
-+   (plus:c@4 (op2:c @0 @1)
-+    (plus:c@5 (double_size_mul_overflow_check_lo @0 @1 @3) (op3:c @0 @1))))
-+     (if (single_use (@4) && single_use (@5)))))
-diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-1.c b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
-new file mode 100644
-index 000000000..4d475cc8a
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
-@@ -0,0 +1,141 @@
-+/* { dg-do compile } */
-+/* fif-conversion-gimple and fuaddsub-overflow-match-all are required for
-+   proper overflow detection in some cases.  */
-+/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
-+#include <stdint.h>
-+
-+typedef unsigned __int128 uint128_t;
-+
-+uint16_t mul16 (uint8_t a, uint8_t b)
-+{
-+  uint8_t a_lo = a & 0xF;
-+  uint8_t b_lo = b & 0xF;
-+  uint8_t a_hi = a >> 4;
-+  uint8_t b_hi = b >> 4;
-+  uint8_t lolo = a_lo * b_lo;
-+  uint8_t lohi = a_lo * b_hi;
-+  uint8_t hilo = a_hi * b_lo;
-+  uint8_t hihi = a_hi * b_hi;
-+  uint8_t middle = hilo + lohi;
-+  uint8_t middle_hi = middle >> 4;
-+  uint8_t middle_lo = middle << 4;
-+  uint8_t res_lo = lolo + middle_lo;
-+  uint8_t res_hi = hihi + middle_hi;
-+  res_hi += (res_lo < middle_lo ? 1 : 0);
-+  res_hi += (middle < hilo ? 0x10 : 0);
-+  uint16_t res = ((uint16_t) res_hi) << 8;
-+  res += res_lo;
-+  return res;
-+}
-+
-+uint32_t mul32 (uint16_t a, uint16_t b)
-+{
-+  uint16_t a_lo = a & 0xFF;
-+  uint16_t b_lo = b & 0xFF;
-+  uint16_t a_hi = a >> 8;

_service:tar_scm:0040-Port-icp-patch-to-GCC-12.patch Deleted

@@ -1,2387 +0,0 @@
-From b73462757734c62f64e7a4379340679ec6f19669 Mon Sep 17 00:00:00 2001
-From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
-Date: Tue, 27 Feb 2024 07:28:12 +0800
-Subject: PATCH 06/18 Port icp patch to GCC 12
-
----
- gcc/common.opt              |    8 +
- gcc/dbgcnt.def              |    1 +
- gcc/ipa-devirt.cc           | 1855 +++++++++++++++++++++++++++++++++++
- gcc/passes.def              |    1 +
- gcc/testsuite/gcc.dg/icp1.c |   40 +
- gcc/testsuite/gcc.dg/icp2.c |   38 +
- gcc/testsuite/gcc.dg/icp3.c |   52 +
- gcc/testsuite/gcc.dg/icp4.c |   55 ++
- gcc/testsuite/gcc.dg/icp5.c |   66 ++
- gcc/testsuite/gcc.dg/icp6.c |   66 ++
- gcc/testsuite/gcc.dg/icp7.c |   48 +
- gcc/timevar.def             |    1 +
- gcc/tree-pass.h             |    1 +
- 13 files changed, 2232 insertions(+)
- create mode 100644 gcc/testsuite/gcc.dg/icp1.c
- create mode 100644 gcc/testsuite/gcc.dg/icp2.c
- create mode 100644 gcc/testsuite/gcc.dg/icp3.c
- create mode 100644 gcc/testsuite/gcc.dg/icp4.c
- create mode 100644 gcc/testsuite/gcc.dg/icp5.c
- create mode 100644 gcc/testsuite/gcc.dg/icp6.c
- create mode 100644 gcc/testsuite/gcc.dg/icp7.c
-
-diff --git a/gcc/common.opt b/gcc/common.opt
-index 39c90604e..16aadccf6 100644
---- a/gcc/common.opt
-+++ b/gcc/common.opt
-@@ -1316,6 +1316,14 @@ fdevirtualize
- Common Var(flag_devirtualize) Optimization
- Try to convert virtual calls to direct ones.
- 
-+ficp
-+Common Var(flag_icp) Optimization Init(0)
-+Try to promote indirect calls to direct ones.
-+
-+ficp-speculatively
-+Common Var(flag_icp_speculatively) Optimization
-+Promote indirect calls speculatively.
-+
- fdiagnostics-show-location=
- Common Joined RejectNegative Enum(diagnostic_prefixing_rule)
- -fdiagnostics-show-location=once|every-line	How often to emit source location at the beginning of line-wrapped diagnostics.
-diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def
-index 3aa18cd0c..a00bbc31b 100644
---- a/gcc/dbgcnt.def
-+++ b/gcc/dbgcnt.def
-@@ -170,6 +170,7 @@ DEBUG_COUNTER (graphite_scop)
- DEBUG_COUNTER (hoist)
- DEBUG_COUNTER (hoist_insn)
- DEBUG_COUNTER (ia64_sched2)
-+DEBUG_COUNTER (icp)
- DEBUG_COUNTER (if_after_combine)
- DEBUG_COUNTER (if_after_reload)
- DEBUG_COUNTER (if_conversion)
-diff --git a/gcc/ipa-devirt.cc b/gcc/ipa-devirt.cc
-index 74fe65608..383839189 100644
---- a/gcc/ipa-devirt.cc
-+++ b/gcc/ipa-devirt.cc
-@@ -103,9 +103,14 @@ along with GCC; see the file COPYING3.  If not see
-   indirect polymorphic edge all possible polymorphic call targets of the call.
- 
-   pass_ipa_devirt performs simple speculative devirtualization.
-+  pass_ipa_icp performs simple indirect call promotion.
- */
- 
- #include "config.h"
-+#define INCLUDE_ALGORITHM
-+#define INCLUDE_SET
-+#define INCLUDE_MAP
-+#define INCLUDE_LIST
- #include "system.h"
- #include "coretypes.h"
- #include "backend.h"
-@@ -127,6 +132,7 @@ along with GCC; see the file COPYING3.  If not see
- #include "ipa-fnsummary.h"
- #include "demangle.h"
- #include "dbgcnt.h"
-+#include "gimple-iterator.h"
- #include "gimple-pretty-print.h"
- #include "intl.h"
- #include "stringpool.h"
-@@ -4401,5 +4407,1854 @@ make_pass_ipa_odr (gcc::context *ctxt)
-   return new pass_ipa_odr (ctxt);
- }
- 
-+/* Function signature map used to look up function decl which corresponds to
-+   the given function type.  */
-+typedef std::set<unsigned> type_set;
-+typedef std::set<tree> decl_set;
-+typedef std::map<unsigned, type_set*> type_alias_map;
-+typedef std::map<unsigned, decl_set*> type_decl_map;
-+typedef std::map<unsigned, tree> uid_to_type_map;
-+typedef std::map<tree, tree> type_map;
-+
-+static bool has_address_taken_functions_with_varargs = false;
-+static type_set *unsafe_types = NULL;
-+static type_alias_map *fta_map = NULL;
-+static type_alias_map *ta_map = NULL;
-+static type_map *ctype_map = NULL;
-+static type_alias_map *cbase_to_ptype = NULL;
-+static type_decl_map *fs_map = NULL;
-+static uid_to_type_map *type_uid_map = NULL;
-+
-+static void
-+print_type_set(unsigned ftype_uid, type_alias_map *map)
-+{
-+  if (!map->count (ftype_uid))
-+    return;
-+  type_set* s = (*map)ftype_uid;
-+  for (type_set::const_iterator it = s->begin (); it != s->end (); it++)
-+    fprintf (dump_file, it == s->begin () ? "%d" : ", %d", *it);
-+}
-+
-+static void
-+dump_type_with_uid (const char *msg, tree type, dump_flags_t flags = TDF_NONE)
-+{
-+  fprintf (dump_file, msg);
-+  print_generic_expr (dump_file, type, flags);
-+  fprintf (dump_file, " (%d)\n", TYPE_UID (type));
-+}
-+
-+/* Walk aggregate type and collect types of scalar elements.  */
-+
-+static void
-+collect_scalar_types (tree tp, std::list<tree> &types)
-+{
-+  /* TODO: take into account different field offsets.
-+     Also support array casts.  */
-+  if (tp && dump_file && (dump_flags & TDF_DETAILS))
-+    dump_type_with_uid ("Walk var's type: ", tp, TDF_UID);
-+  if (RECORD_OR_UNION_TYPE_P (tp))
-+    {
-+      if (dump_file && (dump_flags & TDF_DETAILS))
-+	fprintf (dump_file, "Record's fields {\n");
-+      for (tree field = TYPE_FIELDS (tp); field;
-+	   field = DECL_CHAIN (field))
-+	{
-+	  if (TREE_CODE (field) != FIELD_DECL)
-+	    continue;
-+	  collect_scalar_types (TREE_TYPE (field), types);
-+	}
-+      if (dump_file && (dump_flags & TDF_DETAILS))
-+	fprintf (dump_file, "}\n");
-+      return;
-+    }
-+  if (TREE_CODE (tp) == ARRAY_TYPE)
-+    {
-+      if (dump_file && (dump_flags & TDF_DETAILS))
-+	fprintf (dump_file, "Array's innermost type:\n");
-+      /* Take the innermost component type.  */
-+      tree elt;
-+      for (elt = TREE_TYPE (tp); TREE_CODE (elt) == ARRAY_TYPE;
-+	   elt = TREE_TYPE (elt))
-+	if (dump_file && (dump_flags & TDF_DETAILS))
-+	  print_generic_expr (dump_file, elt);
-+      collect_scalar_types (elt, types);
-+      return;
-+    }
-+  types.push_back (tp);
-+}
-+
-+static void maybe_register_aliases (tree type1, tree type2);
-+
-+/* Walk type lists and maybe register type aliases.  */
-+
-+static void
-+compare_type_lists (std::list<tree> tlist1, std::list<tree> tlist2)
-+{
-+  for (std::list<tree>::iterator ti1 = tlist1.begin (), ti2 = tlist2.begin ();
-+       ti1 != tlist1.end (); ++ti1, ++ti2)
-+    {
-+      /* TODO: correct the analysis results if lists have different length.  */
-+      if (ti2 == tlist2.end ())
-+	{
-+	  if (dump_file && (dump_flags & TDF_DETAILS))
-+	    fprintf (dump_file, "Type lists with different length!\n");
-+	  break;
-+	}
-+      maybe_register_aliases (*ti1, *ti2);
-+    }
-+}
-+
-+/* For two given types collect scalar element types and
-+   compare the result lists to find type aliases.  */
-+
-+static void
-+collect_scalar_types_and_find_aliases (tree t1, tree t2)
-+{
-+  std::list<tree> tlist1;
-+  std::list<tree> tlist2;
-+  if (dump_file && (dump_flags & TDF_DETAILS))
-+    fprintf (dump_file, "First type list: ");
-+  collect_scalar_types (t1, tlist1);
-+  if (dump_file && (dump_flags & TDF_DETAILS))

_service:tar_scm:0041-Port-fixes-in-icp-to-GCC-12.patch Deleted

@@ -1,100 +0,0 @@
-From aaa117a9ff58fb208e8c8859e075ca425f995f63 Mon Sep 17 00:00:00 2001
-From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
-Date: Tue, 27 Feb 2024 07:43:57 +0800
-Subject: PATCH 07/18 Port fixes in icp to GCC 12
-
----
- gcc/ipa-devirt.cc | 37 ++++++++++++++++++++++++++++++-------
- 1 file changed, 30 insertions(+), 7 deletions(-)
-
-diff --git a/gcc/ipa-devirt.cc b/gcc/ipa-devirt.cc
-index 383839189..318535d06 100644
---- a/gcc/ipa-devirt.cc
-+++ b/gcc/ipa-devirt.cc
-@@ -4431,6 +4431,11 @@ print_type_set(unsigned ftype_uid, type_alias_map *map)
-   if (!map->count (ftype_uid))
-     return;
-   type_set* s = (*map)ftype_uid;
-+  if (!s)
-+    {
-+      fprintf (dump_file, "%d (no set)", ftype_uid);
-+      return;
-+    }
-   for (type_set::const_iterator it = s->begin (); it != s->end (); it++)
-     fprintf (dump_file, it == s->begin () ? "%d" : ", %d", *it);
- }
-@@ -4696,12 +4701,19 @@ maybe_register_aliases (tree type1, tree type2)
-       if (register_ailas_type (type1, type2, ta_map))
- 	analyze_pointees (type1, type2);
-     }
-+  unsigned type1_uid = TYPE_UID (type1);
-+  unsigned type2_uid = TYPE_UID (type2);
-+  if (type_uid_map->count (type1_uid) == 0)
-+    (*type_uid_map)type1_uid = type1;
-+  if (type_uid_map->count (type2_uid) == 0)
-+    (*type_uid_map)type2_uid = type2;
-+
-   /* If function and non-function type pointers alias,
-      the function type is unsafe.  */
-   if (FUNCTION_POINTER_TYPE_P (type1) && !FUNCTION_POINTER_TYPE_P (type2))
--    unsafe_types->insert (TYPE_UID (type1));
-+    unsafe_types->insert (type1_uid);
-   if (FUNCTION_POINTER_TYPE_P (type2) && !FUNCTION_POINTER_TYPE_P (type1))
--    unsafe_types->insert (TYPE_UID (type2));
-+    unsafe_types->insert (type2_uid);
- 
-   /* Try to figure out with pointers to incomplete types.  */
-   if (POINTER_TYPE_P (type1) && POINTER_TYPE_P (type2))
-@@ -4825,10 +4837,12 @@ compare_block_and_init_type (tree block, tree t1)
- static void
- analyze_global_var (varpool_node *var)
- {
--  var->get_constructor();
-   tree decl = var->decl;
--  if (TREE_CODE (decl) == SSA_NAME || !DECL_INITIAL (decl)
--      || integer_zerop (DECL_INITIAL (decl)))
-+  if (decl || !DECL_INITIAL (decl))
-+    return;
-+  var->get_constructor ();
-+  if (TREE_CODE (decl) == SSA_NAME || integer_zerop (DECL_INITIAL (decl))
-+      || TREE_CODE (DECL_INITIAL (decl)) == ERROR_MARK)
-     return;
- 
-   if (dump_file && (dump_flags & TDF_DETAILS))
-@@ -4998,7 +5012,9 @@ analyze_assign_stmt (gimple *stmt)
-     {
-       rhs = TREE_OPERAND (rhs, 0);
-       if (VAR_OR_FUNCTION_DECL_P (rhs) || TREE_CODE (rhs) == STRING_CST
--	  || TREE_CODE (rhs) == ARRAY_REF || TREE_CODE (rhs) == PARM_DECL)
-+	  || TREE_CODE (rhs) == ARRAY_REF || TREE_CODE (rhs) == PARM_DECL
-+	  || TREE_CODE (rhs) == LABEL_DECL || TREE_CODE (rhs) == CONST_DECL
-+	  || TREE_CODE (rhs) == RESULT_DECL)
- 	rhs_type = build_pointer_type (TREE_TYPE (rhs));
-       else if (TREE_CODE (rhs) == COMPONENT_REF)
- 	{
-@@ -5012,7 +5028,12 @@ analyze_assign_stmt (gimple *stmt)
- 	  gcc_assert (POINTER_TYPE_P (rhs_type));
- 	}
-       else
--	gcc_unreachable();
-+	{
-+	  fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ",
-+		   get_tree_code_name (TREE_CODE (rhs)));
-+	  print_gimple_stmt (dump_file, stmt, 0);
-+	  gcc_unreachable ();
-+	}
-     }
-   else
-     rhs_type = TREE_TYPE (rhs);
-@@ -5710,6 +5731,8 @@ merge_fs_map_for_ftype_aliases ()
-       decl_set *d_set = it1->second;
-       tree type = (*type_uid_map)it1->first;
-       type_set *set = (*fta_map)it1->first;
-+      if (!set)
-+	continue;
-       for (type_set::const_iterator it2 = set->begin ();
- 	   it2 != set->end (); it2++)
- 	{
--- 
-2.33.0
-

_service:tar_scm:0042-Add-split-complex-instructions-pass.patch Deleted

@@ -1,1245 +0,0 @@
-From 9a8e5716543972dec36bae1f9d380d27bfbcdae1 Mon Sep 17 00:00:00 2001
-From: Agrachev Andrey WX1228450 <agrachev.andrey@huawei-partners.com>
-Date: Mon, 21 Aug 2023 12:35:19 +0300
-Subject: PATCH 09/18 Add split-complex-instructions pass
-
- - Add option -fsplit-ldp-stp
- - Add functionality to detect and split depended from store LDP instructions.
- - Add -param=param-ldp-dependency-search-range= to configure ldp dependency search range
- - Add RTL tests
-
-Co-authored-by: Chernonog Vyacheslav 00812786 <chernonog.vyacheslav@huawei.com>
-Co-authored-by: Zinin Ivan WX1305386 <zinin.ivan@huawei-partners.com>
-Co-authored-by: Gadzhiev Emin WX1195297 <gadzhiev.emin@huawei-partners.com>
----
- gcc/common.opt                                |   5 +
- gcc/config/aarch64/aarch64.cc                 |  42 ++
- gcc/doc/tm.texi                               |   8 +
- gcc/doc/tm.texi.in                            |   4 +
- gcc/params.opt                                |   3 +
- gcc/passes.def                                |   1 +
- gcc/sched-rgn.cc                              | 704 +++++++++++++++++-
- gcc/target.def                                |  10 +
- .../gcc.dg/rtl/aarch64/test-ldp-dont-split.c  |  74 ++
- .../rtl/aarch64/test-ldp-split-rearrange.c    |  40 +
- .../gcc.dg/rtl/aarch64/test-ldp-split.c       | 174 +++++
- gcc/timevar.def                               |   1 +
- gcc/tree-pass.h                               |   1 +
- 13 files changed, 1066 insertions(+), 1 deletion(-)
- create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c
- create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c
- create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c
-
-diff --git a/gcc/common.opt b/gcc/common.opt
-index a42bee250..c0e3f5687 100644
---- a/gcc/common.opt
-+++ b/gcc/common.opt
-@@ -1797,6 +1797,11 @@ floop-nest-optimize
- Common Var(flag_loop_nest_optimize) Optimization
- Enable the loop nest optimizer.
- 
-+fsplit-ldp-stp
-+Common Var(flag_split_ldp_stp) Optimization
-+Split load/store pair instructions into separate load/store operations
-+for better performance.
-+
- fstrict-volatile-bitfields
- Common Var(flag_strict_volatile_bitfields) Init(-1) Optimization
- Force bitfield accesses to match their type width.
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 04072ca25..48e2eded0 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -27507,6 +27507,48 @@ aarch64_run_selftests (void)
- 
- #endif /* #if CHECKING_P */
- 
-+/* TODO: refuse to use ranges intead of full list of an instruction codes.  */
-+
-+bool
-+is_aarch64_ldp_insn (int icode)
-+{
-+  if ((icode >= CODE_FOR_load_pair_sw_sisi
-+	  && icode <= CODE_FOR_load_pair_dw_tftf)
-+      || (icode >= CODE_FOR_loadwb_pairsi_si
-+	     && icode <= CODE_FOR_loadwb_pairtf_di)
-+      || (icode >= CODE_FOR_load_pairv8qiv8qi
-+	     && icode <= CODE_FOR_load_pairdfdf)
-+      || (icode >= CODE_FOR_load_pairv16qiv16qi
-+	     && icode <= CODE_FOR_load_pairv8bfv2df)
-+      || (icode >= CODE_FOR_load_pair_lanesv8qi
-+	     && icode <= CODE_FOR_load_pair_lanesdf))
-+    return true;
-+  return false;
-+}
-+
-+bool
-+is_aarch64_stp_insn (int icode)
-+{
-+  if ((icode >= CODE_FOR_store_pair_sw_sisi
-+	  && icode <= CODE_FOR_store_pair_dw_tftf)
-+      || (icode >= CODE_FOR_storewb_pairsi_si
-+	     && icode <= CODE_FOR_storewb_pairtf_di)
-+      || (icode >= CODE_FOR_vec_store_pairv8qiv8qi
-+	     && icode <= CODE_FOR_vec_store_pairdfdf)
-+      || (icode >= CODE_FOR_vec_store_pairv16qiv16qi
-+	     && icode <= CODE_FOR_vec_store_pairv8bfv2df)
-+      || (icode >= CODE_FOR_store_pair_lanesv8qi
-+	     && icode <= CODE_FOR_store_pair_lanesdf))
-+    return true;
-+  return false;
-+}
-+
-+#undef TARGET_IS_LDP_INSN
-+#define TARGET_IS_LDP_INSN is_aarch64_ldp_insn
-+
-+#undef TARGET_IS_STP_INSN
-+#define TARGET_IS_STP_INSN is_aarch64_stp_insn
-+
- #undef TARGET_STACK_PROTECT_GUARD
- #define TARGET_STACK_PROTECT_GUARD aarch64_stack_protect_guard
- 
-diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
-index c5006afc0..0c6415a9c 100644
---- a/gcc/doc/tm.texi
-+++ b/gcc/doc/tm.texi
-@@ -12113,6 +12113,14 @@ object files that are not referenced from @code{main} and uses export
- lists.
- @end defmac
- 
-+@deftypefn {Target Hook} bool TARGET_IS_LDP_INSN (int @var{icode})
-+Return true if icode is corresponding to any of the LDP instruction types.
-+@end deftypefn
-+
-+@deftypefn {Target Hook} bool TARGET_IS_STP_INSN (int @var{icode})
-+Return true if icode is corresponding to any of the STP instruction types.
-+@end deftypefn
-+
- @deftypefn {Target Hook} bool TARGET_CANNOT_MODIFY_JUMPS_P (void)
- This target hook returns @code{true} past the point in which new jump
- instructions could be created.  On machines that require a register for
-diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
-index f869ddd5e..6ff60e562 100644
---- a/gcc/doc/tm.texi.in
-+++ b/gcc/doc/tm.texi.in
-@@ -7977,6 +7977,10 @@ object files that are not referenced from @code{main} and uses export
- lists.
- @end defmac
- 
-+@hook TARGET_IS_LDP_INSN
-+
-+@hook TARGET_IS_STP_INSN
-+
- @hook TARGET_CANNOT_MODIFY_JUMPS_P
- 
- @hook TARGET_HAVE_CONDITIONAL_EXECUTION
-diff --git a/gcc/params.opt b/gcc/params.opt
-index 7fcc2398d..6176d4790 100644
---- a/gcc/params.opt
-+++ b/gcc/params.opt
-@@ -1217,4 +1217,7 @@ Enum(vrp_mode) String(ranger) Value(VRP_MODE_RANGER)
- Common Joined UInteger Var(param_pointer_compression_size) Init(32) IntegerRange(8, 32) Param Optimization
- Target size of compressed pointer, which should be 8, 16 or 32.
- 
-+-param=param-ldp-dependency-search-range=
-+Common Joined UInteger Var(param_ldp_dependency_search_range) Init(16) IntegerRange(1, 32) Param Optimization
-+Range for depended ldp search in split-ldp-stp path.
- ; This comment is to ensure we retain the blank line above.
-diff --git a/gcc/passes.def b/gcc/passes.def
-index 941bbadf0..a30e05688 100644
---- a/gcc/passes.def
-+++ b/gcc/passes.def
-@@ -514,6 +514,7 @@ along with GCC; see the file COPYING3.  If not see
- 	  NEXT_PASS (pass_reorder_blocks);
- 	  NEXT_PASS (pass_leaf_regs);
- 	  NEXT_PASS (pass_split_before_sched2);
-+	  NEXT_PASS (pass_split_complex_instructions);
- 	  NEXT_PASS (pass_sched2);
- 	  NEXT_PASS (pass_stack_regs);
- 	  PUSH_INSERT_PASSES_WITHIN (pass_stack_regs)
-diff --git a/gcc/sched-rgn.cc b/gcc/sched-rgn.cc
-index a0dfdb788..b4df8bdc5 100644
---- a/gcc/sched-rgn.cc
-+++ b/gcc/sched-rgn.cc
-@@ -44,6 +44,8 @@ along with GCC; see the file COPYING3.  If not see
-    are actually scheduled.  */
- &#xc;
- #include "config.h"
-+#define INCLUDE_SET
-+#define INCLUDE_VECTOR
- #include "system.h"
- #include "coretypes.h"
- #include "backend.h"
-@@ -65,6 +67,7 @@ along with GCC; see the file COPYING3.  If not see
- #include "dbgcnt.h"
- #include "pretty-print.h"
- #include "print-rtl.h"
-+#include "cfgrtl.h"
- 
- /* Disable warnings about quoting issues in the pp_xxx calls below
-    that (intentionally) don't follow GCC diagnostic conventions.  */
-@@ -3951,6 +3954,705 @@ make_pass_sched_fusion (gcc::context *ctxt)
-   return new pass_sched_fusion (ctxt);
- }
- 
-+namespace {
-+
-+/* Def-use analisys special functions implementation.  */
-+
-+static struct df_link *
-+get_defs (rtx_insn *insn, rtx reg)
-+{
-+  df_ref use;
-+  struct df_link *ref_chain, *ref_link;
-+
-+  FOR_EACH_INSN_USE (use, insn)
-+    {
-+      if (GET_CODE (DF_REF_REG (use)) == SUBREG)
-+	return NULL;
-+      if (REGNO (DF_REF_REG (use)) == REGNO (reg))

_service:tar_scm:0043-Extending-and-refactoring-of-pass_split_complex_inst.patch Deleted

@@ -1,1426 +0,0 @@
-From a49db831320ac70ca8f46b94ee60d7c6951f65c3 Mon Sep 17 00:00:00 2001
-From: Gadzhiev Emin WX1195297 <gadzhiev.emin@huawei-partners.com>
-Date: Wed, 20 Dec 2023 21:36:07 +0300
-Subject: PATCH 10/18 Extending and refactoring of
- pass_split_complex_instructions
-
-- Add flag parameter in is_ldp_insn and is_stp_insn to know
-  if instruction has writeback operation
-- Add support of PRE_*, POST_* operands as a memory address
-  expression
-- Split only LDPs that intersect with a dependent store
-  instruction
-- Make the selection of dependent store instructions stricter
-  so it will be enough to check by BFS that dependent store
-  instruction appears in search range.
-- Add helper methods to retrieve fields of rtx
-- Remove redundant iterations in find_dependent_stores_candidates
-- Refactor generation of instructions
-- Add more test cases
----
- gcc/config/aarch64/aarch64.cc                 |  62 +-
- gcc/doc/tm.texi                               |  12 +-
- gcc/sched-rgn.cc                              | 771 +++++++++---------
- gcc/target.def                                |  14 +-
- .../gcc.dg/rtl/aarch64/test-ldp-dont-split.c  |  35 +-
- .../rtl/aarch64/test-ldp-split-rearrange.c    |   2 +-
- .../gcc.dg/rtl/aarch64/test-ldp-split.c       | 181 +++-
- 7 files changed, 603 insertions(+), 474 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 48e2eded0..fa566dd80 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -27507,39 +27507,59 @@ aarch64_run_selftests (void)
- 
- #endif /* #if CHECKING_P */
- 
--/* TODO: refuse to use ranges intead of full list of an instruction codes.  */
-+/* TODO: refuse to use ranges instead of full list of an instruction codes.  */
- 
- bool
--is_aarch64_ldp_insn (int icode)
-+is_aarch64_ldp_insn (int icode, bool *has_wb)
- {
-   if ((icode >= CODE_FOR_load_pair_sw_sisi
--	  && icode <= CODE_FOR_load_pair_dw_tftf)
-+	  && icode <= CODE_FOR_load_pair_sw_sfsf)
-+      || (icode >= CODE_FOR_load_pair_dw_didi
-+	  && icode <= CODE_FOR_load_pair_dw_dfdf)
-+      || (icode == CODE_FOR_load_pair_dw_tftf)
-       || (icode >= CODE_FOR_loadwb_pairsi_si
--	     && icode <= CODE_FOR_loadwb_pairtf_di)
--      || (icode >= CODE_FOR_load_pairv8qiv8qi
--	     && icode <= CODE_FOR_load_pairdfdf)
--      || (icode >= CODE_FOR_load_pairv16qiv16qi
--	     && icode <= CODE_FOR_load_pairv8bfv2df)
--      || (icode >= CODE_FOR_load_pair_lanesv8qi
--	     && icode <= CODE_FOR_load_pair_lanesdf))
--    return true;
-+	  && icode <= CODE_FOR_loadwb_pairdi_di)
-+      || (icode >= CODE_FOR_loadwb_pairsf_si
-+	  && icode <= CODE_FOR_loadwb_pairdf_di)
-+      || (icode >= CODE_FOR_loadwb_pairti_si
-+	  && icode <= CODE_FOR_loadwb_pairtf_di))
-+    {
-+      if (has_wb)
-+	*has_wb = ((icode >= CODE_FOR_loadwb_pairsi_si
-+		     && icode <= CODE_FOR_loadwb_pairdi_di)
-+		   || (icode >= CODE_FOR_loadwb_pairsf_si
-+		     && icode <= CODE_FOR_loadwb_pairdf_di)
-+		   || (icode >= CODE_FOR_loadwb_pairti_si
-+		      && icode <= CODE_FOR_loadwb_pairtf_di));
-+      return true;
-+    }
-   return false;
- }
- 
- bool
--is_aarch64_stp_insn (int icode)
-+is_aarch64_stp_insn (int icode, bool *has_wb)
- {
-   if ((icode >= CODE_FOR_store_pair_sw_sisi
--	  && icode <= CODE_FOR_store_pair_dw_tftf)
-+	  && icode <= CODE_FOR_store_pair_sw_sfsf)
-+      || (icode >= CODE_FOR_store_pair_dw_didi
-+	  && icode <= CODE_FOR_store_pair_dw_dfdf)
-+      || (icode == CODE_FOR_store_pair_dw_tftf)
-       || (icode >= CODE_FOR_storewb_pairsi_si
--	     && icode <= CODE_FOR_storewb_pairtf_di)
--      || (icode >= CODE_FOR_vec_store_pairv8qiv8qi
--	     && icode <= CODE_FOR_vec_store_pairdfdf)
--      || (icode >= CODE_FOR_vec_store_pairv16qiv16qi
--	     && icode <= CODE_FOR_vec_store_pairv8bfv2df)
--      || (icode >= CODE_FOR_store_pair_lanesv8qi
--	     && icode <= CODE_FOR_store_pair_lanesdf))
--    return true;
-+	  && icode <= CODE_FOR_storewb_pairdi_di)
-+      || (icode >= CODE_FOR_storewb_pairsf_si
-+	  && icode <= CODE_FOR_storewb_pairdf_di)
-+      || (icode >= CODE_FOR_storewb_pairti_si
-+	  && icode <= CODE_FOR_storewb_pairtf_di))
-+    {
-+      if (has_wb)
-+	*has_wb = ((icode >= CODE_FOR_storewb_pairsi_si
-+		     && icode <= CODE_FOR_storewb_pairdi_di)
-+		   || (icode >= CODE_FOR_storewb_pairsf_si
-+		     && icode <= CODE_FOR_storewb_pairdf_di)
-+		   || (icode >= CODE_FOR_storewb_pairti_si
-+		     && icode <= CODE_FOR_storewb_pairtf_di));
-+      return true;
-+    }
-   return false;
- }
- 
-diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
-index 0c6415a9c..3b6e90bf2 100644
---- a/gcc/doc/tm.texi
-+++ b/gcc/doc/tm.texi
-@@ -12113,12 +12113,16 @@ object files that are not referenced from @code{main} and uses export
- lists.
- @end defmac
- 
--@deftypefn {Target Hook} bool TARGET_IS_LDP_INSN (int @var{icode})
--Return true if icode is corresponding to any of the LDP instruction types.
-+@deftypefn {Target Hook} bool TARGET_IS_LDP_INSN (int @var{icode}, bool *@var{has_wb})
-+Return true if @var{icode} is corresponding to any of the LDP instruction
-+types.  If @var{has_wb} is not NULL then its value is set to true if LDP
-+contains post-index or pre-index operation.
- @end deftypefn
- 
--@deftypefn {Target Hook} bool TARGET_IS_STP_INSN (int @var{icode})
--Return true if icode is corresponding to any of the STP instruction types.
-+@deftypefn {Target Hook} bool TARGET_IS_STP_INSN (int @var{icode}, bool *@var{has_wb})
-+Return true if @var{icode} is corresponding to any of the STP instruction
-+types.  If @var{has_wb} is not NULL then its value is set to true if STP
-+contains post-index or pre-index operation.
- @end deftypefn
- 
- @deftypefn {Target Hook} bool TARGET_CANNOT_MODIFY_JUMPS_P (void)
-diff --git a/gcc/sched-rgn.cc b/gcc/sched-rgn.cc
-index b4df8bdc5..5f61de1c8 100644
---- a/gcc/sched-rgn.cc
-+++ b/gcc/sched-rgn.cc
-@@ -3956,7 +3956,7 @@ make_pass_sched_fusion (gcc::context *ctxt)
- 
- namespace {
- 
--/* Def-use analisys special functions implementation.  */
-+/* Def-use analysis special functions implementation.  */
- 
- static struct df_link *
- get_defs (rtx_insn *insn, rtx reg)
-@@ -4032,42 +4032,66 @@ const pass_data pass_data_split_complex_instructions = {
-   (TODO_df_verify | TODO_df_finish), /* Todo_flags_finish.  */
- };
- 
-+/* Pass split_complex_instructions finds LOAD PAIR instructions (LDP) that can
-+   be split into two LDR instructions.  It splits only those LDP for which one
-+   half of the requested memory is contained in the preceding STORE (STR/STP)
-+   instruction whose base register has the same definition.  This allows
-+   to use hardware store-to-load forwarding mechanism and to get one half of
-+   requested memory from the store queue of CPU.
-+
-+   TODO: Add split of STP.
-+   TODO: Add split of vector STP and LDP.  */
- class pass_split_complex_instructions : public rtl_opt_pass
- {
- private:
--  enum complex_instructions_t
-+  enum mem_access_insn_t
-   {
-     UNDEFINED,
-     LDP,
-+    /* LDP with post-index (see loadwb_pair in config/aarch64.md).  */
-+    LDP_WB,
-+    /* LDP that contains one destination register in RTL IR
-+       (see movti_aarch64 in config/aarch64.md).  */
-     LDP_TI,
-     STP,
-+    /* STP with pre-index (see storewb_pair in config/aarch64.md).  */
-+    STP_WB,
-+    /* STP that contains one source register in RTL IR
-+       (see movti_aarch64 in config/aarch64.md).  */
-+    STP_TI,
-     STR
-   };
- 
--  void split_complex_insn (rtx_insn *insn);
--  void split_ldp_ti (rtx_insn *insn);
--  void split_ldp_with_offset (rtx_insn *ldp_insn);
--  void split_simple_ldp (rtx_insn *ldp_insn);
--  void split_ldp_stp (rtx_insn *insn);
--  complex_instructions_t get_insn_type (rtx_insn *insn);
--
--  basic_block bb;
--  rtx_insn *insn;
-   std::set<rtx_insn *> dependent_stores_candidates;
-   std::set<rtx_insn *> ldp_to_split_list;
-

_service:tar_scm:0044-Port-maxmin-patch-to-GCC-12.patch Deleted

@@ -1,378 +0,0 @@
-From a3013c074cd2ab5f71eb98a587a627f38c68656c Mon Sep 17 00:00:00 2001
-From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
-Date: Thu, 22 Feb 2024 17:07:24 +0800
-Subject: PATCH 12/18 Port maxmin patch to GCC 12
-
----
- gcc/config/aarch64/aarch64-simd.md    | 256 ++++++++++++++++++++++++++
- gcc/config/aarch64/predicates.md      |  19 ++
- gcc/testsuite/gcc.dg/combine-maxmin.c |  46 +++++
- 3 files changed, 321 insertions(+)
- create mode 100755 gcc/testsuite/gcc.dg/combine-maxmin.c
-
-diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
-index 82f73805f..de92802f5 100644
---- a/gcc/config/aarch64/aarch64-simd.md
-+++ b/gcc/config/aarch64/aarch64-simd.md
-@@ -1138,6 +1138,82 @@
-   (set_attr "type" "neon_compare<q>,neon_shift_imm<q>")
- )
- 
-+;; Simplify the extension with following truncation for shift+neg operation.
-+
-+(define_insn_and_split "*aarch64_sshr_neg_v8hi"
-+  (set (match_operand:V8HI 0 "register_operand" "=w")
-+	(vec_concat:V8HI
-+	  (truncate:V4HI
-+	    (ashiftrt:V4SI
-+	      (neg:V4SI
-+		(sign_extend:V4SI
-+		  (vec_select:V4HI
-+		    (match_operand:V8HI 1 "register_operand")
-+		    (match_operand:V8HI 3 "vect_par_cnst_lo_half"))))
-+	      (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
-+	  (truncate:V4HI
-+	    (ashiftrt:V4SI
-+	      (neg:V4SI
-+		(sign_extend:V4SI
-+		  (vec_select:V4HI
-+		    (match_dup 1)
-+		    (match_operand:V8HI 4 "vect_par_cnst_hi_half"))))
-+	      (match_dup 2)))))
-+  "TARGET_SIMD"
-+  "#"
-+  "&& true"
-+  (set (match_operand:V8HI 0 "register_operand" "=w")
-+	(ashiftrt:V8HI
-+	  (neg:V8HI
-+	    (match_operand:V8HI 1 "register_operand" "w"))
-+	  (match_operand:V8HI 2 "aarch64_simd_imm_minus_one")))
-+  {
-+    /* Reduce the shift amount to smaller mode.  */
-+    int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands2, 0))
-+	      - (GET_MODE_UNIT_BITSIZE (GET_MODE (operands2)) / 2);
-+    operands2 = aarch64_simd_gen_const_vector_dup (V8HImode, val);
-+  }
-+  (set_attr "type" "multiple")
-+)
-+
-+;; The helper definition that allows combiner to use the previous pattern.
-+
-+(define_insn_and_split "*aarch64_sshr_neg_tmpv8hi"
-+  (set (match_operand:V8HI 0 "register_operand" "=w")
-+	(vec_concat:V8HI
-+	  (truncate:V4HI
-+	    (ashiftrt:V4SI
-+	      (neg:V4SI
-+		(match_operand:V4SI 1 "register_operand" "w"))
-+	      (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
-+	  (truncate:V4HI
-+	    (ashiftrt:V4SI
-+	      (neg:V4SI
-+		(match_operand:V4SI 3 "register_operand" "w"))
-+	      (match_dup 2)))))
-+  "TARGET_SIMD"
-+  "#"
-+  "&& true"
-+  (set (match_operand:V4SI 1 "register_operand" "=w")
-+	(ashiftrt:V4SI
-+	  (neg:V4SI
-+	    (match_dup 1))
-+	  (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
-+   (set (match_operand:V4SI 3 "register_operand" "=w")
-+	(ashiftrt:V4SI
-+	  (neg:V4SI
-+	    (match_dup 3))
-+	  (match_dup 2)))
-+   (set (match_operand:V8HI 0 "register_operand" "=w")
-+	(vec_concat:V8HI
-+	  (truncate:V4HI
-+	    (match_dup 1))
-+	  (truncate:V4HI
-+	    (match_dup 3))))
-+  ""
-+  (set_attr "type" "multiple")
-+)
-+
- (define_insn "*aarch64_simd_sra<mode>"
-  (set (match_operand:VDQ_I 0 "register_operand" "=w")
- 	(plus:VDQ_I
-@@ -1714,6 +1790,26 @@
-  }
- )
- 
-+(define_insn "vec_pack_trunc_shifted_<mode>"
-+ (set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
-+       (vec_concat:<VNARROWQ2>
-+	 (truncate:<VNARROWQ>
-+	   (ashiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
-+	      (match_operand:VQN 2 "half_size_operand" "w")))
-+	 (truncate:<VNARROWQ>
-+	   (ashiftrt:VQN (match_operand:VQN 3 "register_operand" "w")
-+	      (match_operand:VQN 4 "half_size_operand" "w")))))
-+ "TARGET_SIMD"
-+ {
-+   if (BYTES_BIG_ENDIAN)
-+     return "uzp2\\t%0.<V2ntype>, %3.<V2ntype>, %1.<V2ntype>";
-+   else
-+     return "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>";
-+ }
-+  (set_attr "type" "neon_permute<q>")
-+   (set_attr "length" "4")
-+)
-+
- (define_insn "aarch64_shrn<mode>_insn_le"
-   (set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
- 	(vec_concat:<VNARROWQ2>
-@@ -6652,6 +6748,166 @@
-   (set_attr "type" "neon_tst<q>")
- )
- 
-+;; Simplify the extension with following truncation for cmtst-like operation.
-+
-+(define_insn_and_split "*aarch64_cmtst_arith_v8hi"
-+  (set (match_operand:V8HI 0 "register_operand" "=w")
-+	(vec_concat:V8HI
-+	  (plus:V4HI
-+	    (truncate:V4HI
-+	      (eq:V4SI
-+		(sign_extend:V4SI
-+		  (vec_select:V4HI
-+		    (and:V8HI
-+		      (match_operand:V8HI 1 "register_operand")
-+		      (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
-+		    (match_operand:V8HI 3 "vect_par_cnst_lo_half")))
-+		(match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero")))
-+	    (match_operand:V4HI 5 "aarch64_simd_imm_minus_one"))
-+	  (plus:V4HI
-+	    (truncate:V4HI
-+	      (eq:V4SI
-+		(sign_extend:V4SI
-+		  (vec_select:V4HI
-+		    (and:V8HI
-+		      (match_dup 1)
-+		      (match_dup 2))
-+		    (match_operand:V8HI 6 "vect_par_cnst_hi_half")))
-+		(match_dup 4)))
-+	    (match_dup 5))))
-+  "TARGET_SIMD && !reload_completed"
-+  "#"
-+  "&& true"
-+  (set (match_operand:V8HI 6 "register_operand" "=w")
-+	(match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
-+   (set (match_operand:V8HI 0 "register_operand" "=w")
-+	(plus:V8HI
-+	  (eq:V8HI
-+	    (and:V8HI
-+	      (match_operand:V8HI 1 "register_operand" "w")
-+	      (match_dup 6))
-+	    (match_operand:V8HI 4 "aarch64_simd_imm_zero"))
-+	  (match_operand:V8HI 5 "aarch64_simd_imm_minus_one")))
-+  {
-+    if (can_create_pseudo_p ())
-+      {
-+	int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands4, 0));
-+	operands4 = aarch64_simd_gen_const_vector_dup (V8HImode, val);
-+	int val2 = INTVAL (CONST_VECTOR_ENCODED_ELT (operands5, 0));
-+	operands5 = aarch64_simd_gen_const_vector_dup (V8HImode, val2);
-+
-+	operands6 = gen_reg_rtx (V8HImode);
-+      }
-+    else
-+      FAIL;
-+  }
-+  (set_attr "type" "neon_tst_q")
-+)
-+
-+;; Three helper definitions that allow combiner to use the previous pattern.
-+
-+(define_insn_and_split "*aarch64_cmtst_arith_tmp_lo_v8hi"
-+  (set (match_operand:V4SI 0 "register_operand" "=w")
-+	(neg:V4SI
-+	  (eq:V4SI
-+	    (sign_extend:V4SI
-+	      (vec_select:V4HI
-+		(and:V8HI
-+		  (match_operand:V8HI 1 "register_operand")
-+		  (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
-+		(match_operand:V8HI 3 "vect_par_cnst_lo_half")))
-+	    (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))

_service:tar_scm:0045-Port-moving-minmask-pattern-to-gimple-to-GCC-12.patch Deleted

@@ -1,239 +0,0 @@
-From 11da40d18e35219961226d40f11b0702b8649044 Mon Sep 17 00:00:00 2001
-From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
-Date: Thu, 22 Feb 2024 17:13:27 +0800
-Subject: PATCH 13/18 Port moving minmask pattern to gimple to GCC 12
-
----
- gcc/common.opt                          |   4 +
- gcc/match.pd                            | 104 ++++++++++++++++++++++++
- gcc/testsuite/gcc.dg/combine-maxmin-1.c |  15 ++++
- gcc/testsuite/gcc.dg/combine-maxmin-2.c |  14 ++++
- gcc/testsuite/gcc.dg/combine-maxmin.c   |  19 +++--
- 5 files changed, 151 insertions(+), 5 deletions(-)
- create mode 100644 gcc/testsuite/gcc.dg/combine-maxmin-1.c
- create mode 100644 gcc/testsuite/gcc.dg/combine-maxmin-2.c
-
-diff --git a/gcc/common.opt b/gcc/common.opt
-index 6c6fabb31..3a5004271 100644
---- a/gcc/common.opt
-+++ b/gcc/common.opt
-@@ -1846,6 +1846,10 @@ fif-conversion-gimple
- Common Var(flag_if_conversion_gimple) Optimization
- Perform conversion of conditional jumps to branchless equivalents during gimple transformations.
- 
-+fconvert-minmax
-+Common Var(flag_convert_minmax) Optimization
-+Convert saturating clipping to min max.
-+
- fstack-reuse=
- Common Joined RejectNegative Enum(stack_reuse_level) Var(flag_stack_reuse) Init(SR_ALL) Optimization
- -fstack-reuse=all|named_vars|none	Set stack reuse level for local variables.
-diff --git a/gcc/match.pd b/gcc/match.pd
-index 61866cb90..3a19e93b3 100644
---- a/gcc/match.pd
-+++ b/gcc/match.pd
-@@ -8031,3 +8031,107 @@ and,
-    (plus:c@4 (op2:c @0 @1)
-     (plus:c@5 (double_size_mul_overflow_check_lo @0 @1 @3) (op3:c @0 @1))))
-      (if (single_use (@4) && single_use (@5)))))
-+
-+/* MinMax pattern matching helpers.  More info on the transformation below.  */
-+
-+/* Match (a & 0b11..100..0) pattern.  */
-+(match (minmax_cmp_arg @0 @1)
-+ (bit_and @0 INTEGER_CST@1)
-+ (if (wi::popcount (~wi::to_widest (@1) + 1) == 1)))
-+
-+/* Match (inversed_sign_bit >> sign_bit_pos) pattern.
-+   This statement is blocking for the transformation of unsigned integers.
-+   Do type check here to avoid unnecessary duplications.  */
-+(match (minmax_sat_arg @0)
-+ (rshift (negate @0) INTEGER_CST@1)
-+ (if (!TYPE_UNSIGNED (TREE_TYPE (@0))
-+      && wi::eq_p (wi::to_widest (@1), TYPE_PRECISION (TREE_TYPE (@0)) - 1))))
-+
-+/* Transform ((x & ~mask) ? (-x)>>31 & mask : x) to (min (max (x, 0), mask)).
-+   The matched pattern can be described as saturated clipping.
-+
-+   The pattern supports truncation via both casts and bit_and.
-+   Also there are patterns for possible inverted conditions.  */
-+(if (flag_convert_minmax)
-+/* Truncation via casts.  Unfortunately convert? cannot be applied here
-+   because convert and cond take different number of arguments.  */
-+ (simplify
-+  (convert
-+   (cond
-+    (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
-+    (convert? (minmax_sat_arg @0))
-+    (convert? @0)))
-+  (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
-+   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
-+    (convert (min (max @0 { integer_zero_node; })
-+		  { mask; })))))
-+ (simplify
-+  (cond
-+   (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
-+   (convert? (minmax_sat_arg @0))
-+   (convert? @0))
-+  (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
-+   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
-+    (convert (min (max @0 { integer_zero_node; })
-+		  { mask; })))))
-+
-+ (simplify
-+  (convert
-+   (cond
-+    (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
-+    (convert? @0)
-+    (convert? (minmax_sat_arg @0))))
-+  (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
-+   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
-+    (convert (min (max @0 { integer_zero_node; })
-+		  { mask; })))))
-+ (simplify
-+  (cond
-+   (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
-+   (convert? @0)
-+   (convert? (minmax_sat_arg @0)))
-+  (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
-+   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
-+    (convert (min (max @0 { integer_zero_node; })
-+		  { mask; })))))
-+
-+ /* Truncation via bit_and with mask.  Same concerns on convert? here.  */
-+ (simplify
-+  (convert
-+   (cond
-+    (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
-+    (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2))
-+    (convert? @0)))
-+  (if (wi::to_widest (@2) == ~wi::to_widest (@1))
-+   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
-+    (convert (min (max @0 { integer_zero_node; })
-+		  { mask; })))))
-+ (simplify
-+  (cond
-+   (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
-+   (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2))
-+   (convert? @0))
-+  (if (wi::to_widest (@2) == ~wi::to_widest (@1))
-+   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
-+    (convert (min (max @0 { integer_zero_node; })
-+		  { mask; })))))
-+
-+ (simplify
-+  (convert
-+   (cond
-+    (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
-+    (convert? @0)
-+    (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2))))
-+  (if (wi::to_widest (@2) == ~wi::to_widest (@1))
-+   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
-+    (convert (min (max @0 { integer_zero_node; })
-+		  { mask; })))))
-+ (simplify
-+  (cond
-+   (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
-+   (convert? @0)
-+   (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2)))
-+  (if (wi::to_widest (@2) == ~wi::to_widest (@1))
-+   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
-+    (convert (min (max @0 { integer_zero_node; })
-+		  { mask; }))))))
-diff --git a/gcc/testsuite/gcc.dg/combine-maxmin-1.c b/gcc/testsuite/gcc.dg/combine-maxmin-1.c
-new file mode 100644
-index 000000000..859ff7df8
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/combine-maxmin-1.c
-@@ -0,0 +1,15 @@
-+/* { dg-do compile { target aarch64-*-* } } */
-+/* { dg-options "-O3 -fconvert-minmax" } */
-+
-+#include <inttypes.h>
-+
-+__attribute__((noinline))
-+void test (int32_t *restrict a, int32_t *restrict x)
-+{
-+  for (int i = 0; i < 4; i++)
-+    ai = ((((-xi) >> 31) ^ xi)
-+            & (-((int32_t)((xi & (~((1 << 8)-1))) == 0)))) ^ ((-xi) >> 31);
-+}
-+
-+/* { dg-final { scan-assembler-not {smax\t} } }  */
-+/* { dg-final { scan-assembler-not {smin\t} } }  */
-diff --git a/gcc/testsuite/gcc.dg/combine-maxmin-2.c b/gcc/testsuite/gcc.dg/combine-maxmin-2.c
-new file mode 100644
-index 000000000..63d4d85b3
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/combine-maxmin-2.c
-@@ -0,0 +1,14 @@
-+/* { dg-do compile { target aarch64-*-* } } */
-+/* { dg-options "-O3 -fconvert-minmax" } */
-+
-+#include <inttypes.h>
-+
-+__attribute__((noinline))
-+void test (int8_t *restrict a, int32_t *restrict x)
-+{
-+  for (int i = 0; i < 8; i++)
-+    ai = ((xi & ~((1 << 9)-1)) ? (-xi)>>31 & ((1 << 9)-1) : xi);
-+}
-+
-+/* { dg-final { scan-assembler-times {smax\t} 4 } }  */
-+/* { dg-final { scan-assembler-times {smin\t} 4 } }  */
-diff --git a/gcc/testsuite/gcc.dg/combine-maxmin.c b/gcc/testsuite/gcc.dg/combine-maxmin.c
-index 06bce7029..a984fa560 100755
---- a/gcc/testsuite/gcc.dg/combine-maxmin.c
-+++ b/gcc/testsuite/gcc.dg/combine-maxmin.c
-@@ -1,5 +1,5 @@
- /* { dg-do compile { target aarch64-*-* } } */
--/* { dg-options "-O3 -fdump-rtl-combine-all" } */
-+/* { dg-options "-O3 -fconvert-minmax" } */
- 
- /* The test checks usage of smax/smin insns for clip evaluation and
-  * uzp1/uzp2 insns for vector element narrowing.  It's inspired by
-@@ -19,20 +19,26 @@ void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
- {
-     const int pad = (8 > 9) ? (-10 * ((1 << 8)-1)) : 0;
-     for( int y = 0; y < height; y++ ) {
-+        /* This loop is not being vectorized now.  */

_service:tar_scm:0046-Add-new-pattern-to-pass-the-maxmin-tests.patch Deleted

@@ -1,65 +0,0 @@
-From dbcb2630c426c8dd2117b5ce625da8422dd8cd65 Mon Sep 17 00:00:00 2001
-From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
-Date: Thu, 22 Feb 2024 17:20:17 +0800
-Subject: PATCH 14/18 Add new pattern to pass the maxmin tests
-
----
- gcc/match.pd                          | 24 ++++++++++++++++++++++++
- gcc/testsuite/gcc.dg/combine-maxmin.c |  2 +-
- 2 files changed, 25 insertions(+), 1 deletion(-)
-
-diff --git a/gcc/match.pd b/gcc/match.pd
-index 3a19e93b3..aee58e47b 100644
---- a/gcc/match.pd
-+++ b/gcc/match.pd
-@@ -8038,6 +8038,10 @@ and,
- (match (minmax_cmp_arg @0 @1)
-  (bit_and @0 INTEGER_CST@1)
-  (if (wi::popcount (~wi::to_widest (@1) + 1) == 1)))
-+/* Match ((unsigned) a > 0b0..01..1) pattern.  */
-+(match (minmax_cmp_arg1 @0 @1)
-+ (gt @0 INTEGER_CST@1)
-+ (if (wi::popcount (wi::to_widest (@1) + 1) == 1)))
- 
- /* Match (inversed_sign_bit >> sign_bit_pos) pattern.
-    This statement is blocking for the transformation of unsigned integers.
-@@ -8095,6 +8099,26 @@ and,
-     (convert (min (max @0 { integer_zero_node; })
- 		  { mask; })))))
- 
-+ (simplify
-+  (convert
-+   (cond
-+    (minmax_cmp_arg1 (convert? @0) INTEGER_CST@1)
-+    (convert? (minmax_sat_arg @0))
-+    (convert? @0)))
-+  (if (wi::geu_p (wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
-+   (with { tree mask = build_int_cst (integer_type_node, tree_to_shwi (@1)); }
-+    (convert (min (max (convert:integer_type_node @0) { integer_zero_node; })
-+		  { mask; })))))
-+ (simplify
-+  (cond
-+   (minmax_cmp_arg1 (convert? @0) INTEGER_CST@1)
-+   (convert? (minmax_sat_arg @0))
-+   (convert? @0))
-+  (if (wi::geu_p (wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
-+   (with { tree mask = build_int_cst (integer_type_node, tree_to_shwi (@1)); }
-+    (convert (min (max (convert:integer_type_node @0) { integer_zero_node; })
-+		  { mask; })))))
-+
-  /* Truncation via bit_and with mask.  Same concerns on convert? here.  */
-  (simplify
-   (convert
-diff --git a/gcc/testsuite/gcc.dg/combine-maxmin.c b/gcc/testsuite/gcc.dg/combine-maxmin.c
-index a984fa560..5c0c9cc49 100755
---- a/gcc/testsuite/gcc.dg/combine-maxmin.c
-+++ b/gcc/testsuite/gcc.dg/combine-maxmin.c
-@@ -52,4 +52,4 @@ void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
- /* { dg-final { scan-assembler-times {usmin\t} 6 } }  */
- /* All of the vectorized patterns are expected to be matched.  */
- /* { dg-final { scan-assembler-not {cmtst\t} } }  */
--/* { dg-final { scan-assembler-times {uzp1\t} 5 } }  */
-+/* { dg-final { scan-assembler-times {uzp1\t} 2 } }  */
--- 
-2.33.0
-

_service:tar_scm:0047-AES-Implement-AES-pattern-matching.patch Deleted

@@ -1,3968 +0,0 @@
-From 53d321d2fe08f69a29527be157d4bcaaefea04ab Mon Sep 17 00:00:00 2001
-From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
-Date: Wed, 6 Dec 2023 10:46:28 +0300
-Subject: PATCH 15/18 AES Implement AES pattern matching
-
----
- gcc/Makefile.in                               |    1 +
- gcc/common.opt                                |    4 +
- gcc/config/aarch64/aarch64.cc                 |   24 +
- gcc/crypto-accel.cc                           | 2415 +++++++++++++++++
- gcc/doc/tm.texi                               |   29 +
- gcc/doc/tm.texi.in                            |   12 +
- gcc/passes.def                                |    1 +
- gcc/rtl-matcher.h                             |  367 +++
- gcc/target.def                                |   41 +
- .../gcc.target/aarch64/aes-decrypt.c          |  478 ++++
- .../gcc.target/aarch64/aes-encrypt.c          |  443 +++
- gcc/timevar.def                               |    1 +
- gcc/tree-pass.h                               |    1 +
- 13 files changed, 3817 insertions(+)
- create mode 100644 gcc/crypto-accel.cc
- create mode 100644 gcc/rtl-matcher.h
- create mode 100644 gcc/testsuite/gcc.target/aarch64/aes-decrypt.c
- create mode 100644 gcc/testsuite/gcc.target/aarch64/aes-encrypt.c
-
-diff --git a/gcc/Makefile.in b/gcc/Makefile.in
-index 45705c1f3..876000bda 100644
---- a/gcc/Makefile.in
-+++ b/gcc/Makefile.in
-@@ -1332,6 +1332,7 @@ OBJS = \
- 	cgraphunit.o \
- 	cgraphclones.o \
- 	combine.o \
-+	crypto-accel.o \
- 	combine-stack-adj.o \
- 	compare-elim.o \
- 	context.o \
-diff --git a/gcc/common.opt b/gcc/common.opt
-index 3a5004271..1eb62ada5 100644
---- a/gcc/common.opt
-+++ b/gcc/common.opt
-@@ -1129,6 +1129,10 @@ Common Var(flag_array_widen_compare) Optimization
- Extends types for pointers to arrays to improve array comparsion performance.
- In some extreme situations this may result in unsafe behavior.
- 
-+fcrypto-accel-aes
-+Common Var(flag_crypto_accel_aes) Init(0) Optimization
-+Perform crypto acceleration AES pattern matching.
-+
- fauto-inc-dec
- Common Var(flag_auto_inc_dec) Init(1) Optimization
- Generate auto-inc/dec instructions.
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index fa566dd80..9171d9d56 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -27569,6 +27569,30 @@ is_aarch64_stp_insn (int icode, bool *has_wb)
- #undef TARGET_IS_STP_INSN
- #define TARGET_IS_STP_INSN is_aarch64_stp_insn
- 
-+machine_mode
-+aarch64_get_v16qi_mode ()
-+{
-+  return V16QImode;
-+}
-+
-+#undef TARGET_GET_V16QI_MODE
-+#define TARGET_GET_V16QI_MODE aarch64_get_v16qi_mode
-+
-+#undef TARGET_GEN_REV32V16QI
-+#define TARGET_GEN_REV32V16QI gen_aarch64_rev32v16qi
-+
-+#undef TARGET_GEN_AESEV16QI
-+#define TARGET_GEN_AESEV16QI gen_aarch64_crypto_aesev16qi
-+
-+#undef TARGET_GEN_AESDV16QI
-+#define TARGET_GEN_AESDV16QI gen_aarch64_crypto_aesdv16qi
-+
-+#undef TARGET_GEN_AESMCV16QI
-+#define TARGET_GEN_AESMCV16QI gen_aarch64_crypto_aesmcv16qi
-+
-+#undef TARGET_GEN_AESIMCV16QI
-+#define TARGET_GEN_AESIMCV16QI gen_aarch64_crypto_aesimcv16qi
-+
- #undef TARGET_STACK_PROTECT_GUARD
- #define TARGET_STACK_PROTECT_GUARD aarch64_stack_protect_guard
- 
-diff --git a/gcc/crypto-accel.cc b/gcc/crypto-accel.cc
-new file mode 100644
-index 000000000..f4e810a6b
---- /dev/null
-+++ b/gcc/crypto-accel.cc
-@@ -0,0 +1,2415 @@
-+/* Crypto-pattern optimizer.
-+   Copyright (C) 2003-2023 Free Software Foundation, Inc.
-+
-+This file is part of GCC.
-+
-+GCC is free software; you can redistribute it and/or modify it under
-+the terms of the GNU General Public License as published by the Free
-+Software Foundation; either version 3, or (at your option) any later
-+version.
-+
-+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-+WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+for more details.
-+
-+You should have received a copy of the GNU General Public License
-+along with GCC; see the file COPYING3.  If not see
-+<http://www.gnu.org/licenses/>.  */
-+
-+#include "config.h"
-+#define INCLUDE_VECTOR
-+#define INCLUDE_MAP
-+#define INCLUDE_SET
-+#define INCLUDE_ALGORITHM
-+#include "system.h"
-+#include "coretypes.h"
-+#include "backend.h"
-+#include "target.h"
-+#include "rtl.h"
-+#include "tree.h"
-+#include "df.h"
-+#include "memmodel.h"
-+#include "optabs.h"
-+#include "regs.h"
-+#include "emit-rtl.h"
-+#include "recog.h"
-+#include "cfgrtl.h"
-+#include "cfgcleanup.h"
-+#include "expr.h"
-+#include "tree-pass.h"
-+#include "rtl-matcher.h"
-+
-+/* Basic AES table descryption.  */
-+struct aes_table
-+{
-+  /* Number of elements per table.  */
-+  static const unsigned int table_nelts = 256;
-+  /* Number of tables.  */
-+  static const unsigned int basic_tables_num = 4;
-+  /* Number of rounds.  */
-+  static const unsigned int rounds_num = 4;
-+  /* Common ID for wrong table.  */
-+  static const unsigned int BAD_TABLE = -1;
-+
-+  typedef const unsigned int table_typetable_nelts;
-+  typedef table_type *table_mapbasic_tables_num;
-+
-+  template<typename T>
-+  static bool is_basic_table (tree ctor, const T ethalontable_nelts)
-+    {
-+      if (TREE_CODE (ctor) != CONSTRUCTOR
-+	  ||CONSTRUCTOR_NELTS (ctor) != table_nelts)
-+	return false;
-+
-+      unsigned ix;
-+      tree val;
-+      FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (ctor), ix, val)
-+	if (TREE_INT_CST_LOW (val) != ethalonix)
-+	  return false;
-+      return true;
-+    }
-+
-+  static unsigned check_table (tree ctor,
-+			       table_map tables)
-+    {
-+      for (unsigned i = 0; i < 4; ++i)
-+	if (is_basic_table (ctor, *tablesi))
-+	  return i;
-+      return BAD_TABLE;
-+    }
-+};
-+
-+/* AES encryption info.  */
-+struct aes_encrypt_table : aes_table
-+{
-+  typedef enum
-+  {
-+    TE0,
-+    TE1,
-+    TE2,
-+    TE3,
-+    BAD_TABLE = aes_table::BAD_TABLE
-+  } table_entry;
-+
-+  static table_type Te0;
-+  static table_type Te1;
-+  static table_type Te2;
-+  static table_type Te3;
-+
-+  static table_map tables;
-+  static table_entry roundsrounds_num;
-+  static table_entry final_roundsrounds_num;
-+
-+  static table_entry get_table_id (tree ctor)
-+    {
-+      return static_cast<table_entry> (check_table (ctor, tables));

_service:tar_scm:0048-crypto-accel-add-optimization-level-requirement-to-t.patch Deleted

_service:tar_scm:0049-Add-more-flexible-check-for-pointer-aliasing-during-.patch Deleted

@@ -1,239 +0,0 @@
-From b5865aef36ebaac87ae30d51f08bfe081795ed67 Mon Sep 17 00:00:00 2001
-From: Chernonog Viacheslav <chernonog.vyacheslav@huawei.com>
-Date: Tue, 12 Mar 2024 23:30:56 +0800
-Subject: PATCH 17/18 Add more flexible check for pointer aliasing during
- vectorization It takes minimum between number of iteration and segment length
- it helps to speed up loops with small number of iterations when only tail can
- be vectorized
-
----
- gcc/params.opt                                |  5 ++
- .../sve/var_stride_flexible_segment_len_1.c   | 23 +++++++
- gcc/tree-data-ref.cc                          | 67 +++++++++++++------
- gcc/tree-data-ref.h                           | 11 ++-
- gcc/tree-vect-data-refs.cc                    | 14 +++-
- 5 files changed, 95 insertions(+), 25 deletions(-)
- create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c
-
-diff --git a/gcc/params.opt b/gcc/params.opt
-index 6176d4790..7e5c119cf 100644
---- a/gcc/params.opt
-+++ b/gcc/params.opt
-@@ -1180,6 +1180,11 @@ Maximum number of loop peels to enhance alignment of data references in a loop.
- Common Joined UInteger Var(param_vect_max_version_for_alias_checks) Init(10) Param Optimization
- Bound on number of runtime checks inserted by the vectorizer's loop versioning for alias check.
- 
-+-param=vect-alias-flexible-segment-len=
-+Common Joined UInteger Var(param_flexible_seg_len) Init(0) IntegerRange(0, 1) Param Optimization
-+Use a minimum length of different segments.  Currenlty the minimum between
-+iteration number and vectorization length is chosen by this param.
-+
- -param=vect-max-version-for-alignment-checks=
- Common Joined UInteger Var(param_vect_max_version_for_alignment_checks) Init(6) Param Optimization
- Bound on number of runtime checks inserted by the vectorizer's loop versioning for alignment check.
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c
-new file mode 100644
-index 000000000..894f075f3
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c
-@@ -0,0 +1,23 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2 -ftree-vectorize --param=vect-alias-flexible-segment-len=1" } */
-+
-+#define TYPE int
-+#define SIZE 257
-+
-+void __attribute__ ((weak))
-+f (TYPE *x, TYPE *y, unsigned short n, long m __attribute__((unused)))
-+{
-+  for (int i = 0; i < SIZE; ++i)
-+    xi * n += yi * n;
-+}
-+
-+/* { dg-final { scan-assembler {\tld1w\tz0-9+} } } */
-+/* { dg-final { scan-assembler {\tst1w\tz0-9+} } } */
-+/* { dg-final { scan-assembler {\tldr\tw0-9+} } } */
-+/* { dg-final { scan-assembler {\tstr\tw0-9+} } } */
-+/* Should use a WAR check that multiplies by (VF-2)*4 rather than
-+   an overlap check that multiplies by (257-1)*4.  */
-+/* { dg-final { scan-assembler {\tcntb\t(x0-9+)\n.*\tsub\tx0-9+, \1, #8\n.*\tmul\tx0-9+,^\n*\1} } } */
-+/* One range check and a check for n being zero.  */
-+/* { dg-final { scan-assembler-times {\t(?:cmp|tst)\t} 2 } } */
-+/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */
-diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc
-index 397792c35..e6ae9e847 100644
---- a/gcc/tree-data-ref.cc
-+++ b/gcc/tree-data-ref.cc
-@@ -2329,31 +2329,15 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
-    same arguments.  Try to optimize cases in which the second access
-    is a write and in which some overlap is valid.  */
- 
--static bool
--create_waw_or_war_checks (tree *cond_expr,
-+static void
-+create_waw_or_war_checks2 (tree *cond_expr, tree seg_len_a,
- 			  const dr_with_seg_len_pair_t &alias_pair)
- {
-   const dr_with_seg_len& dr_a = alias_pair.first;
-   const dr_with_seg_len& dr_b = alias_pair.second;
- 
--  /* Check for cases in which:
--
--     (a) DR_B is always a write;
--     (b) the accesses are well-ordered in both the original and new code
--	 (see the comment above the DR_ALIAS_* flags for details); and
--     (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
--  if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
--    return false;
--
--  /* Check for equal (but possibly variable) steps.  */
-   tree step = DR_STEP (dr_a.dr);
--  if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
--    return false;
--
--  /* Make sure that we can operate on sizetype without loss of precision.  */
-   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
--  if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
--    return false;
- 
-   /* All addresses involved are known to have a common alignment ALIGN.
-      We can therefore subtract ALIGN from an exclusive endpoint to get
-@@ -2370,9 +2354,6 @@ create_waw_or_war_checks (tree *cond_expr,
- 			       fold_convert (ssizetype, indicator),
- 			       ssize_int (0));
- 
--  /* Get lengths in sizetype.  */
--  tree seg_len_a
--    = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
-   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
- 
-   /* Each access has the following pattern:
-@@ -2479,6 +2460,50 @@ create_waw_or_war_checks (tree *cond_expr,
-   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
-   if (dump_enabled_p ())
-     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
-+}
-+
-+/* This is a wrapper function for create_waw_or_war_checks2.  */
-+static bool
-+create_waw_or_war_checks (tree *cond_expr,
-+			  const dr_with_seg_len_pair_t &alias_pair)
-+{
-+  const dr_with_seg_len& dr_a = alias_pair.first;
-+  const dr_with_seg_len& dr_b = alias_pair.second;
-+
-+  /* Check for cases in which:
-+
-+     (a) DR_B is always a write;
-+     (b) the accesses are well-ordered in both the original and new code
-+     (see the comment above the DR_ALIAS_* flags for details); and
-+     (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
-+  if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
-+    return false;
-+
-+  /* Check for equal (but possibly variable) steps.  */
-+  tree step = DR_STEP (dr_a.dr);
-+  if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
-+    return false;
-+
-+  /* Make sure that we can operate on sizetype without loss of precision.  */
-+  tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
-+  if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
-+    return false;
-+
-+  /* Get lengths in sizetype.  */
-+  tree seg_len_a
-+    = fold_convert (sizetype,
-+		    rewrite_to_non_trapping_overflow (dr_a.seg_len));
-+  create_waw_or_war_checks2 (cond_expr, seg_len_a, alias_pair);
-+  if (param_flexible_seg_len && dr_a.seg_len != dr_a.seg_len2)
-+    {
-+      tree seg_len2_a
-+	= fold_convert (sizetype,
-+			rewrite_to_non_trapping_overflow (dr_a.seg_len2));
-+      tree cond_expr2;
-+      create_waw_or_war_checks2 (&cond_expr2, seg_len2_a, alias_pair);
-+      *cond_expr =  fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
-+				 *cond_expr, cond_expr2);
-+   }
-   return true;
- }
- 
-diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
-index f643a95b2..9bc5f16ee 100644
---- a/gcc/tree-data-ref.h
-+++ b/gcc/tree-data-ref.h
-@@ -213,12 +213,19 @@ class dr_with_seg_len
- public:
-   dr_with_seg_len (data_reference_p d, tree len, unsigned HOST_WIDE_INT size,
- 		   unsigned int a)
--    : dr (d), seg_len (len), access_size (size), align (a) {}
--
-+    : dr (d), seg_len (len), seg_len2 (len), access_size (size), align (a)
-+    {}
-+  dr_with_seg_len (data_reference_p d, tree len, tree len2,
-+		   unsigned HOST_WIDE_INT size, unsigned int a)
-+    : dr (d), seg_len (len), seg_len2 (len2), access_size (size), align (a)
-+    {}
-   data_reference_p dr;
-   /* The offset of the last access that needs to be checked minus
-      the offset of the first.  */
-   tree seg_len;
-+  /* The second version of segment length.  Currently this is used to
-+     soften checks for a small number of iterations.  */
-+  tree seg_len2;
-   /* A value that, when added to abs (SEG_LEN), gives the total number of
-      bytes in the segment.  */
-   poly_uint64 access_size;
-diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
-index 4e615b80b..04e68f621 100644
---- a/gcc/tree-vect-data-refs.cc
-+++ b/gcc/tree-vect-data-refs.cc
-@@ -3646,6 +3646,7 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
-     {
-       poly_uint64 lower_bound;
-       tree segment_length_a, segment_length_b;
-+      tree segment_length2_a, segment_length2_b;
-       unsigned HOST_WIDE_INT access_size_a, access_size_b;
-       unsigned int align_a, align_b;
-

_service:tar_scm:0050-Port-IPA-prefetch-to-GCC-12.patch Deleted

@@ -1,2071 +0,0 @@
-From 7ee50ce44c652e21ca8ad33dc4e175f02b51b072 Mon Sep 17 00:00:00 2001
-From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
-Date: Fri, 8 Mar 2024 06:50:39 +0800
-Subject: PATCH 18/18 Port IPA prefetch to GCC 12
-
----
- gcc/Makefile.in     |    1 +
- gcc/cgraph.cc       |    1 +
- gcc/cgraph.h        |    2 +
- gcc/common.opt      |    8 +
- gcc/ipa-devirt.cc   |   54 +-
- gcc/ipa-prefetch.cc | 1819 +++++++++++++++++++++++++++++++++++++++++++
- gcc/ipa-sra.cc      |    8 +
- gcc/params.opt      |    8 +
- gcc/passes.def      |    1 +
- gcc/timevar.def     |    1 +
- gcc/tree-pass.h     |    1 +
- 11 files changed, 1902 insertions(+), 2 deletions(-)
- create mode 100644 gcc/ipa-prefetch.cc
-
-diff --git a/gcc/Makefile.in b/gcc/Makefile.in
-index 876000bda..10544e4a9 100644
---- a/gcc/Makefile.in
-+++ b/gcc/Makefile.in
-@@ -1468,6 +1468,7 @@ OBJS = \
- 	ipa-modref.o \
- 	ipa-modref-tree.o \
- 	ipa-predicate.o \
-+	ipa-prefetch.o \
- 	ipa-profile.o \
- 	ipa-prop.o \
- 	ipa-param-manipulation.o \
-diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc
-index 3734c85db..7d738b891 100644
---- a/gcc/cgraph.cc
-+++ b/gcc/cgraph.cc
-@@ -998,6 +998,7 @@ cgraph_node::create_indirect_edge (gcall *call_stmt, int ecf_flags,
-   edge->indirect_info = cgraph_allocate_init_indirect_info ();
-   edge->indirect_info->ecf_flags = ecf_flags;
-   edge->indirect_info->vptr_changed = true;
-+  edge->indirect_info->targets = NULL;
- 
-   /* Record polymorphic call info.  */
-   if (!cloning_p
-diff --git a/gcc/cgraph.h b/gcc/cgraph.h
-index d96690326..b84ff2f98 100644
---- a/gcc/cgraph.h
-+++ b/gcc/cgraph.h
-@@ -1659,6 +1659,8 @@ public:
-   int param_index;
-   /* ECF flags determined from the caller.  */
-   int ecf_flags;
-+  /* Vector of potential call targets determined by analysis.  */
-+  vec<cgraph_node *, va_gc_atomic> *targets;
- 
-   /* Number of speculative call targets, it's less than GCOV_TOPN_VALUES.  */
-   unsigned num_speculative_call_targets : 16;
-diff --git a/gcc/common.opt b/gcc/common.opt
-index 1eb62ada5..e65a06af9 100644
---- a/gcc/common.opt
-+++ b/gcc/common.opt
-@@ -1328,6 +1328,10 @@ fdevirtualize
- Common Var(flag_devirtualize) Optimization
- Try to convert virtual calls to direct ones.
- 
-+fipa-ic
-+Common Var(flag_ipa_ic) Optimization Init(0)
-+Perform interprocedural analysis of indirect calls.
-+
- ficp
- Common Var(flag_icp) Optimization Init(0)
- Try to promote indirect calls to direct ones.
-@@ -2367,6 +2371,10 @@ fprefetch-loop-arrays
- Common Var(flag_prefetch_loop_arrays) Init(-1) Optimization
- Generate prefetch instructions, if available, for arrays in loops.
- 
-+fipa-prefetch
-+Common Var(flag_ipa_prefetch) Init(0) Optimization
-+Generate prefetch instructions, if available, using IPA info.
-+
- fprofile
- Common Var(profile_flag)
- Enable basic program profiling code.
-diff --git a/gcc/ipa-devirt.cc b/gcc/ipa-devirt.cc
-index 318535d06..dd3562d56 100644
---- a/gcc/ipa-devirt.cc
-+++ b/gcc/ipa-devirt.cc
-@@ -5758,6 +5758,54 @@ merge_fs_map_for_ftype_aliases ()
-     }
- }
- 
-+/* Save results of indirect call analysis for the next passes.  */
-+
-+static void
-+save_analysis_results ()
-+{
-+  if (dump_file)
-+    fprintf (dump_file, "\n\nSave results of indirect call analysis.\n");
-+
-+  struct cgraph_node *n;
-+  FOR_EACH_FUNCTION (n)
-+    {
-+      cgraph_edge *e, *next;
-+      for (e = n->indirect_calls; e; e = next)
-+	{
-+	  next = e->next_callee;
-+	  if (e->indirect_info->polymorphic)
-+	    continue;
-+	  gcall *stmt = e->call_stmt;
-+	  gcc_assert (stmt != NULL);
-+	  tree call_fn = gimple_call_fn (stmt);
-+	  tree call_fn_ty = TREE_TYPE (call_fn);
-+	  if (!POINTER_TYPE_P (call_fn_ty))
-+	    continue;
-+
-+	  tree ctype = TYPE_CANONICAL (TREE_TYPE (call_fn_ty));
-+	  unsigned ctype_uid = ctype ? TYPE_UID (ctype) : 0;
-+	  if (!ctype_uid || unsafe_types->count (ctype_uid)
-+	      || !fs_map->count (ctype_uid))
-+	    continue;
-+	  /* TODO: cleanup noninterposable aliases.  */
-+	  decl_set *decls = (*fs_map)ctype_uid;
-+	  if (dump_file)
-+	    {
-+	      fprintf (dump_file, "For call ");
-+	      print_gimple_stmt (dump_file, stmt, 0);
-+	    }
-+	  vec_alloc (e->indirect_info->targets, decls->size ());
-+	  for (decl_set::const_iterator it = decls->begin ();
-+	       it != decls->end (); it++)
-+	    {
-+	      struct cgraph_node *target = cgraph_node::get (*it);
-+	      /* TODO: maybe discard some targets.  */
-+	      e->indirect_info->targets->quick_push (target);
-+	    }
-+	}
-+    }
-+}
-+
- /* Dump function types with set of functions corresponding to it.  */
- 
- static void
-@@ -5822,6 +5870,8 @@ collect_function_signatures ()
- 	}
-     }
-   merge_fs_map_for_ftype_aliases ();
-+  if (flag_ipa_ic)
-+    save_analysis_results ();
-   if (dump_file)
-     dump_function_signature_sets ();
- }
-@@ -6217,7 +6267,7 @@ ipa_icp (void)
-      optimize indirect calls.  */
-   collect_function_type_aliases ();
-   collect_function_signatures ();
--  bool optimized = optimize_indirect_calls ();
-+  bool optimized = flag_icp ? optimize_indirect_calls () : false;
- 
-   remove_type_alias_map (ta_map);
-   remove_type_alias_map (fta_map);
-@@ -6264,7 +6314,7 @@ public:
-   /* opt_pass methods: */
-   virtual bool gate (function *)
-     {
--      return (optimize && flag_icp && !seen_error ()
-+      return (optimize && (flag_icp || flag_ipa_ic) && !seen_error ()
- 	      && (in_lto_p || flag_whole_program));
-     }
- 
-diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
-new file mode 100644
-index 000000000..aeea51105
---- /dev/null
-+++ b/gcc/ipa-prefetch.cc
-@@ -0,0 +1,1819 @@
-+/* IPA prefetch optimizations.
-+   Copyright (C) 2023 Free Software Foundation, Inc.
-+   Contributed by Ilia Diachkov.
-+
-+This file is part of GCC.
-+
-+GCC is free software; you can redistribute it and/or modify it under
-+the terms of the GNU General Public License as published by the Free
-+Software Foundation; either version 3, or (at your option) any later
-+version.
-+
-+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-+WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+for more details.
-+
-+You should have received a copy of the GNU General Public License
-+along with GCC; see the file COPYING3.  If not see
-+<http://www.gnu.org/licenses/>.  */
-+
-+/* IPA prefetch is an interprocedural pass that detects cases of indirect
-+   memory access potentially in loops and inserts prefetch instructions
-+   to optimize cache usage during these indirect memory accesses.  */
-+

_service:tar_scm:0051-Port-fixes-for-IPA-prefetch-to-GCC-12.patch Deleted

@@ -1,2216 +0,0 @@
-From 4c262af8e178ac7c81b32be5b159b4d09a5841c9 Mon Sep 17 00:00:00 2001
-From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
-Date: Fri, 8 Mar 2024 07:07:50 +0800
-Subject: PATCH 1/2 Port fixes for IPA prefetch to GCC 12
-
----
- gcc/ipa-devirt.cc                          |    9 +-
- gcc/ipa-prefetch.cc                        |  174 +-
- gcc/ipa-sra.cc                             |    7 +
- gcc/params.opt                             |    4 +-
- gcc/testsuite/gcc.dg/completion-1.c        |    1 +
- gcc/testsuite/gcc.dg/ipa/ipa-prefetch-xz.c | 1843 ++++++++++++++++++++
- 6 files changed, 1974 insertions(+), 64 deletions(-)
- create mode 100644 gcc/testsuite/gcc.dg/ipa/ipa-prefetch-xz.c
-
-diff --git a/gcc/ipa-devirt.cc b/gcc/ipa-devirt.cc
-index dd3562d56..dd000b401 100644
---- a/gcc/ipa-devirt.cc
-+++ b/gcc/ipa-devirt.cc
-@@ -5029,9 +5029,12 @@ analyze_assign_stmt (gimple *stmt)
- 	}
-       else
- 	{
--	  fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ",
--		   get_tree_code_name (TREE_CODE (rhs)));
--	  print_gimple_stmt (dump_file, stmt, 0);
-+	  if (dump_file && (dump_flags & TDF_DETAILS))
-+	    {
-+	      fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ",
-+		       get_tree_code_name (TREE_CODE (rhs)));
-+	      print_gimple_stmt (dump_file, stmt, 0);
-+	    }
- 	  gcc_unreachable ();
- 	}
-     }
-diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
-index aeea51105..9537e4835 100644
---- a/gcc/ipa-prefetch.cc
-+++ b/gcc/ipa-prefetch.cc
-@@ -167,6 +167,7 @@ analyse_cgraph ()
- 	}
- 
-       /* TODO: maybe remove loop info here.  */
-+      n->get_body ();
-       push_cfun (DECL_STRUCT_FUNCTION (n->decl));
-       calculate_dominance_info (CDI_DOMINATORS);
-       loop_optimizer_init (LOOPS_NORMAL);
-@@ -942,6 +943,9 @@ compare_memrefs (memref_t* mr, memref_t* mr2)
-       (*mr_candidate_map)mr = mr2;
-       return;
-     }
-+  /* Probably we shouldn't leave nulls in the map.  */
-+  if ((*mr_candidate_map)mr == NULL)
-+    return;
-   /* TODO: support analysis with incrementation of different fields.  */
-   if ((*mr_candidate_map)mr->offset != mr2->offset)
-     {
-@@ -1090,6 +1094,15 @@ analyse_loops ()
- 	  memref_t *mr = it->first, *mr2 = it->second;
- 	  if (mr2 == NULL || !(*fmrs_map)fn->count (mr))
- 	    continue;
-+	  /* For now optimize only MRs that mem is MEM_REF.
-+	     TODO: support other MR types.  */
-+	  if (TREE_CODE (mr->mem) != MEM_REF)
-+	    {
-+	      if (dump_file)
-+		fprintf (dump_file, "Skip MR %d: unsupported tree code = %s\n",
-+			 mr->mr_id, get_tree_code_name (TREE_CODE (mr->mem)));
-+	      continue;
-+	    }
- 	  if (!optimize_mrs_map->count (fn))
- 	    (*optimize_mrs_map)fn = new memref_set;
- 	  (*optimize_mrs_map)fn->insert (mr);
-@@ -1102,7 +1115,7 @@ analyse_loops ()
- 	       it != (*optimize_mrs_map)fn->end (); it++)
- 	    {
- 	      memref_t *mr = *it, *mr2 = (*mr_candidate_map)mr;
--	      fprintf (dump_file, "MRs %d,%d with incremental offset ",
-+	      fprintf (dump_file, "MRs %d, %d with incremental offset ",
- 		       mr->mr_id, mr2->mr_id);
- 	      print_generic_expr (dump_file, mr2->offset);
- 	      fprintf (dump_file, "\n");
-@@ -1435,6 +1448,52 @@ remap_gimple_op_r (tree *tp, int *walk_subtrees, void *data)
-   return NULL_TREE;
- }
- 
-+/* Copy stmt and remap its operands.  */
-+
-+static gimple *
-+gimple_copy_and_remap (gimple *stmt)
-+{
-+  gimple *copy = gimple_copy (stmt);
-+  gcc_checking_assert (!is_gimple_debug (copy));
-+
-+  /* Remap all the operands in COPY.  */
-+  struct walk_stmt_info wi;
-+  memset (&wi, 0, sizeof (wi));
-+  wi.info = copy;
-+  walk_gimple_op (copy, remap_gimple_op_r, &wi);
-+  if (dump_file)
-+    {
-+      fprintf (dump_file, "Stmt copy after remap:\n");
-+      print_gimple_stmt (dump_file, copy, 0);
-+    }
-+  return copy;
-+}
-+
-+/* Copy and remap stmts listed in MR in reverse order to last_idx, skipping
-+   processed ones.  Insert new stmts to the sequence.  */
-+
-+static gimple *
-+gimple_copy_and_remap_memref_stmts (memref_t *mr, gimple_seq &stmts,
-+				    int last_idx, stmt_set &processed)
-+{
-+  gimple *last_stmt = NULL;
-+  for (int i = mr->stmts.length () - 1; i >= last_idx ; i--)
-+    {
-+      if (processed.count (mr->stmtsi))
-+	continue;
-+      processed.insert (mr->stmtsi);
-+      if (dump_file)
-+	{
-+	  fprintf (dump_file, "Copy stmt %d from used MR (%d):\n",
-+		   i, mr->mr_id);
-+	  print_gimple_stmt (dump_file, mr->stmtsi, 0);
-+	}
-+      last_stmt = gimple_copy_and_remap (mr->stmtsi);
-+      gimple_seq_add_stmt (&stmts, last_stmt);
-+  }
-+  return last_stmt;
-+}
-+
- static void
- create_cgraph_edge (cgraph_node *n, gimple *stmt)
- {
-@@ -1490,6 +1549,13 @@ optimize_function (cgraph_node *n, function *fn)
- 		 "Skip the case.\n");
-       return 0;
-     }
-+  if (!tree_fits_shwi_p (inc_mr->step))
-+    {
-+      if (dump_file)
-+	fprintf (dump_file, "Cannot represent incremental MR's step as "
-+		 "integer.  Skip the case.\n");
-+      return 0;
-+    }
-   if (dump_file && !used_mrs.empty ())
-     print_mrs_ids (used_mrs, "Common list of used mrs:\n");
- 
-@@ -1539,16 +1605,44 @@ optimize_function (cgraph_node *n, function *fn)
-       return 0;
-     }
-   else if (dump_file)
--    fprintf (dump_file, "Dominator bb %d for MRs\n", dom_bb->index);
-+    {
-+      fprintf (dump_file, "Dominator bb %d for MRs:\n", dom_bb->index);
-+      gimple_dump_bb (dump_file, dom_bb, 0, dump_flags);
-+      fprintf (dump_file, "\n");
-+    }
- 
--  split_block (dom_bb, (gimple *) NULL);
-+  /* Try to find comp_mr's stmt in the dominator bb.  */
-+  gimple *last_used = NULL;
-+  for (gimple_stmt_iterator si = gsi_last_bb (dom_bb); !gsi_end_p (si);
-+       gsi_prev (&si))
-+    if (comp_mr->stmts0 == gsi_stmt (si))
-+      {
-+	last_used = gsi_stmt (si);
-+	if (dump_file)
-+	  {
-+	    fprintf (dump_file, "Last used stmt in dominator bb:\n");
-+	    print_gimple_stmt (dump_file, last_used, 0);
-+	  }
-+	break;
-+      }
-+
-+  split_block (dom_bb, last_used);
-   gimple_stmt_iterator gsi = gsi_last_bb (dom_bb);
- 
-   /* Create new inc var.  Insert new_var = old_var + step * factor.  */
-   decl_map = new tree_map;
-   gcc_assert (comp_mr->stmts0 && gimple_assign_single_p (comp_mr->stmts0));
-   tree inc_var = gimple_assign_lhs (comp_mr->stmts0);
-+  /* If old_var definition dominates the current use, just use it, otherwise
-+     evaluate it just before new inc var evaluation.  */
-   gimple_seq stmts = NULL;
-+  stmt_set processed_stmts;
-+  if (!dominated_by_p (CDI_DOMINATORS, dom_bb, gimple_bb (comp_mr->stmts0)))
-+    {
-+      gimple *tmp = gimple_copy_and_remap_memref_stmts (comp_mr, stmts, 0,
-+							processed_stmts);
-+      inc_var = gimple_assign_lhs (tmp);
-+    }
-   tree var_type = TREE_TYPE (inc_var);
-   enum tree_code inc_code;
-   if (TREE_CODE (var_type) == POINTER_TYPE)
-@@ -1556,52 +1650,28 @@ optimize_function (cgraph_node *n, function *fn)
-   else
-     inc_code = PLUS_EXPR;

_service:tar_scm:0052-Fix-fails-in-IPA-prefetch-src-openEuler-gcc-I96ID7.patch Deleted

@@ -1,94 +0,0 @@
-From 0263daa1312d0cdcdf9c770bcf5d982a2d4fc16b Mon Sep 17 00:00:00 2001
-From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
-Date: Fri, 29 Mar 2024 17:15:41 +0800
-Subject: PATCH 2/2 Fix fails in IPA prefetch (src-openEuler/gcc: I96ID7)
-
----
- gcc/ipa-prefetch.cc | 28 ++++++++++++++++++++++++++--
- 1 file changed, 26 insertions(+), 2 deletions(-)
-
-diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
-index 9537e4835..1ceb5137f 100644
---- a/gcc/ipa-prefetch.cc
-+++ b/gcc/ipa-prefetch.cc
-@@ -366,6 +366,7 @@ typedef std::map<memref_t *, memref_t *> memref_map;
- typedef std::map<memref_t *, tree> memref_tree_map;
- 
- typedef std::set<gimple *> stmt_set;
-+typedef std::set<tree> tree_set;
- typedef std::map<tree, tree> tree_map;
- 
- tree_memref_map *tm_map;
-@@ -1124,8 +1125,21 @@ analyse_loops ()
-     }
- }
- 
-+/* Compare memrefs by IDs; helper for qsort.  */
-+
-+static int
-+memref_id_cmp (const void *p1, const void *p2)
-+{
-+  const memref_t *mr1 = *(const memref_t **) p1;
-+  const memref_t *mr2 = *(const memref_t **) p2;
-+
-+  if ((unsigned) mr1->mr_id > (unsigned) mr2->mr_id)
-+    return 1;
-+  return -1;
-+}
-+
- /* Reduce the set filtering out memrefs with the same memory references,
--   return the result vector of memrefs.  */
-+   sort and return the result vector of memrefs.  */
- 
- static void
- reduce_memref_set (memref_set *set, vec<memref_t *> &vec)
-@@ -1162,6 +1176,7 @@ reduce_memref_set (memref_set *set, vec<memref_t *> &vec)
- 	    vec.safe_push (mr1);
- 	}
-     }
-+  vec.qsort (memref_id_cmp);
-   if (dump_file)
-     {
-       fprintf (dump_file, "MRs (%d) after filtering: ", vec.length ());
-@@ -1663,10 +1678,15 @@ optimize_function (cgraph_node *n, function *fn)
-     }
- 
-   /* Create other new vars.  Insert new stmts.  */
-+  vec<memref_t *> used_mr_vec = vNULL;
-   for (memref_set::const_iterator it = used_mrs.begin ();
-        it != used_mrs.end (); it++)
-+    used_mr_vec.safe_push (*it);
-+  used_mr_vec.qsort (memref_id_cmp);
-+
-+  for (unsigned int j = 0; j < used_mr_vec.length (); j++)
-     {
--      memref_t *mr = *it;
-+      memref_t *mr = used_mr_vecj;
-       if (mr == comp_mr)
- 	continue;
-       gimple *last_stmt = gimple_copy_and_remap_memref_stmts (mr, stmts, 0,
-@@ -1702,6 +1722,7 @@ optimize_function (cgraph_node *n, function *fn)
-       local = integer_three_node;
-       break;
-     }
-+  tree_set prefetched_addrs;
-   for (unsigned int j = 0; j < vmrs.length (); j++)
-     {
-       memref_t *mr = vmrsj;
-@@ -1714,10 +1735,13 @@ optimize_function (cgraph_node *n, function *fn)
-       tree addr = get_mem_ref_address_ssa_name (mr->mem, NULL_TREE);
-       if (decl_map->count (addr))
- 	addr = (*decl_map)addr;
-+      if (prefetched_addrs.count (addr))
-+	continue;
-       last_stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH),
- 				     3, addr, write_p, local);
-       pcalls.safe_push (last_stmt);
-       gimple_seq_add_stmt (&stmts, last_stmt);
-+      prefetched_addrs.insert (addr);
-       if (dump_file)
- 	{
- 	  fprintf (dump_file, "Insert %d prefetch stmt:\n", j);
--- 
-2.33.0
-

_service:tar_scm:0053-struct-reorg-Add-Semi-Relayout.patch Deleted

@@ -1,1366 +0,0 @@
-From c2a0dcc565e0f6274f26644bd389337db8f2940c Mon Sep 17 00:00:00 2001
-From: tiancheng-bao <baotiancheng1@huawei.com>
-Date: Sat, 30 Mar 2024 11:04:23 +0800
-Subject: PATCH struct-reorg Add Semi Relayout
-
----
- gcc/common.opt                                |   6 +-
- gcc/ipa-struct-reorg/ipa-struct-reorg.cc      | 975 +++++++++++++++++-
- gcc/ipa-struct-reorg/ipa-struct-reorg.h       |   8 +
- gcc/params.opt                                |   5 +
- .../gcc.dg/struct/semi_relayout_rewrite.c     |  86 ++
- gcc/testsuite/gcc.dg/struct/struct-reorg.exp  |   4 +
- 6 files changed, 1040 insertions(+), 44 deletions(-)
- create mode 100644 gcc/testsuite/gcc.dg/struct/semi_relayout_rewrite.c
-
-diff --git a/gcc/common.opt b/gcc/common.opt
-index 38f1e457d..9484df5ad 100644
---- a/gcc/common.opt
-+++ b/gcc/common.opt
-@@ -2010,9 +2010,9 @@ Common Var(flag_ipa_struct_reorg) Init(0) Optimization
- Perform structure layout optimizations.
- 
- fipa-struct-reorg=
--Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 5)
---fipa-struct-reorg=0,1,2,3,4,5 adding none, struct-reorg, reorder-fields,
--dfe, safe-pointer-compression, unsafe-pointer-compression optimizations.
-+Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 6)
-+-fipa-struct-reorg=0,1,2,3,4,5,6 adding none, struct-reorg, reorder-fields,
-+dfe, safe-pointer-compression, unsafe-pointer-compression, semi-relayout optimizations.
- 
- fipa-vrp
- Common Var(flag_ipa_vrp) Optimization
-diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
-index 3922873f3..6a202b4bd 100644
---- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
-+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
-@@ -294,7 +294,8 @@ enum struct_layout_opt_level
-   STRUCT_REORDER_FIELDS = 1 << 2,
-   DEAD_FIELD_ELIMINATION = 1 << 3,
-   POINTER_COMPRESSION_SAFE = 1 << 4,
--  POINTER_COMPRESSION_UNSAFE = 1 << 5
-+  POINTER_COMPRESSION_UNSAFE = 1 << 5,
-+  SEMI_RELAYOUT = 1 << 6
- };
- 
- /* Defines the target pointer size of compressed pointer, which should be 8,
-@@ -308,6 +309,7 @@ void get_base (tree &base, tree expr);
- 
- static unsigned int current_layout_opt_level;
- hash_map<tree, tree> replace_type_map;
-+hash_map<tree, tree> semi_relayout_map;
- 
- /* Return true if one of these types is created by struct-reorg.  */
- 
-@@ -426,7 +428,9 @@ srtype::srtype (tree type)
-     visited (false),
-     pc_candidate (false),
-     has_legal_alloc_num (false),
--    has_alloc_array (0)
-+    has_alloc_array (0),
-+    semi_relayout (false),
-+    bucket_parts (0)
- {
-   for (int i = 0; i < max_split; i++)
-     newtypei = NULL_TREE;
-@@ -891,6 +895,66 @@ srfield::create_new_reorder_fields (tree newtypemax_split,
-   newfield0 = field;
- }
- 
-+/* Given a struct s whose fields has already reordered by size, we try to
-+   combine fields less than 8 bytes together to 8 bytes.  Example:
-+   struct s {
-+     uint64_t a,
-+     uint32_t b,
-+     uint32_t c,
-+     uint32_t d,
-+     uint16_t e,
-+     uint8_t f
-+   }
-+
-+   We allocate memory for arrays of struct S, before semi-relayout, their
-+   layout in memory is shown as below:
-+   a,b,c,d,e,f,padding;a,b,c,d,e,f,padding;...
-+
-+   During semi-relayout, we put a number of structs into a same region called
-+   bucket.  The number is determined by param realyout-bucket-capacity-level.
-+   Using 1024 here as example.  After semi-relayout, the layout in a bucket is
-+   shown as below:
-+   part1 a;a;a...
-+   part2 b,c;b,c;b,c;...
-+   part3 d,e,f,pad;d,e,f,pad;d,e,f,pad;...
-+
-+   In the last bucket, if the amount of rest structs is less than the capacity
-+   of a bucket, the rest of allcated memory will be wasted as padding.  */
-+
-+unsigned
-+srtype::calculate_bucket_size ()
-+{
-+  unsigned parts = 0;
-+  unsigned bit_sum = 0;
-+  unsigned relayout_offset = 0;
-+  /* Currently, limit each 8 bytes with less than 2 fields.  */
-+  unsigned curr_part_num = 0;
-+  unsigned field_num = 0;
-+  for (tree f = TYPE_FIELDS (newtype0); f; f = DECL_CHAIN (f))
-+    {
-+      unsigned size = TYPE_PRECISION (TREE_TYPE (f));
-+      bit_sum += size;
-+      field_num++;
-+      if (++curr_part_num > 2 || bit_sum > 64)
-+	{
-+	  bit_sum = size;
-+	  parts++;
-+	  relayout_offset = relayout_part_size * parts;
-+	  curr_part_num = 1;
-+	}
-+      else
-+	{
-+	  relayout_offset = relayout_part_size * parts + (bit_sum - size) / 8;
-+	}
-+      new_field_offsets.put (f, relayout_offset);
-+    }
-+  /* Donnot relayout a struct with only one field after DFE.  */
-+  if (field_num == 1)
-+    return 0;
-+  bucket_parts = ++parts;
-+  return parts * relayout_part_size;
-+}
-+
- /* Create the new TYPE corresponding to THIS type.  */
- 
- bool
-@@ -1001,6 +1065,15 @@ srtype::create_new_type (void)
-   if (pc_candidate && pc_gptr == NULL_TREE)
-     create_global_ptr_for_pc ();
- 
-+  if (semi_relayout)
-+    {
-+      bucket_size = calculate_bucket_size ();
-+      if (bucket_size == 0)
-+	return false;
-+      if (semi_relayout_map.get (this->newtype0) == NULL)
-+	semi_relayout_map.put (this->newtype0, this->type);
-+    }
-+
-   if (dump_file && (dump_flags & TDF_DETAILS))
-     {
-       fprintf (dump_file, "Created %d types:\n", maxclusters);
-@@ -1393,7 +1466,7 @@ public:
- 		       bool should_create = false, bool can_escape = false);
-   bool wholeaccess (tree expr, tree base, tree accesstype, srtype *t);
- 
--  void check_alloc_num (gimple *stmt, srtype *type);
-+  void check_alloc_num (gimple *stmt, srtype *type, bool ptrptr);
-   void check_definition_assign (srdecl *decl, vec<srdecl *> &worklist);
-   void check_definition_call (srdecl *decl, vec<srdecl *> &worklist);
-   void check_definition (srdecl *decl, vec<srdecl *> &);
-@@ -1442,6 +1515,33 @@ public:
- 						  tree &);
-   basic_block create_bb_for_compress_nullptr (basic_block, tree &);
-   basic_block create_bb_for_decompress_nullptr (basic_block, tree, tree &);
-+
-+   // Semi-relayout methods:
-+  bool is_semi_relayout_candidate (tree);
-+  srtype *get_semi_relayout_candidate_type (tree);
-+  void check_and_prune_struct_for_semi_relayout (void);
-+  tree rewrite_pointer_diff (gimple_stmt_iterator *, tree, tree, srtype *);
-+  tree rewrite_pointer_plus_integer (gimple *, gimple_stmt_iterator *, tree,
-+				     tree, srtype *);
-+  tree build_div_expr (gimple_stmt_iterator *, tree, tree);
-+  tree get_true_pointer_base (gimple_stmt_iterator *, tree, srtype *);
-+  tree get_real_allocated_ptr (tree, gimple_stmt_iterator *);
-+  tree set_ptr_for_use (tree, gimple_stmt_iterator *);
-+  void record_allocated_size (tree, gimple_stmt_iterator *, tree);
-+  tree read_allocated_size (tree, gimple_stmt_iterator *);
-+  gimple *create_aligned_alloc (gimple_stmt_iterator *, srtype *, tree,
-+				tree &);
-+  void create_memset_zero (tree, gimple_stmt_iterator *, tree);
-+  void create_memcpy (tree, tree, tree, gimple_stmt_iterator *);
-+  void create_free (tree, gimple_stmt_iterator *);
-+  void copy_to_lhs (tree, tree, gimple_stmt_iterator *);
-+  srtype *get_relayout_candidate_type (tree);
-+  long unsigned int get_true_field_offset (srfield *, srtype *);
-+  tree rewrite_address (tree, srfield *, srtype *, gimple_stmt_iterator *);
-+  bool check_sr_copy (gimple *);
-+  void relayout_field_copy (gimple_stmt_iterator *, gimple *, tree, tree,
-+			    tree&, tree &);
-+  bool do_semi_relayout (gimple_stmt_iterator *, gimple *, tree &, tree &);
- };
- 
- struct ipa_struct_relayout
-@@ -4355,7 +4455,7 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl,
- }
- 
- void
--ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type)
-+ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type, bool ptrptr)
- {
-   if (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT

_service:tar_scm:0054-Struct-Reorg-Bugfix-for-structure-pointer-compressio.patch Deleted

_service:tar_scm:0055-Struct-Reorg-Port-bugfixes-to-GCC-12.3.1.patch Deleted

@@ -1,420 +0,0 @@
-From 55c547748af36ffc3f2d5ed154a91fb3fcb8431c Mon Sep 17 00:00:00 2001
-From: Mingchuan Wu <wumingchuan1992@foxmail.com>
-Date: Thu, 11 Apr 2024 15:49:59 +0800
-Subject: PATCH Struct Reorg Port bugfixes to GCC 12.3.1
-
-Migrated from commits in GCC10.3.1:
-https://gitee.com/openeuler/gcc/commit/41af6d361a6d85ef4fce8a8438113d765596afdd
-https://gitee.com/openeuler/gcc/commit/25d74b98caeaae881e374924886ee664aa1af5bc
-https://gitee.com/openeuler/gcc/commit/b5a3bfe92f96cd0d2224d80ac4eaa80dab1bd6bf
-https://gitee.com/openeuler/gcc/commit/708ffe6f132ee39441b66b6ab6b98847d35916b7
-https://gitee.com/openeuler/gcc/commit/e875e4e7f3716aa268ffbbf55ee199ec82b6aeba
----
- gcc/ipa-struct-reorg/ipa-struct-reorg.cc      | 97 ++++++++++---------
- gcc/testsuite/gcc.dg/struct/dfe_escape.c      | 50 ++++++++++
- gcc/testsuite/gcc.dg/struct/dfe_func_ptr.c    | 69 +++++++++++++
- gcc/testsuite/gcc.dg/struct/struct-reorg.exp  |  2 +
- gcc/testsuite/gcc.dg/struct/struct_reorg-10.c | 29 ++++++
- gcc/testsuite/gcc.dg/struct/struct_reorg-11.c | 16 +++
- gcc/testsuite/gcc.dg/struct/struct_reorg-12.c | 26 +++++
- 7 files changed, 243 insertions(+), 46 deletions(-)
- create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_escape.c
- create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_func_ptr.c
- create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-10.c
- create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-11.c
- create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-12.c
-
-diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
-index 6a202b4bd..f03d1d875 100644
---- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
-+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
-@@ -466,10 +466,19 @@ srtype::has_dead_field (void)
-   unsigned i;
-   FOR_EACH_VEC_ELT (fields, i, this_field)
-     {
--      if (!(this_field->field_access & READ_FIELD))
--	{
--	  may_dfe = true;
--	  break;
-+      /* Function pointer members are not processed, because DFE
-+         does not currently support accurate analysis of function
-+         pointers, and we have not identified specific use cases. */
-+      if (!(this_field->field_access & READ_FIELD)
-+	 && !FUNCTION_POINTER_TYPE_P (this_field->fieldtype))
-+	{
-+	  /* Fields with escape risks should not be processed. */
-+	  if (this_field->type == NULL
-+	      || (this_field->type->escapes == does_not_escape))
-+	    {
-+	      may_dfe = true;
-+	      break;
-+	    }
- 	}
-     }
-   return may_dfe;
-@@ -1032,8 +1041,13 @@ srtype::create_new_type (void)
-     {
-       srfield *f = fieldsi;
-       if (current_layout_opt_level & DEAD_FIELD_ELIMINATION
--	  && !(f->field_access & READ_FIELD))
--	continue;
-+	  && !(f->field_access & READ_FIELD)
-+	  && !FUNCTION_POINTER_TYPE_P (f->fieldtype))
-+	{
-+	  /* Fields with escape risks should not be processed. */
-+	  if (f->type == NULL || (f->type->escapes == does_not_escape))
-+	    continue;
-+	}
-       f->create_new_fields (newtype, newfields, newlast);
-     }
- 
-@@ -3815,9 +3829,17 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other,
-       if (VOID_POINTER_P (TREE_TYPE (side))
- 	  && TREE_CODE (side) == SSA_NAME)
- 	{
--	  /* The type is other, the declaration is side.  */
--	  current_function->record_decl (type, side, -1,
--		isptrptr (TREE_TYPE (other)) ? TREE_TYPE (other) : NULL);
-+	  tree inner = SSA_NAME_VAR (side);
-+	  if (inner)
-+	    {
-+	      srdecl *in = find_decl (inner);
-+	      if (in && !in->type->has_escaped ())
-+		{
-+		  /* The type is other, the declaration is side.  */
-+		  current_function->record_decl (type, side, -1,
-+			isptrptr (TREE_TYPE (other)) ? TREE_TYPE (other) : NULL);
-+		}
-+	     }
- 	}
-       else
- 	/* *_1 = &MEM(void *)&x + 8B.  */
-@@ -3910,6 +3932,12 @@ ipa_struct_reorg::maybe_record_assign (cgraph_node *node, gassign *stmt)
- 	maybe_mark_or_record_other_side (rhs, lhs, stmt);
-       if (TREE_CODE (lhs) == SSA_NAME)
- 	maybe_mark_or_record_other_side (lhs, rhs, stmt);
-+
-+      /* Handle missing ARRAY_REF cases.  */
-+      if (TREE_CODE (lhs) == ARRAY_REF)
-+	mark_type_as_escape (TREE_TYPE (lhs), escape_array, stmt);
-+      if (TREE_CODE (rhs) == ARRAY_REF)
-+	mark_type_as_escape (TREE_TYPE (rhs), escape_array, stmt);
-     }
- }
- 
-@@ -5272,8 +5300,11 @@ ipa_struct_reorg::record_accesses (void)
- 	record_function (cnode);
-       else
- 	{
--	  tree return_type = TREE_TYPE (TREE_TYPE (cnode->decl));
--	  mark_type_as_escape (return_type, escape_return, NULL);
-+	  if (cnode->externally_visible)
-+	    {
-+	      tree return_type = TREE_TYPE (TREE_TYPE (cnode->decl));
-+	      mark_type_as_escape (return_type, escape_return, NULL);
-+	    }
- 	}
- 
-     }
-@@ -5889,6 +5920,7 @@ ipa_struct_reorg::rewrite_expr (tree expr,
-   bool escape_from_base = false;
- 
-   tree newbasemax_split;
-+  memset (newbase, 0, sizeof (treemax_split));
-   memset (newexpr, 0, sizeof (treemax_split));
- 
-   if (TREE_CODE (expr) == CONSTRUCTOR)
-@@ -6912,7 +6944,7 @@ create_bb_for_group_diff_ne_0 (basic_block new_bb, tree &phi, tree ptr,
- }
- 
- tree
--ipa_struct_reorg::rewrite_pointer_plus_integer (gimple *stmt,
-+ipa_struct_reorg::rewrite_pointer_plus_integer (gimple *stmt ATTRIBUTE_UNUSED,
- 						gimple_stmt_iterator *gsi,
- 						tree ptr, tree offset,
- 						srtype *type)
-@@ -7889,41 +7921,14 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt,
-    should be removed.  */
- 
- bool
--ipa_struct_reorg::rewrite_debug (gimple *stmt, gimple_stmt_iterator *)
-+ipa_struct_reorg::rewrite_debug (gimple *, gimple_stmt_iterator *)
- {
--  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
--    /* Delete debug gimple now.  */
--    return true;
--  bool remove = false;
--  if (gimple_debug_bind_p (stmt))
--    {
--      tree var = gimple_debug_bind_get_var (stmt);
--      tree newvarmax_split;
--      if (rewrite_expr (var, newvar, true))
--	remove = true;
--      if (gimple_debug_bind_has_value_p (stmt))
--	{
--	  var = gimple_debug_bind_get_value (stmt);
--	  if (TREE_CODE (var) == POINTER_PLUS_EXPR)
--	    var = TREE_OPERAND (var, 0);
--	  if (rewrite_expr (var, newvar, true))
--	    remove = true;
--	}
--    }
--  else if (gimple_debug_source_bind_p (stmt))
--    {
--      tree var = gimple_debug_source_bind_get_var (stmt);
--      tree newvarmax_split;
--      if (rewrite_expr (var, newvar, true))
--	remove = true;
--      var = gimple_debug_source_bind_get_value (stmt);
--      if (TREE_CODE (var) == POINTER_PLUS_EXPR)
--	var = TREE_OPERAND (var, 0);
--      if (rewrite_expr (var, newvar, true))
--	remove = true;
--    }
--
--  return remove;
-+  /* In debug statements, there might be some statements that have
-+     been optimized out in gimple but left in debug gimple.  Sometimes
-+     these statements need to be analyzed to escape, but in rewrite
-+     stage it shouldn't happen.  It needs to care a lot to handle these
-+     cases but seems useless.  So now we just delete debug gimple.  */
-+  return true;
- }
- 
- /* Rewrite PHI nodes, return true if the PHI was replaced.  */
-diff --git a/gcc/testsuite/gcc.dg/struct/dfe_escape.c b/gcc/testsuite/gcc.dg/struct/dfe_escape.c
-new file mode 100644
-index 000000000..09efe8027
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/struct/dfe_escape.c
-@@ -0,0 +1,50 @@
-+/* { dg-do compile } */
-+
-+#include <stdio.h>
-+#include <stdlib.h>
-+
-+typedef struct arc arc_t;
-+typedef struct arc *arc_p;
-+
-+typedef struct network

_service:tar_scm:0056-Fix-bug-that-verifying-gimple-failed-when-reorg-leve.patch Deleted

@@ -1,27 +0,0 @@
-From fa6f80044dcebd28506e871e6e5d25e2dfd7e105 Mon Sep 17 00:00:00 2001
-From: tiancheng-bao <baotiancheng1@huawei.com>
-Date: Fri, 12 Apr 2024 15:09:28 +0800
-Subject: PATCH 01/32 Fix bug that verifying gimple failed when reorg-level >
- 5
-
----
- gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
-index f03d1d875..e08577c0c 100644
---- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
-+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
-@@ -7461,6 +7461,9 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
- 	    continue;
- 	  tree lhs_expr = newlhsi ? newlhsi : lhs;
- 	  tree rhs_expr = newrhsi ? newrhsi : rhs;
-+	  if (!useless_type_conversion_p (TREE_TYPE (lhs_expr),
-+					  TREE_TYPE (rhs_expr)))
-+	    rhs_expr = gimplify_build1 (gsi, NOP_EXPR, TREE_TYPE (lhs_expr), rhs_expr);  
- 	  gimple *newstmt = gimple_build_assign (lhs_expr, rhs_expr);
- 	  if (dump_file && (dump_flags & TDF_DETAILS))
- 	    {
--- 
-2.28.0.windows.1
-

_service:tar_scm:0057-AutoFdo-Fix-memory-leaks-in-autofdo.patch Deleted

@@ -1,90 +0,0 @@
-From 13e82fccba781b29e55a6e1934986514019b728d Mon Sep 17 00:00:00 2001
-From: zhenyu--zhao <zhaozhenyu17@huawei.com>
-Date: Sun, 24 Mar 2024 20:42:27 +0800
-Subject: PATCH 02/32 AutoFdo Fix memory leaks in autofdo
-
----
- gcc/final.cc | 22 ++++++++++++++--------
- 1 file changed, 14 insertions(+), 8 deletions(-)
-
-diff --git a/gcc/final.cc b/gcc/final.cc
-index d4c4fa08f..af4e529bb 100644
---- a/gcc/final.cc
-+++ b/gcc/final.cc
-@@ -4402,12 +4402,15 @@ get_fdo_count_quality (profile_count count)
-   return profile_qualitycount.quality ();
- }
- 
--static const char *
-+/* If the function is not public, return the function_name/file_name for
-+   disambiguation of local symbols since there could be identical function
-+   names coming from identical file names.  The caller needs to free memory.  */
-+static char *
- alias_local_functions (const char *fnname)
- {
-   if (TREE_PUBLIC (cfun->decl))
-     {
--      return fnname;
-+      return concat (fnname, NULL);
-     }
-   return concat (fnname, "/", lbasename (dump_base_name), NULL);
- }
-@@ -4457,12 +4460,13 @@ dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
- 
- 	  if (callee)
- 	    {
-+	      char *func_name =
-+		      alias_local_functions (get_fnname_from_decl (callee));
- 	      fprintf (asm_out_file, "\t.string \"%x\"\n",
- 		       INSN_ADDRESSES (INSN_UID (insn)));
- 
- 	      fprintf (asm_out_file, "\t.string \"%s%s\"\n",
--		       ASM_FDO_CALLEE_FLAG,
--                       alias_local_functions (get_fnname_from_decl (callee)));
-+		       ASM_FDO_CALLEE_FLAG, func_name);
- 
-               fprintf (asm_out_file,
-                        "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
-@@ -4472,9 +4476,9 @@ dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
-                 {
-                   fprintf (dump_file, "call: %x --> %s \n",
-                            INSN_ADDRESSES (INSN_UID (insn)),
--                           alias_local_functions
--                           (get_fnname_from_decl (callee)));
-+			   func_name);
-                 }
-+	      free (func_name);
-             }
-         }
-      } 
-@@ -4547,8 +4551,9 @@ dump_bb_info_to_asm (basic_block bb, gcov_type bb_count)
- static void 
- dump_function_info_to_asm (const char *fnname)
- {
-+  char *func_name = alias_local_functions (fnname);
-   fprintf (asm_out_file, "\t.string \"%s%s\"\n",
--           ASM_FDO_CALLER_FLAG, alias_local_functions (fnname));
-+	   ASM_FDO_CALLER_FLAG, func_name);
-   fprintf (asm_out_file, "\t.string \"%s%d\"\n",
-            ASM_FDO_CALLER_SIZE_FLAG, get_function_end_addr ());
-   fprintf (asm_out_file, "\t.string \"%s%s\"\n",
-@@ -4557,7 +4562,7 @@ dump_function_info_to_asm (const char *fnname)
-   if (dump_file)
-     {
-       fprintf (dump_file, "\n FUNC_NAME: %s\n",
--               alias_local_functions (fnname));
-+	       func_name);
-       fprintf (dump_file, " file: %s\n",
-                dump_base_name);
-       fprintf (dump_file, "profile_status: %s\n",
-@@ -4567,6 +4572,7 @@ dump_function_info_to_asm (const char *fnname)
-       fprintf (dump_file, " function_bind: %s\n",
-                simple_get_function_bind ());
-     }
-+  free (func_name);
- }
- 
- /* Dump function profile into form AutoFDO or PGO to asm.    */
--- 
-2.28.0.windows.1
-

_service:tar_scm:0058-x86-Add-a-new-option-mdaz-ftz-to-enable-FTZ-and-DAZ-.patch Deleted

@@ -1,135 +0,0 @@
-From 1649f9fbbc5267de2a675336d3ac665528a03db8 Mon Sep 17 00:00:00 2001
-From: liuhongt <hongtao.liu@intel.com>
-Date: Wed, 10 May 2023 15:16:58 +0800
-Subject: PATCH 03/32 x86: Add a new option -mdaz-ftz to enable FTZ and DAZ
- flags in MXCSR.
-
-    if (mdaz-ftz)
-      link crtfastmath.o
-    else if ((Ofast || ffast-math || funsafe-math-optimizations)
-             && !mno-daz-ftz)
-      link crtfastmath.o
-    else
-      Don't link crtfastmath.o
-
-gcc/ChangeLog:
-
-	* config/i386/cygwin.h (ENDFILE_SPEC): Link crtfastmath.o
-	whenever -mdaz-ftz is specified. Don't link crtfastmath.o
-	when -mno-daz-ftz is specified.
-	* config/i386/darwin.h (ENDFILE_SPEC): Ditto.
-	* config/i386/gnu-user-common.h
-	(GNU_USER_TARGET_MATHFILE_SPEC): Ditto.
-	* config/i386/mingw32.h (ENDFILE_SPEC): Ditto.
-	* config/i386/i386.opt (mdaz-ftz): New option.
-	* doc/invoke.texi (x86 options): Document mftz-daz.
----
- gcc/config/i386/cygwin.h          |  2 +-
- gcc/config/i386/darwin.h          |  4 ++--
- gcc/config/i386/gnu-user-common.h |  2 +-
- gcc/config/i386/i386.opt          |  4 ++++
- gcc/config/i386/mingw32.h         |  2 +-
- gcc/doc/invoke.texi               | 11 ++++++++++-
- 6 files changed, 19 insertions(+), 6 deletions(-)
-
-diff --git a/gcc/config/i386/cygwin.h b/gcc/config/i386/cygwin.h
-index d06eda369..5412c5d44 100644
---- a/gcc/config/i386/cygwin.h
-+++ b/gcc/config/i386/cygwin.h
-@@ -57,7 +57,7 @@ along with GCC; see the file COPYING3.  If not see
- 
- #undef ENDFILE_SPEC
- #define ENDFILE_SPEC \
--  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}\
-+  "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \
-    %{!shared:%:if-exists(default-manifest.o%s)}\
-    %{fvtable-verify=none:%s; \
-     fvtable-verify=preinit:vtv_end.o%s; \
-diff --git a/gcc/config/i386/darwin.h b/gcc/config/i386/darwin.h
-index a55f6b2b8..2f773924d 100644
---- a/gcc/config/i386/darwin.h
-+++ b/gcc/config/i386/darwin.h
-@@ -109,8 +109,8 @@ along with GCC; see the file COPYING3.  If not see
- "%{!force_cpusubtype_ALL:-force_cpusubtype_ALL} "
- 
- #undef ENDFILE_SPEC
--#define ENDFILE_SPEC \
--  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
-+#define ENDFILE_SPEC
-+\  "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \
-    %{mpc32:crtprec32.o%s} \
-    %{mpc64:crtprec64.o%s} \
-    %{mpc80:crtprec80.o%s}" TM_DESTRUCTOR
-diff --git a/gcc/config/i386/gnu-user-common.h b/gcc/config/i386/gnu-user-common.h
-index 23b54c5be..3d2a33f17 100644
---- a/gcc/config/i386/gnu-user-common.h
-+++ b/gcc/config/i386/gnu-user-common.h
-@@ -47,7 +47,7 @@ along with GCC; see the file COPYING3.  If not see
- 
- /* Similar to standard GNU userspace, but adding -ffast-math support.  */
- #define GNU_USER_TARGET_MATHFILE_SPEC \
--  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
-+  "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \
-    %{mpc32:crtprec32.o%s} \
-    %{mpc64:crtprec64.o%s} \
-    %{mpc80:crtprec80.o%s}"
-diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
-index fc1b944ac..498fb454d 100644
---- a/gcc/config/i386/i386.opt
-+++ b/gcc/config/i386/i386.opt
-@@ -420,6 +420,10 @@ mpc80
- Target RejectNegative
- Set 80387 floating-point precision to 80-bit.
- 
-+mdaz-ftz
-+Target
-+Set the FTZ and DAZ Flags.
-+
- mpreferred-stack-boundary=
- Target RejectNegative Joined UInteger Var(ix86_preferred_stack_boundary_arg)
- Attempt to keep stack aligned to this power of 2.
-diff --git a/gcc/config/i386/mingw32.h b/gcc/config/i386/mingw32.h
-index d3ca0cd02..ddbe6a405 100644
---- a/gcc/config/i386/mingw32.h
-+++ b/gcc/config/i386/mingw32.h
-@@ -197,7 +197,7 @@ along with GCC; see the file COPYING3.  If not see
- 
- #undef ENDFILE_SPEC
- #define ENDFILE_SPEC \
--  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
-+  "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \
-    %{!shared:%:if-exists(default-manifest.o%s)}\
-    %{fvtable-verify=none:%s; \
-     fvtable-verify=preinit:vtv_end.o%s; \
-diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
-index 2b376e0e9..3a48655e5 100644
---- a/gcc/doc/invoke.texi
-+++ b/gcc/doc/invoke.texi
-@@ -1437,7 +1437,7 @@ See RS/6000 and PowerPC Options.
- -m96bit-long-double  -mlong-double-64  -mlong-double-80  -mlong-double-128 @gol
- -mregparm=@var{num}  -msseregparm @gol
- -mveclibabi=@var{type}  -mvect8-ret-in-mem @gol
---mpc32  -mpc64  -mpc80  -mstackrealign @gol
-+-mpc32  -mpc64  -mpc80 -mdaz-ftz -mstackrealign @gol
- -momit-leaf-frame-pointer  -mno-red-zone  -mno-tls-direct-seg-refs @gol
- -mcmodel=@var{code-model}  -mabi=@var{name}  -maddress-mode=@var{mode} @gol
- -m32  -m64  -mx32  -m16  -miamcu  -mlarge-data-threshold=@var{num} @gol
-@@ -32122,6 +32122,15 @@ are enabled by default; routines in such libraries could suffer significant
- loss of accuracy, typically through so-called ``catastrophic cancellation'',
- when this option is used to set the precision to less than extended precision.
- 
-+@item -mdaz-ftz
-+@opindex mdaz-ftz
-+
-+The flush-to-zero (FTZ) and denormals-are-zero (DAZ) flags in the MXCSR register
-+are used to control floating-point calculations.SSE and AVX instructions
-+including scalar and vector instructions could benefit from enabling the FTZ
-+and DAZ flags when @option{-mdaz-ftz} is specified. Don't set FTZ/DAZ flags
-+when @option{-mno-daz-ftz} is specified.
-+
- @item -mstackrealign
- @opindex mstackrealign
- Realign the stack at entry.  On the x86, the @option{-mstackrealign}
--- 
-2.28.0.windows.1
-

_service:tar_scm:0059-Explicitly-view_convert_expr-mask-to-signed-type-whe.patch Deleted

@@ -1,65 +0,0 @@
-From e70fa730dcfcb3a7b1d56a2e166752d4299f0504 Mon Sep 17 00:00:00 2001
-From: liuhongt <hongtao.liu@intel.com>
-Date: Mon, 5 Jun 2023 12:38:41 +0800
-Subject: PATCH 04/32 Explicitly view_convert_expr mask to signed type when
- folding pblendvb builtins.
-
-Since mask < 0 will be always false for vector char when
--funsigned-char, but vpblendvb needs to check the most significant
-bit. The patch explicitly VCE to vector signed char.
-
-gcc/ChangeLog:
-
-	PR target/110108
-	* config/i386/i386.cc (ix86_gimple_fold_builtin): Explicitly
-	view_convert_expr mask to signed type when folding pblendvb
-	builtins.
-
-gcc/testsuite/ChangeLog:
-
-	* gcc.target/i386/pr110108-2.c: New test.
----
- gcc/config/i386/i386.cc                    |  4 +++-
- gcc/testsuite/gcc.target/i386/pr110108-2.c | 14 ++++++++++++++
- 2 files changed, 17 insertions(+), 1 deletion(-)
- create mode 100644 gcc/testsuite/gcc.target/i386/pr110108-2.c
-
-diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
-index 462dce10e..479fc6010 100644
---- a/gcc/config/i386/i386.cc
-+++ b/gcc/config/i386/i386.cc
-@@ -18396,8 +18396,10 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
- 	      tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
- 		? intSI_type_node : intDI_type_node;
- 	      type = get_same_sized_vectype (itype, type);
--	      arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
- 	    }
-+	  else
-+	    type = signed_type_for (type);
-+	  arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
- 	  tree zero_vec = build_zero_cst (type);
- 	  tree cmp_type = truth_type_for (type);
- 	  tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
-diff --git a/gcc/testsuite/gcc.target/i386/pr110108-2.c b/gcc/testsuite/gcc.target/i386/pr110108-2.c
-new file mode 100644
-index 000000000..2d1d2fd49
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/i386/pr110108-2.c
-@@ -0,0 +1,14 @@
-+/* { dg-do compile } */
-+/* { dg-options "-mavx2 -O2 -funsigned-char" } */
-+/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
-+
-+#include <immintrin.h>
-+__m128i do_stuff_128(__m128i X0, __m128i X1, __m128i X2) {
-+  __m128i Result = _mm_blendv_epi8(X0, X1, X2);
-+  return Result;
-+}
-+
-+__m256i do_stuff_256(__m256i X0, __m256i X1, __m256i X2) {
-+  __m256i Result = _mm256_blendv_epi8(X0, X1, X2);
-+  return Result;
-+}
--- 
-2.28.0.windows.1
-

_service:tar_scm:0060-Make-option-mvzeroupper-independent-of-optimization-.patch Deleted

@@ -1,138 +0,0 @@
-From 48715f03ad08f185153bfb0ff4c0802ab2d9579c Mon Sep 17 00:00:00 2001
-From: liuhongt <hongtao.liu@intel.com>
-Date: Mon, 26 Jun 2023 09:50:25 +0800
-Subject: PATCH 05/32 Make option mvzeroupper independent of optimization
- level.
-
-pass_insert_vzeroupper is under condition
-
-TARGET_AVX && TARGET_VZEROUPPER
-&& flag_expensive_optimizations && !optimize_size
-
-But the document of mvzeroupper doesn't mention the insertion
-required -O2 and above, it may confuse users when they explicitly
-use -Os -mvzeroupper.
-
-------------
-mvzeroupper
-Target Mask(VZEROUPPER) Save
-Generate vzeroupper instruction before a transfer of control flow out of
-the function.
-------------
-
-The patch moves flag_expensive_optimizations && !optimize_size to
-ix86_option_override_internal. It makes -mvzeroupper independent of
-optimization level, but still keeps the behavior of architecture
-tuning(emit_vzeroupper) unchanged.
-
-gcc/ChangeLog:
-
-	* config/i386/i386-features.cc (pass_insert_vzeroupper:gate):
-	Move flag_expensive_optimizations && !optimize_size to ..
-	* config/i386/i386-options.cc (ix86_option_override_internal):
-	.. this, it makes -mvzeroupper independent of optimization
-	level, but still keeps the behavior of architecture
-	tuning(emit_vzeroupper) unchanged.
-
-gcc/testsuite/ChangeLog:
-
-	* gcc.target/i386/avx-vzeroupper-29.c: New testcase.
-	* gcc.target/i386/avx-vzeroupper-12.c: Adjust testcase.
-	* gcc.target/i386/avx-vzeroupper-7.c: Ditto.
-	* gcc.target/i386/avx-vzeroupper-9.c: Ditto.
----
- gcc/config/i386/i386-features.cc                  |  3 +--
- gcc/config/i386/i386-options.cc                   |  4 +++-
- gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c |  3 ++-
- gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c | 14 ++++++++++++++
- gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c  |  3 ++-
- gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c  |  3 ++-
- 6 files changed, 24 insertions(+), 6 deletions(-)
- create mode 100644 gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
-
-diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
-index 6fe41c3c2..6a2444eb6 100644
---- a/gcc/config/i386/i386-features.cc
-+++ b/gcc/config/i386/i386-features.cc
-@@ -1875,8 +1875,7 @@ public:
-   /* opt_pass methods: */
-   virtual bool gate (function *)
-     {
--      return TARGET_AVX && TARGET_VZEROUPPER
--	&& flag_expensive_optimizations && !optimize_size;
-+      return TARGET_AVX && TARGET_VZEROUPPER;
-     }
- 
-   virtual unsigned int execute (function *)
-diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
-index ff44ad4e0..74e969b68 100644
---- a/gcc/config/i386/i386-options.cc
-+++ b/gcc/config/i386/i386-options.cc
-@@ -2702,7 +2702,9 @@ ix86_option_override_internal (bool main_args_p,
-     sorry ("%<-mcall-ms2sysv-xlogues%> isn%'t currently supported with SEH");
- 
-   if (!(opts_set->x_target_flags & MASK_VZEROUPPER)
--      && TARGET_EMIT_VZEROUPPER)
-+      && TARGET_EMIT_VZEROUPPER
-+      && flag_expensive_optimizations
-+      && !optimize_size)
-     opts->x_target_flags |= MASK_VZEROUPPER;
-   if (!(opts_set->x_target_flags & MASK_STV))
-     opts->x_target_flags |= MASK_STV;
-diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
-index e694d4048..5a40e8783 100644
---- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
-+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
-@@ -16,5 +16,6 @@ foo ()
-   _mm256_zeroupper ();
- }
- 
--/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */
-+/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */
-+/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */
- /* { dg-final { scan-assembler-times "\\*avx_vzeroall" 1 } } */
-diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
-new file mode 100644
-index 000000000..4af637757
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
-@@ -0,0 +1,14 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O0 -mavx -mtune=generic -mvzeroupper -dp" } */
-+
-+#include <immintrin.h>
-+
-+extern __m256 x, y;
-+
-+void
-+foo ()
-+{
-+  x = y;
-+}
-+
-+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
-diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
-index ab6d68779..75fe58897 100644
---- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
-+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
-@@ -12,4 +12,5 @@ foo ()
-   _mm256_zeroupper ();
- }
- 
--/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
-+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 { target ia32 } } } */
-+/* { dg-final { scan-assembler-times "avx_vzeroupper" 2 { target { ! ia32 } } } } */
-diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
-index 974e1626a..fa0a6dfca 100644
---- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
-+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
-@@ -15,4 +15,5 @@ foo ()
-   _mm256_zeroupper ();
- }
- 
--/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */
-+/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */
-+/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */
--- 
-2.28.0.windows.1
-

_service:tar_scm:0061-i386-Sync-tune_string-with-arch_string-for-target-at.patch Deleted

@@ -1,68 +0,0 @@
-From 8039d773354360ed8ff2f25c63843fc637eacc67 Mon Sep 17 00:00:00 2001
-From: Hongyu Wang <hongyu.wang@intel.com>
-Date: Sun, 25 Jun 2023 09:50:21 +0800
-Subject: PATCH 06/32 i386: Sync tune_string with arch_string for target
- attribute
-
-arch=*
-
-For function with target attribute arch=*, current logic will set its
-tune to -mtune from command line so all target_clones will get same
-tuning flags which would affect the performance for each clone. Override
-tune with arch if tune was not explicitly specified to get proper tuning
-flags for target_clones.
-
-gcc/ChangeLog:
-
-	* config/i386/i386-options.cc (ix86_valid_target_attribute_tree):
-	Override tune_string with arch_string if tune_string is not
-	explicitly specified.
-
-gcc/testsuite/ChangeLog:
-
-	* gcc.target/i386/mvc17.c: New test.
-
-(cherry picked from commit 2916278d14e9ac28c361c396a67256acbebda6e8)
----
- gcc/config/i386/i386-options.cc       |  6 +++++-
- gcc/testsuite/gcc.target/i386/mvc17.c | 11 +++++++++++
- 2 files changed, 16 insertions(+), 1 deletion(-)
- create mode 100644 gcc/testsuite/gcc.target/i386/mvc17.c
-
-diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
-index 74e969b68..fb2ed942f 100644
---- a/gcc/config/i386/i386-options.cc
-+++ b/gcc/config/i386/i386-options.cc
-@@ -1378,7 +1378,11 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args,
-       if (option_stringsIX86_FUNCTION_SPECIFIC_TUNE)
- 	opts->x_ix86_tune_string
- 	  = ggc_strdup (option_stringsIX86_FUNCTION_SPECIFIC_TUNE);
--      else if (orig_tune_defaulted)
-+      /* If we have explicit arch string and no tune string specified, set
-+	 tune_string to NULL and later it will be overriden by arch_string
-+	 so target clones can get proper optimization.  */
-+      else if (option_stringsIX86_FUNCTION_SPECIFIC_ARCH
-+	       || orig_tune_defaulted)
- 	opts->x_ix86_tune_string = NULL;
- 
-       /* If fpmath= is not set, and we now have sse2 on 32-bit, use it.  */
-diff --git a/gcc/testsuite/gcc.target/i386/mvc17.c b/gcc/testsuite/gcc.target/i386/mvc17.c
-new file mode 100644
-index 000000000..8b83c1aec
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/i386/mvc17.c
-@@ -0,0 +1,11 @@
-+/* { dg-do compile } */
-+/* { dg-require-ifunc "" } */
-+/* { dg-options "-O2 -march=x86-64" } */
-+/* { dg-final { scan-assembler-times "rep mov" 1 } } */
-+
-+__attribute__((target_clones("default","arch=icelake-server")))
-+void
-+foo (char *a, char *b, int size)
-+{
-+  __builtin_memcpy (a, b, size & 0x7F);
-+}
--- 
-2.28.0.windows.1
-

_service:tar_scm:0062-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch Deleted

@@ -1,111 +0,0 @@
-From fbcb1a5899b1bd3964aed78ed74041121e618d36 Mon Sep 17 00:00:00 2001
-From: liuhongt <hongtao.liu@intel.com>
-Date: Tue, 20 Jun 2023 15:41:00 +0800
-Subject: PATCH 07/32 Refine maskloadmn pattern with UNSPEC_MASKLOAD.
-
-If mem_addr points to a memory region with less than whole vector size
-bytes of accessible memory and k is a mask that would prevent reading
-the inaccessible bytes from mem_addr, add UNSPEC_MASKLOAD to prevent
-it to be transformed to vpblendd.
-
-gcc/ChangeLog:
-
-	PR target/110309
-	* config/i386/sse.md (maskload<mode><avx512fmaskmodelower>):
-	Refine pattern with UNSPEC_MASKLOAD.
-	(maskload<mode><avx512fmaskmodelower>): Ditto.
-	(*<avx512>_load<mode>_mask): Extend mode iterator to
-	VI12HF_AVX512VL.
-	(*<avx512>_load<mode>): Ditto.
-
-gcc/testsuite/ChangeLog:
-
-	* gcc.target/i386/pr110309.c: New test.
----
- gcc/config/i386/sse.md                   | 32 +++++++++++++-----------
- gcc/testsuite/gcc.target/i386/pr110309.c | 10 ++++++++
- 2 files changed, 28 insertions(+), 14 deletions(-)
- create mode 100644 gcc/testsuite/gcc.target/i386/pr110309.c
-
-diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
-index eb767e56c..b30e96cb1 100644
---- a/gcc/config/i386/sse.md
-+++ b/gcc/config/i386/sse.md
-@@ -1411,12 +1411,12 @@
- })
- 
- (define_insn "*<avx512>_load<mode>_mask"
--  (set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
--	(vec_merge:VI12_AVX512VL
--	  (unspec:VI12_AVX512VL
--	    (match_operand:VI12_AVX512VL 1 "memory_operand" "m")
-+  (set (match_operand:VI12HF_AVX512VL 0 "register_operand" "=v")
-+	(vec_merge:VI12HF_AVX512VL
-+	  (unspec:VI12HF_AVX512VL
-+	    (match_operand:VI12HF_AVX512VL 1 "memory_operand" "m")
- 	    UNSPEC_MASKLOAD)
--	  (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
-+	  (match_operand:VI12HF_AVX512VL 2 "nonimm_or_0_operand" "0C")
- 	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))
-   "TARGET_AVX512BW"
-   "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
-@@ -1425,9 +1425,9 @@
-    (set_attr "mode" "<sseinsnmode>"))
- 
- (define_insn_and_split "*<avx512>_load<mode>"
--  (set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
--	(unspec:VI12_AVX512VL
--	  (match_operand:VI12_AVX512VL 1 "memory_operand" "m")
-+  (set (match_operand:VI12HF_AVX512VL 0 "register_operand" "=v")
-+	(unspec:VI12HF_AVX512VL
-+	  (match_operand:VI12HF_AVX512VL 1 "memory_operand" "m")
- 	  UNSPEC_MASKLOAD))
-   "TARGET_AVX512BW"
-   "#"
-@@ -25973,17 +25973,21 @@
-   "TARGET_AVX")
- 
- (define_expand "maskload<mode><avx512fmaskmodelower>"
--  (set (match_operand:V48H_AVX512VL 0 "register_operand")
--	(vec_merge:V48H_AVX512VL
--	  (match_operand:V48H_AVX512VL 1 "memory_operand")
-+  (set (match_operand:V48_AVX512VL 0 "register_operand")
-+	(vec_merge:V48_AVX512VL
-+	  (unspec:V48_AVX512VL
-+	    (match_operand:V48_AVX512VL 1 "memory_operand")
-+	    UNSPEC_MASKLOAD)
- 	  (match_dup 0)
- 	  (match_operand:<avx512fmaskmode> 2 "register_operand")))
-   "TARGET_AVX512F")
- 
- (define_expand "maskload<mode><avx512fmaskmodelower>"
--  (set (match_operand:VI12_AVX512VL 0 "register_operand")
--	(vec_merge:VI12_AVX512VL
--	  (match_operand:VI12_AVX512VL 1 "memory_operand")
-+  (set (match_operand:VI12HF_AVX512VL 0 "register_operand")
-+	(vec_merge:VI12HF_AVX512VL
-+	  (unspec:VI12HF_AVX512VL
-+	    (match_operand:VI12HF_AVX512VL 1 "memory_operand")
-+	    UNSPEC_MASKLOAD)
- 	  (match_dup 0)
- 	  (match_operand:<avx512fmaskmode> 2 "register_operand")))
-   "TARGET_AVX512BW")
-diff --git a/gcc/testsuite/gcc.target/i386/pr110309.c b/gcc/testsuite/gcc.target/i386/pr110309.c
-new file mode 100644
-index 000000000..f6e9e9c3c
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/i386/pr110309.c
-@@ -0,0 +1,10 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O3 --param vect-partial-vector-usage=1 -march=znver4 -mprefer-vector-width=256" } */
-+/* { dg-final { scan-assembler-not {(?n)vpblendd.*ymm} } } */
-+
-+
-+void foo (int * __restrict a, int *b)
-+{
-+  for (int i = 0; i < 6; ++i)
-+    ai = bi + 42;
-+}
--- 
-2.28.0.windows.1
-

_service:tar_scm:0063-Refine-maskstore-patterns-with-UNSPEC_MASKMOV.patch Deleted

@@ -1,126 +0,0 @@
-From 5ad28ef4010c1248b4d94396d03f863705f7b0db Mon Sep 17 00:00:00 2001
-From: liuhongt <hongtao.liu@intel.com>
-Date: Mon, 26 Jun 2023 21:07:09 +0800
-Subject: PATCH 08/32 Refine maskstore patterns with UNSPEC_MASKMOV.
-
-Similar like r14-2070-gc79476da46728e
-
-If mem_addr points to a memory region with less than whole vector size
-bytes of accessible memory and k is a mask that would prevent reading
-the inaccessible bytes from mem_addr, add UNSPEC_MASKMOV to prevent
-it to be transformed to any other whole memory access instructions.
-
-gcc/ChangeLog:
-
-	PR rtl-optimization/110237
-	* config/i386/sse.md (<avx512>_store<mode>_mask): Refine with
-	UNSPEC_MASKMOV.
-	(maskstore<mode><avx512fmaskmodelower): Ditto.
-	(*<avx512>_store<mode>_mask): New define_insn, it's renamed
-	from original <avx512>_store<mode>_mask.
----
- gcc/config/i386/sse.md | 69 ++++++++++++++++++++++++++++++++++--------
- 1 file changed, 57 insertions(+), 12 deletions(-)
-
-diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
-index b30e96cb1..3af159896 100644
---- a/gcc/config/i386/sse.md
-+++ b/gcc/config/i386/sse.md
-@@ -1554,7 +1554,7 @@
-    (set_attr "prefix" "evex")
-    (set_attr "mode" "<sseinsnmode>"))
- 
--(define_insn "<avx512>_store<mode>_mask"
-+(define_insn "*<avx512>_store<mode>_mask"
-   (set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
- 	(vec_merge:V48_AVX512VL
- 	  (match_operand:V48_AVX512VL 1 "register_operand" "v")
-@@ -1582,7 +1582,7 @@
-    (set_attr "memory" "store")
-    (set_attr "mode" "<sseinsnmode>"))
- 
--(define_insn "<avx512>_store<mode>_mask"
-+(define_insn "*<avx512>_store<mode>_mask"
-   (set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m")
- 	(vec_merge:VI12HF_AVX512VL
- 	  (match_operand:VI12HF_AVX512VL 1 "register_operand" "v")
-@@ -26002,21 +26002,66 @@
-   "TARGET_AVX")
- 
- (define_expand "maskstore<mode><avx512fmaskmodelower>"
--  (set (match_operand:V48H_AVX512VL 0 "memory_operand")
--	(vec_merge:V48H_AVX512VL
--	  (match_operand:V48H_AVX512VL 1 "register_operand")
--	  (match_dup 0)
--	  (match_operand:<avx512fmaskmode> 2 "register_operand")))
-+  (set (match_operand:V48_AVX512VL 0 "memory_operand")
-+	(unspec:V48_AVX512VL
-+	  (match_operand:V48_AVX512VL 1 "register_operand")
-+	   (match_dup 0)
-+	   (match_operand:<avx512fmaskmode> 2 "register_operand")
-+	  UNSPEC_MASKMOV))
-   "TARGET_AVX512F")
- 
- (define_expand "maskstore<mode><avx512fmaskmodelower>"
--  (set (match_operand:VI12_AVX512VL 0 "memory_operand")
--	(vec_merge:VI12_AVX512VL
--	  (match_operand:VI12_AVX512VL 1 "register_operand")
--	  (match_dup 0)
--	  (match_operand:<avx512fmaskmode> 2 "register_operand")))
-+  (set (match_operand:VI12HF_AVX512VL 0 "memory_operand")
-+	(unspec:VI12HF_AVX512VL
-+	  (match_operand:VI12HF_AVX512VL 1 "register_operand")
-+	   (match_dup 0)
-+	   (match_operand:<avx512fmaskmode> 2 "register_operand")
-+	  UNSPEC_MASKMOV))
-   "TARGET_AVX512BW")
- 
-+(define_insn "<avx512>_store<mode>_mask"
-+  (set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
-+	(unspec:V48_AVX512VL
-+	  (match_operand:V48_AVX512VL 1 "register_operand" "v")
-+	   (match_dup 0)
-+	   (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")
-+	  UNSPEC_MASKMOV))
-+  "TARGET_AVX512F"
-+{
-+  if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
-+    {
-+      if (misaligned_operand (operands0, <MODE>mode))
-+	return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
-+      else
-+	return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
-+    }
-+  else
-+    {
-+      if (misaligned_operand (operands0, <MODE>mode))
-+	return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
-+      else
-+	return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
-+    }
-+}
-+  (set_attr "type" "ssemov")
-+   (set_attr "prefix" "evex")
-+   (set_attr "memory" "store")
-+   (set_attr "mode" "<sseinsnmode>"))
-+
-+(define_insn "<avx512>_store<mode>_mask"
-+  (set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m")
-+	(unspec:VI12HF_AVX512VL
-+	  (match_operand:VI12HF_AVX512VL 1 "register_operand" "v")
-+	   (match_dup 0)
-+	   (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")
-+	   UNSPEC_MASKMOV))
-+  "TARGET_AVX512BW"
-+  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
-+  (set_attr "type" "ssemov")
-+   (set_attr "prefix" "evex")
-+   (set_attr "memory" "store")
-+   (set_attr "mode" "<sseinsnmode>"))
-+
- (define_expand "cbranch<mode>4"
-   (set (reg:CC FLAGS_REG)
- 	(compare:CC (match_operand:VI48_AVX 1 "register_operand")
--- 
-2.28.0.windows.1
-

_service:tar_scm:0064-x86-Update-model-values-for-Alderlake-and-Rocketlake.patch Deleted

@@ -1,38 +0,0 @@
-From 50757adc93ef32a97a8a1083f5d53a9c00da6ac8 Mon Sep 17 00:00:00 2001
-From: "Cui, Lili" <lili.cui@intel.com>
-Date: Thu, 29 Jun 2023 03:10:35 +0000
-Subject: PATCH 09/32 x86: Update model values for Alderlake and Rocketlake.
-
-Update model values for Alderlake and Rocketlake according to SDM.
-
-gcc/ChangeLog
-
-	* common/config/i386/cpuinfo.h (get_intel_cpu): Remove model value 0xa8
-	from Rocketlake, remove model value 0xbf from Alderlake.
----
- gcc/common/config/i386/cpuinfo.h | 2 --
- 1 file changed, 2 deletions(-)
-
-diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
-index 0333da56b..28b2ff0b0 100644
---- a/gcc/common/config/i386/cpuinfo.h
-+++ b/gcc/common/config/i386/cpuinfo.h
-@@ -435,7 +435,6 @@ get_intel_cpu (struct __processor_model *cpu_model,
-       cpu_model->__cpu_subtype = INTEL_COREI7_SKYLAKE;
-       break;
-     case 0xa7:
--    case 0xa8:
-       /* Rocket Lake.  */
-       cpu = "rocketlake";
-       CHECK___builtin_cpu_is ("corei7");
-@@ -508,7 +507,6 @@ get_intel_cpu (struct __processor_model *cpu_model,
-       break;
-     case 0x97:
-     case 0x9a:
--    case 0xbf:
-       /* Alder Lake.  */
-       cpu = "alderlake";
-       CHECK___builtin_cpu_is ("corei7");
--- 
-2.28.0.windows.1
-

_service:tar_scm:0065-Workaround-possible-CPUID-bug-in-Sandy-Bridge.patch Deleted

@@ -1,78 +0,0 @@
-From 60364b439a80c217174e1830e0b7507d6f4538c4 Mon Sep 17 00:00:00 2001
-From: liuhongt <hongtao.liu@intel.com>
-Date: Fri, 4 Aug 2023 09:27:39 +0800
-Subject: PATCH 10/32 Workaround possible CPUID bug in Sandy Bridge.
-
-Don't access leaf 7 subleaf 1 unless subleaf 0 says it is
-supported via EAX.
-
-Intel documentation says invalid subleaves return 0. We had been
-relying on that behavior instead of checking the max sublef number.
-
-It appears that some Sandy Bridge CPUs return at least the subleaf 0
-EDX value for subleaf 1. Best guess is that this is a bug in a
-microcode patch since all of the bits we're seeing set in EDX were
-introduced after Sandy Bridge was originally released.
-
-This is causing avxvnniint16 to be incorrectly enabled with
--march=native on these CPUs.
-
-gcc/ChangeLog:
-
-	* common/config/i386/cpuinfo.h (get_available_features): Check
-	max_subleaf_level for valid subleaf before use CPUID.
----
- gcc/common/config/i386/cpuinfo.h | 29 +++++++++++++++++------------
- 1 file changed, 17 insertions(+), 12 deletions(-)
-
-diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
-index 28b2ff0b0..316ad3cb3 100644
---- a/gcc/common/config/i386/cpuinfo.h
-+++ b/gcc/common/config/i386/cpuinfo.h
-@@ -647,7 +647,9 @@ get_available_features (struct __processor_model *cpu_model,
-   /* Get Advanced Features at level 7 (eax = 7, ecx = 0/1). */
-   if (max_cpuid_level >= 7)
-     {
--      __cpuid_count (7, 0, eax, ebx, ecx, edx);
-+      unsigned int max_subleaf_level;
-+
-+      __cpuid_count (7, 0, max_subleaf_level, ebx, ecx, edx);
-       if (ebx & bit_BMI)
- 	set_feature (FEATURE_BMI);
-       if (ebx & bit_SGX)
-@@ -759,18 +761,21 @@ get_available_features (struct __processor_model *cpu_model,
- 	    set_feature (FEATURE_AVX512FP16);
- 	}
- 
--      __cpuid_count (7, 1, eax, ebx, ecx, edx);
--      if (eax & bit_HRESET)
--	set_feature (FEATURE_HRESET);
--      if (avx_usable)
--	{
--	  if (eax & bit_AVXVNNI)
--	    set_feature (FEATURE_AVXVNNI);
--	}
--      if (avx512_usable)
-+      if (max_subleaf_level >= 1)
- 	{
--	  if (eax & bit_AVX512BF16)
--	    set_feature (FEATURE_AVX512BF16);
-+	  __cpuid_count (7, 1, eax, ebx, ecx, edx);
-+	  if (eax & bit_HRESET)
-+	    set_feature (FEATURE_HRESET);
-+	  if (avx_usable)
-+	    {
-+	      if (eax & bit_AVXVNNI)
-+		set_feature (FEATURE_AVXVNNI);
-+	    }
-+	  if (avx512_usable)
-+	    {
-+	      if (eax & bit_AVX512BF16)
-+		set_feature (FEATURE_AVX512BF16);
-+	    }
- 	}
-     }
- 
--- 
-2.28.0.windows.1
-

_service:tar_scm:0066-Software-mitigation-Disable-gather-generation-in-vec.patch Deleted

@@ -1,220 +0,0 @@
-From cfffbec938afdc45c31db5ec282ce21ad1ba2dc7 Mon Sep 17 00:00:00 2001
-From: liuhongt <hongtao.liu@intel.com>
-Date: Thu, 10 Aug 2023 11:41:39 +0800
-Subject: PATCH 11/32 Software mitigation: Disable gather generation in
- vectorization for GDS affected Intel Processors.
-
-For more details of GDS (Gather Data Sampling), refer to
-https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/gather-data-sampling.html
-
-After microcode update, there's performance regression. To avoid that,
-the patch disables gather generation in autovectorization but uses
-gather scalar emulation instead.
-
-gcc/ChangeLog:
-
-	* config/i386/i386-options.cc (m_GDS): New macro.
-	* config/i386/x86-tune.def (X86_TUNE_USE_GATHER_2PARTS): Don't
-	enable for m_GDS.
-	(X86_TUNE_USE_GATHER_4PARTS): Ditto.
-	(X86_TUNE_USE_GATHER): Ditto.
-
-gcc/testsuite/ChangeLog:
-
-	* gcc.target/i386/avx2-gather-2.c: Adjust options to keep
-	gather vectorization.
-	* gcc.target/i386/avx2-gather-6.c: Ditto.
-	* gcc.target/i386/avx512f-pr88464-1.c: Ditto.
-	* gcc.target/i386/avx512f-pr88464-5.c: Ditto.
-	* gcc.target/i386/avx512vl-pr88464-1.c: Ditto.
-	* gcc.target/i386/avx512vl-pr88464-11.c: Ditto.
-	* gcc.target/i386/avx512vl-pr88464-3.c: Ditto.
-	* gcc.target/i386/avx512vl-pr88464-9.c: Ditto.
-	* gcc.target/i386/pr88531-1b.c: Ditto.
-	* gcc.target/i386/pr88531-1c.c: Ditto.
-
-(cherry picked from commit 3064d1f5c48cb6ce1b4133570dd08ecca8abb52d)
----
- gcc/config/i386/i386-options.cc                     | 5 +++++
- gcc/config/i386/x86-tune.def                        | 9 ++++++---
- gcc/testsuite/gcc.target/i386/avx2-gather-2.c       | 2 +-
- gcc/testsuite/gcc.target/i386/avx2-gather-6.c       | 2 +-
- gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c   | 2 +-
- gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c   | 2 +-
- gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c  | 2 +-
- gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c | 2 +-
- gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c  | 2 +-
- gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c  | 2 +-
- gcc/testsuite/gcc.target/i386/pr88531-1b.c          | 2 +-
- gcc/testsuite/gcc.target/i386/pr88531-1c.c          | 2 +-
- 12 files changed, 21 insertions(+), 13 deletions(-)
-
-diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
-index fb2ed942f..9617fc162 100644
---- a/gcc/config/i386/i386-options.cc
-+++ b/gcc/config/i386/i386-options.cc
-@@ -137,6 +137,11 @@ along with GCC; see the file COPYING3.  If not see
- #define m_GOLDMONT_PLUS (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT_PLUS)
- #define m_TREMONT (HOST_WIDE_INT_1U<<PROCESSOR_TREMONT)
- #define m_INTEL (HOST_WIDE_INT_1U<<PROCESSOR_INTEL)
-+/* Gather Data Sampling / CVE-2022-40982 / INTEL-SA-00828.
-+   Software mitigation.  */
-+#define m_GDS (m_SKYLAKE | m_SKYLAKE_AVX512 | m_CANNONLAKE \
-+	       | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \
-+	       | m_TIGERLAKE | m_COOPERLAKE | m_ROCKETLAKE)
- 
- #define m_GEODE (HOST_WIDE_INT_1U<<PROCESSOR_GEODE)
- #define m_K6 (HOST_WIDE_INT_1U<<PROCESSOR_K6)
-diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
-index e6b9e2125..4392709fc 100644
---- a/gcc/config/i386/x86-tune.def
-+++ b/gcc/config/i386/x86-tune.def
-@@ -467,7 +467,8 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
- /* X86_TUNE_USE_GATHER_2PARTS: Use gather instructions for vectors with 2
-    elements.  */
- DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts",
--	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC))
-+	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE
-+	    | m_GENERIC | m_GDS))
- 
- /* X86_TUNE_USE_SCATTER_2PARTS: Use scater instructions for vectors with 2
-    elements.  */
-@@ -477,7 +478,8 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_2PARTS, "use_scatter_2parts",
- /* X86_TUNE_USE_GATHER_4PARTS: Use gather instructions for vectors with 4
-    elements.  */
- DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts",
--	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 |  m_ALDERLAKE | m_GENERIC))
-+	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE
-+	    | m_GENERIC | m_GDS))
- 
- /* X86_TUNE_USE_SCATTER_4PARTS: Use scater instructions for vectors with 4
-    elements.  */
-@@ -487,7 +489,8 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts",
- /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more
-    elements.  */
- DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
--	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC))
-+	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE
-+	    | m_GENERIC | m_GDS))
- 
- /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
-    elements.  */
-diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c
-index ad5ef7310..978924b0f 100644
---- a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c
-+++ b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake" } */
-+/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake -mtune=haswell" } */
- 
- #include "avx2-gather-1.c"
- 
-diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-6.c b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
-index b9119581a..067b251e3 100644
---- a/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
-+++ b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-O3 -mavx2 -fno-common -fdump-tree-vect-details -mtune=skylake" } */
-+/* { dg-options "-O3 -mavx2 -fno-common -fdump-tree-vect-details  -mtune=haswell" } */
- 
- #include "avx2-gather-5.c"
- 
-diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c
-index 06d21bb01..d1a229861 100644
---- a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c
-+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c
-@@ -1,6 +1,6 @@
- /* PR tree-optimization/88464 */
- /* { dg-do compile } */
--/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
-+/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=haswell -fdump-tree-vect-details" } */
- /* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */
- /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c
-index 462e951fd..d7b0b2b28 100644
---- a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c
-+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c
-@@ -1,6 +1,6 @@
- /* PR tree-optimization/88464 */
- /* { dg-do compile } */
--/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
-+/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=haswell -fdump-tree-vect-details" } */
- /* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */
- /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c
-index 55a28dddb..07439185e 100644
---- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c
-+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c
-@@ -1,6 +1,6 @@
- /* PR tree-optimization/88464 */
- /* { dg-do compile } */
--/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
-+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=haswell -fdump-tree-vect-details" } */
- /* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */
- /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c
-index 969600885..3a9810827 100644
---- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c
-+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c
-@@ -1,6 +1,6 @@
- /* PR tree-optimization/88464 */
- /* { dg-do compile } */
--/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
-+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=haswell -fdump-tree-vect-details" } */
- /* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */
- /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c
-index 6b0c8a859..ac669e048 100644
---- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c
-+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c
-@@ -1,6 +1,6 @@
- /* PR tree-optimization/88464 */
- /* { dg-do compile } */
--/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
-+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=haswell -fdump-tree-vect-details" } */
- /* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */
- /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c
-index 3af568ab3..14a1083b6 100644
---- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c
-+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c
-@@ -1,6 +1,6 @@
- /* PR tree-optimization/88464 */
- /* { dg-do compile } */
--/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
-+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=haswell -fdump-tree-vect-details" } */
- /* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */
- /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/pr88531-1b.c b/gcc/testsuite/gcc.target/i386/pr88531-1b.c
-index 812c8a10f..e6df789de 100644
---- a/gcc/testsuite/gcc.target/i386/pr88531-1b.c
-+++ b/gcc/testsuite/gcc.target/i386/pr88531-1b.c

_service:tar_scm:0067-Support-m-no-gather-m-no-scatter-to-enable-disable-v.patch Deleted

@@ -1,187 +0,0 @@
-From c269629130cb23252da2db026ce9ed13f57f69f4 Mon Sep 17 00:00:00 2001
-From: liuhongt <hongtao.liu@intel.com>
-Date: Thu, 10 Aug 2023 16:26:13 +0800
-Subject: PATCH 12/32 Support -mno-gather -mno-scatter to enable/disable
- vectorization for all gather/scatter instructions
-
-Rename original use_gather to use_gather_8parts, Support
--mtune-ctrl={,^}use_gather to set/clear tune features
-use_gather_{2parts, 4parts, 8parts}. Support the new option -mgather
-as alias of -mtune-ctrl=, use_gather, ^use_gather.
-
-Similar for use_scatter.
-
-gcc/ChangeLog:
-
-	* config/i386/i386-builtins.cc
-	(ix86_vectorize_builtin_gather): Adjust for use_gather_8parts.
-	* config/i386/i386-options.cc (parse_mtune_ctrl_str):
-	Set/Clear tune features use_{gather,scatter}_{2parts, 4parts,
-	8parts} for -mtune-crtl={,^}{use_gather,use_scatter}.
-	* config/i386/i386.cc (ix86_vectorize_builtin_scatter): Adjust
-	for use_scatter_8parts
-	* config/i386/i386.h (TARGET_USE_GATHER): Rename to ..
-	(TARGET_USE_GATHER_8PARTS): .. this.
-	(TARGET_USE_SCATTER): Rename to ..
-	(TARGET_USE_SCATTER_8PARTS): .. this.
-	* config/i386/x86-tune.def (X86_TUNE_USE_GATHER): Rename to
-	(X86_TUNE_USE_GATHER_8PARTS): .. this.
-	(X86_TUNE_USE_SCATTER): Rename to
-	(X86_TUNE_USE_SCATTER_8PARTS): .. this.
-	* config/i386/i386.opt: Add new options mgather, mscatter.
-
-(cherry picked from commit b2a927fb5343db363ea4361da0d6bcee227b6737)
----
- gcc/config/i386/i386-builtins.cc |  2 +-
- gcc/config/i386/i386-options.cc  | 54 +++++++++++++++++++++++---------
- gcc/config/i386/i386.cc          |  2 +-
- gcc/config/i386/i386.h           |  8 ++---
- gcc/config/i386/i386.opt         |  4 +++
- gcc/config/i386/x86-tune.def     |  4 +--
- 6 files changed, 52 insertions(+), 22 deletions(-)
-
-diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
-index 050c6228a..8ed32e14f 100644
---- a/gcc/config/i386/i386-builtins.cc
-+++ b/gcc/config/i386/i386-builtins.cc
-@@ -1790,7 +1790,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
- 	  ? !TARGET_USE_GATHER_2PARTS
- 	  : (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), 4u)
- 	     ? !TARGET_USE_GATHER_4PARTS
--	     : !TARGET_USE_GATHER)))
-+	     : !TARGET_USE_GATHER_8PARTS)))
-     return NULL_TREE;
- 
-   if ((TREE_CODE (index_type) != INTEGER_TYPE
-diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
-index 9617fc162..3df1f0c41 100644
---- a/gcc/config/i386/i386-options.cc
-+++ b/gcc/config/i386/i386-options.cc
-@@ -1705,20 +1705,46 @@ parse_mtune_ctrl_str (struct gcc_options *opts, bool dump)
-           curr_feature_string++;
-           clear = true;
-         }
--      for (i = 0; i < X86_TUNE_LAST; i++)
--        {
--          if (!strcmp (curr_feature_string, ix86_tune_feature_namesi))
--            {
--              ix86_tune_featuresi = !clear;
--              if (dump)
--                fprintf (stderr, "Explicitly %s feature %s\n",
--                         clear ? "clear" : "set", ix86_tune_feature_namesi);
--              break;
--            }
--        }
--      if (i == X86_TUNE_LAST)
--	error ("unknown parameter to option %<-mtune-ctrl%>: %s",
--	       clear ? curr_feature_string - 1 : curr_feature_string);
-+
-+      if (!strcmp (curr_feature_string, "use_gather"))
-+	{
-+	  ix86_tune_featuresX86_TUNE_USE_GATHER_2PARTS = !clear;
-+	  ix86_tune_featuresX86_TUNE_USE_GATHER_4PARTS = !clear;
-+	  ix86_tune_featuresX86_TUNE_USE_GATHER_8PARTS = !clear;
-+	  if (dump)
-+	    fprintf (stderr, "Explicitly %s features use_gather_2parts,"
-+		     " use_gather_4parts, use_gather_8parts\n",
-+		     clear ? "clear" : "set");
-+
-+	}
-+      else if (!strcmp (curr_feature_string, "use_scatter"))
-+	{
-+	  ix86_tune_featuresX86_TUNE_USE_SCATTER_2PARTS = !clear;
-+	  ix86_tune_featuresX86_TUNE_USE_SCATTER_4PARTS = !clear;
-+	  ix86_tune_featuresX86_TUNE_USE_SCATTER_8PARTS = !clear;
-+	  if (dump)
-+	    fprintf (stderr, "Explicitly %s features use_scatter_2parts,"
-+		     " use_scatter_4parts, use_scatter_8parts\n",
-+		     clear ? "clear" : "set");
-+	}
-+      else
-+	{
-+	  for (i = 0; i < X86_TUNE_LAST; i++)
-+	    {
-+	      if (!strcmp (curr_feature_string, ix86_tune_feature_namesi))
-+		{
-+		  ix86_tune_featuresi = !clear;
-+		  if (dump)
-+		    fprintf (stderr, "Explicitly %s feature %s\n",
-+			     clear ? "clear" : "set", ix86_tune_feature_namesi);
-+		  break;
-+		}
-+	    }
-+
-+	  if (i == X86_TUNE_LAST)
-+	    error ("unknown parameter to option %<-mtune-ctrl%>: %s",
-+		   clear ? curr_feature_string - 1 : curr_feature_string);
-+	}
-       curr_feature_string = next_feature_string;
-     }
-   while (curr_feature_string);
-diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
-index 479fc6010..e75d37023 100644
---- a/gcc/config/i386/i386.cc
-+++ b/gcc/config/i386/i386.cc
-@@ -18937,7 +18937,7 @@ ix86_vectorize_builtin_scatter (const_tree vectype,
-       ? !TARGET_USE_SCATTER_2PARTS
-       : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
- 	 ? !TARGET_USE_SCATTER_4PARTS
--	 : !TARGET_USE_SCATTER))
-+	 : !TARGET_USE_SCATTER_8PARTS))
-     return NULL_TREE;
- 
-   if ((TREE_CODE (index_type) != INTEGER_TYPE
-diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
-index 688aaabd3..aaa136ba0 100644
---- a/gcc/config/i386/i386.h
-+++ b/gcc/config/i386/i386.h
-@@ -403,10 +403,10 @@ extern unsigned char ix86_tune_featuresX86_TUNE_LAST;
- 	ix86_tune_featuresX86_TUNE_USE_GATHER_4PARTS
- #define TARGET_USE_SCATTER_4PARTS \
- 	ix86_tune_featuresX86_TUNE_USE_SCATTER_4PARTS
--#define TARGET_USE_GATHER \
--	ix86_tune_featuresX86_TUNE_USE_GATHER
--#define TARGET_USE_SCATTER \
--	ix86_tune_featuresX86_TUNE_USE_SCATTER
-+#define TARGET_USE_GATHER_8PARTS \
-+	ix86_tune_featuresX86_TUNE_USE_GATHER_8PARTS
-+#define TARGET_USE_SCATTER_8PARTS \
-+	ix86_tune_featuresX86_TUNE_USE_SCATTER_8PARTS
- #define TARGET_FUSE_CMP_AND_BRANCH_32 \
- 	ix86_tune_featuresX86_TUNE_FUSE_CMP_AND_BRANCH_32
- #define TARGET_FUSE_CMP_AND_BRANCH_64 \
-diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
-index 498fb454d..b154110d8 100644
---- a/gcc/config/i386/i386.opt
-+++ b/gcc/config/i386/i386.opt
-@@ -1222,3 +1222,7 @@ Instructions number above which STFL stall penalty can be compensated.
- munroll-only-small-loops
- Target Var(ix86_unroll_only_small_loops) Init(0) Save
- Enable conservative small loop unrolling.
-+
-+mscatter
-+Target Alias(mtune-ctrl=, use_scatter, ^use_scatter)
-+Enable vectorization for scatter instruction.
-diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
-index 4392709fc..bdb455d20 100644
---- a/gcc/config/i386/x86-tune.def
-+++ b/gcc/config/i386/x86-tune.def
-@@ -488,13 +488,13 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts",
- 
- /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more
-    elements.  */
--DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
-+DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts",
- 	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE
- 	    | m_GENERIC | m_GDS))
- 
- /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
-    elements.  */
--DEF_TUNE (X86_TUNE_USE_SCATTER, "use_scatter",
-+DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts",
- 	  ~(m_ZNVER4))
- 
- /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
--- 
-2.28.0.windows.1
-

_service:tar_scm:0068-Remove-constraint-modifier-for-fcmaddcph-fmaddcph-fc.patch Deleted

@@ -1,129 +0,0 @@
-From 764518a35e90a3e13c469275da9c3c7002fe1982 Mon Sep 17 00:00:00 2001
-From: liuhongt <hongtao.liu@intel.com>
-Date: Fri, 8 Sep 2023 09:22:43 +0800
-Subject: PATCH 13/32 Remove constraint modifier % for
- fcmaddcph/fmaddcph/fcmulcph since there're not commutative.
-
-gcc/ChangeLog:
-
-	PR target/111306
-	PR target/111335
-	* config/i386/sse.md (int_comm): New int_attr.
-	(fma_<complexopname>_<mode><sdc_maskz_name><round_name>):
-	Remove % for Complex conjugate operations since they're not
-	commutative.
-	(fma_<complexpairopname>_<mode>_pair): Ditto.
-	(<avx512>_<complexopname>_<mode>_mask<round_name>): Ditto.
-	(cmul<conj_op><mode>3): Ditto.
-
-gcc/testsuite/ChangeLog:
-
-	* gcc.target/i386/pr111306.c: New test.
-
-(cherry picked from commit f197392a16ffb1327f1d12ff8ff05f9295e015cb)
----
- gcc/config/i386/sse.md                   | 16 ++++++++---
- gcc/testsuite/gcc.target/i386/pr111306.c | 36 ++++++++++++++++++++++++
- 2 files changed, 48 insertions(+), 4 deletions(-)
- create mode 100644 gcc/testsuite/gcc.target/i386/pr111306.c
-
-diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
-index 3af159896..f25dd5f2b 100644
---- a/gcc/config/i386/sse.md
-+++ b/gcc/config/i386/sse.md
-@@ -6318,6 +6318,14 @@
- 	(UNSPEC_COMPLEX_FMA_PAIR "fmaddc")
- 	 (UNSPEC_COMPLEX_FCMA_PAIR "fcmaddc"))
- 
-+(define_int_attr int_comm
-+	(UNSPEC_COMPLEX_FMA "")
-+	 (UNSPEC_COMPLEX_FMA_PAIR "")
-+	 (UNSPEC_COMPLEX_FCMA "")
-+	 (UNSPEC_COMPLEX_FCMA_PAIR "")
-+	 (UNSPEC_COMPLEX_FMUL "%")
-+	 (UNSPEC_COMPLEX_FCMUL ""))
-+
- (define_int_attr conj_op
- 	(UNSPEC_COMPLEX_FMA "")
- 	 (UNSPEC_COMPLEX_FCMA "_conj")
-@@ -6431,7 +6439,7 @@
- (define_insn "fma_<complexopname>_<mode><sdc_maskz_name><round_name>"
-   (set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
- 	(unspec:VF_AVX512FP16VL
--	  (match_operand:VF_AVX512FP16VL 1 "<round_nimm_predicate>" "%v")
-+	  (match_operand:VF_AVX512FP16VL 1 "<round_nimm_predicate>" "<int_comm>v")
- 	   (match_operand:VF_AVX512FP16VL 2 "<round_nimm_predicate>" "<round_constraint>")
- 	   (match_operand:VF_AVX512FP16VL 3 "<round_nimm_predicate>" "0")
- 	   UNSPEC_COMPLEX_F_C_MA))
-@@ -6495,7 +6503,7 @@
- (define_insn "fma_<complexpairopname>_<mode>_pair"
-  (set (match_operand:VF1_AVX512VL 0 "register_operand" "=&v")
-        (unspec:VF1_AVX512VL
--	 (match_operand:VF1_AVX512VL 1 "vector_operand" "%v")
-+	 (match_operand:VF1_AVX512VL 1 "vector_operand" "<int_comm>v")
- 	  (match_operand:VF1_AVX512VL 2 "bcst_vector_operand" "vmBr")
- 	  (match_operand:VF1_AVX512VL 3 "vector_operand" "0")
- 	  UNSPEC_COMPLEX_F_C_MA_PAIR))
-@@ -6562,7 +6570,7 @@
-   (set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
- 	(vec_merge:VF_AVX512FP16VL
- 	  (unspec:VF_AVX512FP16VL
--	    (match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "%v")
-+	    (match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "<int_comm>v")
- 	     (match_operand:VF_AVX512FP16VL 2 "nonimmediate_operand" "<round_constraint>")
- 	     (match_operand:VF_AVX512FP16VL 3 "register_operand" "0")
- 	     UNSPEC_COMPLEX_F_C_MA)
-@@ -6586,7 +6594,7 @@
- (define_insn "<avx512>_<complexopname>_<mode><maskc_name><round_name>"
-   (set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
- 	  (unspec:VF_AVX512FP16VL
--	    (match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "%v")
-+	    (match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "<int_comm>v")
- 	     (match_operand:VF_AVX512FP16VL 2 "nonimmediate_operand" "<round_constraint>")
- 	     UNSPEC_COMPLEX_F_C_MUL))
-   "TARGET_AVX512FP16 && <round_mode512bit_condition>"
-diff --git a/gcc/testsuite/gcc.target/i386/pr111306.c b/gcc/testsuite/gcc.target/i386/pr111306.c
-new file mode 100644
-index 000000000..541725ebd
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/i386/pr111306.c
-@@ -0,0 +1,36 @@
-+/* { dg-do run } */
-+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
-+/* { dg-require-effective-target avx512fp16 } */
-+
-+#define AVX512FP16
-+#include "avx512f-helper.h"
-+
-+__attribute__((optimize("O2"),noipa))
-+void func1(_Float16 *a, _Float16 *b, int n, _Float16 *c) {
-+  __m512h rA = _mm512_loadu_ph(a);
-+  for (int i = 0; i < n; i += 32) {
-+    __m512h rB = _mm512_loadu_ph(b + i);
-+    _mm512_storeu_ph(c + i, _mm512_fcmul_pch(rB, rA));
-+  }
-+}
-+
-+void
-+test_512 (void)
-+{
-+  int n = 32;
-+  _Float16 an, bn, cn;
-+  _Float16 expn;
-+  for (int i = 1; i <= n; i++) {
-+    ai - 1 = i & 1 ? -i : i;
-+    bi - 1 = i;
-+  }
-+
-+  func1(a, b, n, c);
-+  for (int i = 0; i < n / 32; i += 2) {
-+    if (ci != ai * bi + ai+1 * bi+1
-+	|| ci+1 != ai * bi+1 - ai+1*bi)
-+      __builtin_abort ();
-+    }
-+}
-+
-+
--- 
-2.28.0.windows.1
-

_service:tar_scm:0069-Disparage-slightly-for-the-alternative-which-move-DF.patch Deleted

@@ -1,106 +0,0 @@
-From afd539adfe762adb57863299a11987b7e20e7987 Mon Sep 17 00:00:00 2001
-From: liuhongt <hongtao.liu@intel.com>
-Date: Wed, 5 Jul 2023 13:45:11 +0800
-Subject: PATCH 14/32 Disparage slightly for the alternative which move
- DFmode between SSE_REGS and GENERAL_REGS.
-
-For testcase
-
-void __cond_swap(double* __x, double* __y) {
-  bool __r = (*__x < *__y);
-  auto __tmp = __r ? *__x : *__y;
-  *__y = __r ? *__y : *__x;
-  *__x = __tmp;
-}
-
-GCC-14 with -O2 and -march=x86-64 options generates the following code:
-
-__cond_swap(double*, double*):
-        movsd   xmm1, QWORD PTR rdi
-        movsd   xmm0, QWORD PTR rsi
-        comisd  xmm0, xmm1
-        jbe     .L2
-        movq    rax, xmm1
-        movapd  xmm1, xmm0
-        movq    xmm0, rax
-.L2:
-        movsd   QWORD PTR rsi, xmm1
-        movsd   QWORD PTR rdi, xmm0
-        ret
-
-rax is used to save and restore DFmode value. In RA both GENERAL_REGS
-and SSE_REGS cost zero since we didn't disparage the
-alternative in movdf_internal pattern, according to register
-allocation order, GENERAL_REGS is allocated. The patch add ? for
-alternative (r,v) and (v,r) just like we did for movsf/hf/bf_internal
-pattern, after that we get optimal RA.
-
-__cond_swap:
-.LFB0:
-	.cfi_startproc
-	movsd	(%rdi), %xmm1
-	movsd	(%rsi), %xmm0
-	comisd	%xmm1, %xmm0
-	jbe	.L2
-	movapd	%xmm1, %xmm2
-	movapd	%xmm0, %xmm1
-	movapd	%xmm2, %xmm0
-.L2:
-	movsd	%xmm1, (%rsi)
-	movsd	%xmm0, (%rdi)
-	ret
-
-gcc/ChangeLog:
-
-	PR target/110170
-	* config/i386/i386.md (movdf_internal): Disparage slightly for
-	2 alternatives (r,v) and (v,r) by adding constraint modifier
-	'?'.
-
-gcc/testsuite/ChangeLog:
-
-	* gcc.target/i386/pr110170-3.c: New test.
-
-(cherry picked from commit 37a231cc7594d12ba0822077018aad751a6fb94e)
----
- gcc/config/i386/i386.md                    |  4 ++--
- gcc/testsuite/gcc.target/i386/pr110170-3.c | 11 +++++++++++
- 2 files changed, 13 insertions(+), 2 deletions(-)
- create mode 100644 gcc/testsuite/gcc.target/i386/pr110170-3.c
-
-diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
-index be07be10d..71691f598 100644
---- a/gcc/config/i386/i386.md
-+++ b/gcc/config/i386/i386.md
-@@ -3582,9 +3582,9 @@
- ;; Possible store forwarding (partial memory) stall in alternatives 4, 6 and 7.
- (define_insn "*movdf_internal"
-   (set (match_operand:DF 0 "nonimmediate_operand"
--    "=Yf*f,m   ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,v,r  ,o ,r  ,m")
-+    "=Yf*f,m   ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,?r,?v,r  ,o ,r  ,m")
- 	(match_operand:DF 1 "general_operand"
--    "Yf*fm,Yf*f,G   ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,v,r ,roF,rF,rmF,rC"))
-+    "Yf*fm,Yf*f,G   ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x, v, r,roF,rF,rmF,rC"))
-   "!(MEM_P (operands0) && MEM_P (operands1))
-    && (lra_in_progress || reload_completed
-        || !CONST_DOUBLE_P (operands1)
-diff --git a/gcc/testsuite/gcc.target/i386/pr110170-3.c b/gcc/testsuite/gcc.target/i386/pr110170-3.c
-new file mode 100644
-index 000000000..70daa89e9
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/i386/pr110170-3.c
-@@ -0,0 +1,11 @@
-+/* { dg-do compile { target { ! ia32 } } } */
-+/* { dg-options "-O2 -fno-if-conversion -fno-if-conversion2" } */
-+/* { dg-final { scan-assembler-not {(?n)movq.*r} } } */
-+
-+void __cond_swap(double* __x, double* __y) {
-+  _Bool __r = (*__x < *__y);
-+  double __tmp = __r ? *__x : *__y;
-+  *__y = __r ? *__y : *__x;
-+  *__x = __tmp;
-+}
-+
--- 
-2.28.0.windows.1
-

_service:tar_scm:0070-Fix-wrong-code-due-to-vec_merge-pcmp-to-blendvb-spli.patch Deleted

@@ -1,163 +0,0 @@
-From 88516507757932c1e67ce99d240596935971d2d0 Mon Sep 17 00:00:00 2001
-From: liuhongt <hongtao.liu@intel.com>
-Date: Thu, 9 Nov 2023 13:20:05 +0800
-Subject: PATCH 15/32 Fix wrong code due to vec_merge + pcmp to blendvb
- splitter.
-
-gcc/ChangeLog:
-
-	PR target/112443
-	* config/i386/sse.md (*avx2_pcmp<mode>3_4): Fix swap condition
-	from LT to GT since there's not in the pattern.
-	(*avx2_pcmp<mode>3_5): Ditto.
-
-gcc/testsuite/ChangeLog:
-
-	* g++.target/i386/pr112443.C: New test.
-
-(cherry picked from commit 9a0cc04b9c9b02426762892b88efc5c44ba546bd)
----
- gcc/config/i386/sse.md                   |   4 +-
- gcc/testsuite/g++.target/i386/pr112443.C | 108 +++++++++++++++++++++++
- 2 files changed, 110 insertions(+), 2 deletions(-)
- create mode 100644 gcc/testsuite/g++.target/i386/pr112443.C
-
-diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
-index f25dd5f2b..23b858ab2 100644
---- a/gcc/config/i386/sse.md
-+++ b/gcc/config/i386/sse.md
-@@ -16358,7 +16358,7 @@
- 	     (match_dup 4))
- 	     UNSPEC_BLENDV))
- {
--  if (INTVAL (operands5) == 1)
-+  if (INTVAL (operands5) == 5)
-     std::swap (operands1, operands2);
-   operands3 = gen_lowpart (<MODE>mode, operands3);
- })
-@@ -16388,7 +16388,7 @@
- 	     (match_dup 4))
- 	     UNSPEC_BLENDV))
- {
--  if (INTVAL (operands5) == 1)
-+  if (INTVAL (operands5) == 5)
-     std::swap (operands1, operands2);
- })
- 
-diff --git a/gcc/testsuite/g++.target/i386/pr112443.C b/gcc/testsuite/g++.target/i386/pr112443.C
-new file mode 100644
-index 000000000..ebfa9b4a7
---- /dev/null
-+++ b/gcc/testsuite/g++.target/i386/pr112443.C
-@@ -0,0 +1,108 @@
-+/* { dg-do run } */
-+/* { dg-require-effective-target avx512bw } */
-+/* { dg-require-effective-target avx512vl } */
-+/* { dg-options "-O2 -std=c++17 -mavx512bw -mavx512vl" } */
-+
-+#include <cstdint>
-+#include <x86intrin.h>
-+#include <functional>
-+#include <ostream>
-+
-+#define AVX512BW
-+#define AVX512VL
-+
-+#include "avx512f-helper.h"
-+
-+struct TensorIteratorBase{
-+  char* in;
-+  char* out;
-+
-+  void for_each(std::function<void(char*, char*, int64_t size)> loop){
-+    loop(out, in, 32);
-+  }    
-+};
-+
-+class Vectorized {
-+protected:
-+  __m256i values;
-+
-+  static inline __m256i invert(const __m256i& v) {
-+    const auto ones = _mm256_set1_epi64x(-1);
-+    return _mm256_xor_si256(ones, v);
-+  }
-+public:
-+  operator __m256i() const {
-+    return values;
-+  }
-+
-+  static constexpr int size() {
-+    return 32;
-+  }
-+
-+  Vectorized() {}
-+  Vectorized(__m256i v) : values(v) {}
-+  Vectorized(uint8_t v) { values = _mm256_set1_epi8(v); }
-+  static Vectorized blendv(const Vectorized& a, const Vectorized& b,
-+			   const Vectorized& mask) {
-+    return _mm256_blendv_epi8(a, b, mask);
-+  }
-+  static Vectorized loadu(const void* ptr) {
-+    return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr));
-+  }
-+  void store(void* ptr) const {
-+    _mm256_storeu_si256(reinterpret_cast<__m256i*>(ptr), values);
-+  }
-+
-+  Vectorized operator<(const Vectorized& other) const {
-+    __m256i max = _mm256_max_epu8(values, other);
-+    return invert(_mm256_cmpeq_epi8(max, values));
-+  }
-+  Vectorized operator-(const Vectorized& b) {
-+    return _mm256_sub_epi8(values, b);
-+  }
-+};
-+
-+std::ostream& operator<<(std::ostream& stream, const Vectorized& vec) {
-+  uint8_t bufVectorized::size();
-+  vec.store(buf);
-+  stream << "vec";
-+  for (int i = 0; i != Vectorized::size(); i++) {
-+    if (i != 0)
-+      stream << ", ";
-+    stream << bufi*1;
-+  }
-+  stream << "";
-+  return stream;
-+}
-+
-+void run(TensorIteratorBase iter){
-+  Vectorized zero_vec(0);
-+  Vectorized one_vec(1);
-+
-+  iter.for_each(=(char* out, char* in, int64_t size) {
-+    for (int64_t i = 0; i <= size - Vectorized::size(); i += Vectorized::size()) {
-+      auto self_vec = Vectorized::loadu(in + i);
-+      auto left = Vectorized::blendv(zero_vec, one_vec, zero_vec < self_vec);
-+      auto right = Vectorized::blendv(zero_vec, one_vec, self_vec < zero_vec);
-+      auto outv = left - right;
-+      outv.store(out + i);
-+    }
-+  });
-+}
-+
-+void
-+test_256 (){
-+  char in32;
-+  char out32;
-+  for(auto& x: in) x = 1;
-+  run(TensorIteratorBase{in, out});
-+  Vectorized::loadu (out);
-+  for (int i = 0; i != 32; i++)
-+    if (outi != 1)
-+      __builtin_abort ();
-+}
-+
-+void
-+test_128 ()
-+{
-+}
--- 
-2.28.0.windows.1
-

_service:tar_scm:0071-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch Deleted

@@ -1,151 +0,0 @@
-From 204ffa7f503411ccac0161c951726274648b6374 Mon Sep 17 00:00:00 2001
-From: liuhongt <hongtao.liu@intel.com>
-Date: Thu, 7 Dec 2023 09:17:27 +0800
-Subject: PATCH 16/32 Don't assume it's AVX_U128_CLEAN after call_insn whose
- abi.mode_clobber(V4DImode) deosn't contains all SSE_REGS.
-
-If the function desn't clobber any sse registers or only clobber
-128-bit part, then vzeroupper isn't issued before the function exit.
-the status not CLEAN but ANY after the function.
-
-Also for sibling_call, it's safe to issue an vzeroupper. Also there
-could be missing vzeroupper since there's no mode_exit for
-sibling_call_p.
-
-gcc/ChangeLog:
-
-	PR target/112891
-	* config/i386/i386.cc (ix86_avx_u128_mode_after): Return
-	AVX_U128_ANY if callee_abi doesn't clobber all_sse_regs to
-	align with ix86_avx_u128_mode_needed.
-	(ix86_avx_u128_mode_needed): Return AVX_U128_ClEAN for
-	sibling_call.
-
-gcc/testsuite/ChangeLog:
-
-	* gcc.target/i386/pr112891.c: New test.
-	* gcc.target/i386/pr112891-2.c: New test.
-
-(cherry picked from commit fc189a08f5b7ad5889bd4c6b320c1dd99dd5d642)
----
- gcc/config/i386/i386.cc                    | 22 +++++++++++++---
- gcc/testsuite/gcc.target/i386/pr112891-2.c | 30 ++++++++++++++++++++++
- gcc/testsuite/gcc.target/i386/pr112891.c   | 29 +++++++++++++++++++++
- 3 files changed, 78 insertions(+), 3 deletions(-)
- create mode 100644 gcc/testsuite/gcc.target/i386/pr112891-2.c
- create mode 100644 gcc/testsuite/gcc.target/i386/pr112891.c
-
-diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
-index e75d37023..60f3296b0 100644
---- a/gcc/config/i386/i386.cc
-+++ b/gcc/config/i386/i386.cc
-@@ -14416,8 +14416,12 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
- 	 modes wider than 256 bits.  It's only safe to issue a
- 	 vzeroupper if all SSE registers are clobbered.  */
-       const function_abi &abi = insn_callee_abi (insn);
--      if (!hard_reg_set_subset_p (reg_class_contentsSSE_REGS,
--				  abi.mode_clobbers (V4DImode)))
-+      /* Should be safe to issue an vzeroupper before sibling_call_p.
-+	 Also there not mode_exit for sibling_call, so there could be
-+	 missing vzeroupper for that.  */
-+      if (!(SIBLING_CALL_P (insn)
-+	    || hard_reg_set_subset_p (reg_class_contentsSSE_REGS,
-+				      abi.mode_clobbers (V4DImode))))
- 	return AVX_U128_ANY;
- 
-       return AVX_U128_CLEAN;
-@@ -14555,7 +14559,19 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
-       bool avx_upper_reg_found = false;
-       note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
- 
--      return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
-+      if (avx_upper_reg_found)
-+	return AVX_U128_DIRTY;
-+
-+      /* If the function desn't clobber any sse registers or only clobber
-+	 128-bit part, Then vzeroupper isn't issued before the function exit.
-+	 the status not CLEAN but ANY after the function.  */
-+      const function_abi &abi = insn_callee_abi (insn);
-+      if (!(SIBLING_CALL_P (insn)
-+	    || hard_reg_set_subset_p (reg_class_contentsSSE_REGS,
-+				      abi.mode_clobbers (V4DImode))))
-+	return AVX_U128_ANY;
-+
-+      return  AVX_U128_CLEAN;
-     }
- 
-   /* Otherwise, return current mode.  Remember that if insn
-diff --git a/gcc/testsuite/gcc.target/i386/pr112891-2.c b/gcc/testsuite/gcc.target/i386/pr112891-2.c
-new file mode 100644
-index 000000000..164c3985d
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/i386/pr112891-2.c
-@@ -0,0 +1,30 @@
-+/* { dg-do compile } */
-+/* { dg-options "-mavx2 -O3" } */
-+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
-+
-+void
-+__attribute__((noinline))
-+bar (double* a)
-+{
-+  a0 = 1.0;
-+  a1 = 2.0;
-+}
-+
-+double
-+__attribute__((noinline))
-+foo (double* __restrict a, double* b)
-+{
-+  a0 += b0;
-+  a1 += b1;
-+  a2 += b2;
-+  a3 += b3;
-+  bar (b);
-+  return a5 + b5;
-+}
-+
-+double
-+foo1 (double* __restrict a, double* b)
-+{
-+  double c = foo (a, b);
-+  return __builtin_exp (c);
-+}
-diff --git a/gcc/testsuite/gcc.target/i386/pr112891.c b/gcc/testsuite/gcc.target/i386/pr112891.c
-new file mode 100644
-index 000000000..dbf6c6794
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/i386/pr112891.c
-@@ -0,0 +1,29 @@
-+/* { dg-do compile } */
-+/* { dg-options "-mavx2 -O3" } */
-+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
-+
-+void
-+__attribute__((noinline))
-+bar (double* a)
-+{
-+  a0 = 1.0;
-+  a1 = 2.0;
-+}
-+
-+void
-+__attribute__((noinline))
-+foo (double* __restrict a, double* b)
-+{
-+  a0 += b0;
-+  a1 += b1;
-+  a2 += b2;
-+  a3 += b3;
-+  bar (b);
-+}
-+
-+double
-+foo1 (double* __restrict a, double* b)
-+{
-+  foo (a, b);
-+  return __builtin_exp (b1);
-+}
--- 
-2.28.0.windows.1
-

_service:tar_scm:0072-Disable-FMADD-in-chains-for-Zen4-and-generic.patch Deleted

@@ -1,142 +0,0 @@
-From 19ee37b11702c86d7ed271e9e1d00e23cc4ab93c Mon Sep 17 00:00:00 2001
-From: Jan Hubicka <jh@suse.cz>
-Date: Fri, 29 Dec 2023 23:51:03 +0100
-Subject: PATCH 17/32 Disable FMADD in chains for Zen4 and generic
-
-this patch disables use of FMA in matrix multiplication loop for generic (for
-x86-64-v3) and zen4.  I tested this on zen4 and Xenon Gold Gold 6212U.
-
-For Intel this is neutral both on the matrix multiplication microbenchmark
-(attached) and spec2k17 where the difference was within noise for Core.
-
-On core the micro-benchmark runs as follows:
-
-With FMA:
-
-       578,500,241      cycles:u                         #    3.645 GHz
-                ( +-  0.12% )
-       753,318,477      instructions:u                   #    1.30  insn per
-cycle              ( +-  0.00% )
-       125,417,701      branches:u                       #  790.227 M/sec
-                ( +-  0.00% )
-          0.159146 +- 0.000363 seconds time elapsed  ( +-  0.23% )
-
-No FMA:
-
-       577,573,960      cycles:u                         #    3.514 GHz
-                ( +-  0.15% )
-       878,318,479      instructions:u                   #    1.52  insn per
-cycle              ( +-  0.00% )
-       125,417,702      branches:u                       #  763.035 M/sec
-                ( +-  0.00% )
-          0.164734 +- 0.000321 seconds time elapsed  ( +-  0.19% )
-
-So the cycle count is unchanged and discrete multiply+add takes same time as
-FMA.
-
-While on zen:
-
-With FMA:
-         484875179      cycles:u                         #    3.599 GHz
-             ( +-  0.05% )  (82.11%)
-         752031517      instructions:u                   #    1.55  insn per
-cycle
-         125106525      branches:u                       #  928.712 M/sec
-             ( +-  0.03% )  (85.09%)
-            128356      branch-misses:u                  #    0.10% of all
-branches          ( +-  0.06% )  (83.58%)
-
-No FMA:
-         375875209      cycles:u                         #    3.592 GHz
-             ( +-  0.08% )  (80.74%)
-         875725341      instructions:u                   #    2.33  insn per
-cycle
-         124903825      branches:u                       #    1.194 G/sec
-             ( +-  0.04% )  (84.59%)
-          0.105203 +- 0.000188 seconds time elapsed  ( +-  0.18% )
-
-The diffrerence is that Cores understand the fact that fmadd does not need
-all three parameters to start computation, while Zen cores doesn't.
-
-Since this seems noticeable win on zen and not loss on Core it seems like good
-default for generic.
-
-float aSIZESIZE;
-float bSIZESIZE;
-float cSIZESIZE;
-
-void init(void)
-{
-   int i, j, k;
-   for(i=0; i<SIZE; ++i)
-   {
-      for(j=0; j<SIZE; ++j)
-      {
-         aij = (float)i + j;
-         bij = (float)i - j;
-         cij = 0.0f;
-      }
-   }
-}
-
-void mult(void)
-{
-   int i, j, k;
-
-   for(i=0; i<SIZE; ++i)
-   {
-      for(j=0; j<SIZE; ++j)
-      {
-         for(k=0; k<SIZE; ++k)
-         {
-            cij += aik * bkj;
-         }
-      }
-   }
-}
-
-int main(void)
-{
-   clock_t s, e;
-
-   init();
-   s=clock();
-   mult();
-   e=clock();
-   printf("        mult took %10d clocks\n", (int)(e-s));
-
-   return 0;
-
-}
-
-gcc/ChangeLog:
-
-	* config/i386/x86-tune.def (X86_TUNE_AVOID_128FMA_CHAINS,
-	X86_TUNE_AVOID_256FMA_CHAINS): Enable for znver4 and Core.
----
- gcc/config/i386/x86-tune.def | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
-index bdb455d20..fd095f3ec 100644
---- a/gcc/config/i386/x86-tune.def
-+++ b/gcc/config/i386/x86-tune.def
-@@ -499,12 +499,13 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts",
- 
- /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
-    smaller FMA chain.  */
--DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 | m_ZNVER3)
-+DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2
-+	  | m_ZNVER3 | m_ZNVER4 | m_GENERIC)
- 
- /* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or
-    smaller FMA chain.  */
- DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3
--	  | m_ALDERLAKE | m_SAPPHIRERAPIDS)
-+	  | m_ZNVER4 | m_ALDERLAKE | m_SAPPHIRERAPIDS | m_GENERIC)
- 
- /* X86_TUNE_AVOID_512FMA_CHAINS: Avoid creating loops with tight 512bit or
-    smaller FMA chain.  */
--- 
-2.28.0.windows.1
-

_service:tar_scm:0073-Initial-Raptorlake-Support.patch Deleted

@@ -1,47 +0,0 @@
-From 411d1f0bcc0d1c8018fdf5fe84ad2404929556ec Mon Sep 17 00:00:00 2001
-From: Haochen Jiang <haochen.jiang@intel.com>
-Date: Fri, 16 Sep 2022 13:59:01 +0800
-Subject: PATCH 18/32 Initial Raptorlake Support
-
-gcc/ChangeLog:
-
-	* common/config/i386/cpuinfo.h:
-	(get_intel_cpu): Handle Raptorlake.
-	* common/config/i386/i386-common.cc:
-	(processor_alias_table): Add Raptorlake.
-
-(cherry picked from commit 470a0659b508d684148f362c4dc0eccf5a83a23e)
----
- gcc/common/config/i386/cpuinfo.h      | 2 ++
- gcc/common/config/i386/i386-common.cc | 2 ++
- 2 files changed, 4 insertions(+)
-
-diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
-index 316ad3cb3..13d0f4cd8 100644
---- a/gcc/common/config/i386/cpuinfo.h
-+++ b/gcc/common/config/i386/cpuinfo.h
-@@ -508,6 +508,8 @@ get_intel_cpu (struct __processor_model *cpu_model,
-     case 0x97:
-     case 0x9a:
-       /* Alder Lake.  */
-+    case 0xb7:
-+      /* Raptor Lake.  */
-       cpu = "alderlake";
-       CHECK___builtin_cpu_is ("corei7");
-       CHECK___builtin_cpu_is ("alderlake");
-diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
-index f650e255f..c1d700f89 100644
---- a/gcc/common/config/i386/i386-common.cc
-+++ b/gcc/common/config/i386/i386-common.cc
-@@ -1939,6 +1939,8 @@ const pta processor_alias_table =
-     M_CPU_SUBTYPE (INTEL_COREI7_SAPPHIRERAPIDS), P_PROC_AVX512F},
-   {"alderlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
-     M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
-+  {"raptorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
-+    M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
-   {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
-     M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
-   {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
--- 
-2.28.0.windows.1
-

_service:tar_scm:0074-Initial-Meteorlake-Support.patch Deleted

@@ -1,49 +0,0 @@
-From 87cea29ede520f4a5af01dff7071ab1d23bd47b5 Mon Sep 17 00:00:00 2001
-From: "Hu, Lin1" <lin1.hu@intel.com>
-Date: Fri, 16 Sep 2022 11:25:13 +0800
-Subject: PATCH 19/32 Initial Meteorlake Support
-
-gcc/ChangeLog:
-
-	* common/config/i386/cpuinfo.h:
-	(get_intel_cpu): Handle Meteorlake.
-	* common/config/i386/i386-common.cc:
-	(processor_alias_table): Add Meteorlake.
-
-(cherry picked from commit fd206f0e95fb6f41b96eaaaab1dc0c30378e5e08)
----
- gcc/common/config/i386/cpuinfo.h      | 4 ++++
- gcc/common/config/i386/i386-common.cc | 2 ++
- 2 files changed, 6 insertions(+)
-
-diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
-index 13d0f4cd8..37af92d6b 100644
---- a/gcc/common/config/i386/cpuinfo.h
-+++ b/gcc/common/config/i386/cpuinfo.h
-@@ -510,6 +510,10 @@ get_intel_cpu (struct __processor_model *cpu_model,
-       /* Alder Lake.  */
-     case 0xb7:
-       /* Raptor Lake.  */
-+    case 0xb5:
-+    case 0xaa:
-+    case 0xac:
-+      /* Meteor Lake.  */
-       cpu = "alderlake";
-       CHECK___builtin_cpu_is ("corei7");
-       CHECK___builtin_cpu_is ("alderlake");
-diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
-index c1d700f89..cfee672fb 100644
---- a/gcc/common/config/i386/i386-common.cc
-+++ b/gcc/common/config/i386/i386-common.cc
-@@ -1941,6 +1941,8 @@ const pta processor_alias_table =
-     M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
-   {"raptorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
-     M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
-+  {"meteorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
-+    M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
-   {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
-     M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
-   {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
--- 
-2.28.0.windows.1
-

_service:tar_scm:0075-Support-Intel-AMX-FP16-ISA.patch Deleted

@@ -1,691 +0,0 @@
-From c11301c7780213ddf46a0bcdb06079af485f431c Mon Sep 17 00:00:00 2001
-From: Hongyu Wang <hongyu.wang@intel.com>
-Date: Fri, 4 Nov 2022 15:50:55 +0800
-Subject: PATCH 20/32 Support Intel AMX-FP16 ISA
-
-gcc/ChangeLog:
-
-	* common/config/i386/cpuinfo.h (get_available_features): Detect
-	amx-fp16.
-	* common/config/i386/i386-common.cc (OPTION_MASK_ISA2_AMX_FP16_SET,
-	OPTION_MASK_ISA2_AMX_FP16_UNSET): New macros.
-	(ix86_handle_option): Handle -mamx-fp16.
-	* common/config/i386/i386-cpuinfo.h (enum processor_features):
-	Add FEATURE_AMX_FP16.
-	* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
-	amx-fp16.
-	* config.gcc: Add amxfp16intrin.h.
-	* config/i386/cpuid.h (bit_AMX_FP16): New.
-	* config/i386/i386-c.cc (ix86_target_macros_internal): Define
-	__AMX_FP16__.
-	* config/i386/i386-isa.def: Add DEF_PTA for AMX_FP16.
-	* config/i386/i386-options.cc (isa2_opts): Add -mamx-fp16.
-	(ix86_valid_target_attribute_inner_p): Add new ATTR.
-	(ix86_option_override_internal): Handle AMX-FP16.
-	* config/i386/i386.opt: Add -mamx-fp16.
-	* config/i386/immintrin.h: Include amxfp16intrin.h.
-	* doc/extend.texi: Document -mamx-fp16.
-	* doc/invoke.texi: Document amx-fp16.
-	* doc/sourcebuild.texi: Document amx_fp16.
-	* config/i386/amxfp16intrin.h: New file.
-
-gcc/testsuite/ChangeLog:
-
-	* g++.dg/other/i386-2.C: Add -mamx-fp16.
-	* g++.dg/other/i386-3.C: Ditto.
-	* gcc.target/i386/sse-12.c: Ditto.
-	* gcc.target/i386/sse-13.c: Ditto.
-	* gcc.target/i386/sse-14.c: Ditto.
-	* gcc.target/i386/sse-22.c: Ditto.
-	* gcc.target/i386/sse-23.c: Ditto.
-	* lib/target-supports.exp: (check_effective_target_amx_fp16):
-	New proc.
-	* gcc.target/i386/funcspec-56.inc: Add new target attribute.
-	* gcc.target/i386/amx-check.h: Add AMX_FP16.
-	* gcc.target/i386/amx-helper.h: New file to support amx-fp16.
-	* gcc.target/i386/amxfp16-asmatt-1.c: New test.
-	* gcc.target/i386/amxfp16-asmintel-1.c: Ditto.
-	* gcc.target/i386/amxfp16-dpfp16ps-2.c: Ditto.
-
-Co-authored-by: Haochen Jiang <haochen.jiang@intel.com>
-
-(cherry picked from commit 2b4a03962a0fe18cadc944d90f1fb85a40004226)
----
- gcc/common/config/i386/cpuinfo.h              |  5 ++
- gcc/common/config/i386/i386-common.cc         | 15 +++++
- gcc/common/config/i386/i386-cpuinfo.h         |  1 +
- gcc/common/config/i386/i386-isas.h            |  1 +
- gcc/config.gcc                                |  3 +-
- gcc/config/i386/amxfp16intrin.h               | 46 ++++++++++++++
- gcc/config/i386/cpuid.h                       |  1 +
- gcc/config/i386/i386-c.cc                     |  2 +
- gcc/config/i386/i386-isa.def                  |  1 +
- gcc/config/i386/i386-options.cc               |  4 +-
- gcc/config/i386/i386.opt                      |  4 ++
- gcc/config/i386/immintrin.h                   |  2 +
- gcc/doc/extend.texi                           |  5 ++
- gcc/doc/invoke.texi                           |  9 ++-
- gcc/doc/sourcebuild.texi                      |  3 +
- gcc/testsuite/g++.dg/other/i386-2.C           |  2 +-
- gcc/testsuite/g++.dg/other/i386-3.C           |  2 +-
- gcc/testsuite/gcc.target/i386/amx-check.h     |  3 +
- gcc/testsuite/gcc.target/i386/amx-helper.h    | 61 +++++++++++++++++++
- .../gcc.target/i386/amxfp16-asmatt-1.c        | 13 ++++
- .../gcc.target/i386/amxfp16-asmintel-1.c      | 10 +++
- .../gcc.target/i386/amxfp16-dpfp16ps-2.c      | 57 +++++++++++++++++
- gcc/testsuite/gcc.target/i386/funcspec-56.inc |  2 +
- gcc/testsuite/gcc.target/i386/sse-12.c        |  2 +-
- gcc/testsuite/gcc.target/i386/sse-13.c        |  2 +-
- gcc/testsuite/gcc.target/i386/sse-14.c        |  2 +-
- gcc/testsuite/gcc.target/i386/sse-22.c        |  4 +-
- gcc/testsuite/gcc.target/i386/sse-23.c        |  2 +-
- gcc/testsuite/lib/target-supports.exp         | 11 ++++
- 29 files changed, 262 insertions(+), 13 deletions(-)
- create mode 100644 gcc/config/i386/amxfp16intrin.h
- create mode 100644 gcc/testsuite/gcc.target/i386/amx-helper.h
- create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-asmatt-1.c
- create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c
- create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c
-
-diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
-index 37af92d6b..5951a30aa 100644
---- a/gcc/common/config/i386/cpuinfo.h
-+++ b/gcc/common/config/i386/cpuinfo.h
-@@ -783,6 +783,11 @@ get_available_features (struct __processor_model *cpu_model,
- 		set_feature (FEATURE_AVX512BF16);
- 	    }
- 	}
-+      if (amx_usable)
-+	{
-+	  if (eax & bit_AMX_FP16)
-+	    set_feature (FEATURE_AMX_FP16);
-+	}
-     }
- 
-   /* Get Advanced Features at level 0xd (eax = 0xd, ecx = 1). */
-diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
-index cfee672fb..922db33ee 100644
---- a/gcc/common/config/i386/i386-common.cc
-+++ b/gcc/common/config/i386/i386-common.cc
-@@ -107,6 +107,7 @@ along with GCC; see the file COPYING3.  If not see
- #define OPTION_MASK_ISA2_AMX_TILE_SET OPTION_MASK_ISA2_AMX_TILE
- #define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8
- #define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
-+#define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16
- 
- /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
-    as -msse4.2.  */
-@@ -275,6 +276,7 @@ along with GCC; see the file COPYING3.  If not see
- #define OPTION_MASK_ISA2_KL_UNSET \
-   (OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET)
- #define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
-+#define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16
- 
- /* SSE4 includes both SSE4.1 and SSE4.2.  -mno-sse4 should the same
-    as -mno-sse4.1. */
-@@ -1125,6 +1127,19 @@ ix86_handle_option (struct gcc_options *opts,
- 	}
-       return true;
- 
-+    case OPT_mamx_fp16:
-+      if (value)
-+	{
-+	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_FP16_SET;
-+	  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_FP16_SET;
-+	}
-+      else
-+	{
-+	  opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_FP16_UNSET;
-+	  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_FP16_UNSET;
-+	}
-+      return true;
-+
-     case OPT_mfma:
-       if (value)
- 	{
-diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
-index 82996ebb3..8f22897de 100644
---- a/gcc/common/config/i386/i386-cpuinfo.h
-+++ b/gcc/common/config/i386/i386-cpuinfo.h
-@@ -240,6 +240,7 @@ enum processor_features
-   FEATURE_X86_64_V2,
-   FEATURE_X86_64_V3,
-   FEATURE_X86_64_V4,
-+  FEATURE_AMX_FP16,
-   CPU_FEATURE_MAX
- };
- 
-diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h
-index 2d0646a68..95bab6da2 100644
---- a/gcc/common/config/i386/i386-isas.h
-+++ b/gcc/common/config/i386/i386-isas.h
-@@ -175,4 +175,5 @@ ISA_NAMES_TABLE_START
-   ISA_NAMES_TABLE_ENTRY("x86-64-v2", FEATURE_X86_64_V2, P_X86_64_V2, NULL)
-   ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3, P_X86_64_V3, NULL)
-   ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL)
-+  ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16")
- ISA_NAMES_TABLE_END
-diff --git a/gcc/config.gcc b/gcc/config.gcc
-index 4a0ae9328..e2b4a23dc 100644
---- a/gcc/config.gcc
-+++ b/gcc/config.gcc
-@@ -423,7 +423,8 @@ i3456786-*-* | x86_64-*-*)
- 		       tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
- 		       amxbf16intrin.h x86gprintrin.h uintrintrin.h
- 		       hresetintrin.h keylockerintrin.h avxvnniintrin.h
--		       mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h"
-+		       mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
-+		       amxfp16intrin.h"
- 	;;
- ia64-*-*)
- 	extra_headers=ia64intrin.h
-diff --git a/gcc/config/i386/amxfp16intrin.h b/gcc/config/i386/amxfp16intrin.h
-new file mode 100644
-index 000000000..6a114741a
---- /dev/null
-+++ b/gcc/config/i386/amxfp16intrin.h
-@@ -0,0 +1,46 @@
-+/* Copyright (C) 2020 Free Software Foundation, Inc.
-+
-+   This file is part of GCC.
-+
-+   GCC is free software; you can redistribute it and/or modify
-+   it under the terms of the GNU General Public License as published by
-+   the Free Software Foundation; either version 3, or (at your option)
-+   any later version.
-+
-+   GCC is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

_service:tar_scm:0076-Support-Intel-prefetchit0-t1.patch Deleted

@@ -1,902 +0,0 @@
-From 42a38c8abaa28f67e26b9af3f434fe0107894e7d Mon Sep 17 00:00:00 2001
-From: Haochen Jiang <haochen.jiang@intel.com>
-Date: Fri, 4 Nov 2022 15:01:05 +0800
-Subject: PATCH 21/32 Support Intel prefetchit0/t1
-
-gcc/ChangeLog:
-
-	* common/config/i386/cpuinfo.h (get_available_features):
-	Detect PREFETCHI.
-	* common/config/i386/i386-common.cc
-	(OPTION_MASK_ISA2_PREFETCHI_SET,
-	OPTION_MASK_ISA2_PREFETCHI_UNSET): New.
-	(ix86_handle_option): Handle -mprefetchi.
-	* common/config/i386/i386-cpuinfo.h
-	(enum processor_features): Add FEATURE_PREFETCHI.
-	* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY
-	for prefetchi.
-	* config.gcc: Add prfchiintrin.h.
-	* config/i386/cpuid.h (bit_PREFETCHI): New.
-	* config/i386/i386-builtin-types.def:
-	Add DEF_FUNCTION_TYPE (VOID, PCVOID, INT)
-	and DEF_FUNCTION_TYPE (VOID, PCVOID, INT, INT, INT).
-	* config/i386/i386-builtin.def (BDESC): Add new builtins.
-	* config/i386/i386-c.cc (ix86_target_macros_internal):
-	Define __PREFETCHI__.
-	* config/i386/i386-expand.cc: Handle new builtins.
-	* config/i386/i386-isa.def (PREFETCHI):
-	Add DEF_PTA(PREFETCHI).
-	* config/i386/i386-options.cc
-	(ix86_valid_target_attribute_inner_p): Handle prefetchi.
-	* config/i386/i386.md (prefetchi): New define_insn.
-	* config/i386/i386.opt: Add option -mprefetchi.
-	* config/i386/predicates.md (local_func_symbolic_operand):
-	New predicates.
-	* config/i386/x86gprintrin.h: Include prfchiintrin.h.
-	* config/i386/xmmintrin.h (enum _mm_hint): New enum for
-	prefetchi.
-	(_mm_prefetch): Handle the highest bit of enum.
-	* doc/extend.texi: Document prefetchi.
-	* doc/invoke.texi: Document -mprefetchi.
-	* doc/sourcebuild.texi: Document target prefetchi.
-	* config/i386/prfchiintrin.h: New file.
-
-gcc/testsuite/ChangeLog:
-
-	* g++.dg/other/i386-2.C: Add -mprefetchi.
-	* g++.dg/other/i386-3.C: Ditto.
-	* gcc.target/i386/avx-1.c: Ditto.
-	* gcc.target/i386/funcspec-56.inc: Add new target attribute.
-	* gcc.target/i386/sse-13.c: Add -mprefetchi.
-	* gcc.target/i386/sse-23.c: Ditto.
-	* gcc.target/i386/x86gprintrin-1.c: Ditto.
-	* gcc.target/i386/x86gprintrin-2.c: Ditto.
-	* gcc.target/i386/x86gprintrin-3.c: Ditto.
-	* gcc.target/i386/x86gprintrin-4.c: Ditto.
-	* gcc.target/i386/x86gprintrin-5.c: Ditto.
-	* gcc.target/i386/prefetchi-1.c: New test.
-	* gcc.target/i386/prefetchi-2.c: Ditto.
-	* gcc.target/i386/prefetchi-3.c: Ditto.
-	* gcc.target/i386/prefetchi-4.c: Ditto.
-
-Co-authored-by: Hongtao Liu <hongtao.liu@intel.com>
----
- gcc/common/config/i386/cpuinfo.h              |  2 +
- gcc/common/config/i386/i386-common.cc         | 15 ++++
- gcc/common/config/i386/i386-cpuinfo.h         |  1 +
- gcc/common/config/i386/i386-isas.h            |  1 +
- gcc/config.gcc                                |  2 +-
- gcc/config/i386/cpuid.h                       |  1 +
- gcc/config/i386/i386-builtin-types.def        |  4 +
- gcc/config/i386/i386-builtin.def              |  4 +
- gcc/config/i386/i386-c.cc                     |  2 +
- gcc/config/i386/i386-expand.cc                | 77 +++++++++++++++++++
- gcc/config/i386/i386-isa.def                  |  1 +
- gcc/config/i386/i386-options.cc               |  4 +-
- gcc/config/i386/i386.md                       | 23 ++++++
- gcc/config/i386/i386.opt                      |  4 +
- gcc/config/i386/predicates.md                 | 15 ++++
- gcc/config/i386/prfchiintrin.h                | 49 ++++++++++++
- gcc/config/i386/x86gprintrin.h                |  2 +
- gcc/config/i386/xmmintrin.h                   |  7 +-
- gcc/doc/extend.texi                           |  5 ++
- gcc/doc/invoke.texi                           |  7 +-
- gcc/doc/sourcebuild.texi                      |  3 +
- gcc/testsuite/g++.dg/other/i386-2.C           |  2 +-
- gcc/testsuite/g++.dg/other/i386-3.C           |  2 +-
- gcc/testsuite/gcc.target/i386/avx-1.c         |  4 +-
- gcc/testsuite/gcc.target/i386/funcspec-56.inc |  2 +
- gcc/testsuite/gcc.target/i386/prefetchi-1.c   | 40 ++++++++++
- gcc/testsuite/gcc.target/i386/prefetchi-2.c   | 26 +++++++
- gcc/testsuite/gcc.target/i386/prefetchi-3.c   | 20 +++++
- gcc/testsuite/gcc.target/i386/prefetchi-4.c   | 19 +++++
- gcc/testsuite/gcc.target/i386/sse-13.c        |  4 +-
- gcc/testsuite/gcc.target/i386/sse-23.c        |  4 +-
- .../gcc.target/i386/x86gprintrin-1.c          |  2 +-
- .../gcc.target/i386/x86gprintrin-2.c          |  2 +-
- .../gcc.target/i386/x86gprintrin-3.c          |  2 +-
- .../gcc.target/i386/x86gprintrin-4.c          |  2 +-
- .../gcc.target/i386/x86gprintrin-5.c          |  2 +-
- 36 files changed, 343 insertions(+), 19 deletions(-)
- create mode 100644 gcc/config/i386/prfchiintrin.h
- create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-1.c
- create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-2.c
- create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-3.c
- create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-4.c
-
-diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
-index 5951a30aa..f17e88144 100644
---- a/gcc/common/config/i386/cpuinfo.h
-+++ b/gcc/common/config/i386/cpuinfo.h
-@@ -772,6 +772,8 @@ get_available_features (struct __processor_model *cpu_model,
- 	  __cpuid_count (7, 1, eax, ebx, ecx, edx);
- 	  if (eax & bit_HRESET)
- 	    set_feature (FEATURE_HRESET);
-+	  if (edx & bit_PREFETCHI)
-+	    set_feature (FEATURE_PREFETCHI);
- 	  if (avx_usable)
- 	    {
- 	      if (eax & bit_AVXVNNI)
-diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
-index 922db33ee..c8cf532cf 100644
---- a/gcc/common/config/i386/i386-common.cc
-+++ b/gcc/common/config/i386/i386-common.cc
-@@ -108,6 +108,7 @@ along with GCC; see the file COPYING3.  If not see
- #define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8
- #define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
- #define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16
-+#define OPTION_MASK_ISA2_PREFETCHI_SET OPTION_MASK_ISA2_PREFETCHI
- 
- /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
-    as -msse4.2.  */
-@@ -277,6 +278,7 @@ along with GCC; see the file COPYING3.  If not see
-   (OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET)
- #define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
- #define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16
-+#define OPTION_MASK_ISA2_PREFETCHI_UNSET OPTION_MASK_ISA2_PREFETCHI
- 
- /* SSE4 includes both SSE4.1 and SSE4.2.  -mno-sse4 should the same
-    as -mno-sse4.1. */
-@@ -1140,6 +1142,19 @@ ix86_handle_option (struct gcc_options *opts,
- 	}
-       return true;
- 
-+    case OPT_mprefetchi:
-+      if (value)
-+	{
-+	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_PREFETCHI_SET;
-+	  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_PREFETCHI_SET;
-+	}
-+      else
-+	{
-+	  opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_PREFETCHI_UNSET;
-+	  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_PREFETCHI_UNSET;
-+	}
-+      return true;
-+
-     case OPT_mfma:
-       if (value)
- 	{
-diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
-index 8f22897de..95b078acf 100644
---- a/gcc/common/config/i386/i386-cpuinfo.h
-+++ b/gcc/common/config/i386/i386-cpuinfo.h
-@@ -241,6 +241,7 @@ enum processor_features
-   FEATURE_X86_64_V3,
-   FEATURE_X86_64_V4,
-   FEATURE_AMX_FP16,
-+  FEATURE_PREFETCHI,
-   CPU_FEATURE_MAX
- };
- 
-diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h
-index 95bab6da2..6caf06249 100644
---- a/gcc/common/config/i386/i386-isas.h
-+++ b/gcc/common/config/i386/i386-isas.h
-@@ -176,4 +176,5 @@ ISA_NAMES_TABLE_START
-   ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3, P_X86_64_V3, NULL)
-   ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL)
-   ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16")
-+  ISA_NAMES_TABLE_ENTRY("prefetchi", FEATURE_PREFETCHI, P_NONE, "-mprefetchi")
- ISA_NAMES_TABLE_END
-diff --git a/gcc/config.gcc b/gcc/config.gcc
-index e2b4a23dc..81012c651 100644
---- a/gcc/config.gcc
-+++ b/gcc/config.gcc
-@@ -424,7 +424,7 @@ i3456786-*-* | x86_64-*-*)
- 		       amxbf16intrin.h x86gprintrin.h uintrintrin.h
- 		       hresetintrin.h keylockerintrin.h avxvnniintrin.h
- 		       mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
--		       amxfp16intrin.h"
-+		       amxfp16intrin.h prfchiintrin.h"
- 	;;
- ia64-*-*)
- 	extra_headers=ia64intrin.h
-diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
-index d6cd8d1bf..21100149a 100644
---- a/gcc/config/i386/cpuid.h
-+++ b/gcc/config/i386/cpuid.h
-@@ -50,6 +50,7 @@

_service:tar_scm:0077-Initial-Granite-Rapids-Support.patch Deleted

@@ -1,277 +0,0 @@
-From 7f0f8b585cf60b4c09bca42b5339995c2cc74633 Mon Sep 17 00:00:00 2001
-From: Haochen Jiang <haochen.jiang@intel.com>
-Date: Mon, 7 Nov 2022 11:04:57 +0800
-Subject: PATCH 22/32 Initial Granite Rapids Support
-
-gcc/ChangeLog:
-
-	* common/config/i386/cpuinfo.h
-	(get_intel_cpu): Handle Granite Rapids.
-	* common/config/i386/i386-common.cc:
-	(processor_names): Add graniterapids.
-	(processor_alias_table): Ditto.
-	* common/config/i386/i386-cpuinfo.h
-	(enum processor_subtypes): Add INTEL_GRANTIERAPIDS.
-	* config.gcc: Add -march=graniterapids.
-	* config/i386/driver-i386.cc (host_detect_local_cpu):
-	Handle graniterapids.
-	* config/i386/i386-c.cc (ix86_target_macros_internal):
-	Ditto.
-	* config/i386/i386-options.cc (m_GRANITERAPIDS): New.
-	(processor_cost_table): Add graniterapids.
-	* config/i386/i386.h (enum processor_type):
-	Add PROCESSOR_GRANITERAPIDS.
-	(PTA_GRANITERAPIDS): Ditto.
-	* doc/extend.texi: Add graniterapids.
-	* doc/invoke.texi: Ditto.
-
-gcc/testsuite/ChangeLog:
-
-	* g++.target/i386/mv16.C: Add graniterapids.
-	* gcc.target/i386/funcspec-56.inc: Handle new march.
-
-(cherry picked from commit 339ffc5a792dd66647392a235f2f7f6344c5359e)
----
- gcc/common/config/i386/cpuinfo.h              |  9 +++++++++
- gcc/common/config/i386/i386-common.cc         |  3 +++
- gcc/common/config/i386/i386-cpuinfo.h         |  1 +
- gcc/config.gcc                                |  2 +-
- gcc/config/i386/driver-i386.cc                |  5 ++++-
- gcc/config/i386/i386-c.cc                     |  7 +++++++
- gcc/config/i386/i386-options.cc               |  4 +++-
- gcc/config/i386/i386.h                        |  3 +++
- gcc/doc/extend.texi                           |  3 +++
- gcc/doc/invoke.texi                           | 11 +++++++++++
- gcc/testsuite/g++.target/i386/mv16.C          |  6 ++++++
- gcc/testsuite/gcc.target/i386/funcspec-56.inc |  1 +
- 12 files changed, 52 insertions(+), 3 deletions(-)
-
-diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
-index f17e88144..1f75ff1ca 100644
---- a/gcc/common/config/i386/cpuinfo.h
-+++ b/gcc/common/config/i386/cpuinfo.h
-@@ -528,6 +528,15 @@ get_intel_cpu (struct __processor_model *cpu_model,
-       cpu_model->__cpu_type = INTEL_COREI7;
-       cpu_model->__cpu_subtype = INTEL_COREI7_SAPPHIRERAPIDS;
-       break;
-+    case 0xad:
-+    case 0xae:
-+      /* Granite Rapids.  */
-+      cpu = "graniterapids";
-+      CHECK___builtin_cpu_is ("corei7");
-+      CHECK___builtin_cpu_is ("graniterapids");
-+      cpu_model->__cpu_type = INTEL_COREI7;
-+      cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS;
-+      break;
-     case 0x17:
-     case 0x1d:
-       /* Penryn.  */
-diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
-index c8cf532cf..1aa163463 100644
---- a/gcc/common/config/i386/i386-common.cc
-+++ b/gcc/common/config/i386/i386-common.cc
-@@ -1855,6 +1855,7 @@ const char *const processor_names =
-   "sapphirerapids",
-   "alderlake",
-   "rocketlake",
-+  "graniterapids",
-   "intel",
-   "geode",
-   "k6",
-@@ -1973,6 +1974,8 @@ const pta processor_alias_table =
-     M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
-   {"meteorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
-     M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
-+  {"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS,
-+    M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F},
-   {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
-     M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
-   {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
-diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
-index 95b078acf..7b2d4d242 100644
---- a/gcc/common/config/i386/i386-cpuinfo.h
-+++ b/gcc/common/config/i386/i386-cpuinfo.h
-@@ -92,6 +92,7 @@ enum processor_subtypes
-   AMDFAM19H_ZNVER3,
-   INTEL_COREI7_ROCKETLAKE,
-   AMDFAM19H_ZNVER4,
-+  INTEL_COREI7_GRANITERAPIDS,
-   CPU_SUBTYPE_MAX
- };
- 
-diff --git a/gcc/config.gcc b/gcc/config.gcc
-index 81012c651..9bad238e3 100644
---- a/gcc/config.gcc
-+++ b/gcc/config.gcc
-@@ -670,7 +670,7 @@ slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \
- silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \
- skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \
- sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \
--nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 native"
-+nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids native"
- 
- # Additional x86 processors supported by --with-cpu=.  Each processor
- # MUST be separated by exactly one space.
-diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
-index 3b5161aed..ea8c3d8d1 100644
---- a/gcc/config/i386/driver-i386.cc
-+++ b/gcc/config/i386/driver-i386.cc
-@@ -576,8 +576,11 @@ const char *host_detect_local_cpu (int argc, const char **argv)
- 	      /* This is unknown family 0x6 CPU.  */
- 	      if (has_feature (FEATURE_AVX))
- 		{
-+		  /* Assume Granite Rapids.  */
-+		  if (has_feature (FEATURE_AMX_FP16))
-+		    cpu = "graniterapids";
- 		  /* Assume Tiger Lake */
--		  if (has_feature (FEATURE_AVX512VP2INTERSECT))
-+		  else if (has_feature (FEATURE_AVX512VP2INTERSECT))
- 		    cpu = "tigerlake";
- 		  /* Assume Sapphire Rapids.  */
- 		  else if (has_feature (FEATURE_TSXLDTRK))
-diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
-index 00880bd17..04f1dd682 100644
---- a/gcc/config/i386/i386-c.cc
-+++ b/gcc/config/i386/i386-c.cc
-@@ -242,6 +242,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
-       def_or_undef (parse_in, "__sapphirerapids");
-       def_or_undef (parse_in, "__sapphirerapids__");
-       break;
-+    case PROCESSOR_GRANITERAPIDS:
-+      def_or_undef (parse_in, "__graniterapids");
-+      def_or_undef (parse_in, "__graniterapids__");
-+      break;
-     case PROCESSOR_ALDERLAKE:
-       def_or_undef (parse_in, "__alderlake");
-       def_or_undef (parse_in, "__alderlake__");
-@@ -419,6 +423,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
-     case PROCESSOR_ROCKETLAKE:
-       def_or_undef (parse_in, "__tune_rocketlake__");
-       break;
-+    case PROCESSOR_GRANITERAPIDS:
-+      def_or_undef (parse_in, "__tune_graniterapids__");
-+      break;
-     case PROCESSOR_INTEL:
-     case PROCESSOR_GENERIC:
-       break;
-diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
-index 724375f02..6645e3259 100644
---- a/gcc/config/i386/i386-options.cc
-+++ b/gcc/config/i386/i386-options.cc
-@@ -127,10 +127,11 @@ along with GCC; see the file COPYING3.  If not see
- #define m_SAPPHIRERAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_SAPPHIRERAPIDS)
- #define m_ALDERLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ALDERLAKE)
- #define m_ROCKETLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ROCKETLAKE)
-+#define m_GRANITERAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_GRANITERAPIDS)
- #define m_CORE_AVX512 (m_SKYLAKE_AVX512 | m_CANNONLAKE \
- 		       | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \
- 		       | m_TIGERLAKE | m_COOPERLAKE | m_SAPPHIRERAPIDS \
--		       | m_ROCKETLAKE)
-+		       | m_ROCKETLAKE | m_GRANITERAPIDS)
- #define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512)
- #define m_CORE_ALL (m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE | m_CORE_AVX2)
- #define m_GOLDMONT (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT)
-@@ -761,6 +762,7 @@ static const struct processor_costs *processor_cost_table =
-   &icelake_cost,
-   &alderlake_cost,
-   &icelake_cost,
-+  &icelake_cost,
-   &intel_cost,
-   &geode_cost,
-   &k6_cost,
-diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
-index aaa136ba0..75953defc 100644
---- a/gcc/config/i386/i386.h
-+++ b/gcc/config/i386/i386.h
-@@ -2250,6 +2250,7 @@ enum processor_type
-   PROCESSOR_SAPPHIRERAPIDS,
-   PROCESSOR_ALDERLAKE,
-   PROCESSOR_ROCKETLAKE,
-+  PROCESSOR_GRANITERAPIDS,
-   PROCESSOR_INTEL,
-   PROCESSOR_GEODE,
-   PROCESSOR_K6,
-@@ -2356,6 +2357,8 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
-   | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT
-   | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
-   | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
-+constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16
-+  | PTA_PREFETCHI;

_service:tar_scm:0078-Support-Intel-AMX-COMPLEX.patch Deleted

@@ -1,722 +0,0 @@
-From 4f1aff10d93cabe8dfbaf076b6d826a142efb6e1 Mon Sep 17 00:00:00 2001
-From: Haochen Jiang <haochen.jiang@intel.com>
-Date: Wed, 31 May 2023 10:45:00 +0800
-Subject: PATCH 23/32 Support Intel AMX-COMPLEX
-
-gcc/ChangeLog:
-
-	* common/config/i386/cpuinfo.h (get_available_features):
-	Detect AMX-COMPLEX.
-	* common/config/i386/i386-common.cc
-	(OPTION_MASK_ISA2_AMX_COMPLEX_SET,
-	OPTION_MASK_ISA2_AMX_COMPLEX_UNSET): New.
-	(ix86_handle_option): Handle -mamx-complex.
-	* common/config/i386/i386-cpuinfo.h (enum processor_features):
-	Add FEATURE_AMX_COMPLEX.
-	* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
-	amx-complex.
-	* config.gcc: Add amxcomplexintrin.h.
-	* config/i386/cpuid.h (bit_AMX_COMPLEX): New.
-	* config/i386/i386-c.cc (ix86_target_macros_internal): Define
-	__AMX_COMPLEX__.
-	* config/i386/i386-isa.def (AMX_COMPLEX): Add DEF_PTA(AMX_COMPLEX).
-	* config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p):
-	Handle amx-complex.
-	* config/i386/i386.opt: Add option -mamx-complex.
-	* config/i386/immintrin.h: Include amxcomplexintrin.h.
-	* doc/extend.texi: Document amx-complex.
-	* doc/invoke.texi: Document -mamx-complex.
-	* doc/sourcebuild.texi: Document target amx-complex.
-	* config/i386/amxcomplexintrin.h: New file.
-
-gcc/testsuite/ChangeLog:
-
-	* g++.dg/other/i386-2.C: Add -mamx-complex.
-	* g++.dg/other/i386-3.C: Ditto.
-	* gcc.target/i386/amx-check.h: Add cpu check for AMX-COMPLEX.
-	* gcc.target/i386/amx-helper.h: Add amx-complex support.
-	* gcc.target/i386/funcspec-56.inc: Add new target attribute.
-	* gcc.target/i386/sse-12.c: Add -mamx-complex.
-	* gcc.target/i386/sse-13.c: Ditto.
-	* gcc.target/i386/sse-14.c: Ditto.
-	* gcc.target/i386/sse-22.c: Add amx-complex.
-	* gcc.target/i386/sse-23.c: Ditto.
-	* lib/target-supports.exp (check_effective_target_amx_complex): New.
-	* gcc.target/i386/amxcomplex-asmatt-1.c: New test.
-	* gcc.target/i386/amxcomplex-asmintel-1.c: Ditto.
-	* gcc.target/i386/amxcomplex-cmmimfp16ps-2.c: Ditto.
-	* gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c: Ditto.
----
- gcc/common/config/i386/cpuinfo.h              |  2 +
- gcc/common/config/i386/i386-common.cc         | 19 +++++-
- gcc/common/config/i386/i386-cpuinfo.h         |  1 +
- gcc/common/config/i386/i386-isas.h            |  2 +
- gcc/config.gcc                                |  2 +-
- gcc/config/i386/amxcomplexintrin.h            | 59 +++++++++++++++++++
- gcc/config/i386/cpuid.h                       |  1 +
- gcc/config/i386/i386-c.cc                     |  2 +
- gcc/config/i386/i386-isa.def                  |  1 +
- gcc/config/i386/i386-options.cc               |  4 +-
- gcc/config/i386/i386.opt                      |  4 ++
- gcc/config/i386/immintrin.h                   |  2 +
- gcc/doc/extend.texi                           |  5 ++
- gcc/doc/invoke.texi                           |  7 ++-
- gcc/doc/sourcebuild.texi                      |  3 +
- gcc/testsuite/g++.dg/other/i386-2.C           |  2 +-
- gcc/testsuite/g++.dg/other/i386-3.C           |  2 +-
- gcc/testsuite/gcc.target/i386/amx-check.h     |  3 +
- gcc/testsuite/gcc.target/i386/amx-helper.h    |  4 +-
- .../gcc.target/i386/amxcomplex-asmatt-1.c     | 15 +++++
- .../gcc.target/i386/amxcomplex-asmintel-1.c   | 12 ++++
- .../i386/amxcomplex-cmmimfp16ps-2.c           | 53 +++++++++++++++++
- .../i386/amxcomplex-cmmrlfp16ps-2.c           | 53 +++++++++++++++++
- gcc/testsuite/gcc.target/i386/funcspec-56.inc |  2 +
- gcc/testsuite/gcc.target/i386/sse-12.c        |  2 +-
- gcc/testsuite/gcc.target/i386/sse-13.c        |  2 +-
- gcc/testsuite/gcc.target/i386/sse-14.c        |  2 +-
- gcc/testsuite/gcc.target/i386/sse-22.c        |  4 +-
- gcc/testsuite/gcc.target/i386/sse-23.c        |  2 +-
- gcc/testsuite/lib/target-supports.exp         | 11 ++++
- 30 files changed, 268 insertions(+), 15 deletions(-)
- create mode 100644 gcc/config/i386/amxcomplexintrin.h
- create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-asmatt-1.c
- create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c
- create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-cmmimfp16ps-2.c
- create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c
-
-diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
-index 1f75ff1ca..39d3351db 100644
---- a/gcc/common/config/i386/cpuinfo.h
-+++ b/gcc/common/config/i386/cpuinfo.h
-@@ -798,6 +798,8 @@ get_available_features (struct __processor_model *cpu_model,
- 	{
- 	  if (eax & bit_AMX_FP16)
- 	    set_feature (FEATURE_AMX_FP16);
-+	  if (edx & bit_AMX_COMPLEX)
-+	    set_feature (FEATURE_AMX_COMPLEX);
- 	}
-     }
- 
-diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
-index 1aa163463..87e8afe9b 100644
---- a/gcc/common/config/i386/i386-common.cc
-+++ b/gcc/common/config/i386/i386-common.cc
-@@ -109,6 +109,8 @@ along with GCC; see the file COPYING3.  If not see
- #define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
- #define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16
- #define OPTION_MASK_ISA2_PREFETCHI_SET OPTION_MASK_ISA2_PREFETCHI
-+#define OPTION_MASK_ISA2_AMX_COMPLEX_SET \
-+  (OPTION_MASK_ISA2_AMX_TILE | OPTION_MASK_ISA2_AMX_COMPLEX)
- 
- /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
-    as -msse4.2.  */
-@@ -269,7 +271,8 @@ along with GCC; see the file COPYING3.  If not see
- #define OPTION_MASK_ISA2_SERIALIZE_UNSET OPTION_MASK_ISA2_SERIALIZE
- #define OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET OPTION_MASK_ISA2_AVX512VP2INTERSECT
- #define OPTION_MASK_ISA2_TSXLDTRK_UNSET OPTION_MASK_ISA2_TSXLDTRK
--#define OPTION_MASK_ISA2_AMX_TILE_UNSET OPTION_MASK_ISA2_AMX_TILE
-+#define OPTION_MASK_ISA2_AMX_TILE_UNSET \
-+  (OPTION_MASK_ISA2_AMX_TILE | OPTION_MASK_ISA2_AMX_COMPLEX_UNSET)
- #define OPTION_MASK_ISA2_AMX_INT8_UNSET OPTION_MASK_ISA2_AMX_INT8
- #define OPTION_MASK_ISA2_AMX_BF16_UNSET OPTION_MASK_ISA2_AMX_BF16
- #define OPTION_MASK_ISA2_UINTR_UNSET OPTION_MASK_ISA2_UINTR
-@@ -279,6 +282,7 @@ along with GCC; see the file COPYING3.  If not see
- #define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
- #define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16
- #define OPTION_MASK_ISA2_PREFETCHI_UNSET OPTION_MASK_ISA2_PREFETCHI
-+#define OPTION_MASK_ISA2_AMX_COMPLEX_UNSET OPTION_MASK_ISA2_AMX_COMPLEX
- 
- /* SSE4 includes both SSE4.1 and SSE4.2.  -mno-sse4 should the same
-    as -mno-sse4.1. */
-@@ -1155,6 +1159,19 @@ ix86_handle_option (struct gcc_options *opts,
- 	}
-       return true;
- 
-+    case OPT_mamx_complex:
-+      if (value)
-+	{
-+	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_COMPLEX_SET;
-+	  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_COMPLEX_SET;
-+	}
-+      else
-+	{
-+	  opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_COMPLEX_UNSET;
-+	  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_COMPLEX_UNSET;
-+	}
-+      return true;
-+
-     case OPT_mfma:
-       if (value)
- 	{
-diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
-index 7b2d4d242..56020faac 100644
---- a/gcc/common/config/i386/i386-cpuinfo.h
-+++ b/gcc/common/config/i386/i386-cpuinfo.h
-@@ -243,6 +243,7 @@ enum processor_features
-   FEATURE_X86_64_V4,
-   FEATURE_AMX_FP16,
-   FEATURE_PREFETCHI,
-+  FEATURE_AMX_COMPLEX,
-   CPU_FEATURE_MAX
- };
- 
-diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h
-index 6caf06249..cbef68479 100644
---- a/gcc/common/config/i386/i386-isas.h
-+++ b/gcc/common/config/i386/i386-isas.h
-@@ -177,4 +177,6 @@ ISA_NAMES_TABLE_START
-   ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL)
-   ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16")
-   ISA_NAMES_TABLE_ENTRY("prefetchi", FEATURE_PREFETCHI, P_NONE, "-mprefetchi")
-+  ISA_NAMES_TABLE_ENTRY("amx-complex", FEATURE_AMX_COMPLEX,
-+			P_NONE, "-mamx-complex")
- ISA_NAMES_TABLE_END
-diff --git a/gcc/config.gcc b/gcc/config.gcc
-index 9bad238e3..ca5c8f8a0 100644
---- a/gcc/config.gcc
-+++ b/gcc/config.gcc
-@@ -424,7 +424,7 @@ i3456786-*-* | x86_64-*-*)
- 		       amxbf16intrin.h x86gprintrin.h uintrintrin.h
- 		       hresetintrin.h keylockerintrin.h avxvnniintrin.h
- 		       mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
--		       amxfp16intrin.h prfchiintrin.h"
-+		       amxfp16intrin.h prfchiintrin.h amxcomplexintrin.h"
- 	;;
- ia64-*-*)
- 	extra_headers=ia64intrin.h
-diff --git a/gcc/config/i386/amxcomplexintrin.h b/gcc/config/i386/amxcomplexintrin.h
-new file mode 100644
-index 000000000..6ea1eca04
---- /dev/null
-+++ b/gcc/config/i386/amxcomplexintrin.h
-@@ -0,0 +1,59 @@
-+/* Copyright (C) 2023 Free Software Foundation, Inc.
-+
-+   This file is part of GCC.
-+
-+   GCC is free software; you can redistribute it and/or modify
-+   it under the terms of the GNU General Public License as published by
-+   the Free Software Foundation; either version 3, or (at your option)

_service:tar_scm:0079-i386-Add-AMX-COMPLEX-to-Granite-Rapids.patch Deleted

@@ -1,30 +0,0 @@
-From 40469a6119085e4c4741bcaeb9418606d28b40c4 Mon Sep 17 00:00:00 2001
-From: Haochen Jiang <haochen.jiang@intel.com>
-Date: Fri, 31 Mar 2023 10:49:14 +0800
-Subject: PATCH 24/32 i386: Add AMX-COMPLEX to Granite Rapids
-
-gcc/Changelog:
-
-	* config/i386/i386.h (PTA_GRANITERAPIDS): Add PTA_AMX_COMPLEX.
-
-(cherry picked from commit afa87bd5f7b126e20268aa959441cde2e02bba0e)
----
- gcc/config/i386/i386.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
-index 75953defc..56d7794dc 100644
---- a/gcc/config/i386/i386.h
-+++ b/gcc/config/i386/i386.h
-@@ -2358,7 +2358,7 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
-   | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
-   | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
- constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16
--  | PTA_PREFETCHI;
-+  | PTA_PREFETCHI | PTA_AMX_COMPLEX;
- constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
-   | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
- constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
--- 
-2.28.0.windows.1
-

_service:tar_scm:0080-Initial-Granite-Rapids-D-Support.patch Deleted

@@ -1,212 +0,0 @@
-From 125e5d448538f7534e0fe3df9b7947cf41605b51 Mon Sep 17 00:00:00 2001
-From: "Mo, Zewei" <zewei.mo@intel.com>
-Date: Mon, 3 Jul 2023 11:00:26 +0800
-Subject: PATCH 25/32 Initial Granite Rapids D Support
-
-gcc/ChangeLog:
-
-	* common/config/i386/cpuinfo.h
-	(get_intel_cpu): Handle Granite Rapids D.
-	* common/config/i386/i386-common.cc:
-	(processor_alias_table): Add graniterapids-d.
-	* common/config/i386/i386-cpuinfo.h
-	(enum processor_subtypes): Add INTEL_COREI7_GRANITERAPIDS_D.
-	* config.gcc: Add -march=graniterapids-d.
-	* config/i386/driver-i386.cc (host_detect_local_cpu):
-	Handle graniterapids-d.
-	* config/i386/i386.h: (PTA_GRANITERAPIDS_D): New.
-	* doc/extend.texi: Add graniterapids-d.
-	* doc/invoke.texi: Ditto.
-
-gcc/testsuite/ChangeLog:
-
-	* g++.target/i386/mv16.C: Add graniterapids-d.
-	* gcc.target/i386/funcspec-56.inc: Handle new march.
-
-(cherry picked from commit a0cb65d34cc141571e870fb3b53b3ff47ae3338d)
----
- gcc/common/config/i386/cpuinfo.h              |  9 ++++++++-
- gcc/common/config/i386/i386-common.cc         |  2 ++
- gcc/common/config/i386/i386-cpuinfo.h         |  1 +
- gcc/config.gcc                                |  3 ++-
- gcc/config/i386/driver-i386.cc                |  5 ++++-
- gcc/config/i386/i386.h                        |  4 +++-
- gcc/doc/extend.texi                           |  3 +++
- gcc/doc/invoke.texi                           | 11 +++++++++++
- gcc/testsuite/g++.target/i386/mv16.C          |  6 ++++++
- gcc/testsuite/gcc.target/i386/funcspec-56.inc |  1 +
- 10 files changed, 41 insertions(+), 4 deletions(-)
-
-diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
-index 39d3351db..1e53248ef 100644
---- a/gcc/common/config/i386/cpuinfo.h
-+++ b/gcc/common/config/i386/cpuinfo.h
-@@ -529,7 +529,6 @@ get_intel_cpu (struct __processor_model *cpu_model,
-       cpu_model->__cpu_subtype = INTEL_COREI7_SAPPHIRERAPIDS;
-       break;
-     case 0xad:
--    case 0xae:
-       /* Granite Rapids.  */
-       cpu = "graniterapids";
-       CHECK___builtin_cpu_is ("corei7");
-@@ -537,6 +536,14 @@ get_intel_cpu (struct __processor_model *cpu_model,
-       cpu_model->__cpu_type = INTEL_COREI7;
-       cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS;
-       break;
-+    case 0xae:
-+      /* Granite Rapids D.  */
-+      cpu = "graniterapids-d";
-+      CHECK___builtin_cpu_is ("corei7");
-+      CHECK___builtin_cpu_is ("graniterapids-d");
-+      cpu_model->__cpu_type = INTEL_COREI7;
-+      cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS_D;
-+      break;
-     case 0x17:
-     case 0x1d:
-       /* Penryn.  */
-diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
-index 87e8afe9b..28f468f48 100644
---- a/gcc/common/config/i386/i386-common.cc
-+++ b/gcc/common/config/i386/i386-common.cc
-@@ -1993,6 +1993,8 @@ const pta processor_alias_table =
-     M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
-   {"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS,
-     M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F},
-+  {"graniterapids-d", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS_D,
-+    M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), P_PROC_AVX512F},
-   {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
-     M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
-   {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
-diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
-index 56020faac..a32f32c97 100644
---- a/gcc/common/config/i386/i386-cpuinfo.h
-+++ b/gcc/common/config/i386/i386-cpuinfo.h
-@@ -93,6 +93,7 @@ enum processor_subtypes
-   INTEL_COREI7_ROCKETLAKE,
-   AMDFAM19H_ZNVER4,
-   INTEL_COREI7_GRANITERAPIDS,
-+  INTEL_COREI7_GRANITERAPIDS_D,
-   CPU_SUBTYPE_MAX
- };
- 
-diff --git a/gcc/config.gcc b/gcc/config.gcc
-index ca5c8f8a0..3108ac4eb 100644
---- a/gcc/config.gcc
-+++ b/gcc/config.gcc
-@@ -670,7 +670,8 @@ slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \
- silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \
- skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \
- sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \
--nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids native"
-+nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids \
-+graniterapids-d native"
- 
- # Additional x86 processors supported by --with-cpu=.  Each processor
- # MUST be separated by exactly one space.
-diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
-index ea8c3d8d1..e3bca4b49 100644
---- a/gcc/config/i386/driver-i386.cc
-+++ b/gcc/config/i386/driver-i386.cc
-@@ -576,8 +576,11 @@ const char *host_detect_local_cpu (int argc, const char **argv)
- 	      /* This is unknown family 0x6 CPU.  */
- 	      if (has_feature (FEATURE_AVX))
- 		{
-+		  /* Assume Granite Rapids D.  */
-+		  if (has_feature (FEATURE_AMX_COMPLEX))
-+		    cpu = "graniterapids-d";
- 		  /* Assume Granite Rapids.  */
--		  if (has_feature (FEATURE_AMX_FP16))
-+		  else if (has_feature (FEATURE_AMX_FP16))
- 		    cpu = "graniterapids";
- 		  /* Assume Tiger Lake */
- 		  else if (has_feature (FEATURE_AVX512VP2INTERSECT))
-diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
-index 56d7794dc..eda3e5e5b 100644
---- a/gcc/config/i386/i386.h
-+++ b/gcc/config/i386/i386.h
-@@ -2358,7 +2358,9 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
-   | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
-   | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
- constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16
--  | PTA_PREFETCHI | PTA_AMX_COMPLEX;
-+  | PTA_PREFETCHI;
-+constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS
-+  | PTA_AMX_COMPLEX;
- constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
-   | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
- constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
-diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
-index d7b0bc802..674db2f1a 100644
---- a/gcc/doc/extend.texi
-+++ b/gcc/doc/extend.texi
-@@ -21837,6 +21837,9 @@ Intel Core i7 Rocketlake CPU.
- @item graniterapids
- Intel Core i7 graniterapids CPU.
- 
-+@item graniterapids-d
-+Intel Core i7 graniterapids D CPU.
-+
- @item bonnell
- Intel Atom Bonnell CPU.
- 
-diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
-index 186b33481..a2ec060fd 100644
---- a/gcc/doc/invoke.texi
-+++ b/gcc/doc/invoke.texi
-@@ -31626,6 +31626,17 @@ MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
- SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
- AVX512BF16, AMX-FP16 and PREFETCHI instruction set support.
- 
-+@item graniterapids-d
-+Intel graniterapids D CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
-+SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE,
-+RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
-+AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ,
-+AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2,
-+VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB,
-+MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
-+SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
-+AVX512BF16, AMX-FP16, PREFETCHI and AMX-COMPLEX instruction set support.
-+
- @item k6
- AMD K6 CPU with MMX instruction set support.
- 
-diff --git a/gcc/testsuite/g++.target/i386/mv16.C b/gcc/testsuite/g++.target/i386/mv16.C
-index 65cc24f32..17b1fc722 100644
---- a/gcc/testsuite/g++.target/i386/mv16.C
-+++ b/gcc/testsuite/g++.target/i386/mv16.C
-@@ -96,6 +96,10 @@ int __attribute__ ((target("arch=graniterapids"))) foo () {
-   return 26;
- }
- 
-+int __attribute__ ((target("arch=graniterapids-d"))) foo () {
-+  return 28;
-+}
-+
- int main ()
- {
-   int val = foo ();
-@@ -136,6 +140,8 @@ int main ()
-     assert (val == 24);
-   else if (__builtin_cpu_is ("graniterapids"))
-     assert (val == 25);
-+  else if (__builtin_cpu_is ("graniterapids-d"))
-+    assert (val == 26);
-   else
-     assert (val == 0);
- 
-diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
-index 1a2f3b83d..f0f3397a7 100644

_service:tar_scm:0081-Correct-Granite-Rapids-D-documentation.patch Deleted

@@ -1,48 +0,0 @@
-From a809a6a416af4d08f7feeadfdd5d1f5a76a830b5 Mon Sep 17 00:00:00 2001
-From: Haochen Jiang <haochen.jiang@intel.com>
-Date: Thu, 20 Jul 2023 10:47:18 +0800
-Subject: PATCH 26/32 Correct Granite Rapids{, D} documentation
-
-gcc/Changelog:
-
-	* doc/invoke.texi: Remove AVX512VP2INTERSECT in
-	Granite Rapids{, D} from documentation.
-
-(cherry picked from commit 38daaaa91438d3f635a10bf5d5181c3b29f07df9)
----
- gcc/doc/invoke.texi | 12 ++++++------
- 1 file changed, 6 insertions(+), 6 deletions(-)
-
-diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
-index a2ec060fd..4d3eccdb2 100644
---- a/gcc/doc/invoke.texi
-+++ b/gcc/doc/invoke.texi
-@@ -31622,9 +31622,9 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
- AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ,
- AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2,
- VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB,
--MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
--SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
--AVX512BF16, AMX-FP16 and PREFETCHI instruction set support.
-+MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK,
-+UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512-FP16, AVX512BF16, AMX-FP16
-+and PREFETCHI instruction set support.
- 
- @item graniterapids-d
- Intel graniterapids D CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
-@@ -31633,9 +31633,9 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
- AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ,
- AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2,
- VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB,
--MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
--SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
--AVX512BF16, AMX-FP16, PREFETCHI and AMX-COMPLEX instruction set support.
-+MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK,
-+UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, AVX512BF16, AMX-FP16,
-+PREFETCHI and AMX-COMPLEX instruction set support.
- 
- @item k6
- AMD K6 CPU with MMX instruction set support.
--- 
-2.28.0.windows.1
-

_service:tar_scm:0082-i386-Remove-Meteorlake-s-family_model.patch Deleted

_service:tar_scm:0083-x86-Update-model-values-for-Alderlake-Rocketlake-and.patch Deleted

@@ -1,33 +0,0 @@
-From 73042aa18fe70aa30a9c7c760b08e642560ecccd Mon Sep 17 00:00:00 2001
-From: "Cui, Lili" <lili.cui@intel.com>
-Date: Thu, 29 Jun 2023 03:10:35 +0000
-Subject: PATCH 28/32 x86: Update model values for Alderlake, Rocketlake and
- Raptorlake.
-
-Update model values for Alderlake, Rocketlake and Raptorlake according to SDM.
-
-gcc/ChangeLog
-
-	* common/config/i386/cpuinfo.h (get_intel_cpu): Remove model value 0xa8
-	from Rocketlake, move model value 0xbf from Alderlake to Raptorlake.
-
-(cherry picked from commit e510c3be13a8ccdf1fc1b27c2501c126d493f335)
----
- gcc/common/config/i386/cpuinfo.h | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
-index 348bc0c12..f9bcb6fad 100644
---- a/gcc/common/config/i386/cpuinfo.h
-+++ b/gcc/common/config/i386/cpuinfo.h
-@@ -509,6 +509,7 @@ get_intel_cpu (struct __processor_model *cpu_model,
-     case 0x9a:
-       /* Alder Lake.  */
-     case 0xb7:
-+    case 0xbf:
-       /* Raptor Lake.  */
-     case 0xaa:
-     case 0xac:
--- 
-2.28.0.windows.1
-

_service:tar_scm:0084-x86-Update-model-values-for-Raptorlake.patch Deleted

_service:tar_scm:0085-Fix-target_clone-arch-graniterapids-d.patch Deleted

@@ -1,159 +0,0 @@
-From 8db0f3cd29bd7f937ffa01dd1100360fbbf5b6f4 Mon Sep 17 00:00:00 2001
-From: liuhongt <hongtao.liu@intel.com>
-Date: Tue, 22 Aug 2023 18:18:31 +0800
-Subject: PATCH 30/32 Fix target_clone ("arch=graniterapids-d")
-
-Both "graniterapid-d" and "graniterapids" are attached with
-PROCESSOR_GRANITERAPID in processor_alias_table but mapped to
-different __cpu_subtype in get_intel_cpu.
-
-And get_builtin_code_for_version will try to match the first
-PROCESSOR_GRANITERAPIDS in processor_alias_table which maps to
-"granitepraids" here.
-
-861      else if (new_target->arch_specified && new_target->arch > 0)
-1862        for (i = 0; i < pta_size; i++)
-1863          if (processor_alias_tablei.processor == new_target->arch)
-1864            {
-1865              const pta *arch_info = &processor_alias_tablei;
-1866              switch (arch_info->priority)
-1867                {
-1868                default:
-1869                  arg_str = arch_info->name;
-
-This mismatch makes dispatch_function_versions check the preidcate
-of__builtin_cpu_is ("graniterapids") for "graniterapids-d" and causes
-the issue.
-The patch explicitly adds PROCESSOR_GRANITERAPIDS_D to make a distinction.
-
-For "alderlake","raptorlake", "meteorlake" they share same isa, cost,
-tuning, and mapped to the same __cpu_type/__cpu_subtype in
-get_intel_cpu, so no need to add PROCESSOR_RAPTORLAKE and others.
-
-gcc/ChangeLog:
-
-	* common/config/i386/i386-common.cc (processor_names): Add new
-	member graniterapids-s.
-	* config/i386/i386-options.cc (processor_alias_table): Update
-	table with and PROCESSOR_GRANITERAPIDS_D.
-	(m_GRANITERAPID_D): New macro.
-	(m_CORE_AVX512): Add m_GRANITERAPIDS_D.
-	(processor_cost_table): Add icelake_cost for
-	PROCESSOR_GRANITERAPIDS_D.
-	* config/i386/i386.h (enum processor_type): Add new member
-	PROCESSOR_GRANITERAPIDS_D.
-	* config/i386/i386-c.cc (ix86_target_macros_internal): Handle
-	PROCESSOR_GRANITERAPIDS_D
----
- gcc/common/config/i386/i386-common.cc | 6 ++++--
- gcc/config/i386/i386-c.cc             | 8 ++++++++
- gcc/config/i386/i386-options.cc       | 4 +++-
- gcc/config/i386/i386.h                | 3 ++-
- 4 files changed, 17 insertions(+), 4 deletions(-)
-
-diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
-index 28f468f48..bec6801ce 100644
---- a/gcc/common/config/i386/i386-common.cc
-+++ b/gcc/common/config/i386/i386-common.cc
-@@ -1873,6 +1873,7 @@ const char *const processor_names =
-   "alderlake",
-   "rocketlake",
-   "graniterapids",
-+  "graniterapids-d",
-   "intel",
-   "geode",
-   "k6",
-@@ -1993,8 +1994,9 @@ const pta processor_alias_table =
-     M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
-   {"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS,
-     M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F},
--  {"graniterapids-d", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS_D,
--    M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), P_PROC_AVX512F},
-+  {"graniterapids-d", PROCESSOR_GRANITERAPIDS_D, CPU_HASWELL,
-+    PTA_GRANITERAPIDS_D, M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D),
-+    P_PROC_AVX512F},
-   {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
-     M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
-   {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
-diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
-index 5e0ac278c..49f0db2b8 100644
---- a/gcc/config/i386/i386-c.cc
-+++ b/gcc/config/i386/i386-c.cc
-@@ -246,6 +246,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
-       def_or_undef (parse_in, "__graniterapids");
-       def_or_undef (parse_in, "__graniterapids__");
-       break;
-+    case PROCESSOR_GRANITERAPIDS_D:
-+      def_or_undef (parse_in, "__graniterapids_d");
-+      def_or_undef (parse_in, "__graniterapids_d__");
-+      break;
-     case PROCESSOR_ALDERLAKE:
-       def_or_undef (parse_in, "__alderlake");
-       def_or_undef (parse_in, "__alderlake__");
-@@ -254,6 +258,7 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
-       def_or_undef (parse_in, "__rocketlake");
-       def_or_undef (parse_in, "__rocketlake__");
-       break;
-+
-     /* use PROCESSOR_max to not set/unset the arch macro.  */
-     case PROCESSOR_max:
-       break;
-@@ -426,6 +431,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
-     case PROCESSOR_GRANITERAPIDS:
-       def_or_undef (parse_in, "__tune_graniterapids__");
-       break;
-+    case PROCESSOR_GRANITERAPIDS_D:
-+      def_or_undef (parse_in, "__tune_graniterapids_d__");
-+      break;
-     case PROCESSOR_INTEL:
-     case PROCESSOR_GENERIC:
-       break;
-diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
-index 7efd25084..86932d719 100644
---- a/gcc/config/i386/i386-options.cc
-+++ b/gcc/config/i386/i386-options.cc
-@@ -128,10 +128,11 @@ along with GCC; see the file COPYING3.  If not see
- #define m_ALDERLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ALDERLAKE)
- #define m_ROCKETLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ROCKETLAKE)
- #define m_GRANITERAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_GRANITERAPIDS)
-+#define m_GRANITERAPIDS_D (HOST_WIDE_INT_1U<<PROCESSOR_GRANITERAPIDS_D)
- #define m_CORE_AVX512 (m_SKYLAKE_AVX512 | m_CANNONLAKE \
- 		       | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \
- 		       | m_TIGERLAKE | m_COOPERLAKE | m_SAPPHIRERAPIDS \
--		       | m_ROCKETLAKE | m_GRANITERAPIDS)
-+		       | m_ROCKETLAKE | m_GRANITERAPIDS | m_GRANITERAPIDS_D)
- #define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512)
- #define m_CORE_ALL (m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE | m_CORE_AVX2)
- #define m_GOLDMONT (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT)
-@@ -764,6 +765,7 @@ static const struct processor_costs *processor_cost_table =
-   &alderlake_cost,
-   &icelake_cost,
-   &icelake_cost,
-+  &icelake_cost,
-   &intel_cost,
-   &geode_cost,
-   &k6_cost,
-diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
-index eda3e5e5b..5052f878d 100644
---- a/gcc/config/i386/i386.h
-+++ b/gcc/config/i386/i386.h
-@@ -2216,7 +2216,7 @@ extern int const svr4_dbx_register_mapFIRST_PSEUDO_REGISTER;
- #define DEFAULT_LARGE_SECTION_THRESHOLD 65536
- &#xc;
- /* Which processor to tune code generation for.  These must be in sync
--   with processor_target_table in i386.cc.  */ 
-+   with processor_cost_table in i386-options.cc.  */
- 
- enum processor_type
- {
-@@ -2251,6 +2251,7 @@ enum processor_type
-   PROCESSOR_ALDERLAKE,
-   PROCESSOR_ROCKETLAKE,
-   PROCESSOR_GRANITERAPIDS,
-+  PROCESSOR_GRANITERAPIDS_D,
-   PROCESSOR_INTEL,
-   PROCESSOR_GEODE,
-   PROCESSOR_K6,
--- 
-2.28.0.windows.1
-

_service:tar_scm:0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch Deleted

@@ -1,321 +0,0 @@
-From c546aad5d38165e2962456525a0f6a427e03583b Mon Sep 17 00:00:00 2001
-From: "Vladimir N. Makarov" <vmakarov@redhat.com>
-Date: Thu, 26 Oct 2023 09:50:40 -0400
-Subject: PATCH 31/32 Modfify cost calculation for dealing with equivalences
-
-RISCV target developers reported that pseudos with equivalence used in
-a loop can be spilled.  Simple changes of heuristics of cost
-calculation of pseudos with equivalence or even ignoring equivalences
-resulted in numerous testsuite failures on different targets or worse
-spec2017 performance.  This patch implements more sophisticated cost
-calculations of pseudos with equivalences.  The patch does not change
-RA behaviour for targets still using the old reload pass instead of
-LRA.  The patch solves the reported problem and improves x86-64
-specint2017 a bit (specfp2017 performance stays the same).  The patch
-takes into account how the equivalence will be used: will it be
-integrated into the user insns or require an input reload insn.  It
-requires additional pass over insns.  To compensate RA slow down, the
-patch removes a pass over insns in the reload pass used by IRA before.
-This also decouples IRA from reload more and will help to remove the
-reload pass in the future if it ever happens.
-
-gcc/ChangeLog:
-
-	* dwarf2out.cc (reg_loc_descriptor): Use lra_eliminate_regs when
-	LRA is used.
-	* ira-costs.cc: Include regset.h.
-	(equiv_can_be_consumed_p, get_equiv_regno, calculate_equiv_gains):
-	New functions.
-	(find_costs_and_classes): Call calculate_equiv_gains and redefine
-	mem_cost of pseudos with equivs when LRA is used.
-	* var-tracking.cc: Include ira.h and lra.h.
-	(vt_initialize): Use lra_eliminate_regs when LRA is used.
----
- gcc/dwarf2out.cc    |   4 +-
- gcc/ira-costs.cc    | 169 ++++++++++++++++++++++++++++++++++++++++++--
- gcc/var-tracking.cc |  14 +++-
- 3 files changed, 179 insertions(+), 8 deletions(-)
-
-diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
-index 0a5c081d8..f0f6f4fd4 100644
---- a/gcc/dwarf2out.cc
-+++ b/gcc/dwarf2out.cc
-@@ -14263,7 +14263,9 @@ reg_loc_descriptor (rtx rtl, enum var_init_status initialized)
-      argument pointer and soft frame pointer rtx's.
-      Use DW_OP_fbreg offset DW_OP_stack_value in this case.  */
-   if ((rtl == arg_pointer_rtx || rtl == frame_pointer_rtx)
--      && eliminate_regs (rtl, VOIDmode, NULL_RTX) != rtl)
-+      && (ira_use_lra_p
-+	  ? lra_eliminate_regs (rtl, VOIDmode, NULL_RTX)
-+	  : eliminate_regs (rtl, VOIDmode, NULL_RTX)) != rtl)
-     {
-       dw_loc_descr_ref result = NULL;
- 
-diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc
-index 642fda529..c79311783 100644
---- a/gcc/ira-costs.cc
-+++ b/gcc/ira-costs.cc
-@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.  If not see
- #include "tm_p.h"
- #include "insn-config.h"
- #include "regs.h"
-+#include "regset.h"
- #include "ira.h"
- #include "ira-int.h"
- #include "addresses.h"
-@@ -1750,6 +1751,145 @@ process_bb_node_for_costs (ira_loop_tree_node_t loop_tree_node)
-     process_bb_for_costs (bb);
- }
- 
-+/* Check that reg REGNO can be changed by TO in INSN.  Return true in case the
-+   result insn would be valid one.  */
-+static bool
-+equiv_can_be_consumed_p (int regno, rtx to, rtx_insn *insn)
-+{
-+  validate_replace_src_group (regno_reg_rtxregno, to, insn);
-+  bool res = verify_changes (0);
-+  cancel_changes (0);
-+  return res;
-+}
-+
-+/* Return true if X contains a pseudo with equivalence.  In this case also
-+   return the pseudo through parameter REG.  If the pseudo is a part of subreg,
-+   return the subreg through parameter SUBREG.  */
-+
-+static bool
-+get_equiv_regno (rtx x, int &regno, rtx &subreg)
-+{
-+  subreg = NULL_RTX;
-+  if (GET_CODE (x) == SUBREG)
-+    {
-+      subreg = x;
-+      x = SUBREG_REG (x);
-+    }
-+  if (REG_P (x)
-+      && (ira_reg_equivREGNO (x).memory != NULL
-+	  || ira_reg_equivREGNO (x).constant != NULL))
-+    {
-+      regno = REGNO (x);
-+      return true;
-+    }
-+  RTX_CODE code = GET_CODE (x);
-+  const char *fmt = GET_RTX_FORMAT (code);
-+
-+  for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
-+    if (fmti == 'e')
-+      {
-+	if (get_equiv_regno (XEXP (x, i), regno, subreg))
-+	  return true;
-+      }
-+    else if (fmti == 'E')
-+      {
-+	for (int j = 0; j < XVECLEN (x, i); j++)
-+	  if (get_equiv_regno (XVECEXP (x, i, j), regno, subreg))
-+	    return true;
-+      }
-+  return false;
-+}
-+
-+/* A pass through the current function insns.  Calculate costs of using
-+   equivalences for pseudos and store them in regno_equiv_gains.  */
-+
-+static void
-+calculate_equiv_gains (void)
-+{
-+  basic_block bb;
-+  int regno, freq, cost;
-+  rtx subreg;
-+  rtx_insn *insn;
-+  machine_mode mode;
-+  enum reg_class rclass;
-+  bitmap_head equiv_pseudos;
-+
-+  ira_assert (allocno_p);
-+  bitmap_initialize (&equiv_pseudos, &reg_obstack);
-+  for (regno = max_reg_num () - 1; regno >= FIRST_PSEUDO_REGISTER; regno--)
-+    if (ira_reg_equivregno.init_insns != NULL
-+	&& (ira_reg_equivregno.memory != NULL
-+	    || (ira_reg_equivregno.constant != NULL
-+		/* Ignore complicated constants which probably will be placed
-+		   in memory:  */
-+		&& GET_CODE (ira_reg_equivregno.constant) != CONST_DOUBLE
-+		&& GET_CODE (ira_reg_equivregno.constant) != CONST_VECTOR
-+		&& GET_CODE (ira_reg_equivregno.constant) != LABEL_REF)))
-+      {
-+	rtx_insn_list *x;
-+	for (x = ira_reg_equivregno.init_insns; x != NULL; x = x->next ())
-+	  {
-+	    insn = x->insn ();
-+	    rtx set = single_set (insn);
-+
-+	    if (set == NULL_RTX || SET_DEST (set) != regno_reg_rtxregno)
-+	      break;
-+	    bb = BLOCK_FOR_INSN (insn);
-+	    ira_curr_regno_allocno_map
-+	      = ira_bb_nodesbb->index.parent->regno_allocno_map;
-+	    mode = PSEUDO_REGNO_MODE (regno);
-+	    rclass = prefCOST_INDEX (regno);
-+	    ira_init_register_move_cost_if_necessary (mode);
-+	    if (ira_reg_equivregno.memory != NULL)
-+	      cost = ira_memory_move_costmoderclass1;
-+	    else
-+	      cost = ira_register_move_costmoderclassrclass;
-+	    freq = REG_FREQ_FROM_BB (bb);
-+	    regno_equiv_gainsregno += cost * freq;
-+	  }
-+	if (x != NULL)
-+	  /* We found complicated equiv or reverse equiv mem=reg.  Ignore
-+	     them.  */
-+	  regno_equiv_gainsregno = 0;
-+	else
-+	  bitmap_set_bit (&equiv_pseudos, regno);
-+      }
-+
-+  FOR_EACH_BB_FN (bb, cfun)
-+    {
-+      freq = REG_FREQ_FROM_BB (bb);
-+      ira_curr_regno_allocno_map
-+	= ira_bb_nodesbb->index.parent->regno_allocno_map;
-+      FOR_BB_INSNS (bb, insn)
-+	{
-+	  if (!INSN_P (insn) || !get_equiv_regno (PATTERN (insn), regno, subreg)
-+	      || !bitmap_bit_p (&equiv_pseudos, regno))
-+	    continue;
-+	  rtx subst = ira_reg_equivregno.memory;
-+
-+	  if (subst == NULL)
-+	    subst = ira_reg_equivregno.constant;
-+	  ira_assert (subst != NULL);
-+	  mode = PSEUDO_REGNO_MODE (regno);
-+	  ira_init_register_move_cost_if_necessary (mode);
-+	  bool consumed_p = equiv_can_be_consumed_p (regno, subst, insn);
-+
-+	  rclass = prefCOST_INDEX (regno);
-+	  if (MEM_P (subst)
-+	      /* If it is a change of constant into double for example, the
-+		 result constant probably will be placed in memory.  */
-+	      || (subreg != NULL_RTX && !INTEGRAL_MODE_P (GET_MODE (subreg))))
-+	    cost = ira_memory_move_costmoderclass1 + (consumed_p ? 0 : 1);
-+	  else if (consumed_p)

_service:tar_scm:0087-Add-cost-calculation-for-reg-equivalence-invariants.patch Deleted

@@ -1,49 +0,0 @@
-From 4965473a4211a9feb46a0d168180ab450cb18bcc Mon Sep 17 00:00:00 2001
-From: "Vladimir N. Makarov" <vmakarov@redhat.com>
-Date: Fri, 27 Oct 2023 08:28:24 -0400
-Subject: PATCH 32/32 Add cost calculation for reg equivalence invariants
-
-My recent patch improving cost calculation for pseudos with equivalence
-resulted in failure of gcc.target/arm/eliminate.c on aarch64.  This patch
-fixes this failure.
-
-gcc/ChangeLog:
-
-	* ira-costs.cc: (get_equiv_regno, calculate_equiv_gains):
-	Process reg equivalence invariants.
----
- gcc/ira-costs.cc | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc
-index c79311783..d33104a30 100644
---- a/gcc/ira-costs.cc
-+++ b/gcc/ira-costs.cc
-@@ -1777,6 +1777,7 @@ get_equiv_regno (rtx x, int &regno, rtx &subreg)
-     }
-   if (REG_P (x)
-       && (ira_reg_equivREGNO (x).memory != NULL
-+	  || ira_reg_equivREGNO (x).invariant != NULL
- 	  || ira_reg_equivREGNO (x).constant != NULL))
-     {
-       regno = REGNO (x);
-@@ -1819,6 +1820,7 @@ calculate_equiv_gains (void)
-   for (regno = max_reg_num () - 1; regno >= FIRST_PSEUDO_REGISTER; regno--)
-     if (ira_reg_equivregno.init_insns != NULL
- 	&& (ira_reg_equivregno.memory != NULL
-+	    || ira_reg_equivregno.invariant != NULL
- 	    || (ira_reg_equivregno.constant != NULL
- 		/* Ignore complicated constants which probably will be placed
- 		   in memory:  */
-@@ -1869,6 +1871,8 @@ calculate_equiv_gains (void)
- 
- 	  if (subst == NULL)
- 	    subst = ira_reg_equivregno.constant;
-+	  if (subst == NULL)
-+	    subst = ira_reg_equivregno.invariant;
- 	  ira_assert (subst != NULL);
- 	  mode = PSEUDO_REGNO_MODE (regno);
- 	  ira_init_register_move_cost_if_necessary (mode);
--- 
-2.28.0.windows.1
-

_service:tar_scm:LoongArch-Add-LA664-support.patch Deleted

@@ -1,332 +0,0 @@
-From c68463abbab98aa7f5a9b91e71ed6f6834c723df Mon Sep 17 00:00:00 2001
-From: Lulu Cheng <chenglulu@loongson.cn>
-Date: Thu, 16 Nov 2023 20:43:53 +0800
-Subject: PATCH LoongArch: Add LA664 support.
-
-Define ISA_BASE_LA64V110, which represents the base instruction set defined in LoongArch1.1.
-Support the configure setting --with-arch =la664, and support -march=la664,-mtune=la664.
-
-gcc/ChangeLog:
-
-	* config.gcc: Support LA664.
-	* config/loongarch/genopts/loongarch-strings: Likewise.
-	* config/loongarch/genopts/loongarch.opt.in: Likewise.
-	* config/loongarch/loongarch-cpu.cc (fill_native_cpu_config): Likewise.
-	* config/loongarch/loongarch-def.c: Likewise.
-	* config/loongarch/loongarch-def.h (N_ISA_BASE_TYPES): Likewise.
-	(ISA_BASE_LA64V110): Define macro.
-	(N_ARCH_TYPES): Update value.
-	(N_TUNE_TYPES): Update value.
-	(CPU_LA664): New macro.
-	* config/loongarch/loongarch-opts.cc (isa_default_abi): Likewise.
-	(isa_base_compat_p): Likewise.
-	* config/loongarch/loongarch-opts.h (TARGET_64BIT): This parameter is enabled
-	when la_target.isa.base is equal to ISA_BASE_LA64V100 or ISA_BASE_LA64V110.
-	(TARGET_uARCH_LA664): Define macro.
-	* config/loongarch/loongarch-str.h (STR_CPU_LA664): Likewise.
-	* config/loongarch/loongarch.cc (loongarch_cpu_sched_reassociation_width):
-	Add LA664 support.
-	* config/loongarch/loongarch.opt: Regenerate.
-
-Signed-off-by: ticat_fp <fanpeng@loongson.cn>
----
- gcc/config.gcc                                | 10 ++++-----
- .../loongarch/genopts/loongarch-strings       |  1 +
- gcc/config/loongarch/genopts/loongarch.opt.in |  3 +++
- gcc/config/loongarch/loongarch-cpu.cc         |  4 ++++
- gcc/config/loongarch/loongarch-def.c          | 21 +++++++++++++++++++
- gcc/config/loongarch/loongarch-def.h          |  8 ++++---
- gcc/config/loongarch/loongarch-opts.cc        |  8 +++----
- gcc/config/loongarch/loongarch-opts.h         |  4 +++-
- gcc/config/loongarch/loongarch-str.h          |  1 +
- gcc/config/loongarch/loongarch.cc             |  1 +
- gcc/config/loongarch/loongarch.opt            |  3 +++
- 11 files changed, 51 insertions(+), 13 deletions(-)
-
-diff --git a/gcc/config.gcc b/gcc/config.gcc
-index 6d51bd93f3f..b88591b6fd8 100644
---- a/gcc/config.gcc
-+++ b/gcc/config.gcc
-@@ -5039,7 +5039,7 @@ case "${target}" in
- 
- 		# Perform initial sanity checks on --with-* options.
- 		case ${with_arch} in
--		"" | abi-default | loongarch64 | la464) ;; # OK, append here.
-+		"" | abi-default | loongarch64 | la4664) ;; # OK, append here.
- 		native)
- 			if test x${host} != x${target}; then
- 				echo "--with-arch=native is illegal for cross-compiler." 1>&2
-@@ -5088,7 +5088,7 @@ case "${target}" in
- 		case ${abi_base}/${abi_ext} in
- 		lp64*/base)
- 			# architectures that support lp64* ABI
--			arch_pattern="native|abi-default|loongarch64|la464"
-+			arch_pattern="native|abi-default|loongarch64|la4664"
- 			# default architecture for lp64* ABI
- 			arch_default="abi-default"
- 			;;
-@@ -5163,7 +5163,7 @@ case "${target}" in
- 		# Check default with_tune configuration using with_arch.
- 		case ${with_arch} in
- 		loongarch64)
--			tune_pattern="native|abi-default|loongarch64|la464"
-+			tune_pattern="native|abi-default|loongarch64|la4664"
- 			;;
- 		*)
- 			# By default, $with_tune == $with_arch
-@@ -5219,7 +5219,7 @@ case "${target}" in
- 					# Fixed: use the default gcc configuration for all multilib
- 					# builds by default.
- 					with_multilib_default="" ;;
--				arch,native|arch,loongarch64|arch,la464) # OK, append here.
-+				arch,native|arch,loongarch64|arch,la4664) # OK, append here.
- 					with_multilib_default="/march=${component}" ;;
- 				arch,*)
- 					with_multilib_default="/march=abi-default"
-@@ -5307,7 +5307,7 @@ case "${target}" in
- 				if test x${parse_state} = x"arch"; then
- 					# -march option
- 					case ${component} in
--					native | abi-default | loongarch64 | la464) # OK, append here.
-+					native | abi-default | loongarch64 | la4664) # OK, append here.
- 						# Append -march spec for each multilib variant.
- 						loongarch_multilib_list_make="${loongarch_multilib_list_make}/march=${component}"
- 						parse_state="opts"
-diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings
-index 8e412f7536e..7bc4824007e 100644
---- a/gcc/config/loongarch/genopts/loongarch-strings
-+++ b/gcc/config/loongarch/genopts/loongarch-strings
-@@ -26,6 +26,7 @@ STR_CPU_NATIVE	      native
- STR_CPU_ABI_DEFAULT   abi-default
- STR_CPU_LOONGARCH64   loongarch64
- STR_CPU_LA464	      la464
-+STR_CPU_LA664	      la664
- 
- # Base architecture
- STR_ISA_BASE_LA64V100 la64
-diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
-index 158701d327a..00b4733d75b 100644
---- a/gcc/config/loongarch/genopts/loongarch.opt.in
-+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
-@@ -107,6 +107,9 @@ Enum(cpu_type) String(@@STR_CPU_LOONGARCH64@@) Value(CPU_LOONGARCH64)
- EnumValue
- Enum(cpu_type) String(@@STR_CPU_LA464@@) Value(CPU_LA464)
- 
-+EnumValue
-+Enum(cpu_type) String(@@STR_CPU_LA664@@) Value(CPU_LA664)
-+
- m@@OPTSTR_ARCH@@=
- Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET)
- -m@@OPTSTR_ARCH@@=PROCESSOR	Generate code for the given PROCESSOR ISA.
-diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc
-index 7a2866f60f9..f3a13414143 100644
---- a/gcc/config/loongarch/loongarch-cpu.cc
-+++ b/gcc/config/loongarch/loongarch-cpu.cc
-@@ -106,6 +106,10 @@ fill_native_cpu_config (struct loongarch_target *tgt)
-       native_cpu_type = CPU_LA464;
-       break;
- 
-+    case 0x0014d000:   /* LA664 */
-+      native_cpu_type = CPU_LA664;
-+      break;
-+
-     default:
-       /* Unknown PRID.  */
-       if (tune_native_p)
-diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
-index 430ef8b2d95..067629141b6 100644
---- a/gcc/config/loongarch/loongarch-def.c
-+++ b/gcc/config/loongarch/loongarch-def.c
-@@ -28,6 +28,7 @@ loongarch_cpu_stringsN_TUNE_TYPES = {
-   CPU_ABI_DEFAULT	  = STR_CPU_ABI_DEFAULT,
-   CPU_LOONGARCH64	  = STR_CPU_LOONGARCH64,
-   CPU_LA464		  = STR_CPU_LA464,
-+  CPU_LA664		  = STR_CPU_LA664,
- };
- 
- struct loongarch_isa
-@@ -42,6 +43,11 @@ loongarch_cpu_default_isaN_ARCH_TYPES = {
-       .fpu = ISA_EXT_FPU64,
-       .simd = ISA_EXT_SIMD_LASX,
-   },
-+  CPU_LA664 = {
-+      .base = ISA_BASE_LA64V110,
-+      .fpu = ISA_EXT_FPU64,
-+      .simd = ISA_EXT_SIMD_LASX,
-+  },
- };
- 
- struct loongarch_cache
-@@ -58,6 +64,12 @@ loongarch_cpu_cacheN_TUNE_TYPES = {
-       .l2d_size = 256,
-       .simultaneous_prefetches = 4,
-   },
-+  CPU_LA664 = {
-+      .l1d_line_size = 64,
-+      .l1d_size = 64,
-+      .l2d_size = 256,
-+      .simultaneous_prefetches = 4,
-+  },
- };
- 
- struct loongarch_align
-@@ -70,6 +82,10 @@ loongarch_cpu_alignN_TUNE_TYPES = {
-     .function = "32",
-     .label = "16",
-   },
-+  CPU_LA664 = {
-+    .function = "32",
-+    .label = "16",
-+  },
- };
- 
- 
-@@ -104,6 +120,9 @@ loongarch_cpu_rtx_cost_dataN_TUNE_TYPES = {
-   CPU_LA464 = {
-       DEFAULT_COSTS
-   },
-+  CPU_LA664 = {
-+      DEFAULT_COSTS
-+  },
- };
- 
- /* RTX costs to use when optimizing for size.  */
-@@ -127,6 +146,7 @@ loongarch_cpu_issue_rateN_TUNE_TYPES = {
-   CPU_NATIVE	      = 4,
-   CPU_LOONGARCH64   = 4,
-   CPU_LA464	      = 4,
-+  CPU_LA664	      = 6,
- };

_service:tar_scm:LoongArch-Fix-internal-error-running-gcc-march-nativ.patch Deleted

@@ -1,106 +0,0 @@
-From 56752a6bbfb3d3501d0899b23020c3e2eb58882c Mon Sep 17 00:00:00 2001
-From: Xi Ruoyao <xry111@xry111.site>
-Date: Fri, 17 Nov 2023 20:44:17 +0800
-Subject: PATCH LoongArch: Fix internal error running "gcc -march=native" on
- LA664
-
-On LA664, the PRID preset is ISA_BASE_LA64V110 but the base architecture
-is guessed ISA_BASE_LA64V100.  This causes a warning to be outputed:
-
-    cc1: warning: base architecture 'la64' differs from PRID preset '?'
-
-But we've not set the "?" above in loongarch_isa_base_strings, thus it's
-a nullptr and then an ICE is triggered.
-
-Add ISA_BASE_LA64V110 to genopts and initialize
-loongarch_isa_base_stringsISA_BASE_LA64V110 correctly to fix the ICE.
-The warning itself will be fixed later.
-
-gcc/ChangeLog:
-
-	* config/loongarch/genopts/loongarch-strings:
-	(STR_ISA_BASE_LA64V110): Add.
-	* config/loongarch/genopts/loongarch.opt.in:
-	(ISA_BASE_LA64V110): Add.
-	* config/loongarch/loongarch-def.c
-	(loongarch_isa_base_strings): Initialize ISA_BASE_LA64V110
-	to STR_ISA_BASE_LA64V110.
-	* config/loongarch/loongarch.opt: Regenerate.
-	* config/loongarch/loongarch-str.h: Regenerate.
-
-Signed-off-by: ticat_fp <fanpeng@loongson.cn>
----
- gcc/config/loongarch/genopts/loongarch-strings | 1 +
- gcc/config/loongarch/genopts/loongarch.opt.in  | 3 +++
- gcc/config/loongarch/loongarch-def.c           | 1 +
- gcc/config/loongarch/loongarch-str.h           | 1 +
- gcc/config/loongarch/loongarch.opt             | 3 +++
- 5 files changed, 9 insertions(+)
-
-diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings
-index 7bc4824007e..b2070c83ed0 100644
---- a/gcc/config/loongarch/genopts/loongarch-strings
-+++ b/gcc/config/loongarch/genopts/loongarch-strings
-@@ -30,6 +30,7 @@ STR_CPU_LA664	      la664
- 
- # Base architecture
- STR_ISA_BASE_LA64V100 la64
-+STR_ISA_BASE_LA64V110 la64v1.1
- 
- # -mfpu
- OPTSTR_ISA_EXT_FPU    fpu
-diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
-index 00b4733d75b..b274b3fb21e 100644
---- a/gcc/config/loongarch/genopts/loongarch.opt.in
-+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
-@@ -32,6 +32,9 @@ Basic ISAs of LoongArch:
- EnumValue
- Enum(isa_base) String(@@STR_ISA_BASE_LA64V100@@) Value(ISA_BASE_LA64V100)
- 
-+EnumValue
-+Enum(isa_base) String(@@STR_ISA_BASE_LA64V110@@) Value(ISA_BASE_LA64V110)
-+
- ;; ISA extensions / adjustments
- Enum
- Name(isa_ext_fpu) Type(int)
-diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
-index 067629141b6..f22d488acb2 100644
---- a/gcc/config/loongarch/loongarch-def.c
-+++ b/gcc/config/loongarch/loongarch-def.c
-@@ -165,6 +165,7 @@ loongarch_cpu_multipass_dfa_lookaheadN_TUNE_TYPES = {
- const char*
- loongarch_isa_base_stringsN_ISA_BASE_TYPES = {
-   ISA_BASE_LA64V100 = STR_ISA_BASE_LA64V100,
-+  ISA_BASE_LA64V110 = STR_ISA_BASE_LA64V110,
- };
- 
- const char*
-diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
-index fc4f41bfc1e..114dbc692d7 100644
---- a/gcc/config/loongarch/loongarch-str.h
-+++ b/gcc/config/loongarch/loongarch-str.h
-@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.  If not see
- #define STR_CPU_LA664 "la664"
- 
- #define STR_ISA_BASE_LA64V100 "la64"
-+#define STR_ISA_BASE_LA64V110 "la64v1.1"
- 
- #define OPTSTR_ISA_EXT_FPU "fpu"
- #define STR_NONE "none"
-diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
-index 7f129e53ba5..350ca30d232 100644
---- a/gcc/config/loongarch/loongarch.opt
-+++ b/gcc/config/loongarch/loongarch.opt
-@@ -39,6 +39,9 @@ Basic ISAs of LoongArch:
- EnumValue
- Enum(isa_base) String(la64) Value(ISA_BASE_LA64V100)
- 
-+EnumValue
-+Enum(isa_base) String(la64v1.1) Value(ISA_BASE_LA64V110)
-+
- ;; ISA extensions / adjustments
- Enum
- Name(isa_ext_fpu) Type(int)
--- 
-2.33.0
-

_service:tar_scm:LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch Deleted

@@ -1,907 +0,0 @@
-From 40366b89e9c8e727af70ecf7007cba6c51e4b7d2 Mon Sep 17 00:00:00 2001
-From: Jiahao Xu <xujiahao@loongson.cn>
-Date: Wed, 29 Nov 2023 11:16:59 +0800
-Subject: PATCH LoongArch: Fix lsx-vshuf.c and lasx-xvshuf_b.c tests fail on
- LA664 PR112611
-
-For xvshuf instructions, if the index value in the selector exceeds 63, it triggers
-undefined behavior on LA464, but not on LA664. To ensure compatibility of these two
-tests on both LA464 and LA664, we have modified both tests to ensure that the index
-value in the selector does not exceed 63.
-
-gcc/testsuite/ChangeLog:
-
-	PR target/112611
-	* gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c: Sure index less than 64.
-	* gcc.target/loongarch/vector/lsx/lsx-vshuf.c: Ditto.
-
-Signed-off-by: ticat_fp <fanpeng@loongson.cn>
----
- .../loongarch/vector/lasx/lasx-xvshuf_b.c     | 343 ++++++------------
- .../loongarch/vector/lsx/lsx-vshuf.c          | 162 +++------
- 2 files changed, 164 insertions(+), 341 deletions(-)
-
-diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c
-index d8a29dbd225..b8ab387118a 100644
---- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c
-+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c
-@@ -43,9 +43,9 @@ main ()
-   *((unsigned long *)&__m256i_op11) = 0xfffffefefffffefe;
-   *((unsigned long *)&__m256i_op10) = 0xfffffefefffffefe;
-   *((unsigned long *)&__m256i_op23) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op22) = 0xfffffff8fffffff8;
-+  *((unsigned long *)&__m256i_op22) = 0x3f3f3f383f3f3f38;
-   *((unsigned long *)&__m256i_op21) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op20) = 0xfffffff8fc000000;
-+  *((unsigned long *)&__m256i_op20) = 0x3f3f3f383c000000;
-   *((unsigned long *)&__m256i_result3) = 0xfafafafafafafafa;
-   *((unsigned long *)&__m256i_result2) = 0x0000000000000000;
-   *((unsigned long *)&__m256i_result1) = 0xfefefefefefefefe;
-@@ -137,33 +137,14 @@ main ()
-   *((unsigned long *)&__m256i_op12) = 0xffffffffffffffff;
-   *((unsigned long *)&__m256i_op11) = 0x0000000000000000;
-   *((unsigned long *)&__m256i_op10) = 0xffffffffffffffff;
--  *((unsigned long *)&__m256i_op23) = 0x0000ffffffffffff;
--  *((unsigned long *)&__m256i_op22) = 0x0000ffff0000ffff;
--  *((unsigned long *)&__m256i_op21) = 0x0000ffffffffffff;
--  *((unsigned long *)&__m256i_op20) = 0x0000ffff0000ffff;
-+  *((unsigned long *)&__m256i_op23) = 0x0000111111111111;
-+  *((unsigned long *)&__m256i_op22) = 0x0000222200002222;
-+  *((unsigned long *)&__m256i_op21) = 0x0000111111111111;
-+  *((unsigned long *)&__m256i_op20) = 0x0000222200002222;
-   *((unsigned long *)&__m256i_result3) = 0xffff000000000000;
--  *((unsigned long *)&__m256i_result2) = 0xffff0000ffff0000;
-+  *((unsigned long *)&__m256i_result2) = 0xffffffffffffffff;
-   *((unsigned long *)&__m256i_result1) = 0xffff000000000000;
--  *((unsigned long *)&__m256i_result0) = 0xffff0000ffff0000;
--  __m256i_out = __lasx_xvshuf_b (__m256i_op0, __m256i_op1, __m256i_op2);
--  ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
--
--  *((unsigned long *)&__m256i_op03) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op02) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op01) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op00) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op13) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op12) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op11) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op10) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op23) = 0x000000000000ffff;
--  *((unsigned long *)&__m256i_op22) = 0x000000000000ffff;
--  *((unsigned long *)&__m256i_op21) = 0x000000000000ffff;
--  *((unsigned long *)&__m256i_op20) = 0x000000000000ffff;
--  *((unsigned long *)&__m256i_result3) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_result2) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_result1) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_result0) = 0x0000000000000000;
-+  *((unsigned long *)&__m256i_result0) = 0xffffffffffffffff;
-   __m256i_out = __lasx_xvshuf_b (__m256i_op0, __m256i_op1, __m256i_op2);
-   ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
- 
-@@ -176,7 +157,7 @@ main ()
-   *((unsigned long *)&__m256i_op11) = 0x0000000000000000;
-   *((unsigned long *)&__m256i_op10) = 0x0000000000000000;
-   *((unsigned long *)&__m256i_op23) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op22) = 0x0000000000077fff;
-+  *((unsigned long *)&__m256i_op22) = 0x0000000000032f1f;
-   *((unsigned long *)&__m256i_op21) = 0x0000000000000000;
-   *((unsigned long *)&__m256i_op20) = 0x0000000000000000;
-   *((unsigned long *)&__m256i_result3) = 0xffffffffffffffff;
-@@ -186,9 +167,9 @@ main ()
-   __m256i_out = __lasx_xvshuf_b (__m256i_op0, __m256i_op1, __m256i_op2);
-   ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
- 
--  *((unsigned long *)&__m256i_op03) = 0xfffffffffffffefe;
--  *((unsigned long *)&__m256i_op02) = 0x0000000000000101;
--  *((unsigned long *)&__m256i_op01) = 0xfffffffffffffefe;
-+  *((unsigned long *)&__m256i_op03) = 0x0011001100110011;
-+  *((unsigned long *)&__m256i_op02) = 0x0000000000000001;
-+  *((unsigned long *)&__m256i_op01) = 0x0011001100110011;
-   *((unsigned long *)&__m256i_op00) = 0x0000000000000101;
-   *((unsigned long *)&__m256i_op13) = 0xffffffffffffffff;
-   *((unsigned long *)&__m256i_op12) = 0x67eee33567eee435;
-@@ -198,35 +179,16 @@ main ()
-   *((unsigned long *)&__m256i_op22) = 0xffffffffffffffff;
-   *((unsigned long *)&__m256i_op21) = 0x00000000ffffffff;
-   *((unsigned long *)&__m256i_op20) = 0xffffffffffffffff;
--  *((unsigned long *)&__m256i_result3) = 0x0000000000000000;
-+  *((unsigned long *)&__m256i_result3) = 0xffffffffffffffff;
-   *((unsigned long *)&__m256i_result2) = 0xffffffffffffffff;
--  *((unsigned long *)&__m256i_result1) = 0x0000000000000000;
-+  *((unsigned long *)&__m256i_result1) = 0xffffffffffffffff;
-   *((unsigned long *)&__m256i_result0) = 0xffffffffffffffff;
-   __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
-   ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
- 
--  *((unsigned long *)&__m256i_op03) = 0x0000000000000000;
-+  *((unsigned long *)&__m256i_op03) = 0x0022002200000000;
-   *((unsigned long *)&__m256i_op02) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op01) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op00) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op13) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op12) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op11) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op10) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op23) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op22) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op21) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op20) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_result3) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_result2) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_result1) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_result0) = 0x0000000000000000;
--  __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
--  ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
--
--  *((unsigned long *)&__m256i_op03) = 0xffffffff80000000;
--  *((unsigned long *)&__m256i_op02) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op01) = 0xffffffff80000000;
-+  *((unsigned long *)&__m256i_op01) = 0x001f001f00000000;
-   *((unsigned long *)&__m256i_op00) = 0x0000000000000000;
-   *((unsigned long *)&__m256i_op13) = 0xffffffffffffffff;
-   *((unsigned long *)&__m256i_op12) = 0xffffffffffffffff;
-@@ -243,10 +205,10 @@ main ()
-   __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
-   ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
- 
--  *((unsigned long *)&__m256i_op03) = 0xffffffffffffffff;
--  *((unsigned long *)&__m256i_op02) = 0xffffffffffffffff;
--  *((unsigned long *)&__m256i_op01) = 0xffffffffffffffff;
--  *((unsigned long *)&__m256i_op00) = 0xffffffffffffffff;
-+  *((unsigned long *)&__m256i_op03) = 0x0011001100110011;
-+  *((unsigned long *)&__m256i_op02) = 0x0011001100110011;
-+  *((unsigned long *)&__m256i_op01) = 0x0011001100110011;
-+  *((unsigned long *)&__m256i_op00) = 0x0011001100110011;
-   *((unsigned long *)&__m256i_op13) = 0xffffffffffffffff;
-   *((unsigned long *)&__m256i_op12) = 0xffffffffffffffff;
-   *((unsigned long *)&__m256i_op11) = 0xffffffffffffffff;
-@@ -255,17 +217,17 @@ main ()
-   *((unsigned long *)&__m256i_op22) = 0xffffffffffffffff;
-   *((unsigned long *)&__m256i_op21) = 0xffffffffffffffff;
-   *((unsigned long *)&__m256i_op20) = 0xffffffffffffffff;
--  *((unsigned long *)&__m256i_result3) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_result2) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_result1) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_result0) = 0x0000000000000000;
-+  *((unsigned long *)&__m256i_result3) = 0xffffffffffffffff;
-+  *((unsigned long *)&__m256i_result2) = 0xffffffffffffffff;
-+  *((unsigned long *)&__m256i_result1) = 0xffffffffffffffff;
-+  *((unsigned long *)&__m256i_result0) = 0xffffffffffffffff;
-   __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
-   ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
- 
--  *((unsigned long *)&__m256i_op03) = 0xffffffffffffffff;
--  *((unsigned long *)&__m256i_op02) = 0xffffffffffffffff;
--  *((unsigned long *)&__m256i_op01) = 0xffffffffffffffff;
--  *((unsigned long *)&__m256i_op00) = 0xffffffffffffffff;
-+  *((unsigned long *)&__m256i_op03) = 0x003f003f003f003f;
-+  *((unsigned long *)&__m256i_op02) = 0x003f003f003f003f;
-+  *((unsigned long *)&__m256i_op01) = 0x003f003f003f003f;
-+  *((unsigned long *)&__m256i_op00) = 0x003f003f003f003f;
-   *((unsigned long *)&__m256i_op13) = 0xefdfefdf00000000;
-   *((unsigned long *)&__m256i_op12) = 0xefdfefdfefdfefdf;
-   *((unsigned long *)&__m256i_op11) = 0xefdfefdf00000000;
-@@ -274,36 +236,17 @@ main ()
-   *((unsigned long *)&__m256i_op22) = 0xffffffffffffffff;
-   *((unsigned long *)&__m256i_op21) = 0xffffffffffffffff;
-   *((unsigned long *)&__m256i_op20) = 0xffffffffffffffff;
--  *((unsigned long *)&__m256i_result3) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_result2) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_result1) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_result0) = 0x0000000000000000;
--  __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
--  ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
--
--  *((unsigned long *)&__m256i_op03) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op02) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op01) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op00) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op13) = 0x0000000000000000;
--  *((unsigned long *)&__m256i_op12) = 0x0000000000000000;

_service:tar_scm:LoongArch-Use-finer-grained-DBAR-hints.patch Deleted

@@ -1,137 +0,0 @@
-From 4a70bfbf686c2b6a1ecd83fe851de826c612c3e0 Mon Sep 17 00:00:00 2001
-From: Xi Ruoyao <xry111@xry111.site>
-Date: Tue, 14 Nov 2023 05:32:38 +0800
-Subject: PATCH LoongArch: Use finer-grained DBAR hints
-
-LA664 defines DBAR hints 0x1 - 0x1f (except 0xf and 0x1f) as follows 1-2:
-
-- Bit 4: kind of constraint (0: completion, 1: ordering)
-- Bit 3: barrier for previous read (0: true, 1: false)
-- Bit 2: barrier for previous write (0: true, 1: false)
-- Bit 1: barrier for succeeding read (0: true, 1: false)
-- Bit 0: barrier for succeeding write (0: true, 1: false)
-
-LLVM has already utilized them for different memory orders 3:
-
-- Bit 4 is always set to one because it's only intended to be zero for
-  things like MMIO devices, which are out of the scope of memory orders.
-- An acquire barrier is used to implement acquire loads like
-
-    ld.d $a1, $t0, 0
-    dbar acquire_hint
-
-  where the load operation (ld.d) should not be reordered with any load
-  or store operation after the acquire load.  To accomplish this
-  constraint, we need to prevent the load operation from being reordered
-  after the barrier, and also prevent any following load/store operation
-  from being reordered before the barrier.  Thus bits 0, 1, and 3 must
-  be zero, and bit 2 can be one, so acquire_hint should be 0b10100.
-- An release barrier is used to implement release stores like
-
-    dbar release_hint
-    st.d $a1, $t0, 0
-
-  where the store operation (st.d) should not be reordered with any load
-  or store operation before the release store.  So we need to prevent
-  the store operation from being reordered before the barrier, and also
-  prevent any preceding load/store operation from being reordered after
-  the barrier.  So bits 0, 2, 3 must be zero, and bit 1 can be one.  So
-  release_hint should be 0b10010.
-
-A similar mapping has been utilized for RISC-V GCC 4, LoongArch Linux
-kernel 1, and LoongArch LLVM 3.  So the mapping should be correct.
-And I've also bootstrapped & regtested GCC on a LA664 with this patch.
-
-The LoongArch CPUs should treat "unknown" hints as dbar 0, so we can
-unconditionally emit the new hints without a compiler switch.
-
-1: https://git.kernel.org/torvalds/c/e031a5f3f1ed
-2: https://github.com/loongson-community/docs/pull/12
-3: https://github.com/llvm/llvm-project/pull/68787
-4: https://gcc.gnu.org/r14-406
-
-gcc/ChangeLog:
-
-	* config/loongarch/sync.md (mem_thread_fence): Remove redundant
-	check.
-	(mem_thread_fence_1): Emit finer-grained DBAR hints for
-	different memory models, instead of 0.
-
-Signed-off-by: ticat_fp <fanpeng@loongson.cn>
----
- gcc/config/loongarch/sync.md | 51 +++++++++++++++++++++++++++++-------
- 1 file changed, 42 insertions(+), 9 deletions(-)
-
-diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
-index 9924d522bcd..1ad0c63e0d9 100644
---- a/gcc/config/loongarch/sync.md
-+++ b/gcc/config/loongarch/sync.md
-@@ -50,23 +50,56 @@
-   (match_operand:SI 0 "const_int_operand" "") ;; model
-   ""
- {
--  if (INTVAL (operands0) != MEMMODEL_RELAXED)
--    {
--      rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
--      MEM_VOLATILE_P (mem) = 1;
--      emit_insn (gen_mem_thread_fence_1 (mem, operands0));
--    }
-+  rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
-+  MEM_VOLATILE_P (mem) = 1;
-+  emit_insn (gen_mem_thread_fence_1 (mem, operands0));
-+
-   DONE;
- })
- 
--;; Until the LoongArch memory model (hence its mapping from C++) is finalized,
--;; conservatively emit a full FENCE.
-+;; DBAR hint encoding for LA664 and later micro-architectures, paraphrased from
-+;; the Linux patch revealing it 1:
-+;;
-+;; - Bit 4: kind of constraint (0: completion, 1: ordering)
-+;; - Bit 3: barrier for previous read (0: true, 1: false)
-+;; - Bit 2: barrier for previous write (0: true, 1: false)
-+;; - Bit 1: barrier for succeeding read (0: true, 1: false)
-+;; - Bit 0: barrier for succeeding write (0: true, 1: false)
-+;;
-+;; 1: https://git.kernel.org/torvalds/c/e031a5f3f1ed
-+;;
-+;; Implementations without support for the finer-granularity hints simply treat
-+;; all as the full barrier (DBAR 0), so we can unconditionally start emiting the
-+;; more precise hints right away.
- (define_insn "mem_thread_fence_1"
-   (set (match_operand:BLK 0 "" "")
- 	(unspec:BLK (match_dup 0) UNSPEC_MEMORY_BARRIER))
-    (match_operand:SI 1 "const_int_operand" "") ;; model
-   ""
--  "dbar\t0")
-+  {
-+    enum memmodel model = memmodel_base (INTVAL (operands1));
-+
-+    switch (model)
-+      {
-+      case MEMMODEL_ACQUIRE:
-+	return "dbar\t0b10100";
-+      case MEMMODEL_RELEASE:
-+	return "dbar\t0b10010";
-+      case MEMMODEL_ACQ_REL:
-+      case MEMMODEL_SEQ_CST:
-+	return "dbar\t0b10000";
-+      default:
-+	/* GCC internal: "For the '__ATOMIC_RELAXED' model no instructions
-+	   need to be issued and this expansion is not invoked."
-+
-+	   __atomic builtins doc: "Consume is implemented using the
-+	   stronger acquire memory order because of a deficiency in C++11's
-+	   semantics."  See PR 59448 and get_memmodel in builtins.cc.
-+
-+	   Other values should not be returned by memmodel_base.  */
-+	gcc_unreachable ();
-+      }
-+  })
- 
- ;; Atomic memory operations.
- 
--- 
-2.33.0
-