开源软件构建与测试

We truncated the diff of some files because they were too big. If you want to see the full diff for every file, click here.

Changes of Revision 7

_service:tar_scm:gcc.spec Changed

@@ -2,7 +2,7 @@
 %global gcc_major 12
 # Note, gcc_release must be integer, if you want to add suffixes to
 # %%{release}, append them after %%{gcc_release} on Release: line.
-%global gcc_release 32
+%global gcc_release 45
 
 %global _unpackaged_files_terminate_build 0
 %global _performance_build 1
@@ -69,12 +69,8 @@
 %global multilib_32_arch i686
 %endif
 %ifarch riscv64
-%global _lib lib
 %global _smp_mflags -j8
 %endif
-%ifarch loongarch64
-%global _lib lib
-%endif
 
 %global isl_enable 0
 %global check_enable 0
@@ -206,27 +202,218 @@
 Patch94: 0094-BUGFIX-AutoBOLT-function-miss-bind-type.patch
 Patch95: 0095-STABS-remove-gstabs-and-gxcoff-functionality.patch
 Patch96: 0096-Bugfix-Autofdo-use-PMU-sampling-set-num-eauals-den.patch
-Patch97: 0097-aarch64-Use-local-frame-vars-in-shrink-wrapping-code.patch
-Patch98: 0098-aarch64-Avoid-a-use-of-callee-offset.patch
-Patch99: 0099-aarch64-Explicitly-handle-frames-with-no-saved-registers.patch
-Patch100: 0100-aarch64-Add-bytes-below-saved-regs-to-frame-info.patch
-Patch101: 0101-aarch64-Add-bytes-below-hard-fp-to-frame-info.patch
-Patch102: 0102-aarch64-Tweak-aarch64-save-restore-callee-saves.patch
-Patch103: 0103-aarch64-Only-calculate-chain-offset-if-there-is-a-chain.patch
-Patch104: 0104-aarch64-Rename-locals-offset-to-bytes-above-locals.patch
-Patch105: 0105-aarch64-Rename-hard-fp-offset-to-bytes-above-hard-fp.patch
-Patch106: 0106-aarch64-Tweak-frame-size-comment.patch
-Patch107: 0107-aarch64-Measure-reg-offset-from-the-bottom-of-the-frame.patch
-Patch108: 0108-aarch64-Simplify-top-of-frame-allocation.patch
-Patch109: 0109-aarch64-Minor-initial-adjustment-tweak.patch
-Patch110: 0110-aarch64-Tweak-stack-clash-boundary-condition.patch
-Patch111: 0111-aarch64-Put-LR-save-probe-in-first-16-bytes.patch
-Patch112: 0112-aarch64-Simplify-probe-of-final-frame-allocation.patch
-Patch113: 0113-aarch64-Explicitly-record-probe-registers-in-frame-info.patch
-Patch114: 0114-aarch64-Remove-below-hard-fp-saved-regs-size.patch
-Patch115: 0115-aarch64-Make-stack-smash-canary-protect-saved-registers.patch
-Patch116: 0116-aarch64-Fix-return-register-handling-in-untyped_call.patch
-Patch117: 0117-aarch64-Fix-loose-ldpstp-check.patch
+Patch97: 0097-Improve-non-loop-disambiguation.patch
+Patch98: 0098-CHREC-multiplication-and-undefined-overflow.patch
+Patch99: 0099-Enable-Transposed-SLP.patch
+Patch100: 0100-Add-hip09-machine-discribtion.patch
+Patch101: 0101-Add-hip11-CPU-pipeline-scheduling.patch
+Patch102: 0102-Add-Crc32-Optimization-in-Gzip-For-crc32-algorithm-i.patch
+Patch103: 0103-SME-Remove-hip09-and-hip11-in-aarch64-cores.def-to-b.patch
+Patch104: 0104-Backport-SME-AArch64-Cleanup-CPU-option-processing-c.patch
+Patch105: 0105-Backport-SME-AArch64-Cleanup-option-processing-code.patch
+Patch106: 0106-Backport-SME-aarch64-Add-march-support-for-Armv9.1-A.patch
+Patch107: 0107-Backport-SME-Revert-aarch64-Define-__ARM_FEATURE_RCP.patch
+Patch108: 0108-Backport-SME-Revert-Ampere-1-and-Ampere-1A-core-defi.patch
+Patch109: 0109-Backport-SME-aarch64-Rename-AARCH64_ISA-architecture.patch
+Patch110: 0110-Backport-SME-aarch64-Rename-AARCH64_FL-architecture-.patch
+Patch111: 0111-Backport-SME-aarch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch
+Patch112: 0112-Backport-SME-aarch64-Add-V-to-aarch64-arches.def-nam.patch
+Patch113: 0113-Backport-SME-aarch64-Small-config.gcc-cleanups.patch
+Patch114: 0114-Backport-SME-aarch64-Avoid-redundancy-in-aarch64-cor.patch
+Patch115: 0115-Backport-SME-aarch64-Remove-AARCH64_FL_RCPC8_4-PR107.patch
+Patch116: 0116-Backport-SME-aarch64-Fix-transitive-closure-of-featu.patch
+Patch117: 0117-Backport-SME-aarch64-Reorder-an-entry-in-aarch64-opt.patch
+Patch118: 0118-Backport-SME-aarch64-Simplify-feature-definitions.patch
+Patch119: 0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch
+Patch120: 0120-Backport-SME-aarch64-Avoid-std-string-in-static-data.patch
+Patch121: 0121-Backport-SME-aarch64-Tweak-constness-of-option-relat.patch
+Patch122: 0122-Backport-SME-aarch64-Make-more-use-of-aarch64_featur.patch
+Patch123: 0123-Backport-SME-aarch64-Tweak-contents-of-flags_on-off-.patch
+Patch124: 0124-Backport-SME-aarch64-Tweak-handling-of-mgeneral-regs.patch
+Patch125: 0125-Backport-SME-aarch64-Remove-redundant-TARGET_-checks.patch
+Patch126: 0126-Backport-SME-aarch64-Define-__ARM_FEATURE_RCPC.patch
+Patch127: 0127-Backport-SME-Add-Ampere-1-and-Ampere-1A-core-definit.patch
+Patch128: 0128-Backport-SME-aarch64-Fix-nosimd-handling-of-FPR-move.patch
+Patch129: 0129-Backport-SME-aarch64-Commonise-some-folding-code.patch
+Patch130: 0130-Backport-SME-aarch64-Add-a-Z-operand-modifier-for-SV.patch
+Patch131: 0131-Backport-SME-mode-switching-Remove-unused-bbnum-fiel.patch
+Patch132: 0132-Backport-SME-mode-switching-Tweak-the-macro-hook-doc.patch
+Patch133: 0133-Backport-SME-mode-switching-Add-note-problem.patch
+Patch134: 0134-Backport-SME-mode-switching-Avoid-quadractic-list-op.patch
+Patch135: 0135-Backport-SME-mode-switching-Fix-the-mode-passed-to-t.patch
+Patch136: 0136-Backport-SME-mode-switching-Simplify-recording-of-tr.patch
+Patch137: 0137-Backport-SME-mode-switching-Tweak-entry-exit-handlin.patch
+Patch138: 0138-Backport-SME-mode-switching-Allow-targets-to-set-the.patch
+Patch139: 0139-Backport-SME-mode-switching-Pass-set-of-live-registe.patch
+Patch140: 0140-Backport-SME-mode-switching-Pass-the-set-of-live-reg.patch
+Patch141: 0141-Backport-SME-mode-switching-Use-1-based-edge-aux-fie.patch
+Patch142: 0142-Backport-SME-mode-switching-Add-a-target-configurabl.patch
+Patch143: 0143-Backport-SME-mode-switching-Add-a-backprop-hook.patch
+Patch144: 0144-Backport-SME-aarch64-Add-a-result_mode-helper-functi.patch
+Patch145: 0145-Backport-SME-rtl-Try-to-remove-EH-edges-after-pro-ep.patch
+Patch146: 0146-Backport-SME-Fix-PR-middle-end-107705-ICE-after-recl.patch
+Patch147: 0147-Backport-SME-function-Change-return-type-of-predicat.patch
+Patch148: 0148-Backport-SME-Allow-prologues-and-epilogues-to-be-ins.patch
+Patch149: 0149-Backport-SME-Add-a-target-hook-for-sibcall-epilogues.patch
+Patch150: 0150-Backport-SME-Add-a-new-target-hook-TARGET_START_CALL.patch
+Patch151: 0151-Backport-SME-Allow-targets-to-add-USEs-to-asms.patch
+Patch152: 0152-Backport-SME-New-compact-syntax-for-insn-and-insn_sp.patch
+Patch153: 0153-Backport-SME-recog-Improve-parser-for-pattern-new-co.patch
+Patch154: 0154-Backport-SME-recog-Support-space-in-cons.patch
+Patch155: 0155-Backport-SME-aarch64-Generalise-require_immediate_la.patch
+Patch156: 0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch
+Patch157: 0157-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch
+Patch158: 0158-Backport-SME-aarch64-Simplify-output-template-emissi.patch
+Patch159: 0159-Backport-SME-Improve-immediate-expansion-PR106583.patch
+Patch160: 0160-Backport-SME-AArch64-Cleanup-move-immediate-code.patch
+Patch161: 0161-Backport-SME-AArch64-convert-some-patterns-to-compac.patch
+Patch162: 0162-Backport-SME-aarch64-Use-SVE-s-RDVL-instruction.patch
+Patch163: 0163-Backport-SME-aarch64-Make-AARCH64_FL_SVE-requirement.patch
+Patch164: 0164-Backport-SME-aarch64-Add-group-suffixes-to-SVE-intri.patch
+Patch165: 0165-Backport-SME-aarch64-Add-sve_type-to-SVE-builtins-co.patch
+Patch166: 0166-Backport-SME-aarch64-Generalise-some-SVE-ACLE-error-.patch
+Patch167: 0167-Backport-SME-aarch64-Replace-vague-previous-argument.patch
+Patch168: 0168-Backport-SME-aarch64-Make-more-use-of-sve_type-in-AC.patch
+Patch169: 0169-Backport-SME-aarch64-Tweak-error-message-for-tuple-v.patch
+Patch170: 0170-Backport-SME-aarch64-Add-tuple-forms-of-svreinterpre.patch
+Patch171: 0171-Backport-SME-attribs-Use-existing-traits-for-excl_ha.patch
+Patch172: 0172-Backport-SME-Allow-target-attributes-in-non-gnu-name.patch
+Patch173: 0173-Backport-SME-aarch64-Fix-plugin-header-install.patch
+Patch174: 0174-Backport-SME-aarch64-Add-arm_streaming-_compatible-a.patch
+Patch175: 0175-Backport-SME-aarch64-Add-sme.patch
+Patch176: 0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch
+Patch177: 0177-Backport-SME-AArch64-Rewrite-simd-move-immediate-pat.patch
+Patch178: 0178-Backport-SME-AArch64-remove-test-comment-from-mov-mo.patch
+Patch179: 0179-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch
+Patch180: 0180-Backport-SME-aarch64-Mark-relevant-SVE-instructions-.patch
+Patch181: 0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch
+Patch182: 0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch
+Patch183: 0183-Backport-SME-aarch64-Avoid-a-use-of-callee_offset.patch
+Patch184: 0184-Backport-SME-aarch64-Explicitly-handle-frames-with-n.patch
+Patch185: 0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch
+Patch186: 0186-Backport-SME-aarch64-Add-bytes_below_hard_fp-to-fram.patch
+Patch187: 0187-Backport-SME-aarch64-Robustify-stack-tie-handling.patch
+Patch188: 0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch
+Patch189: 0189-Backport-SME-aarch64-Only-calculate-chain_offset-if-.patch
+Patch190: 0190-Backport-SME-aarch64-Rename-locals_offset-to-bytes_a.patch
+Patch191: 0191-Backport-SME-aarch64-Rename-hard_fp_offset-to-bytes_.patch
+Patch192: 0192-Backport-SME-aarch64-Tweak-frame_size-comment.patch
+Patch193: 0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch
+Patch194: 0194-Backport-SME-aarch64-Simplify-top-of-frame-allocatio.patch
+Patch195: 0195-Backport-SME-aarch64-Minor-initial-adjustment-tweak.patch
+Patch196: 0196-Backport-SME-aarch64-Tweak-stack-clash-boundary-cond.patch
+Patch197: 0197-Backport-SME-aarch64-Put-LR-save-probe-in-first-16-b.patch
+Patch198: 0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch
+Patch199: 0199-Backport-SME-aarch64-Explicitly-record-probe-registe.patch
+Patch200: 0200-Backport-SME-aarch64-Remove-below_hard_fp_saved_regs.patch
+Patch201: 0201-Backport-SME-aarch64-Make-stack-smash-canary-protect.patch
+Patch202: 0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch
+Patch203: 0203-Backport-SME-aarch64-Use-vecs-to-store-register-save.patch
+Patch204: 0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch
+Patch205: 0205-Backport-SME-aarch64-Switch-PSTATE.SM-around-calls.patch
+Patch206: 0206-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch
+Patch207: 0207-Backport-SME-aarch64-Add-a-register-class-for-w12-w1.patch
+Patch208: 0208-Backport-SME-aarch64-Add-a-VNx1TI-mode.patch
+Patch209: 0209-Backport-SME-aarch64-Generalise-unspec_based_functio.patch
+Patch210: 0210-Backport-SME-aarch64-Generalise-_m-rules-for-SVE-int.patch
+Patch211: 0211-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch
+Patch212: 0212-Backport-SME-aarch64-Add-support-for-__arm_locally_s.patch
+Patch213: 0213-Backport-SME-aarch64-Handle-PSTATE.SM-across-abnorma.patch
+Patch214: 0214-Backport-SME-aarch64-Enforce-inlining-restrictions-f.patch
+Patch215: 0215-Backport-SME-aarch64-Update-sibcall-handling-for-SME.patch
+Patch216: 0216-Backport-SME-libgcc-aarch64-Configure-check-for-.var.patch
+Patch217: 0217-Backport-SME-libgcc-aarch64-Configure-check-for-__ge.patch
+Patch218: 0218-Backport-SME-libgcc-aarch64-Add-SME-runtime-support.patch
+Patch219: 0219-Backport-SME-libgcc-aarch64-Add-SME-unwinder-support.patch
+Patch220: 0220-Backport-SME-libgcc-Fix-config.in.patch
+Patch221: 0221-Backport-SME-aarch64-Add-funwind-tables-to-some-test.patch
+Patch222: 0222-Backport-SME-aarch64-Skip-some-SME-register-save-tes.patch
+Patch223: 0223-Backport-SME-Add-OPTIONS_H_EXTRA-to-GTFILES.patch
+Patch224: 0224-Backport-SME-aarch64-Add-V1DI-mode.patch
+Patch225: 0225-Backport-SME-Allow-md-iterators-to-include-other-ite.patch
+Patch226: 0226-Backport-SME-riscv-Add-support-for-strlen-inline-exp.patch
+Patch227: 0227-Backport-SME-attribs-Add-overloads-with-namespace-na.patch
+Patch228: 0228-Backport-SME-vec-Add-array_slice-constructors-from-n.patch
+Patch229: 0229-Backport-SME-A-couple-of-va_gc_atomic-tweaks.patch
+Patch230: 0230-Backport-SME-middle-end-Fix-issue-of-poly_uint16-1-1.patch
+Patch231: 0231-SME-Add-missing-header-file-in-aarch64.cc.patch
+Patch232: 0232-Backport-SME-c-Add-support-for-__extension__.patch
+Patch233: 0233-Backport-SME-lra-Updates-of-biggest-mode-for-hard-re.patch
+Patch234: 0234-Backport-SME-c-Support-C2x-empty-initializer-braces.patch
+Patch235: 0235-Backport-SME-aarch64-Update-sizeless-tests-for-recen.patch
+Patch236: 0236-Backport-SME-attribs-Namespace-aware-lookup_attribut.patch
+Patch237: 0237-Backport-SME-c-family-ICE-with-gnu-nocf_check-PR1069.patch
+Patch238: 0238-Backport-SME-AArch64-Fix-assert-in-aarch64_move_imm-.patch
+Patch239: 0239-Backport-SME-testsuite-Only-run-fcf-protection-test-.patch
+Patch240: 0240-Backport-SME-Fix-PRs-106764-106765-and-107307-all-IC.patch
+Patch241: 0241-Backport-SME-aarch64-Remove-expected-error-for-compo.patch
+Patch242: 0242-Backport-SME-aarch64-Remove-redundant-builtins-code.patch
+Patch243: 0243-Backport-SME-AArch64-Fix-Armv9-a-warnings-that-get-e.patch
+Patch244: 0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch
+Patch245: 0245-Backport-SME-middle-end-Add-new-tbranch-optab-to-add.patch
+Patch246: 0246-Backport-SME-explow-Allow-dynamic-allocations-after-.patch
+Patch247: 0247-Backport-SME-PR105169-Fix-references-to-discarded-se.patch
+Patch248: 0248-Backport-SME-RISC-V-autovec-Verify-that-GET_MODE_NUN.patch
+Patch249: 0249-Backport-SME-Add-operator-to-gimple_stmt_iterator-an.patch

_service:tar_scm:0001-LoongArch-Reimplement-multilib-build-option-handling.patch Added

@@ -0,0 +1,464 @@
+From d394a9ac68674b40e0d2b436c09e23dd29d8b5d0 Mon Sep 17 00:00:00 2001
+From: Yang Yujie <yangyujie@loongson.cn>
+Date: Wed, 13 Sep 2023 17:52:14 +0800
+Subject: PATCH 001/188 LoongArch: Reimplement multilib build option
+ handling.
+
+Library build options from --with-multilib-list used to be processed with
+*self_spec, which missed the driver's initial canonicalization.  This
+caused limitations on CFLAGS override and the use of driver-only options
+like -mno-lsx.
+
+The problem is solved by promoting the injection rules of --with-multilib-list
+options to the first element of DRIVER_SELF_SPECS, to make them execute before
+the canonialization.  The library-build options are also hard-coded in
+the driver and can be used conveniently by the builders of other non-gcc
+libraries via the use of -fmultiflags.
+
+Bootstrapped and tested on loongarch64-linux-gnu.
+
+ChangeLog:
+
+	* config-ml.in: Remove unneeded loongarch clause.
+	* configure.ac: Register custom makefile fragments mt-loongarch-*
+	for loongarch targets.
+	* configure: Regenerate.
+
+config/ChangeLog:
+
+	* mt-loongarch-mlib: New file.  Pass -fmultiflags when building
+	target libraries (FLAGS_FOR_TARGET).
+	* mt-loongarch-elf: New file.
+	* mt-loongarch-gnu: New file.
+
+gcc/ChangeLog:
+
+	* config.gcc: Pass the default ABI via TM_MULTILIB_CONFIG.
+	* config/loongarch/loongarch-driver.h: Invoke MLIB_SELF_SPECS
+	before the driver canonicalization routines.
+	* config/loongarch/loongarch.h: Move definitions of CC1_SPEC etc.
+	to loongarch-driver.h
+	* config/loongarch/t-linux: Move multilib-related definitions to
+	t-multilib.
+	* config/loongarch/t-multilib: New file.  Inject library build
+	options obtained from --with-multilib-list.
+	* config/loongarch/t-loongarch: Same.
+---
+ config-ml.in                            | 10 ----
+ config/mt-loongarch-elf                 |  1 +
+ config/mt-loongarch-gnu                 |  2 +
+ config/mt-loongarch-mlib                |  1 +
+ configure                               |  6 +++
+ configure.ac                            |  6 +++
+ gcc/config.gcc                          |  6 +--
+ gcc/config/loongarch/loongarch-driver.h | 42 +++++++++++++++
+ gcc/config/loongarch/loongarch.h        | 50 ------------------
+ gcc/config/loongarch/t-linux            | 66 +++---------------------
+ gcc/config/loongarch/t-loongarch        |  2 +-
+ gcc/config/loongarch/t-multilib         | 68 +++++++++++++++++++++++++
+ 12 files changed, 137 insertions(+), 123 deletions(-)
+ create mode 100644 config/mt-loongarch-elf
+ create mode 100644 config/mt-loongarch-gnu
+ create mode 100644 config/mt-loongarch-mlib
+ create mode 100644 gcc/config/loongarch/t-multilib
+
+diff --git a/config-ml.in b/config-ml.in
+index ad0db0781..68854a4f1 100644
+--- a/config-ml.in
++++ b/config-ml.in
+@@ -301,16 +301,6 @@ arm-*-*)
+ 	  done
+ 	fi
+ 	;;
+-loongarch*-*)
+-	old_multidirs="${multidirs}"
+-	multidirs=""
+-	for x in ${old_multidirs}; do
+-	case "$x" in
+-	`${CC-gcc} --print-multi-directory`) : ;;
+-	*) multidirs="${multidirs} ${x}" ;;
+-	esac
+-	done
+-	;;
+ m68*-*-*)
+ 	if  x$enable_softfloat = xno 
+ 	then
+diff --git a/config/mt-loongarch-elf b/config/mt-loongarch-elf
+new file mode 100644
+index 000000000..bbf29bb57
+--- /dev/null
++++ b/config/mt-loongarch-elf
+@@ -0,0 +1 @@
++include $(srcdir)/config/mt-loongarch-mlib
+diff --git a/config/mt-loongarch-gnu b/config/mt-loongarch-gnu
+new file mode 100644
+index 000000000..dfefb44ed
+--- /dev/null
++++ b/config/mt-loongarch-gnu
+@@ -0,0 +1,2 @@
++include $(srcdir)/config/mt-gnu
++include $(srcdir)/config/mt-loongarch-mlib
+diff --git a/config/mt-loongarch-mlib b/config/mt-loongarch-mlib
+new file mode 100644
+index 000000000..4cfe568f1
+--- /dev/null
++++ b/config/mt-loongarch-mlib
+@@ -0,0 +1 @@
++FLAGS_FOR_TARGET += -fmultiflags
+diff --git a/configure b/configure
+index aff62c464..81b4a3cec 100755
+--- a/configure
++++ b/configure
+@@ -9548,6 +9548,12 @@ case "${target}" in
+   spu-*-*)
+     target_makefile_frag="config/mt-spu"
+     ;;
++  loongarch*-*linux* | loongarch*-*gnu*)
++    target_makefile_frag="config/mt-loongarch-gnu"
++    ;;
++  loongarch*-*elf*)
++    target_makefile_frag="config/mt-loongarch-elf"
++    ;;
+   mips*-sde-elf* | mips*-mti-elf* | mips*-img-elf*)
+     target_makefile_frag="config/mt-sde"
+     ;;
+diff --git a/configure.ac b/configure.ac
+index f310d75ca..9f8dbd319 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -2729,6 +2729,12 @@ case "${target}" in
+   spu-*-*)
+     target_makefile_frag="config/mt-spu"
+     ;;
++  loongarch*-*linux* | loongarch*-*gnu*)
++    target_makefile_frag="config/mt-loongarch-gnu"
++    ;;
++  loongarch*-*elf*)
++    target_makefile_frag="config/mt-loongarch-elf"
++    ;;
+   mips*-sde-elf* | mips*-mti-elf* | mips*-img-elf*)
+     target_makefile_frag="config/mt-sde"
+     ;;
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index 3f870e966..e34a5fbb9 100644
+--- a/gcc/config.gcc
++++ b/gcc/config.gcc
+@@ -2510,7 +2510,7 @@ loongarch*-*-linux*)
+ 	tm_file="elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file}"
+ 	tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h"
+ 	extra_options="${extra_options} linux-android.opt"
+-	tmake_file="${tmake_file} loongarch/t-linux"
++	tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux"
+ 	gnu_ld=yes
+ 	gas=yes
+ 
+@@ -2522,7 +2522,7 @@ loongarch*-*-linux*)
+ loongarch*-*-elf*)
+ 	tm_file="elfos.h newlib-stdint.h ${tm_file}"
+ 	tm_file="${tm_file} loongarch/elf.h loongarch/linux.h"
+-	tmake_file="${tmake_file} loongarch/t-linux"
++	tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux"
+ 	gnu_ld=yes
+ 	gas=yes
+ 
+@@ -5241,7 +5241,7 @@ case "${target}" in
+ 		loongarch_multilib_list_sane=no
+ 
+ 		# This one goes to TM_MULTILIB_CONFIG, for use in t-linux.
+-		loongarch_multilib_list_make=""
++		loongarch_multilib_list_make="${abi_base},"
+ 
+ 		# This one goes to tm_defines, for use in loongarch-driver.c.
+ 		loongarch_multilib_list_c=""
+diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h
+index 6cfe0efb5..e7d083677 100644
+--- a/gcc/config/loongarch/loongarch-driver.h
++++ b/gcc/config/loongarch/loongarch-driver.h
+@@ -23,6 +23,39 @@ along with GCC; see the file COPYING3.  If not see
+ 
+ #include "loongarch-str.h"
+ 
++#ifndef SUBTARGET_CPP_SPEC
++#define SUBTARGET_CPP_SPEC ""
++#endif
++
++#ifndef SUBTARGET_CC1_SPEC
++#define SUBTARGET_CC1_SPEC ""
++#endif
++
++#ifndef SUBTARGET_ASM_SPEC
++#define SUBTARGET_ASM_SPEC ""
++#endif
++
++#define EXTRA_SPECS \
++  {"early_self_spec", ""}, \
++  {"subtarget_cc1_spec", SUBTARGET_CC1_SPEC}, \
++  {"subtarget_cpp_spec", SUBTARGET_CPP_SPEC}, \
++  {"subtarget_asm_spec", SUBTARGET_ASM_SPEC},
++
++

_service:tar_scm:0002-LoongArch-Check-whether-binutils-supports-the-relax-.patch Added

@@ -0,0 +1,192 @@
+From 13c33536900709bf1f33171d5ae2b2af97789601 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Fri, 15 Sep 2023 10:22:49 +0800
+Subject: PATCH 002/188 LoongArch: Check whether binutils supports the relax
+ function. If supported, explicit relocs are turned off by default.
+
+gcc/ChangeLog:
+
+	* config.in: Regenerate.
+	* config/loongarch/genopts/loongarch.opt.in: Add compilation option
+	mrelax. And set the initial value of explicit-relocs according to the
+	detection status.
+	* config/loongarch/gnu-user.h: When compiling with -mno-relax, pass the
+	--no-relax option to the linker.
+	* config/loongarch/loongarch-driver.h (ASM_SPEC): When compiling with
+	-mno-relax, pass the -mno-relax option to the assembler.
+	* config/loongarch/loongarch-opts.h (HAVE_AS_MRELAX_OPTION): Define macro.
+	* config/loongarch/loongarch.opt: Regenerate.
+	* configure: Regenerate.
+	* configure.ac: Add detection of support for binutils relax function.
+---
+ gcc/config.in                                 |  6 ++++
+ gcc/config/loongarch/genopts/loongarch.opt.in |  7 ++++-
+ gcc/config/loongarch/gnu-user.h               |  3 +-
+ gcc/config/loongarch/loongarch-driver.h       |  2 +-
+ gcc/config/loongarch/loongarch-opts.h         |  4 +++
+ gcc/config/loongarch/loongarch.opt            |  7 ++++-
+ gcc/configure                                 | 31 +++++++++++++++++++
+ gcc/configure.ac                              |  4 +++
+ 8 files changed, 60 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/config.in b/gcc/config.in
+index 0dff36199..0c55e67e7 100644
+--- a/gcc/config.in
++++ b/gcc/config.in
+@@ -637,6 +637,12 @@
+ #endif
+ 
+ 
++/* Define if your assembler supports -mrelax option. */
++#ifndef USED_FOR_TARGET
++#undef HAVE_AS_MRELAX_OPTION
++#endif
++
++
+ /* Define if your assembler supports .mspabi_attribute. */
+ #ifndef USED_FOR_TARGET
+ #undef HAVE_AS_MSPABI_ATTRIBUTE
+diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
+index 2ef1b1e3b..f18733c24 100644
+--- a/gcc/config/loongarch/genopts/loongarch.opt.in
++++ b/gcc/config/loongarch/genopts/loongarch.opt.in
+@@ -181,7 +181,7 @@ Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init
+ -mmax-inline-memcpy-size=SIZE	Set the max size of memcpy to inline, default is 1024.
+ 
+ mexplicit-relocs
+-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS)
++Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION)
+ Use %reloc() assembly operators.
+ 
+ ; The code model option names for -mcmodel.
+@@ -214,3 +214,8 @@ Specify the code model.
+ mdirect-extern-access
+ Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0)
+ Avoid using the GOT to access external symbols.
++
++mrelax
++Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
++Take advantage of linker relaxations to reduce the number of instructions
++required to materialize symbol addresses.
+diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
+index 44e4f2575..60ef75601 100644
+--- a/gcc/config/loongarch/gnu-user.h
++++ b/gcc/config/loongarch/gnu-user.h
+@@ -48,7 +48,8 @@ along with GCC; see the file COPYING3.  If not see
+   "%{!shared: %{static} " \
+   "%{!static: %{!static-pie: %{rdynamic:-export-dynamic} " \
+   "-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} " \
+-  "%{static-pie: -static -pie --no-dynamic-linker -z text}}"
++  "%{static-pie: -static -pie --no-dynamic-linker -z text}}" \
++  "%{mno-relax: --no-relax}"
+ 
+ 
+ /* Similar to standard Linux, but adding -ffast-math support.  */
+diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h
+index e7d083677..59fa3263d 100644
+--- a/gcc/config/loongarch/loongarch-driver.h
++++ b/gcc/config/loongarch/loongarch-driver.h
+@@ -53,7 +53,7 @@ along with GCC; see the file COPYING3.  If not see
+ 
+ #undef ASM_SPEC
+ #define ASM_SPEC \
+-  "%{mabi=*} %(subtarget_asm_spec)"
++  "%{mabi=*} %{mno-relax} %(subtarget_asm_spec)"
+ 
+ 
+ extern const char*
+diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
+index 624e246bb..f2b59abe6 100644
+--- a/gcc/config/loongarch/loongarch-opts.h
++++ b/gcc/config/loongarch/loongarch-opts.h
+@@ -99,4 +99,8 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
+ #define HAVE_AS_EXPLICIT_RELOCS 0
+ #endif
+ 
++#ifndef HAVE_AS_MRELAX_OPTION
++#define HAVE_AS_MRELAX_OPTION 0
++#endif
++
+ #endif /* LOONGARCH_OPTS_H */
+diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
+index f2d21c9f3..78f2baf3a 100644
+--- a/gcc/config/loongarch/loongarch.opt
++++ b/gcc/config/loongarch/loongarch.opt
+@@ -188,7 +188,7 @@ Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init
+ -mmax-inline-memcpy-size=SIZE	Set the max size of memcpy to inline, default is 1024.
+ 
+ mexplicit-relocs
+-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS)
++Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION)
+ Use %reloc() assembly operators.
+ 
+ ; The code model option names for -mcmodel.
+@@ -221,3 +221,8 @@ Specify the code model.
+ mdirect-extern-access
+ Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0)
+ Avoid using the GOT to access external symbols.
++
++mrelax
++Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
++Take advantage of linker relaxations to reduce the number of instructions
++required to materialize symbol addresses.
+diff --git a/gcc/configure b/gcc/configure
+index 2a5d3aaf3..8ae8a924a 100755
+--- a/gcc/configure
++++ b/gcc/configure
+@@ -28830,6 +28830,37 @@ if test $gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support = yes; then
+ 
+ $as_echo "#define HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT 1" >>confdefs.h
+ 
++fi
++
++    { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -mrelax option" >&5
++$as_echo_n "checking assembler for -mrelax option... " >&6; }
++if ${gcc_cv_as_loongarch_relax+:} false; then :
++  $as_echo_n "(cached) " >&6
++else
++  gcc_cv_as_loongarch_relax=no
++  if test x$gcc_cv_as != x; then
++    $as_echo '.text' > conftest.s
++    if { ac_try='$gcc_cv_as $gcc_cv_as_flags -mrelax -o conftest.o conftest.s >&5'
++  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
++  (eval $ac_try) 2>&5
++  ac_status=$?
++  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
++  test $ac_status = 0; }; }
++    then
++	gcc_cv_as_loongarch_relax=yes
++    else
++      echo "configure: failed program was" >&5
++      cat conftest.s >&5
++    fi
++    rm -f conftest.o conftest.s
++  fi
++fi
++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_relax" >&5
++$as_echo "$gcc_cv_as_loongarch_relax" >&6; }
++if test $gcc_cv_as_loongarch_relax = yes; then
++
++$as_echo "#define HAVE_AS_MRELAX_OPTION 1" >>confdefs.h
++
+ fi
+ 
+     ;;
+diff --git a/gcc/configure.ac b/gcc/configure.ac
+index ba2bf1ffc..f7161e66e 100644
+--- a/gcc/configure.ac
++++ b/gcc/configure.ac
+@@ -5322,6 +5322,10 @@ x:
+        .cfi_endproc,,
+       AC_DEFINE(HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT, 1,
+ 	  Define if your assembler supports eh_frame pcrel encoding.))
++    gcc_GAS_CHECK_FEATURE(-mrelax option, gcc_cv_as_loongarch_relax,
++      -mrelax, .text,,
++      AC_DEFINE(HAVE_AS_MRELAX_OPTION, 1,
++		Define if your assembler supports -mrelax option.))
+     ;;
+     s390*-*-*)
+     gcc_GAS_CHECK_FEATURE(.gnu_attribute support,
+-- 
+2.43.0
+

_service:tar_scm:0003-Modify-gas-uleb128-support-test.patch Added

@@ -0,0 +1,115 @@
+From 38c338555e64da83fd35c608a1a89d738e1ca356 Mon Sep 17 00:00:00 2001
+From: mengqinggang <mengqinggang@loongson.cn>
+Date: Fri, 15 Sep 2023 12:04:04 +0800
+Subject: PATCH 003/188 Modify gas uleb128 support test
+
+Some assemblers (GNU as for LoongArch) generates relocations for leb128
+symbol arithmetic for relaxation, we need to disable relaxation probing
+leb128 support then.
+
+gcc/ChangeLog:
+
+	* configure: Regenerate.
+	* configure.ac: Checking assembler for -mno-relax support.
+	Disable relaxation when probing leb128 support.
+
+co-authored-by: Xi Ruoyao <xry111@xry111.site>
+---
+ gcc/configure    | 42 +++++++++++++++++++++++++++++++++++++++++-
+ gcc/configure.ac | 17 ++++++++++++++++-
+ 2 files changed, 57 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/configure b/gcc/configure
+index 8ae8a924a..430d44dc3 100755
+--- a/gcc/configure
++++ b/gcc/configure
+@@ -24441,6 +24441,46 @@ _ACEOF
+ 
+ 
+ 
++# Some assemblers (GNU as for LoongArch) generates relocations for
++# leb128 symbol arithmetic for relaxation, we need to disable relaxation
++# probing leb128 support then.
++case $target in
++  loongarch*-*-*)
++    { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -mno-relax support" >&5
++$as_echo_n "checking assembler for -mno-relax support... " >&6; }
++if ${gcc_cv_as_mno_relax+:} false; then :
++  $as_echo_n "(cached) " >&6
++else
++  gcc_cv_as_mno_relax=no
++  if test x$gcc_cv_as != x; then
++    $as_echo '.text' > conftest.s
++    if { ac_try='$gcc_cv_as $gcc_cv_as_flags -mno-relax -o conftest.o conftest.s >&5'
++  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
++  (eval $ac_try) 2>&5
++  ac_status=$?
++  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
++  test $ac_status = 0; }; }
++    then
++	gcc_cv_as_mno_relax=yes
++    else
++      echo "configure: failed program was" >&5
++      cat conftest.s >&5
++    fi
++    rm -f conftest.o conftest.s
++  fi
++fi
++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_mno_relax" >&5
++$as_echo "$gcc_cv_as_mno_relax" >&6; }
++if test $gcc_cv_as_mno_relax = yes; then
++  check_leb128_asflags=-mno-relax
++fi
++
++    ;;
++  *)
++    check_leb128_asflags=
++    ;;
++esac
++
+ # Check if we have .usleb128, and support symbol arithmetic with it.
+ # Older versions of GAS and some non-GNU assemblers, have a bugs handling
+ # these directives, even when they appear to accept them.
+@@ -24459,7 +24499,7 @@ L1:
+ L2:
+ 	.uleb128 0x8000000000000000
+ ' > conftest.s
+-    if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
++    if { ac_try='$gcc_cv_as $gcc_cv_as_flags $check_leb128_asflags -o conftest.o conftest.s >&5'
+   { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+   (eval $ac_try) 2>&5
+   ac_status=$?
+diff --git a/gcc/configure.ac b/gcc/configure.ac
+index f7161e66e..4b24db190 100644
+--- a/gcc/configure.ac
++++ b/gcc/configure.ac
+@@ -3185,10 +3185,25 @@ AC_MSG_RESULT($gcc_cv_ld_ro_rw_mix)
+ 
+ gcc_AC_INITFINI_ARRAY
+ 
++# Some assemblers (GNU as for LoongArch) generates relocations for
++# leb128 symbol arithmetic for relaxation, we need to disable relaxation
++# probing leb128 support then.
++case $target in
++  loongarch*-*-*)
++    gcc_GAS_CHECK_FEATURE(-mno-relax support,
++      gcc_cv_as_mno_relax,-mno-relax,.text,,
++      check_leb128_asflags=-mno-relax)
++    ;;
++  *)
++    check_leb128_asflags=
++    ;;
++esac
++
+ # Check if we have .usleb128, and support symbol arithmetic with it.
+ # Older versions of GAS and some non-GNU assemblers, have a bugs handling
+ # these directives, even when they appear to accept them.
+-gcc_GAS_CHECK_FEATURE(.sleb128 and .uleb128, gcc_cv_as_leb128,,
++gcc_GAS_CHECK_FEATURE(.sleb128 and .uleb128, gcc_cv_as_leb128,
++$check_leb128_asflags,
+ 	.data
+ 	.uleb128 L2 - L1
+ L1:
+-- 
+2.43.0
+

_service:tar_scm:0004-LoongArch-Optimizations-of-vector-construction.patch Added

@@ -0,0 +1,1310 @@
+From b74895b8b723a64bc136c4c560661abed81e013a Mon Sep 17 00:00:00 2001
+From: Guo Jie <guojie@loongson.cn>
+Date: Thu, 21 Sep 2023 09:19:18 +0800
+Subject: PATCH 004/188 LoongArch: Optimizations of vector construction.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasx.md (lasx_vecinit_merge_<LASX:mode>): New
+	pattern for vector construction.
+	(vec_set<mode>_internal): Ditto.
+	(lasx_xvinsgr2vr_<mode256_i_half>_internal): Ditto.
+	(lasx_xvilvl_<lasxfmt_f>_internal): Ditto.
+	* config/loongarch/loongarch.cc (loongarch_expand_vector_init):
+	Optimized the implementation of vector construction.
+	(loongarch_expand_vector_init_same): New function.
+	* config/loongarch/lsx.md (lsx_vilvl_<lsxfmt_f>_internal): New
+	pattern for vector construction.
+	(lsx_vreplvei_mirror_<lsxfmt_f>): New pattern for vector
+	construction.
+	(vec_concatv2df): Ditto.
+	(vec_concatv4sf): Ditto.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/lasx/lasx-vec-construct-opt.c: New test.
+	* gcc.target/loongarch/vector/lsx/lsx-vec-construct-opt.c: New test.
+---
+ gcc/config/loongarch/lasx.md                  |  69 ++
+ gcc/config/loongarch/loongarch.cc             | 716 +++++++++---------
+ gcc/config/loongarch/lsx.md                   | 134 ++++
+ .../vector/lasx/lasx-vec-construct-opt.c      | 102 +++
+ .../vector/lsx/lsx-vec-construct-opt.c        |  85 +++
+ 5 files changed, 732 insertions(+), 374 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-construct-opt.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vec-construct-opt.c
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index 8111c8bb7..2bc5d47ed 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -186,6 +186,9 @@
+   UNSPEC_LASX_XVLDI
+   UNSPEC_LASX_XVLDX
+   UNSPEC_LASX_XVSTX
++  UNSPEC_LASX_VECINIT_MERGE
++  UNSPEC_LASX_VEC_SET_INTERNAL
++  UNSPEC_LASX_XVILVL_INTERNAL
+ )
+ 
+ ;; All vector modes with 256 bits.
+@@ -255,6 +258,15 @@
+    (V8SF "V4SF")
+    (V4DF "V2DF"))
+ 
++;; The attribute gives half int/float modes for vector modes.
++(define_mode_attr VHMODE256_ALL
++  (V32QI "V16QI")
++   (V16HI "V8HI")
++   (V8SI "V4SI")
++   (V4DI "V2DI")
++   (V8SF "V4SF")
++   (V4DF "V2DF"))
++
+ ;; The attribute gives double modes for vector modes in LASX.
+ (define_mode_attr VDMODE256
+   (V8SI "V4DI")
+@@ -312,6 +324,11 @@
+    (V4DI "v4df")
+    (V8SI "v8sf"))
+ 
++;; This attribute gives V32QI mode and V16HI mode with half size.
++(define_mode_attr mode256_i_half
++  (V32QI "v16qi")
++   (V16HI "v8hi"))
++
+  ;; This attribute gives suffix for LASX instructions.  HOW?
+ (define_mode_attr lasxfmt
+   (V4DF "d")
+@@ -756,6 +773,20 @@
+   (set_attr "type" "simd_splat")
+    (set_attr "mode" "<MODE>"))
+ 
++;; Only for loongarch_expand_vector_init in loongarch.cc.
++;; Support a LSX-mode input op2.
++(define_insn "lasx_vecinit_merge_<LASX:mode>"
++  (set (match_operand:LASX 0 "register_operand" "=f")
++	(unspec:LASX
++	  (match_operand:LASX 1 "register_operand" "0")
++	   (match_operand:<VHMODE256_ALL> 2 "register_operand" "f")
++	   (match_operand     3 "const_uimm8_operand")
++	   UNSPEC_LASX_VECINIT_MERGE))
++  "ISA_HAS_LASX"
++  "xvpermi.q\t%u0,%u2,%3"
++  (set_attr "type" "simd_splat")
++   (set_attr "mode" "<MODE>"))
++
+ (define_insn "lasx_xvpickve2gr_d<u>"
+   (set (match_operand:DI 0 "register_operand" "=r")
+ 	(any_extend:DI
+@@ -779,6 +810,33 @@
+   DONE;
+ })
+ 
++;; Only for loongarch_expand_vector_init in loongarch.cc.
++;; Simulate missing instructions xvinsgr2vr.b and xvinsgr2vr.h.
++(define_expand "vec_set<mode>_internal"
++  (match_operand:ILASX_HB 0 "register_operand")
++   (match_operand:<UNITMODE> 1 "reg_or_0_operand")
++   (match_operand 2 "const_<indeximm256>_operand")
++  "ISA_HAS_LASX"
++{
++  rtx index = GEN_INT (1 << INTVAL (operands2));
++  emit_insn (gen_lasx_xvinsgr2vr_<mode256_i_half>_internal
++	     (operands0, operands1, operands0, index));
++  DONE;
++})
++
++(define_insn "lasx_xvinsgr2vr_<mode256_i_half>_internal"
++  (set (match_operand:ILASX_HB 0 "register_operand" "=f")
++	(unspec:ILASX_HB (match_operand:<UNITMODE> 1 "reg_or_0_operand" "rJ")
++			  (match_operand:ILASX_HB 2 "register_operand" "0")
++			  (match_operand 3 "const_<bitmask256>_operand" "")
++			 UNSPEC_LASX_VEC_SET_INTERNAL))
++  "ISA_HAS_LASX"
++{
++  return "vinsgr2vr.<lasxfmt>\t%w0,%z1,%y3";
++}
++  (set_attr "type" "simd_insert")
++   (set_attr "mode" "<MODE>"))
++
+ (define_expand "vec_set<mode>"
+   (match_operand:FLASX 0 "register_operand")
+    (match_operand:<UNITMODE> 1 "reg_or_0_operand")
+@@ -1567,6 +1625,17 @@
+   (set_attr "type" "simd_flog2")
+    (set_attr "mode" "<MODE>"))
+ 
++;; Only for loongarch_expand_vector_init in loongarch.cc.
++;; Merge two scalar floating-point op1 and op2 into a LASX op0.
++(define_insn "lasx_xvilvl_<lasxfmt_f>_internal"
++  (set (match_operand:FLASX 0 "register_operand" "=f")
++	(unspec:FLASX (match_operand:<UNITMODE> 1 "register_operand" "f")
++		       (match_operand:<UNITMODE> 2 "register_operand" "f")
++		      UNSPEC_LASX_XVILVL_INTERNAL))
++  "ISA_HAS_LASX"
++  "xvilvl.<lasxfmt>\t%u0,%u2,%u1"
++  (set_attr "type" "simd_permute")
++   (set_attr "mode" "<MODE>"))
+ 
+ (define_insn "smax<mode>3"
+   (set (match_operand:FLASX 0 "register_operand" "=f")
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index f2e796a6b..760b12268 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -10193,300 +10193,344 @@ loongarch_expand_vector_group_init (rtx target, rtx vals)
+ 						      ops1)));
+ }
+ 
++/* Expand initialization of a vector which has all same elements.  */
++
+ void
+-loongarch_expand_vector_init (rtx target, rtx vals)
++loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar)
+ {
+   machine_mode vmode = GET_MODE (target);
+   machine_mode imode = GET_MODE_INNER (vmode);
+-  unsigned i, nelt = GET_MODE_NUNITS (vmode);
+-  unsigned nvar = 0;
+-  bool all_same = true;
+-  rtx x;
++  rtx same = XVECEXP (vals, 0, 0);
++  rtx temp, temp2;
+ 
+-  for (i = 0; i < nelt; ++i)
++  if (CONST_INT_P (same) && nvar == 0
++      && loongarch_signed_immediate_p (INTVAL (same), 10, 0))
++    {
++      switch (vmode)
++	{
++	case E_V32QImode:
++	case E_V16HImode:
++	case E_V8SImode:
++	case E_V4DImode:
++	case E_V16QImode:
++	case E_V8HImode:
++	case E_V4SImode:
++	case E_V2DImode:
++	  temp = gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0));
++	  emit_move_insn (target, temp);
++	  return;
++	default:
++	  gcc_unreachable ();
++	}
++    }
++  temp = gen_reg_rtx (imode);
++  if (imode == GET_MODE (same))
++    temp2 = same;
++  else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD)

_service:tar_scm:0005-LoongArch-Replace-UNSPEC_FCOPYSIGN-with-copysign-RTL.patch Added

@@ -0,0 +1,51 @@
+From 9b2cbf361e38ea1ad672c2b8c8cf1dda4f6f7d72 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Mon, 2 Oct 2023 18:51:00 +0800
+Subject: PATCH 005/188 LoongArch: Replace UNSPEC_FCOPYSIGN with copysign RTL
+
+When I added copysign support for LoongArch (r13-3702), we did not have
+a copysign RTL insn, so I had to use UNSPEC to represent the copysign
+instruction. Now the copysign RTX code has been added in r14-1586, so
+this patch removes those UNSPECs, and it uses the native RTL copysign
+insn.
+
+Inspired by rs6000 patch "Cleanup: Replace UNSPEC_COPYSIGN with copysign
+RTL" 1 from Michael Meissner.
+
+1: https://gcc.gnu.org/pipermail/gcc-patches/2023-September/631701.html
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (UNSPEC_FCOPYSIGN): Delete.
+	(copysign<mode>3): Use copysign RTL instead of UNSPEC.
+---
+ gcc/config/loongarch/loongarch.md | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 63ff32e75..73e2cbe0b 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -37,7 +37,6 @@
+   UNSPEC_FCLASS
+   UNSPEC_FMAX
+   UNSPEC_FMIN
+-  UNSPEC_FCOPYSIGN
+   UNSPEC_FTINT
+   UNSPEC_FTINTRM
+   UNSPEC_FTINTRP
+@@ -1129,9 +1128,8 @@
+ 
+ (define_insn "copysign<mode>3"
+   (set (match_operand:ANYF 0 "register_operand" "=f")
+-	(unspec:ANYF (match_operand:ANYF 1 "register_operand" "f")
+-		      (match_operand:ANYF 2 "register_operand" "f")
+-		     UNSPEC_FCOPYSIGN))
++	(copysign:ANYF (match_operand:ANYF 1 "register_operand" "f")
++		       (match_operand:ANYF 2 "register_operand" "f")))
+   "TARGET_HARD_FLOAT"
+   "fcopysign.<fmt>\t%0,%1,%2"
+   (set_attr "type" "fcopysign")
+-- 
+2.43.0
+

_service:tar_scm:0006-LoongArch-Adjust-makefile-dependency-for-loongarch-h.patch Added

@@ -0,0 +1,71 @@
+From 746109cb61d6f3db4c25a9a107f30996c17f11db Mon Sep 17 00:00:00 2001
+From: Yang Yujie <yangyujie@loongson.cn>
+Date: Wed, 11 Oct 2023 17:59:53 +0800
+Subject: PATCH 006/188 LoongArch: Adjust makefile dependency for loongarch
+ headers.
+
+gcc/ChangeLog:
+
+	* config.gcc: Add loongarch-driver.h to tm_files.
+	* config/loongarch/loongarch.h: Do not include loongarch-driver.h.
+	* config/loongarch/t-loongarch: Append loongarch-multilib.h to $(GTM_H)
+	instead of $(TM_H) for building generator programs.
+---
+ gcc/config.gcc                   | 4 ++--
+ gcc/config/loongarch/loongarch.h | 3 ---
+ gcc/config/loongarch/t-loongarch | 3 ++-
+ 3 files changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index e34a5fbb9..11ab620d0 100644
+--- a/gcc/config.gcc
++++ b/gcc/config.gcc
+@@ -2508,7 +2508,7 @@ riscv*-*-freebsd*)
+ 
+ loongarch*-*-linux*)
+ 	tm_file="elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file}"
+-	tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h"
++	tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h loongarch/loongarch-driver.h"
+ 	extra_options="${extra_options} linux-android.opt"
+ 	tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux"
+ 	gnu_ld=yes
+@@ -2521,7 +2521,7 @@ loongarch*-*-linux*)
+ 
+ loongarch*-*-elf*)
+ 	tm_file="elfos.h newlib-stdint.h ${tm_file}"
+-	tm_file="${tm_file} loongarch/elf.h loongarch/linux.h"
++	tm_file="${tm_file} loongarch/elf.h loongarch/linux.h loongarch/loongarch-driver.h"
+ 	tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux"
+ 	gnu_ld=yes
+ 	gas=yes
+diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
+index a443a6427..a2dc4ba8c 100644
+--- a/gcc/config/loongarch/loongarch.h
++++ b/gcc/config/loongarch/loongarch.h
+@@ -49,9 +49,6 @@ along with GCC; see the file COPYING3.  If not see
+ 
+ #define TARGET_LIBGCC_SDATA_SECTION ".sdata"
+ 
+-/* Driver native functions for SPEC processing in the GCC driver.  */
+-#include "loongarch-driver.h"
+-
+ /* This definition replaces the formerly used 'm' constraint with a
+    different constraint letter in order to avoid changing semantics of
+    the 'm' constraint when accepting new address formats in
+diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch
+index 28cfb49df..12734c37b 100644
+--- a/gcc/config/loongarch/t-loongarch
++++ b/gcc/config/loongarch/t-loongarch
+@@ -16,7 +16,8 @@
+ # along with GCC; see the file COPYING3.  If not see
+ # <http://www.gnu.org/licenses/>.
+ 
+-TM_H += loongarch-multilib.h $(srcdir)/config/loongarch/loongarch-driver.h
++
++GTM_H += loongarch-multilib.h
+ OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \
+ 		   $(srcdir)/config/loongarch/loongarch-tune.h
+ 
+-- 
+2.43.0
+

_service:tar_scm:0007-LoongArch-Enable-vect.exp-for-LoongArch.-PR111424.patch Added

@@ -0,0 +1,65 @@
+From b75f00086e863ac7e9e1ee37f8107b199cf62550 Mon Sep 17 00:00:00 2001
+From: Chenghui Pan <panchenghui@loongson.cn>
+Date: Fri, 25 Oct 2024 00:58:01 +0000
+Subject: PATCH 007/188 LoongArch: Enable vect.exp for LoongArch. PR111424
+
+gcc/testsuite/ChangeLog:
+
+        PR target/111424
+        * lib/target-supports.exp: Enable vect.exp for LoongArch.
+---
+ gcc/testsuite/lib/target-supports.exp | 31 +++++++++++++++++++++++++++
+ 1 file changed, 31 insertions(+)
+
+diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
+index 192e0aded..bbe145c1c 100644
+--- a/gcc/testsuite/lib/target-supports.exp
++++ b/gcc/testsuite/lib/target-supports.exp
+@@ -10535,6 +10535,13 @@ proc check_vect_support_and_set_flags { } {
+         }
+     } elseif istarget amdgcn-*-* {
+         set dg-do-what-default run
++    } elseif istarget loongarch*-*-* {
++      lappend DEFAULT_VECTCFLAGS "-mdouble-float" "-mlasx"
++      if check_effective_target_loongarch_asx_hw {
++     set dg-do-what-default run
++      } else {
++     set dg-do-what-default compile
++      }
+     } else {
+         return 0
+     }
+@@ -10542,6 +10549,30 @@ proc check_vect_support_and_set_flags { } {
+     return 1
+ }
+ 
++proc check_effective_target_loongarch_sx_hw { } {
++    return check_runtime loongarch_sx_hw {
++   #include <lsxintrin.h>
++   int main (void)
++   {
++     __m128i a, b, c;
++     c = __lsx_vand_v (a, b);
++     return 0;
++   }
++    } "-mlsx"
++}
++
++proc check_effective_target_loongarch_asx_hw { } {
++    return check_runtime loongarch_asx_hw {
++   #include <lasxintrin.h>
++   int main (void)
++   {
++     __m256i a, b, c;
++     c = __lasx_xvand_v (a, b);
++     return 0;
++   }
++    } "-mlasx"
++}
++
+ # Return 1 if the target does *not* require strict alignment.
+ 
+ proc check_effective_target_non_strict_align {} {
+-- 
+2.43.0
+

_service:tar_scm:0008-LoongArch-Delete-macro-definition-ASM_OUTPUT_ALIGN_W.patch Added

@@ -0,0 +1,48 @@
+From 3829ad1963a92526201b42233d2bb4facf7ba8d4 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Fri, 15 Sep 2023 11:56:01 +0800
+Subject: PATCH 008/188 LoongArch: Delete macro definition
+ ASM_OUTPUT_ALIGN_WITH_NOP.
+
+There are two reasons for removing this macro definition:
+1. The default in the assembler is to use the nop instruction for filling.
+2. For assembly directives: .align abs-expr, abs-expr, abs-expr
+   The third expression it is the maximum number of bytes that should be
+   skipped by this alignment directive.
+   Therefore, it will affect the display of the specified alignment rules
+   and affect the operating efficiency.
+
+This modification relies on binutils commit 1fb3cdd87ec61715a5684925fb6d6a6cf53bb97c.
+(Since the assembler will add nop based on the .align information when doing relax,
+it will cause the conditional branch to go out of bounds during the assembly process.
+This submission of binutils solves this problem.)
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.h (ASM_OUTPUT_ALIGN_WITH_NOP):
+	Delete.
+
+Co-authored-by: Chenghua Xu <xuchenghua@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.h | 5 -----
+ 1 file changed, 5 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
+index a2dc4ba8c..572b538be 100644
+--- a/gcc/config/loongarch/loongarch.h
++++ b/gcc/config/loongarch/loongarch.h
+@@ -1058,11 +1058,6 @@ typedef struct {
+ 
+ #define ASM_OUTPUT_ALIGN(STREAM, LOG) fprintf (STREAM, "\t.align\t%d\n", (LOG))
+ 
+-/* "nop" instruction 54525952 (andi $r0,$r0,0) is
+-   used for padding.  */
+-#define ASM_OUTPUT_ALIGN_WITH_NOP(STREAM, LOG) \
+-  fprintf (STREAM, "\t.align\t%d,54525952,4\n", (LOG))
+-
+ /* This is how to output an assembler line to advance the location
+    counter by SIZE bytes.  */
+ 
+-- 
+2.43.0
+

_service:tar_scm:0009-LoongArch-Fix-vec_initv32qiv16qi-template-to-avoid-I.patch Added

@@ -0,0 +1,105 @@
+From aa947bf395b5722a23f2edd9d6302e220473d900 Mon Sep 17 00:00:00 2001
+From: Chenghui Pan <panchenghui@loongson.cn>
+Date: Wed, 11 Oct 2023 16:41:25 +0800
+Subject: PATCH 009/188 LoongArch: Fix vec_initv32qiv16qi template to avoid
+ ICE.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Following test code triggers unrecognized insn ICE on LoongArch target
+with "-O3 -mlasx":
+
+void
+foo (unsigned char *dst, unsigned char *src)
+{
+  for (int y = 0; y < 16; y++)
+    {
+      for (int x = 0; x < 16; x++)
+        dstx = srcx + 1;
+      dst += 32;
+      src += 32;
+    }
+}
+
+ICE info:
+./test.c: In function ‘foo’:
+./test.c:8:1: error: unrecognizable insn:
+    8 | }
+      | ^
+(insn 15 14 16 4 (set (reg:V32QI 185  vect__24.7 )
+        (vec_concat:V32QI (reg:V16QI 186)
+            (const_vector:V16QI 
+                    (const_int 0 0) repeated x16
+                ))) "./test.c":4:19 -1
+     (nil))
+during RTL pass: vregs
+./test.c:8:1: internal compiler error: in extract_insn, at recog.cc:2791
+0x12028023b _fatal_insn(char const*, rtx_def const*, char const*, int, char const*)
+        /home/panchenghui/upstream/gcc/gcc/rtl-error.cc:108
+0x12028026f _fatal_insn_not_found(rtx_def const*, char const*, int, char const*)
+        /home/panchenghui/upstream/gcc/gcc/rtl-error.cc:116
+0x120a03c5b extract_insn(rtx_insn*)
+        /home/panchenghui/upstream/gcc/gcc/recog.cc:2791
+0x12067ff73 instantiate_virtual_regs_in_insn
+        /home/panchenghui/upstream/gcc/gcc/function.cc:1610
+0x12067ff73 instantiate_virtual_regs
+        /home/panchenghui/upstream/gcc/gcc/function.cc:1983
+0x12067ff73 execute
+        /home/panchenghui/upstream/gcc/gcc/function.cc:2030
+
+This RTL is generated inside loongarch_expand_vector_group_init function (related
+to vec_initv32qiv16qi template). Original impl doesn't ensure all vec_concat arguments
+are register type. This patch adds force_reg() to the vec_concat argument generation.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_expand_vector_group_init):
+	fix impl related to vec_initv32qiv16qi template to avoid ICE.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c: New test.
+---
+ gcc/config/loongarch/loongarch.cc                  |  3 ++-
+ .../loongarch/vector/lasx/lasx-vec-init-1.c        | 14 ++++++++++++++
+ 2 files changed, 16 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 760b12268..9a629a999 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -10188,7 +10188,8 @@ loongarch_gen_const_int_vector_shuffle (machine_mode mode, int val)
+ void
+ loongarch_expand_vector_group_init (rtx target, rtx vals)
+ {
+-  rtx ops2 = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) };
++  rtx ops2 = { force_reg (E_V16QImode, XVECEXP (vals, 0, 0)),
++      force_reg (E_V16QImode, XVECEXP (vals, 0, 1)) };
+   emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (E_V32QImode, ops0,
+ 						      ops1)));
+ }
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c
+new file mode 100644
+index 000000000..28be32982
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c
+@@ -0,0 +1,14 @@
++/* { dg-do compile } */
++/* { dg-options "-O3" } */
++
++void
++foo (unsigned char *dst, unsigned char *src)
++{
++  for (int y = 0; y < 16; y++)
++    {
++      for (int x = 0; x < 16; x++)
++        dstx = srcx + 1;
++      dst += 32;
++      src += 32;
++    }
++}
+-- 
+2.43.0
+

_service:tar_scm:0010-LoongArch-Use-fcmp.caf.s-instead-of-movgr2cf-for-zer.patch Added

@@ -0,0 +1,35 @@
+From 35bce671a97b27a41c425109ba92b24ab87ff35b Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Tue, 17 Oct 2023 21:55:05 +0800
+Subject: PATCH 010/188 LoongArch: Use fcmp.caf.s instead of movgr2cf for
+ zeroing a fcc
+
+During the review of an LLVM change 1, on LA464 we found that zeroing
+an fcc with fcmp.caf.s is much faster than a movgr2cf from $r0.
+
+1: https://github.com/llvm/llvm-project/pull/69300
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (movfcc): Use fcmp.caf.s for
+	zeroing a fcc.
+---
+ gcc/config/loongarch/loongarch.md | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 73e2cbe0b..5f9e63d66 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -2150,7 +2150,7 @@
+   (set (match_operand:FCC 0 "register_operand" "=z")
+ 	(const_int 0))
+   ""
+-  "movgr2cf\t%0,$r0")
++  "fcmp.caf.s\t%0,$f0,$f0")
+ 
+ ;; Conditional move instructions.
+ 
+-- 
+2.43.0
+

_service:tar_scm:0011-LoongArch-Implement-avg-and-sad-standard-names.patch Added

@@ -0,0 +1,389 @@
+From 159dd069968fae895f1f663ebda6f53970ec34b1 Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Wed, 18 Oct 2023 17:36:12 +0800
+Subject: PATCH 011/188 LoongArch:Implement avg and sad standard names.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasx.md
+	(avg<mode>3_ceil): New patterns.
+	(uavg<mode>3_ceil): Ditto.
+	(avg<mode>3_floor): Ditto.
+	(uavg<mode>3_floor): Ditto.
+	(usadv32qi): Ditto.
+	(ssadv32qi): Ditto.
+	* config/loongarch/lsx.md
+	(avg<mode>3_ceil): New patterns.
+	(uavg<mode>3_ceil): Ditto.
+	(avg<mode>3_floor): Ditto.
+	(uavg<mode>3_floor): Ditto.
+	(usadv16qi): Ditto.
+	(ssadv16qi): Ditto.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/avg-ceil-lasx.c: New test.
+	* gcc.target/loongarch/avg-ceil-lsx.c: New test.
+	* gcc.target/loongarch/avg-floor-lasx.c: New test.
+	* gcc.target/loongarch/avg-floor-lsx.c: New test.
+	* gcc.target/loongarch/sad-lasx.c: New test.
+	* gcc.target/loongarch/sad-lsx.c: New test.
+---
+ gcc/config/loongarch/lasx.md                  | 78 +++++++++++++++++++
+ gcc/config/loongarch/lsx.md                   | 78 +++++++++++++++++++
+ .../gcc.target/loongarch/avg-ceil-lasx.c      | 22 ++++++
+ .../gcc.target/loongarch/avg-ceil-lsx.c       | 22 ++++++
+ .../gcc.target/loongarch/avg-floor-lasx.c     | 22 ++++++
+ .../gcc.target/loongarch/avg-floor-lsx.c      | 22 ++++++
+ gcc/testsuite/gcc.target/loongarch/sad-lasx.c | 20 +++++
+ gcc/testsuite/gcc.target/loongarch/sad-lsx.c  | 20 +++++
+ 8 files changed, 284 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-ceil-lasx.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-ceil-lsx.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-floor-lasx.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-floor-lsx.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/sad-lasx.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/sad-lsx.c
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index 2bc5d47ed..c7496d68a 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -5171,3 +5171,81 @@
+ 					      const0_rtx));
+   DONE;
+ })
++
++(define_expand "avg<mode>3_ceil"
++  (match_operand:ILASX_WHB 0 "register_operand")
++   (match_operand:ILASX_WHB 1 "register_operand")
++   (match_operand:ILASX_WHB 2 "register_operand")
++  "ISA_HAS_LASX"
++{
++  emit_insn (gen_lasx_xvavgr_s_<lasxfmt> (operands0,
++	operands1, operands2));
++  DONE;
++})
++
++(define_expand "uavg<mode>3_ceil"
++  (match_operand:ILASX_WHB 0 "register_operand")
++   (match_operand:ILASX_WHB 1 "register_operand")
++   (match_operand:ILASX_WHB 2 "register_operand")
++  "ISA_HAS_LASX"
++{
++  emit_insn (gen_lasx_xvavgr_u_<lasxfmt_u> (operands0,
++	operands1, operands2));
++  DONE;
++})
++
++(define_expand "avg<mode>3_floor"
++  (match_operand:ILASX_WHB 0 "register_operand")
++   (match_operand:ILASX_WHB 1 "register_operand")
++   (match_operand:ILASX_WHB 2 "register_operand")
++  "ISA_HAS_LASX"
++{
++  emit_insn (gen_lasx_xvavg_s_<lasxfmt> (operands0,
++	operands1, operands2));
++  DONE;
++})
++
++(define_expand "uavg<mode>3_floor"
++  (match_operand:ILASX_WHB 0 "register_operand")
++   (match_operand:ILASX_WHB 1 "register_operand")
++   (match_operand:ILASX_WHB 2 "register_operand")
++  "ISA_HAS_LASX"
++{
++  emit_insn (gen_lasx_xvavg_u_<lasxfmt_u> (operands0,
++	operands1, operands2));
++  DONE;
++})
++
++(define_expand "usadv32qi"
++  (match_operand:V8SI 0 "register_operand")
++   (match_operand:V32QI 1 "register_operand")
++   (match_operand:V32QI 2 "register_operand")
++   (match_operand:V8SI 3 "register_operand")
++  "ISA_HAS_LASX"
++{
++  rtx t1 = gen_reg_rtx (V32QImode);
++  rtx t2 = gen_reg_rtx (V16HImode);
++  rtx t3 = gen_reg_rtx (V8SImode);
++  emit_insn (gen_lasx_xvabsd_u_bu (t1, operands1, operands2));
++  emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1));
++  emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2));
++  emit_insn (gen_addv8si3 (operands0, t3, operands3));
++  DONE;
++})
++
++(define_expand "ssadv32qi"
++  (match_operand:V8SI 0 "register_operand")
++   (match_operand:V32QI 1 "register_operand")
++   (match_operand:V32QI 2 "register_operand")
++   (match_operand:V8SI 3 "register_operand")
++  "ISA_HAS_LASX"
++{
++  rtx t1 = gen_reg_rtx (V32QImode);
++  rtx t2 = gen_reg_rtx (V16HImode);
++  rtx t3 = gen_reg_rtx (V8SImode);
++  emit_insn (gen_lasx_xvabsd_s_b (t1, operands1, operands2));
++  emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1));
++  emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2));
++  emit_insn (gen_addv8si3 (operands0, t3, operands3));
++  DONE;
++})
+diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
+index 075f6ba56..b4e92ae9c 100644
+--- a/gcc/config/loongarch/lsx.md
++++ b/gcc/config/loongarch/lsx.md
+@@ -3581,6 +3581,84 @@
+   DONE;
+ })
+ 
++(define_expand "avg<mode>3_ceil"
++  (match_operand:ILSX_WHB 0 "register_operand")
++   (match_operand:ILSX_WHB 1 "register_operand")
++   (match_operand:ILSX_WHB 2 "register_operand")
++  "ISA_HAS_LSX"
++{
++  emit_insn (gen_lsx_vavgr_s_<lsxfmt> (operands0,
++	operands1, operands2));
++  DONE;
++})
++
++(define_expand "uavg<mode>3_ceil"
++  (match_operand:ILSX_WHB 0 "register_operand")
++   (match_operand:ILSX_WHB 1 "register_operand")
++   (match_operand:ILSX_WHB 2 "register_operand")
++  "ISA_HAS_LSX"
++{
++  emit_insn (gen_lsx_vavgr_u_<lsxfmt_u> (operands0,
++	operands1, operands2));
++  DONE;
++})
++
++(define_expand "avg<mode>3_floor"
++  (match_operand:ILSX_WHB 0 "register_operand")
++   (match_operand:ILSX_WHB 1 "register_operand")
++   (match_operand:ILSX_WHB 2 "register_operand")
++  "ISA_HAS_LSX"
++{
++  emit_insn (gen_lsx_vavg_s_<lsxfmt> (operands0,
++	operands1, operands2));
++  DONE;
++})
++
++(define_expand "uavg<mode>3_floor"
++  (match_operand:ILSX_WHB 0 "register_operand")
++   (match_operand:ILSX_WHB 1 "register_operand")
++   (match_operand:ILSX_WHB 2 "register_operand")
++  "ISA_HAS_LSX"
++{
++  emit_insn (gen_lsx_vavg_u_<lsxfmt_u> (operands0,
++	operands1, operands2));
++  DONE;
++})
++
++(define_expand "usadv16qi"
++  (match_operand:V4SI 0 "register_operand")
++   (match_operand:V16QI 1 "register_operand")
++   (match_operand:V16QI 2 "register_operand")
++   (match_operand:V4SI 3 "register_operand")
++  "ISA_HAS_LSX"
++{
++  rtx t1 = gen_reg_rtx (V16QImode);
++  rtx t2 = gen_reg_rtx (V8HImode);
++  rtx t3 = gen_reg_rtx (V4SImode);
++  emit_insn (gen_lsx_vabsd_u_bu (t1, operands1, operands2));
++  emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1));
++  emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2));
++  emit_insn (gen_addv4si3 (operands0, t3, operands3));

_service:tar_scm:0012-LoongArch-Implement-vec_widen-standard-names.patch Added

@@ -0,0 +1,403 @@
+From 81e2e22979d9f9d170b1c30ec27e30e1f25aec35 Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Wed, 18 Oct 2023 17:39:40 +0800
+Subject: PATCH 012/188 LoongArch:Implement vec_widen standard names.
+
+Add support for vec_widen lo/hi patterns.  These do not directly
+match on Loongarch lasx instructions but can be emulated with
+even/odd + vector merge.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasx.md
+	(vec_widen_<su>mult_even_v8si): New patterns.
+	(vec_widen_<su>add_hi_<mode>): Ditto.
+	(vec_widen_<su>add_lo_<mode>): Ditto.
+	(vec_widen_<su>sub_hi_<mode>): Ditto.
+	(vec_widen_<su>sub_lo_<mode>): Ditto.
+	(vec_widen_<su>mult_hi_<mode>): Ditto.
+	(vec_widen_<su>mult_lo_<mode>): Ditto.
+	* config/loongarch/loongarch.md (u_bool): New iterator.
+	* config/loongarch/loongarch-protos.h
+	(loongarch_expand_vec_widen_hilo): New prototype.
+	* config/loongarch/loongarch.cc
+	(loongarch_expand_vec_interleave): New function.
+	(loongarch_expand_vec_widen_hilo): New function.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vect-widen-add.c: New test.
+	* gcc.target/loongarch/vect-widen-mul.c: New test.
+	* gcc.target/loongarch/vect-widen-sub.c: New test.
+---
+ gcc/config/loongarch/lasx.md                  |  82 ++++++++---
+ gcc/config/loongarch/loongarch-protos.h       |   1 +
+ gcc/config/loongarch/loongarch.cc             | 137 ++++++++++++++++++
+ gcc/config/loongarch/loongarch.md             |   2 +
+ .../gcc.target/loongarch/vect-widen-add.c     |  24 +++
+ .../gcc.target/loongarch/vect-widen-mul.c     |  24 +++
+ .../gcc.target/loongarch/vect-widen-sub.c     |  24 +++
+ 7 files changed, 277 insertions(+), 17 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-add.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index c7496d68a..442fda246 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -5048,23 +5048,71 @@
+   (set_attr "type" "simd_store")
+    (set_attr "mode" "DI"))
+ 
+-(define_insn "vec_widen_<su>mult_even_v8si"
+-  (set (match_operand:V4DI 0 "register_operand" "=f")
+-    (mult:V4DI
+-      (any_extend:V4DI
+-        (vec_select:V4SI
+-          (match_operand:V8SI 1 "register_operand" "%f")
+-          (parallel (const_int 0) (const_int 2)
+-                         (const_int 4) (const_int 6))))
+-      (any_extend:V4DI
+-        (vec_select:V4SI
+-          (match_operand:V8SI 2 "register_operand" "f")
+-          (parallel (const_int 0) (const_int 2)
+-             (const_int 4) (const_int 6))))))
+-  "ISA_HAS_LASX"
+-  "xvmulwev.d.w<u>\t%u0,%u1,%u2"
+-  (set_attr "type" "simd_int_arith")
+-   (set_attr "mode" "V4DI"))
++(define_expand "vec_widen_<su>add_hi_<mode>"
++  (match_operand:<VDMODE256> 0 "register_operand")
++   (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
++   (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))
++  "ISA_HAS_LASX"
++{
++  loongarch_expand_vec_widen_hilo (operands0, operands1, operands2,
++                        <u_bool>, true, "add");
++  DONE;
++})
++
++(define_expand "vec_widen_<su>add_lo_<mode>"
++  (match_operand:<VDMODE256> 0 "register_operand")
++   (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
++   (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))
++  "ISA_HAS_LASX"
++{
++  loongarch_expand_vec_widen_hilo (operands0, operands1, operands2,
++                        <u_bool>, false, "add");
++  DONE;
++})
++
++(define_expand "vec_widen_<su>sub_hi_<mode>"
++  (match_operand:<VDMODE256> 0 "register_operand")
++   (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
++   (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))
++  "ISA_HAS_LASX"
++{
++  loongarch_expand_vec_widen_hilo (operands0, operands1, operands2,
++                        <u_bool>, true, "sub");
++  DONE;
++})
++
++(define_expand "vec_widen_<su>sub_lo_<mode>"
++  (match_operand:<VDMODE256> 0 "register_operand")
++   (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
++   (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))
++  "ISA_HAS_LASX"
++{
++  loongarch_expand_vec_widen_hilo (operands0, operands1, operands2,
++                        <u_bool>, false, "sub");
++  DONE;
++})
++
++(define_expand "vec_widen_<su>mult_hi_<mode>"
++  (match_operand:<VDMODE256> 0 "register_operand")
++   (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
++   (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))
++  "ISA_HAS_LASX"
++{
++  loongarch_expand_vec_widen_hilo (operands0, operands1, operands2,
++                        <u_bool>, true, "mult");
++  DONE;
++})
++
++(define_expand "vec_widen_<su>mult_lo_<mode>"
++  (match_operand:<VDMODE256> 0 "register_operand")
++   (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
++   (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))
++  "ISA_HAS_LASX"
++{
++  loongarch_expand_vec_widen_hilo (operands0, operands1, operands2,
++                        <u_bool>, false, "mult");
++  DONE;
++})
+ 
+ ;; Vector reduction operation
+ (define_expand "reduc_plus_scal_v4di"
+diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
+index ea61cf567..163162598 100644
+--- a/gcc/config/loongarch/loongarch-protos.h
++++ b/gcc/config/loongarch/loongarch-protos.h
+@@ -205,6 +205,7 @@ extern void loongarch_register_frame_header_opt (void);
+ extern void loongarch_expand_vec_cond_expr (machine_mode, machine_mode, rtx *);
+ extern void loongarch_expand_vec_cond_mask_expr (machine_mode, machine_mode,
+ 						 rtx *);
++extern void loongarch_expand_vec_widen_hilo (rtx, rtx, rtx, bool, bool, const char *);
+ 
+ /* Routines implemented in loongarch-c.c.  */
+ void loongarch_cpu_cpp_builtins (cpp_reader *);
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 9a629a999..c0f58f9a9 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -8028,6 +8028,143 @@ loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
+   return loongarch_expand_vec_perm_even_odd_1 (d, odd);
+ }
+ 
++static void
++loongarch_expand_vec_interleave (rtx target, rtx op0, rtx op1, bool high_p)
++{
++  struct expand_vec_perm_d d;
++  unsigned i, nelt, base;
++  bool ok;
++
++  d.target = target;
++  d.op0 = op0;
++  d.op1 = op1;
++  d.vmode = GET_MODE (target);
++  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
++  d.one_vector_p = false;
++  d.testing_p = false;
++
++  base = high_p ? nelt / 2 : 0;
++  for (i = 0; i < nelt / 2; ++i)
++    {
++      d.permi * 2 = i + base;
++      d.permi * 2 + 1 = i + base + nelt;
++    }
++
++  ok = loongarch_expand_vec_perm_interleave (&d);
++  gcc_assert (ok);
++}
++
++/* The loongarch lasx instructions xvmulwev and xvmulwod return the even or odd
++   parts of the double sized result elements in the corresponding elements of
++   the target register. That's NOT what the vec_widen_umult_lo/hi patterns are
++   expected to do. We emulate the widening lo/hi multiplies with the even/odd
++   versions followed by a vector merge.  */
++
++void
++loongarch_expand_vec_widen_hilo (rtx dest, rtx op1, rtx op2,
++				 bool uns_p, bool high_p, const char *optab)
++{
++  machine_mode wmode = GET_MODE (dest);
++  machine_mode mode = GET_MODE (op1);
++  rtx t1, t2, t3;
++
++  t1 = gen_reg_rtx (wmode);
++  t2 = gen_reg_rtx (wmode);

_service:tar_scm:0013-LoongArch-Implement-the-new-vector-cost-model-framew.patch Added

@@ -0,0 +1,354 @@
+From 472890b43d2848a46fa13945279308f0a21c55d9 Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Wed, 18 Oct 2023 17:43:39 +0800
+Subject: PATCH 013/188 LoongArch:Implement the new vector cost model
+ framework.
+
+This patch make loongarch use the new vector hooks and implements the costing
+function determine_suggested_unroll_factor, to make it be able to suggest the
+unroll factor for a given loop being vectorized base vec_ops analysis during
+vector costing and the available issue information. Referring to aarch64 and
+rs6000 port.
+
+The patch also reduces the cost of unaligned stores, making it equal to the
+cost of aligned ones in order to avoid odd alignment peeling.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_vector_costs): Inherit from
+	vector_costs.  Add a constructor.
+	(loongarch_vector_costs::add_stmt_cost): Use adjust_cost_for_freq to
+	adjust the cost for inner loops.
+	(loongarch_vector_costs::count_operations): New function.
+	(loongarch_vector_costs::determine_suggested_unroll_factor): Ditto.
+	(loongarch_vector_costs::finish_cost): Ditto.
+	(loongarch_builtin_vectorization_cost): Adjust.
+	* config/loongarch/loongarch.opt (loongarch-vect-unroll-limit): New parameter.
+	(loongarcg-vect-issue-info): Ditto.
+	(mmemvec-cost): Delete.
+	* config/loongarch/genopts/loongarch.opt.in
+	(loongarch-vect-unroll-limit): Ditto.
+	(loongarcg-vect-issue-info): Ditto.
+	(mmemvec-cost): Delete.
+	* doc/invoke.texi (loongarcg-vect-unroll-limit): Document new option.
+---
+ gcc/config/loongarch/genopts/loongarch.opt.in |  15 +-
+ gcc/config/loongarch/loongarch.cc             | 173 ++++++++++++++++--
+ gcc/config/loongarch/loongarch.opt            |  15 +-
+ gcc/doc/invoke.texi                           |   7 +
+ 4 files changed, 188 insertions(+), 22 deletions(-)
+
+diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
+index f18733c24..74cf4a7f7 100644
+--- a/gcc/config/loongarch/genopts/loongarch.opt.in
++++ b/gcc/config/loongarch/genopts/loongarch.opt.in
+@@ -152,10 +152,6 @@ mbranch-cost=
+ Target RejectNegative Joined UInteger Var(loongarch_branch_cost)
+ -mbranch-cost=COST	Set the cost of branches to roughly COST instructions.
+ 
+-mmemvec-cost=
+-Target RejectNegative Joined UInteger Var(loongarch_vector_access_cost) IntegerRange(1, 5)
+-mmemvec-cost=COST      Set the cost of vector memory access instructions.
+-
+ mcheck-zero-division
+ Target Mask(CHECK_ZERO_DIV)
+ Trap on integer divide by zero.
+@@ -219,3 +215,14 @@ mrelax
+ Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
+ Take advantage of linker relaxations to reduce the number of instructions
+ required to materialize symbol addresses.
++
++-param=loongarch-vect-unroll-limit=
++Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param
++Used to limit unroll factor which indicates how much the autovectorizer may
++unroll a loop.  The default value is 6.
++
++-param=loongarch-vect-issue-info=
++Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) IntegerRange(1, 64) Param
++Indicate how many non memory access vector instructions can be issued per
++cycle, it's used in unroll factor determination for autovectorizer.  The
++default value is 4.
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index c0f58f9a9..e22a64600 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -65,6 +65,8 @@ along with GCC; see the file COPYING3.  If not see
+ #include "rtl-iter.h"
+ #include "opts.h"
+ #include "function-abi.h"
++#include "cfgloop.h"
++#include "tree-vectorizer.h"
+ 
+ /* This file should be included last.  */
+ #include "target-def.h"
+@@ -3841,8 +3843,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+     }
+ }
+ 
+-/* Vectorizer cost model implementation.  */
+-
+ /* Implement targetm.vectorize.builtin_vectorization_cost.  */
+ 
+ static int
+@@ -3861,36 +3861,182 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+       case vector_load:
+       case vec_to_scalar:
+       case scalar_to_vec:
+-      case cond_branch_not_taken:
+-      case vec_promote_demote:
+       case scalar_store:
+       case vector_store:
+ 	return 1;
+ 
++      case vec_promote_demote:
+       case vec_perm:
+ 	return LASX_SUPPORTED_MODE_P (mode)
+ 	  && !LSX_SUPPORTED_MODE_P (mode) ? 2 : 1;
+ 
+       case unaligned_load:
+-      case vector_gather_load:
+-	return 2;
+-
+       case unaligned_store:
+-      case vector_scatter_store:
+-	return 10;
++	return 2;
+ 
+       case cond_branch_taken:
+-	return 3;
++	return 4;
++
++      case cond_branch_not_taken:
++	return 2;
+ 
+       case vec_construct:
+ 	elements = TYPE_VECTOR_SUBPARTS (vectype);
+-	return elements / 2 + 1;
++	if (ISA_HAS_LASX)
++	  return elements + 1;
++	else
++	  return elements;
+ 
+       default:
+ 	gcc_unreachable ();
+     }
+ }
+ 
++class loongarch_vector_costs : public vector_costs
++{
++public:
++  using vector_costs::vector_costs;
++
++  unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
++			      stmt_vec_info stmt_info, slp_tree, tree vectype,
++			      int misalign,
++			      vect_cost_model_location where) override;
++  void finish_cost (const vector_costs *) override;
++
++protected:
++  void count_operations (vect_cost_for_stmt, stmt_vec_info,
++			 vect_cost_model_location, unsigned int);
++  unsigned int determine_suggested_unroll_factor (loop_vec_info);
++  /* The number of vectorized stmts in loop.  */
++  unsigned m_stmts = 0;
++  /* The number of load and store operations in loop.  */
++  unsigned m_loads = 0;
++  unsigned m_stores = 0;
++  /* Reduction factor for suggesting unroll factor.  */
++  unsigned m_reduc_factor = 0;
++  /* True if the loop contains an average operation. */
++  bool m_has_avg =false;
++};
++
++/* Implement TARGET_VECTORIZE_CREATE_COSTS.  */
++static vector_costs *
++loongarch_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
++{
++  return new loongarch_vector_costs (vinfo, costing_for_scalar);
++}
++
++void
++loongarch_vector_costs::count_operations (vect_cost_for_stmt kind,
++					  stmt_vec_info stmt_info,
++					  vect_cost_model_location where,
++					  unsigned int count)
++{
++  if (!m_costing_for_scalar
++      && is_a<loop_vec_info> (m_vinfo)
++      && where == vect_body)
++    {
++      m_stmts += count;
++
++      if (kind == scalar_load
++	  || kind == vector_load
++	  || kind == unaligned_load)
++	m_loads += count;
++      else if (kind == scalar_store
++	       || kind == vector_store
++	       || kind == unaligned_store)
++	m_stores += count;
++      else if ((kind == scalar_stmt
++		|| kind == vector_stmt
++		|| kind == vec_to_scalar)
++	       && stmt_info && vect_is_reduction (stmt_info))
++	{
++	  tree lhs = gimple_get_lhs (stmt_info->stmt);
++	  unsigned int base = FLOAT_TYPE_P (TREE_TYPE (lhs)) ? 2 : 1;
++	  m_reduc_factor = MAX (base * count, m_reduc_factor);
++	}
++    }

_service:tar_scm:0014-LoongArch-Define-macro-CLEAR_INSN_CACHE.patch Added

@@ -0,0 +1,34 @@
+From 7f4a912da99f5787c88b275b83ee547c9e1aa3d7 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Mon, 23 Oct 2023 09:07:32 +0800
+Subject: PATCH 014/188 LoongArch: Define macro CLEAR_INSN_CACHE.
+
+LoongArch's microstructure ensures cache consistency by hardware.
+Due to out-of-order execution, "ibar" is required to ensure the visibility of the
+store (invalidated icache) executed by this CPU before "ibar" (to the instance).
+"ibar" will not invalidate the icache, so the start and end parameters are not Affect
+"ibar" performance.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.h (CLEAR_INSN_CACHE): New definition.
+---
+ gcc/config/loongarch/loongarch.h | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
+index 572b538be..f7ddfc452 100644
+--- a/gcc/config/loongarch/loongarch.h
++++ b/gcc/config/loongarch/loongarch.h
+@@ -1231,3 +1231,8 @@ struct GTY (()) machine_function
+   (TARGET_HARD_FLOAT_ABI ? (TARGET_DOUBLE_FLOAT_ABI ? 8 : 4) : 0)
+ 
+ #define FUNCTION_VALUE_REGNO_P(N) ((N) == GP_RETURN || (N) == FP_RETURN)
++
++/* LoongArch maintains ICache/DCache coherency by hardware,
++   we just need "ibar" to avoid instruction hazard here.  */
++#undef  CLEAR_INSN_CACHE
++#define CLEAR_INSN_CACHE(beg, end) __builtin_loongarch_ibar (0)
+-- 
+2.43.0
+

_service:tar_scm:0015-LoongArch-Add-enum-style-mexplicit-relocs-option.patch Added

@@ -0,0 +1,233 @@
+From 56403837a7859f0a7ccbc56c055261c9adf22fb8 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Mon, 23 Oct 2023 15:23:11 +0800
+Subject: PATCH 015/188 LoongArch: Add enum-style -mexplicit-relocs= option
+
+To take a better balance between scheduling and relaxation when -flto is
+enabled, add three-way -mexplicit-relocs={auto,none,always} options.
+The old -mexplicit-relocs and -mno-explicit-relocs options are still
+supported, they are mapped to -mexplicit-relocs=always and
+-mexplicit-relocs=none.
+
+The default choice is determined by probing assembler capabilities at
+build time.  If the assembler does not supports explicit relocs at all,
+the default will be none; if it supports explicit relocs but not
+relaxation, the default will be always; if both explicit relocs and
+relaxation are supported, the default will be auto.
+
+Currently auto is same as none.  We will make auto more clever in
+following changes.
+
+gcc/ChangeLog:
+
+	* config/loongarch/genopts/loongarch-strings: Add strings for
+	-mexplicit-relocs={auto,none,always}.
+	* config/loongarch/genopts/loongarch.opt.in: Add options for
+	-mexplicit-relocs={auto,none,always}.
+	* config/loongarch/loongarch-str.h: Regenerate.
+	* config/loongarch/loongarch.opt: Regenerate.
+	* config/loongarch/loongarch-def.h
+	(EXPLICIT_RELOCS_AUTO): Define.
+	(EXPLICIT_RELOCS_NONE): Define.
+	(EXPLICIT_RELOCS_ALWAYS): Define.
+	(N_EXPLICIT_RELOCS_TYPES): Define.
+	* config/loongarch/loongarch.cc
+	(loongarch_option_override_internal): Error out if the old-style
+	-mno-explicit-relocs option is used with
+	-mexplicit-relocs={auto,none,always} together.  Map
+	-mno-explicit-relocs to -mexplicit-relocs=none and
+	-mexplicit-relocs to -mexplicit-relocs=always for backward
+	compatibility.  Set a proper default for -mexplicit-relocs=
+	based on configure-time probed linker capability.  Update a
+	diagnostic message to mention -mexplicit-relocs=always instead
+	of the old-style -mexplicit-relocs.
+	(loongarch_handle_model_attribute): Update a diagnostic message
+	to mention -mexplicit-relocs=always instead of the old-style
+	-mexplicit-relocs.
+	* config/loongarch/loongarch.h (TARGET_EXPLICIT_RELOCS): Define.
+---
+ .../loongarch/genopts/loongarch-strings       |  6 +++++
+ gcc/config/loongarch/genopts/loongarch.opt.in | 21 ++++++++++++++--
+ gcc/config/loongarch/loongarch-def.h          |  6 +++++
+ gcc/config/loongarch/loongarch-str.h          |  5 ++++
+ gcc/config/loongarch/loongarch.cc             | 24 +++++++++++++++++--
+ gcc/config/loongarch/loongarch.h              |  3 +++
+ gcc/config/loongarch/loongarch.opt            | 21 ++++++++++++++--
+ 7 files changed, 80 insertions(+), 6 deletions(-)
+
+diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings
+index eb5086fe3..6c8a42af2 100644
+--- a/gcc/config/loongarch/genopts/loongarch-strings
++++ b/gcc/config/loongarch/genopts/loongarch-strings
+@@ -65,3 +65,9 @@ STR_CMODEL_TS	      tiny-static
+ STR_CMODEL_MEDIUM     medium
+ STR_CMODEL_LARGE      large
+ STR_CMODEL_EXTREME    extreme
++
++# -mexplicit-relocs
++OPTSTR_EXPLICIT_RELOCS		explicit-relocs
++STR_EXPLICIT_RELOCS_AUTO	auto
++STR_EXPLICIT_RELOCS_NONE	none
++STR_EXPLICIT_RELOCS_ALWAYS	always
+diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
+index 74cf4a7f7..e7df1964a 100644
+--- a/gcc/config/loongarch/genopts/loongarch.opt.in
++++ b/gcc/config/loongarch/genopts/loongarch.opt.in
+@@ -176,10 +176,27 @@ mmax-inline-memcpy-size=
+ Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024)
+ -mmax-inline-memcpy-size=SIZE	Set the max size of memcpy to inline, default is 1024.
+ 
+-mexplicit-relocs
+-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION)
++Enum
++Name(explicit_relocs) Type(int)
++The code model option names for -mexplicit-relocs:
++
++EnumValue
++Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_AUTO@@) Value(EXPLICIT_RELOCS_AUTO)
++
++EnumValue
++Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_NONE@@) Value(EXPLICIT_RELOCS_NONE)
++
++EnumValue
++Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_ALWAYS@@) Value(EXPLICIT_RELOCS_ALWAYS)
++
++mexplicit-relocs=
++Target RejectNegative Joined Enum(explicit_relocs) Var(la_opt_explicit_relocs) Init(M_OPT_UNSET)
+ Use %reloc() assembly operators.
+ 
++mexplicit-relocs
++Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET)
++Use %reloc() assembly operators (for backward compatibility).
++
+ ; The code model option names for -mcmodel.
+ Enum
+ Name(cmodel) Type(int)
+diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
+index eb8e53b20..4757de14b 100644
+--- a/gcc/config/loongarch/loongarch-def.h
++++ b/gcc/config/loongarch/loongarch-def.h
+@@ -100,6 +100,12 @@ extern const char* loongarch_cmodel_strings;
+ #define CMODEL_EXTREME	      5
+ #define N_CMODEL_TYPES	      6
+ 
++/* enum explicit_relocs */
++#define EXPLICIT_RELOCS_AUTO	0
++#define EXPLICIT_RELOCS_NONE	1
++#define EXPLICIT_RELOCS_ALWAYS	2
++#define N_EXPLICIT_RELOCS_TYPES	3
++
+ /* The common default value for variables whose assignments
+    are triggered by command-line options.  */
+ 
+diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
+index ecfebf9db..037e9e583 100644
+--- a/gcc/config/loongarch/loongarch-str.h
++++ b/gcc/config/loongarch/loongarch-str.h
+@@ -64,4 +64,9 @@ along with GCC; see the file COPYING3.  If not see
+ #define STR_CMODEL_LARGE "large"
+ #define STR_CMODEL_EXTREME "extreme"
+ 
++#define OPTSTR_EXPLICIT_RELOCS "explicit-relocs"
++#define STR_EXPLICIT_RELOCS_AUTO "auto"
++#define STR_EXPLICIT_RELOCS_NONE "none"
++#define STR_EXPLICIT_RELOCS_ALWAYS "always"
++
+ #endif /* LOONGARCH_STR_H */
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index e22a64600..3258c8655 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -7383,6 +7383,25 @@ loongarch_option_override_internal (struct gcc_options *opts,
+   loongarch_update_gcc_opt_status (&la_target, opts, opts_set);
+   loongarch_cpu_option_override (&la_target, opts, opts_set);
+ 
++  if (la_opt_explicit_relocs != M_OPT_UNSET
++      && la_opt_explicit_relocs_backward != M_OPT_UNSET)
++    error ("do not use %qs (with %qs) and %qs (without %qs) together",
++	   "-mexplicit-relocs=", "=",
++	   la_opt_explicit_relocs_backward ? "-mexplicit-relocs"
++					   : "-mno-explicit-relocs", "=");
++
++  if (la_opt_explicit_relocs_backward != M_OPT_UNSET)
++    la_opt_explicit_relocs = (la_opt_explicit_relocs_backward
++			      ? EXPLICIT_RELOCS_ALWAYS
++			      : EXPLICIT_RELOCS_NONE);
++
++  if (la_opt_explicit_relocs == M_OPT_UNSET)
++    la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS
++			      ? (HAVE_AS_MRELAX_OPTION
++				 ? EXPLICIT_RELOCS_AUTO
++				 : EXPLICIT_RELOCS_ALWAYS)
++			      : EXPLICIT_RELOCS_NONE);
++
+   if (TARGET_ABI_LP64)
+     flag_pcc_struct_return = 0;
+ 
+@@ -7413,7 +7432,7 @@ loongarch_option_override_internal (struct gcc_options *opts,
+       case CMODEL_EXTREME:
+ 	if (!TARGET_EXPLICIT_RELOCS)
+ 	  error ("code model %qs needs %s",
+-		 "extreme", "-mexplicit-relocs");
++		 "extreme", "-mexplicit-relocs=always");
+ 
+ 	if (opts->x_flag_plt)
+ 	  {
+@@ -7717,7 +7736,8 @@ loongarch_handle_model_attribute (tree *node, tree name, tree arg, int,
+       if (!TARGET_EXPLICIT_RELOCS)
+ 	{
+ 	  error_at (DECL_SOURCE_LOCATION (decl),
+-		    "%qE attribute requires %s", name, "-mexplicit-relocs");
++		    "%qE attribute requires %s", name,
++		    "-mexplicit-relocs=always");
+ 	  *no_add_attrs = true;
+ 	  return NULL_TREE;
+ 	}
+diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
+index f7ddfc452..6e8ac293a 100644
+--- a/gcc/config/loongarch/loongarch.h
++++ b/gcc/config/loongarch/loongarch.h
+@@ -1236,3 +1236,6 @@ struct GTY (()) machine_function
+    we just need "ibar" to avoid instruction hazard here.  */
+ #undef  CLEAR_INSN_CACHE
+ #define CLEAR_INSN_CACHE(beg, end) __builtin_loongarch_ibar (0)
++
++#define TARGET_EXPLICIT_RELOCS \
++  (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS)
+diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
+index 34bd832bd..44376fd77 100644
+--- a/gcc/config/loongarch/loongarch.opt

_service:tar_scm:0016-LoongArch-Use-explicit-relocs-for-GOT-access-when-me.patch Added

@@ -0,0 +1,212 @@
+From 8539e5560e7bf11473cc7c386043b7019264236a Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sat, 30 Sep 2023 18:46:28 +0800
+Subject: PATCH 016/188 LoongArch: Use explicit relocs for GOT access when
+ -mexplicit-relocs=auto and LTO during a final link with linker plugin
+
+If we are performing LTO for a final link and linker plugin is enabled,
+then we are sure any GOT access may resolve to a symbol out of the link
+unit (otherwise the linker plugin will tell us the symbol should be
+resolved locally and we'll use PC-relative access instead).
+
+Produce machine instructions with explicit relocs instead of la.global
+for better scheduling.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-protos.h
+	(loongarch_explicit_relocs_p): Declare new function.
+	* config/loongarch/loongarch.cc (loongarch_explicit_relocs_p):
+	Implement.
+	(loongarch_symbol_insns): Call loongarch_explicit_relocs_p for
+	SYMBOL_GOT_DISP, instead of using TARGET_EXPLICIT_RELOCS.
+	(loongarch_split_symbol): Call loongarch_explicit_relocs_p for
+	deciding if return early, instead of using
+	TARGET_EXPLICIT_RELOCS.
+	(loongarch_output_move): CAll loongarch_explicit_relocs_p
+	instead of using TARGET_EXPLICIT_RELOCS.
+	* config/loongarch/loongarch.md (*low<mode>): Remove
+	TARGET_EXPLICIT_RELOCS from insn condition.
+	(@ld_from_got<mode>): Likewise.
+	* config/loongarch/predicates.md (move_operand): Call
+	loongarch_explicit_relocs_p instead of using
+	TARGET_EXPLICIT_RELOCS.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/explicit-relocs-auto-lto.c: New test.
+---
+ gcc/config/loongarch/loongarch-protos.h       |  1 +
+ gcc/config/loongarch/loongarch.cc             | 34 +++++++++++++++----
+ gcc/config/loongarch/loongarch.md             |  4 +--
+ gcc/config/loongarch/predicates.md            |  8 ++---
+ .../loongarch/explicit-relocs-auto-lto.c      | 26 ++++++++++++++
+ 5 files changed, 59 insertions(+), 14 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c
+
+diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
+index 163162598..51d38177b 100644
+--- a/gcc/config/loongarch/loongarch-protos.h
++++ b/gcc/config/loongarch/loongarch-protos.h
+@@ -220,4 +220,5 @@ extern rtx loongarch_gen_const_int_vector_shuffle (machine_mode, int);
+ extern tree loongarch_build_builtin_va_list (void);
+ 
+ extern rtx loongarch_build_signbit_mask (machine_mode, bool, bool);
++extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type);
+ #endif /* ! GCC_LOONGARCH_PROTOS_H */
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 3258c8655..1d20577e7 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -1922,6 +1922,29 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type)
+   gcc_unreachable ();
+ }
+ 
++/* If -mexplicit-relocs=auto, we use machine operations with reloc hints
++   for cases where the linker is unable to relax so we can schedule the
++   machine operations, otherwise use an assembler pseudo-op so the
++   assembler will generate R_LARCH_RELAX.  */
++
++bool
++loongarch_explicit_relocs_p (enum loongarch_symbol_type type)
++{
++  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO)
++    return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS;
++
++  /* If we are performing LTO for a final link, and we have the linker
++     plugin so we know the resolution of the symbols, then all GOT
++     references are binding to external symbols or preemptable symbols.
++     So the linker cannot relax them.  */
++  return (in_lto_p
++	  && !flag_incremental_link
++	  && HAVE_LTO_PLUGIN == 2
++	  && (!global_options_set.x_flag_use_linker_plugin
++	      || global_options.x_flag_use_linker_plugin)
++	  && type == SYMBOL_GOT_DISP);
++}
++
+ /* Returns the number of instructions necessary to reference a symbol.  */
+ 
+ static int
+@@ -1937,7 +1960,7 @@ loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode)
+     case SYMBOL_GOT_DISP:
+       /* The constant will have to be loaded from the GOT before it
+ 	 is used in an address.  */
+-      if (!TARGET_EXPLICIT_RELOCS && mode != MAX_MACHINE_MODE)
++      if (!loongarch_explicit_relocs_p (type) && mode != MAX_MACHINE_MODE)
+ 	return 0;
+ 
+       return 3;
+@@ -3034,7 +3057,7 @@ loongarch_symbol_extreme_p (enum loongarch_symbol_type type)
+    If so, and if LOW_OUT is nonnull, emit the high part and store the
+    low part in *LOW_OUT.  Leave *LOW_OUT unchanged otherwise.
+ 
+-   Return false if build with '-mno-explicit-relocs'.
++   Return false if build with '-mexplicit-relocs=none'.
+ 
+    TEMP is as for loongarch_force_temporary and is used to load the high
+    part into a register.
+@@ -3048,12 +3071,9 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
+ {
+   enum loongarch_symbol_type symbol_type;
+ 
+-  /* If build with '-mno-explicit-relocs', don't split symbol.  */
+-  if (!TARGET_EXPLICIT_RELOCS)
+-    return false;
+-
+   if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE)
+       || !loongarch_symbolic_constant_p (addr, &symbol_type)
++      || !loongarch_explicit_relocs_p (symbol_type)
+       || loongarch_symbol_insns (symbol_type, mode) == 0
+       || !loongarch_split_symbol_type (symbol_type))
+     return false;
+@@ -4793,7 +4813,7 @@ loongarch_output_move (rtx dest, rtx src)
+ 	}
+     }
+ 
+-  if (!TARGET_EXPLICIT_RELOCS
++  if (!loongarch_explicit_relocs_p (loongarch_classify_symbol (src))
+       && dest_code == REG && symbolic_operand (src, VOIDmode))
+     {
+       if (loongarch_classify_symbol (src) == SYMBOL_PCREL)
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 29ac950bf..81c97393b 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -2247,7 +2247,7 @@
+   (set (match_operand:P 0 "register_operand" "=r")
+  (lo_sum:P (match_operand:P 1 "register_operand" " r")
+      (match_operand:P 2 "symbolic_operand" "")))
+-  "TARGET_EXPLICIT_RELOCS"
++  ""
+   "addi.<d>\t%0,%1,%L2"
+   (set_attr "type" "arith")
+    (set_attr "mode" "<MODE>"))
+@@ -2275,7 +2275,7 @@
+ 				(match_operand:P 1 "register_operand" "r")
+ 				(match_operand:P 2 "symbolic_operand")))
+ 	UNSPEC_LOAD_FROM_GOT))
+-  "TARGET_EXPLICIT_RELOCS"
++  ""
+   "ld.<d>\t%0,%1,%L2"
+   (set_attr "type" "move")
+ )
+diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
+index ad6cee5c4..6b50b3a4d 100644
+--- a/gcc/config/loongarch/predicates.md
++++ b/gcc/config/loongarch/predicates.md
+@@ -541,16 +541,14 @@
+     case SYMBOL_REF:
+     case LABEL_REF:
+       return (loongarch_symbolic_constant_p (op, &symbol_type)
+-	      && (!TARGET_EXPLICIT_RELOCS
++	      && (!loongarch_explicit_relocs_p (symbol_type)
+ 		  || !loongarch_split_symbol_type (symbol_type)));
+ 
+     case HIGH:
+-      /* '-mno-explicit-relocs' don't generate high/low pairs.  */
+-      if (!TARGET_EXPLICIT_RELOCS)
+-	return false;
+-
+       op = XEXP (op, 0);
++
+       return (loongarch_symbolic_constant_p (op, &symbol_type)
++	      && loongarch_explicit_relocs_p (symbol_type)
+ 	      && loongarch_split_symbol_type (symbol_type));
+ 
+     default:
+diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c
+new file mode 100644
+index 000000000..f53b54689
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c
+@@ -0,0 +1,26 @@
++/* { dg-do link } */
++/* { dg-require-effective-target lto } */
++/* { dg-require-linker-plugin "" } */
++/* { dg-options "-fpic -shared -O2 --save-temps -mexplicit-relocs=auto -flto -fuse-linker-plugin -flto-partition=one" } */
++
++int pcrel __attribute__ ((visibility ("hidden")));
++int got __attribute__ ((visibility ("default")));
++
++int
++*addr_pcrel (void)
++{
++  return &pcrel;
++}
++
++int
++*addr_got (void)

_service:tar_scm:0017-LoongArch-Use-explicit-relocs-for-TLS-access-with-me.patch Added

@@ -0,0 +1,146 @@
+From 23b4166c6699a1a3063b11fa45497c1a1524bd48 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Mon, 2 Oct 2023 13:00:18 +0800
+Subject: PATCH 017/188 LoongArch: Use explicit relocs for TLS access with
+ -mexplicit-relocs=auto
+
+The linker does not know how to relax TLS access for LoongArch, so let's
+emit machine instructions with explicit relocs for TLS.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_explicit_relocs_p):
+	Return true for TLS symbol types if -mexplicit-relocs=auto.
+	(loongarch_call_tls_get_addr): Replace TARGET_EXPLICIT_RELOCS
+	with la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE.
+	(loongarch_legitimize_tls_address): Likewise.
+	* config/loongarch/loongarch.md (@tls_low<mode>): Remove
+	TARGET_EXPLICIT_RELOCS from insn condition.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c: New
+	test.
+	* gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c: New
+	test.
+---
+ gcc/config/loongarch/loongarch.cc             | 37 ++++++++++++-------
+ gcc/config/loongarch/loongarch.md             |  2 +-
+ .../explicit-relocs-auto-tls-ld-gd.c          |  9 +++++
+ .../explicit-relocs-auto-tls-le-ie.c          |  6 +++
+ 4 files changed, 40 insertions(+), 14 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 1d20577e7..fa5c14be6 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -1933,16 +1933,27 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type)
+   if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO)
+     return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS;
+ 
+-  /* If we are performing LTO for a final link, and we have the linker
+-     plugin so we know the resolution of the symbols, then all GOT
+-     references are binding to external symbols or preemptable symbols.
+-     So the linker cannot relax them.  */
+-  return (in_lto_p
+-	  && !flag_incremental_link
+-	  && HAVE_LTO_PLUGIN == 2
+-	  && (!global_options_set.x_flag_use_linker_plugin
+-	      || global_options.x_flag_use_linker_plugin)
+-	  && type == SYMBOL_GOT_DISP);
++  switch (type)
++    {
++      case SYMBOL_TLS_IE:
++      case SYMBOL_TLS_LE:
++      case SYMBOL_TLSGD:
++      case SYMBOL_TLSLDM:
++	/* The linker don't know how to relax TLS accesses.  */
++	return true;
++      case SYMBOL_GOT_DISP:
++	/* If we are performing LTO for a final link, and we have the
++	   linker plugin so we know the resolution of the symbols, then
++	   all GOT references are binding to external symbols or
++	   preemptable symbols.  So the linker cannot relax them.  */
++	return (in_lto_p
++		&& !flag_incremental_link
++		&& HAVE_LTO_PLUGIN == 2
++		&& (!global_options_set.x_flag_use_linker_plugin
++		    || global_options.x_flag_use_linker_plugin));
++      default:
++	return false;
++    }
+ }
+ 
+ /* Returns the number of instructions necessary to reference a symbol.  */
+@@ -2749,7 +2760,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
+ 
+   start_sequence ();
+ 
+-  if (TARGET_EXPLICIT_RELOCS)
++  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
+     {
+       /* Split tls symbol to high and low.  */
+       rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc));
+@@ -2914,7 +2925,7 @@ loongarch_legitimize_tls_address (rtx loc)
+ 	  tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
+ 	  tmp1 = gen_reg_rtx (Pmode);
+ 	  dest = gen_reg_rtx (Pmode);
+-	  if (TARGET_EXPLICIT_RELOCS)
++	  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
+ 	    {
+ 	      tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE);
+ 	      tmp3 = gen_reg_rtx (Pmode);
+@@ -2951,7 +2962,7 @@ loongarch_legitimize_tls_address (rtx loc)
+ 	  tmp1 = gen_reg_rtx (Pmode);
+ 	  dest = gen_reg_rtx (Pmode);
+ 
+-	  if (TARGET_EXPLICIT_RELOCS)
++	  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
+ 	    {
+ 	      tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE);
+ 	      tmp3 = gen_reg_rtx (Pmode);
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 81c97393b..3b836d535 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -2257,7 +2257,7 @@
+ 	(unspec:P (mem:P (lo_sum:P (match_operand:P 1 "register_operand" "r")
+ 				    (match_operand:P 2 "symbolic_operand" "")))
+ 	UNSPEC_TLS_LOW))
+-  "TARGET_EXPLICIT_RELOCS"
++  ""
+   "addi.<d>\t%0,%1,%L2"
+   (set_attr "type" "arith")
+    (set_attr "mode" "<MODE>"))
+diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c
+new file mode 100644
+index 000000000..957ff98df
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c
+@@ -0,0 +1,9 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto" } */
++
++__thread int a __attribute__((visibility("hidden")));
++extern __thread int b __attribute__((visibility("default")));
++
++int test() { return a + b; }
++
++/* { dg-final { scan-assembler-not "la.tls" { target tls_native } } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c
+new file mode 100644
+index 000000000..78898cfc6
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c
+@@ -0,0 +1,6 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mexplicit-relocs=auto" } */
++
++#include "explicit-relocs-auto-tls-ld-gd.c"
++
++/* { dg-final { scan-assembler-not "la.tls" { target tls_native } } } */
+-- 
+2.43.0
+

_service:tar_scm:0018-LoongArch-Use-explicit-relocs-for-addresses-only-use.patch Added

@@ -0,0 +1,245 @@
+From c29a4f4fb5ff24ef975ba27688a3da696aa7d006 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 1 Oct 2023 11:14:29 +0800
+Subject: PATCH 018/188 LoongArch: Use explicit relocs for addresses only
+ used for one load or store with -mexplicit-relocs=auto and
+ -mcmodel={normal,medium}
+
+In these cases, if we use explicit relocs, we end up with 2
+instructions:
+
+    pcalau12i    t0, %pc_hi20(x)
+    ld.d         t0, t0, %pc_lo12(x)
+
+If we use la.local pseudo-op, in the best scenario (x is in +/- 2MiB
+range) we still have 2 instructions:
+
+    pcaddi       t0, %pcrel_20(x)
+    ld.d         t0, t0, 0
+
+If x is out of the range we'll have 3 instructions.  So for these cases
+just emit machine instructions with explicit relocs.
+
+gcc/ChangeLog:
+
+	* config/loongarch/predicates.md (symbolic_pcrel_operand): New
+	predicate.
+	* config/loongarch/loongarch.md (define_peephole2): Optimize
+	la.local + ld/st to pcalau12i + ld/st if the address is only used
+	once if -mexplicit-relocs=auto and -mcmodel=normal or medium.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/explicit-relocs-auto-single-load-store.c:
+	New test.
+	* gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c:
+	New test.
+---
+ gcc/config/loongarch/loongarch.md             | 122 ++++++++++++++++++
+ gcc/config/loongarch/predicates.md            |   7 +
+ ...-relocs-auto-single-load-store-no-anchor.c |   6 +
+ .../explicit-relocs-auto-single-load-store.c  |  14 ++
+ 4 files changed, 149 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 3b836d535..c4c6baa60 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -65,6 +65,7 @@
+ 
+   UNSPEC_LOAD_FROM_GOT
+   UNSPEC_PCALAU12I
++  UNSPEC_PCALAU12I_GR
+   UNSPEC_ORI_L_LO12
+   UNSPEC_LUI_L_HI20
+   UNSPEC_LUI_H_LO20
+@@ -2297,6 +2298,16 @@
+   "pcalau12i\t%0,%%pc_hi20(%1)"
+   (set_attr "type" "move"))
+ 
++;; @pcalau12i may be used for sibcall so it has a strict constraint.  This
++;; allows any general register as the operand.
++(define_insn "@pcalau12i_gr<mode>"
++  (set (match_operand:P 0 "register_operand" "=r")
++       (unspec:P (match_operand:P 1 "symbolic_operand" "")
++       UNSPEC_PCALAU12I_GR))
++  ""
++  "pcalau12i\t%0,%%pc_hi20(%1)"
++  (set_attr "type" "move"))
++
+ (define_insn "@ori_l_lo12<mode>"
+   (set (match_operand:P 0 "register_operand" "=r")
+ 	(unspec:P (match_operand:P 1 "register_operand" "r")
+@@ -3748,6 +3759,117 @@
+   (set_attr "type" "unknown")
+    (set_attr "mode" "<MODE>"))
+ 
++;; With normal or medium code models, if the only use of a pc-relative
++;; address is for loading or storing a value, then relying on linker
++;; relaxation is not better than emitting the machine instruction directly.
++;; Even if the la.local pseudo op can be relaxed, we get:
++;;
++;;     pcaddi     $t0, %pcrel_20(x)
++;;     ld.d       $t0, $t0, 0
++;;
++;; There are still two instructions, same as using the machine instructions
++;; and explicit relocs:
++;;
++;;     pcalau12i  $t0, %pc_hi20(x)
++;;     ld.d       $t0, $t0, %pc_lo12(x)
++;;
++;; And if the pseudo op cannot be relaxed, we'll get a worse result (with
++;; 3 instructions).
++(define_peephole2
++  (set (match_operand:P 0 "register_operand")
++	(match_operand:P 1 "symbolic_pcrel_operand"))
++   (set (match_operand:GPR 2 "register_operand")
++	(mem:GPR (match_dup 0)))
++  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
++   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
++   && (peep2_reg_dead_p (2, operands0) \
++       || REGNO (operands0) == REGNO (operands2))"
++  (set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1))))
++  {
++    emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1));
++  })
++
++(define_peephole2
++  (set (match_operand:P 0 "register_operand")
++	(match_operand:P 1 "symbolic_pcrel_operand"))
++   (set (match_operand:GPR 2 "register_operand")
++	(mem:GPR (plus (match_dup 0)
++		       (match_operand 3 "const_int_operand"))))
++  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
++   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
++   && (peep2_reg_dead_p (2, operands0) \
++       || REGNO (operands0) == REGNO (operands2))"
++  (set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1))))
++  {
++    operands1 = plus_constant (Pmode, operands1, INTVAL (operands3));
++    emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1));
++  })
++
++(define_peephole2
++  (set (match_operand:P 0 "register_operand")
++	(match_operand:P 1 "symbolic_pcrel_operand"))
++   (set (match_operand:GPR 2 "register_operand")
++	(any_extend:GPR (mem:SUBDI (match_dup 0))))
++  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
++   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
++   && (peep2_reg_dead_p (2, operands0) \
++       || REGNO (operands0) == REGNO (operands2))"
++  (set (match_dup 2)
++	(any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0)
++					     (match_dup 1)))))
++  {
++    emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1));
++  })
++
++(define_peephole2
++  (set (match_operand:P 0 "register_operand")
++	(match_operand:P 1 "symbolic_pcrel_operand"))
++   (set (match_operand:GPR 2 "register_operand")
++	(any_extend:GPR
++	  (mem:SUBDI (plus (match_dup 0)
++			   (match_operand 3 "const_int_operand")))))
++  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
++   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
++   && (peep2_reg_dead_p (2, operands0) \
++       || REGNO (operands0) == REGNO (operands2))"
++  (set (match_dup 2)
++	(any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0)
++					     (match_dup 1)))))
++  {
++    operands1 = plus_constant (Pmode, operands1, INTVAL (operands3));
++    emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1));
++  })
++
++(define_peephole2
++  (set (match_operand:P 0 "register_operand")
++	(match_operand:P 1 "symbolic_pcrel_operand"))
++   (set (mem:QHWD (match_dup 0))
++	(match_operand:QHWD 2 "register_operand"))
++  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
++   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
++   && (peep2_reg_dead_p (2, operands0)) \
++   && REGNO (operands0) != REGNO (operands2)"
++  (set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))
++  {
++    emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1));
++  })
++
++(define_peephole2
++  (set (match_operand:P 0 "register_operand")
++	(match_operand:P 1 "symbolic_pcrel_operand"))
++   (set (mem:QHWD (plus (match_dup 0)
++			(match_operand 3 "const_int_operand")))
++	(match_operand:QHWD 2 "register_operand"))
++  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
++   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
++   && (peep2_reg_dead_p (2, operands0)) \
++   && REGNO (operands0) != REGNO (operands2)"
++  (set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))
++  {
++    operands1 = plus_constant (Pmode, operands1, INTVAL (operands3));
++    emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1));
++  })
++
+ ;; Synchronization instructions.
+ 
+ (include "sync.md")
+diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
+index 6b50b3a4d..1d669f560 100644
+--- a/gcc/config/loongarch/predicates.md
++++ b/gcc/config/loongarch/predicates.md
+@@ -563,6 +563,13 @@
+   return loongarch_symbolic_constant_p (op, &type);
+ })

_service:tar_scm:0019-LoongArch-Implement-__builtin_thread_pointer-for-TLS.patch Added

@@ -0,0 +1,84 @@
+From 619b6081064bf85a19f4659e278a361875e4f9fb Mon Sep 17 00:00:00 2001
+From: chenxiaolong <chenxiaolong@loongson.cn>
+Date: Tue, 24 Oct 2023 14:40:14 +0800
+Subject: PATCH 019/188 LoongArch: Implement __builtin_thread_pointer for
+ TLS.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (get_thread_pointer<mode>):Adds the
+	instruction template corresponding to the __builtin_thread_pointer
+	function.
+	* doc/extend.texi:Add the __builtin_thread_pointer function support
+	description to the documentation.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/builtin_thread_pointer.c: New test.
+---
+ gcc/config/loongarch/loongarch.md                      |  7 +++++++
+ gcc/doc/extend.texi                                    |  5 +++++
+ .../gcc.target/loongarch/builtin_thread_pointer.c      | 10 ++++++++++
+ 3 files changed, 22 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index c4c6baa60..80487488d 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -113,6 +113,7 @@
+ 
+ (define_constants
+   (RETURN_ADDR_REGNUM		1)
++   (TP_REGNUM			2)
+    (T0_REGNUM			12)
+    (T1_REGNUM			13)
+    (S0_REGNUM			23)
+@@ -3647,6 +3648,12 @@
+   (set_attr "length" "0")
+    (set_attr "type" "ghost"))
+ 
++;; Named pattern for expanding thread pointer reference.
++(define_expand "get_thread_pointer<mode>"
++  (set (match_operand:P 0 "register_operand" "=r")
++	(reg:P TP_REGNUM))
++  "HAVE_AS_TLS"
++  {})
+ &#xc;
+ (define_split
+   (match_operand 0 "small_data_pattern")
+diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
+index 1d1bac255..497c6de5f 100644
+--- a/gcc/doc/extend.texi
++++ b/gcc/doc/extend.texi
+@@ -16257,6 +16257,11 @@ function you need to include @code{larchintrin.h}.
+     void __break (imm0_32767)
+ @end smallexample
+ 
++Returns the value that is currently set in the @samp{tp} register.
++@smallexample
++    void * __builtin_thread_pointer (void)
++@end smallexample
++
+ @node MIPS DSP Built-in Functions
+ @subsection MIPS DSP Built-in Functions
+ 
+diff --git a/gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c b/gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c
+new file mode 100644
+index 000000000..541e3b143
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c
+@@ -0,0 +1,10 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target tls_native } */
++/* { dg-options "-O2" } */
++/* { dg-final { scan-assembler "or\t\\\$r4,\\\$r2,\\\$r0" } } */
++
++void *
++get_tp ()
++{
++  return __builtin_thread_pointer ();
++}
+-- 
+2.43.0
+

_service:tar_scm:0020-LoongArch-Fix-vfrint-releated-comments-in-lsxintrin..patch Added

@@ -0,0 +1,189 @@
+From 9b29e6ba10716656ba9b32c33f021e920bb05f3d Mon Sep 17 00:00:00 2001
+From: Chenghui Pan <panchenghui@loongson.cn>
+Date: Mon, 23 Oct 2023 10:13:24 +0800
+Subject: PATCH 020/188 LoongArch: Fix vfrint-releated comments in
+ lsxintrin.h and lasxintrin.h
+
+The comment of vfrint-related intrinsic functions does not match the return
+value type in definition. This patch fixes these comments.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasxintrin.h (__lasx_xvftintrnel_l_s): Fix comments.
+	(__lasx_xvfrintrne_s): Ditto.
+	(__lasx_xvfrintrne_d): Ditto.
+	(__lasx_xvfrintrz_s): Ditto.
+	(__lasx_xvfrintrz_d): Ditto.
+	(__lasx_xvfrintrp_s): Ditto.
+	(__lasx_xvfrintrp_d): Ditto.
+	(__lasx_xvfrintrm_s): Ditto.
+	(__lasx_xvfrintrm_d): Ditto.
+	* config/loongarch/lsxintrin.h (__lsx_vftintrneh_l_s): Ditto.
+	(__lsx_vfrintrne_s): Ditto.
+	(__lsx_vfrintrne_d): Ditto.
+	(__lsx_vfrintrz_s): Ditto.
+	(__lsx_vfrintrz_d): Ditto.
+	(__lsx_vfrintrp_s): Ditto.
+	(__lsx_vfrintrp_d): Ditto.
+	(__lsx_vfrintrm_s): Ditto.
+	(__lsx_vfrintrm_d): Ditto.
+---
+ gcc/config/loongarch/lasxintrin.h | 16 ++++++++--------
+ gcc/config/loongarch/lsxintrin.h  | 16 ++++++++--------
+ 2 files changed, 16 insertions(+), 16 deletions(-)
+
+diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h
+index d39379927..7bce2c757 100644
+--- a/gcc/config/loongarch/lasxintrin.h
++++ b/gcc/config/loongarch/lasxintrin.h
+@@ -3368,7 +3368,7 @@ __m256i __lasx_xvftintrnel_l_s (__m256 _1)
+ }
+ 
+ /* Assembly instruction format:	xd, xj.  */
+-/* Data types in instruction templates:  V8SI, V8SF.  */
++/* Data types in instruction templates:  V8SF, V8SF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ __m256 __lasx_xvfrintrne_s (__m256 _1)
+ {
+@@ -3376,7 +3376,7 @@ __m256 __lasx_xvfrintrne_s (__m256 _1)
+ }
+ 
+ /* Assembly instruction format:	xd, xj.  */
+-/* Data types in instruction templates:  V4DI, V4DF.  */
++/* Data types in instruction templates:  V4DF, V4DF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ __m256d __lasx_xvfrintrne_d (__m256d _1)
+ {
+@@ -3384,7 +3384,7 @@ __m256d __lasx_xvfrintrne_d (__m256d _1)
+ }
+ 
+ /* Assembly instruction format:	xd, xj.  */
+-/* Data types in instruction templates:  V8SI, V8SF.  */
++/* Data types in instruction templates:  V8SF, V8SF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ __m256 __lasx_xvfrintrz_s (__m256 _1)
+ {
+@@ -3392,7 +3392,7 @@ __m256 __lasx_xvfrintrz_s (__m256 _1)
+ }
+ 
+ /* Assembly instruction format:	xd, xj.  */
+-/* Data types in instruction templates:  V4DI, V4DF.  */
++/* Data types in instruction templates:  V4DF, V4DF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ __m256d __lasx_xvfrintrz_d (__m256d _1)
+ {
+@@ -3400,7 +3400,7 @@ __m256d __lasx_xvfrintrz_d (__m256d _1)
+ }
+ 
+ /* Assembly instruction format:	xd, xj.  */
+-/* Data types in instruction templates:  V8SI, V8SF.  */
++/* Data types in instruction templates:  V8SF, V8SF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ __m256 __lasx_xvfrintrp_s (__m256 _1)
+ {
+@@ -3408,7 +3408,7 @@ __m256 __lasx_xvfrintrp_s (__m256 _1)
+ }
+ 
+ /* Assembly instruction format:	xd, xj.  */
+-/* Data types in instruction templates:  V4DI, V4DF.  */
++/* Data types in instruction templates:  V4DF, V4DF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ __m256d __lasx_xvfrintrp_d (__m256d _1)
+ {
+@@ -3416,7 +3416,7 @@ __m256d __lasx_xvfrintrp_d (__m256d _1)
+ }
+ 
+ /* Assembly instruction format:	xd, xj.  */
+-/* Data types in instruction templates:  V8SI, V8SF.  */
++/* Data types in instruction templates:  V8SF, V8SF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ __m256 __lasx_xvfrintrm_s (__m256 _1)
+ {
+@@ -3424,7 +3424,7 @@ __m256 __lasx_xvfrintrm_s (__m256 _1)
+ }
+ 
+ /* Assembly instruction format:	xd, xj.  */
+-/* Data types in instruction templates:  V4DI, V4DF.  */
++/* Data types in instruction templates:  V4DF, V4DF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ __m256d __lasx_xvfrintrm_d (__m256d _1)
+ {
+diff --git a/gcc/config/loongarch/lsxintrin.h b/gcc/config/loongarch/lsxintrin.h
+index ec4206990..29553c093 100644
+--- a/gcc/config/loongarch/lsxintrin.h
++++ b/gcc/config/loongarch/lsxintrin.h
+@@ -3412,7 +3412,7 @@ __m128i __lsx_vftintrneh_l_s (__m128 _1)
+ }
+ 
+ /* Assembly instruction format:	vd, vj.  */
+-/* Data types in instruction templates:  V4SI, V4SF.  */
++/* Data types in instruction templates:  V4SF, V4SF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ __m128 __lsx_vfrintrne_s (__m128 _1)
+ {
+@@ -3420,7 +3420,7 @@ __m128 __lsx_vfrintrne_s (__m128 _1)
+ }
+ 
+ /* Assembly instruction format:	vd, vj.  */
+-/* Data types in instruction templates:  V2DI, V2DF.  */
++/* Data types in instruction templates:  V2DF, V2DF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ __m128d __lsx_vfrintrne_d (__m128d _1)
+ {
+@@ -3428,7 +3428,7 @@ __m128d __lsx_vfrintrne_d (__m128d _1)
+ }
+ 
+ /* Assembly instruction format:	vd, vj.  */
+-/* Data types in instruction templates:  V4SI, V4SF.  */
++/* Data types in instruction templates:  V4SF, V4SF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ __m128 __lsx_vfrintrz_s (__m128 _1)
+ {
+@@ -3436,7 +3436,7 @@ __m128 __lsx_vfrintrz_s (__m128 _1)
+ }
+ 
+ /* Assembly instruction format:	vd, vj.  */
+-/* Data types in instruction templates:  V2DI, V2DF.  */
++/* Data types in instruction templates:  V2DF, V2DF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ __m128d __lsx_vfrintrz_d (__m128d _1)
+ {
+@@ -3444,7 +3444,7 @@ __m128d __lsx_vfrintrz_d (__m128d _1)
+ }
+ 
+ /* Assembly instruction format:	vd, vj.  */
+-/* Data types in instruction templates:  V4SI, V4SF.  */
++/* Data types in instruction templates:  V4SF, V4SF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ __m128 __lsx_vfrintrp_s (__m128 _1)
+ {
+@@ -3452,7 +3452,7 @@ __m128 __lsx_vfrintrp_s (__m128 _1)
+ }
+ 
+ /* Assembly instruction format:	vd, vj.  */
+-/* Data types in instruction templates:  V2DI, V2DF.  */
++/* Data types in instruction templates:  V2DF, V2DF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ __m128d __lsx_vfrintrp_d (__m128d _1)
+ {
+@@ -3460,7 +3460,7 @@ __m128d __lsx_vfrintrp_d (__m128d _1)
+ }
+ 
+ /* Assembly instruction format:	vd, vj.  */
+-/* Data types in instruction templates:  V4SI, V4SF.  */
++/* Data types in instruction templates:  V4SF, V4SF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ __m128 __lsx_vfrintrm_s (__m128 _1)
+ {
+@@ -3468,7 +3468,7 @@ __m128 __lsx_vfrintrm_s (__m128 _1)
+ }
+ 
+ /* Assembly instruction format:	vd, vj.  */
+-/* Data types in instruction templates:  V2DI, V2DF.  */
++/* Data types in instruction templates:  V2DF, V2DF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ __m128d __lsx_vfrintrm_d (__m128d _1)
+ {
+-- 
+2.43.0
+

_service:tar_scm:0021-LoongArch-Enable-vcond_mask_mn-expanders-for-SF-DF-m.patch Added

@@ -0,0 +1,418 @@
+From 156d9451a5b20ac336370f1610a949db1bef7a26 Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Thu, 26 Oct 2023 09:34:32 +0800
+Subject: PATCH 021/188 LoongArch:Enable vcond_mask_mn expanders for SF/DF
+ modes.
+
+If the vcond_mask patterns don't support fp modes, the vector
+FP comparison instructions will not be generated.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasx.md (vcond_mask_<ILASX:mode><ILASX:mode>): Change to
+	(vcond_mask_<mode><mode256_i>): this.
+	* config/loongarch/lsx.md (vcond_mask_<ILSX:mode><ILSX:mode>): Change to
+	(vcond_mask_<mode><mode_i>): this.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/lasx/lasx-vcond-1.c: New test.
+	* gcc.target/loongarch/vector/lasx/lasx-vcond-2.c: New test.
+	* gcc.target/loongarch/vector/lsx/lsx-vcond-1.c: New test.
+	* gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: New test.
+---
+ gcc/config/loongarch/lasx.md                  | 14 +--
+ gcc/config/loongarch/lsx.md                   | 14 +--
+ .../loongarch/vector/lasx/lasx-vcond-1.c      | 64 ++++++++++++++
+ .../loongarch/vector/lasx/lasx-vcond-2.c      | 87 +++++++++++++++++++
+ .../loongarch/vector/lsx/lsx-vcond-1.c        | 64 ++++++++++++++
+ .../loongarch/vector/lsx/lsx-vcond-2.c        | 87 +++++++++++++++++++
+ 6 files changed, 316 insertions(+), 14 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index 442fda246..f0f2dd08d 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -906,15 +906,15 @@
+ })
+ 
+ ;; Same as vcond_
+-(define_expand "vcond_mask_<ILASX:mode><ILASX:mode>"
+-  (match_operand:ILASX 0 "register_operand")
+-   (match_operand:ILASX 1 "reg_or_m1_operand")
+-   (match_operand:ILASX 2 "reg_or_0_operand")
+-   (match_operand:ILASX 3 "register_operand")
++(define_expand "vcond_mask_<mode><mode256_i>"
++  (match_operand:LASX 0 "register_operand")
++   (match_operand:LASX 1 "reg_or_m1_operand")
++   (match_operand:LASX 2 "reg_or_0_operand")
++   (match_operand:<VIMODE256> 3 "register_operand")
+   "ISA_HAS_LASX"
+ {
+-  loongarch_expand_vec_cond_mask_expr (<ILASX:MODE>mode,
+-				      <ILASX:VIMODE256>mode, operands);
++  loongarch_expand_vec_cond_mask_expr (<MODE>mode,
++				       <VIMODE256>mode, operands);
+   DONE;
+ })
+ 
+diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
+index b4e92ae9c..4af32c8df 100644
+--- a/gcc/config/loongarch/lsx.md
++++ b/gcc/config/loongarch/lsx.md
+@@ -644,15 +644,15 @@
+   DONE;
+ })
+ 
+-(define_expand "vcond_mask_<ILSX:mode><ILSX:mode>"
+-  (match_operand:ILSX 0 "register_operand")
+-   (match_operand:ILSX 1 "reg_or_m1_operand")
+-   (match_operand:ILSX 2 "reg_or_0_operand")
+-   (match_operand:ILSX 3 "register_operand")
++(define_expand "vcond_mask_<mode><mode_i>"
++  (match_operand:LSX 0 "register_operand")
++   (match_operand:LSX 1 "reg_or_m1_operand")
++   (match_operand:LSX 2 "reg_or_0_operand")
++   (match_operand:<VIMODE> 3 "register_operand")
+   "ISA_HAS_LSX"
+ {
+-  loongarch_expand_vec_cond_mask_expr (<ILSX:MODE>mode,
+-				      <ILSX:VIMODE>mode, operands);
++  loongarch_expand_vec_cond_mask_expr (<MODE>mode,
++				       <VIMODE>mode, operands);
+   DONE;
+ })
+ 
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c
+new file mode 100644
+index 000000000..ee9cb1a1f
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c
+@@ -0,0 +1,64 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -ftree-vectorize -fno-unroll-loops -fno-vect-cost-model -mlasx" } */
++
++#include <stdint-gcc.h>
++
++#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX)	\
++  void __attribute__ ((noinline, noclone))			\
++  vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r,	\
++				   DATA_TYPE *__restrict__ x,	\
++				   DATA_TYPE *__restrict__ y,	\
++				   CMP_TYPE *__restrict__ a,	\
++				   CMP_TYPE *__restrict__ b,	\
++				   int n)			\
++  {								\
++    for (int i = 0; i < n; i++)					\
++      {								\
++	DATA_TYPE xval = xi, yval = yi;			\
++	CMP_TYPE aval = ai, bval = bi;			\
++	ri = aval COND bval ? xval : yval;			\
++      }								\
++  }
++
++#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX)	\
++  T (int8_t, int8_t, COND, SUFFIX)			\
++  T (int16_t, int16_t, COND, SUFFIX)			\
++  T (int32_t, int32_t, COND, SUFFIX)			\
++  T (int64_t, int64_t, COND, SUFFIX)			\
++  T (float, int32_t, COND, SUFFIX##_float)		\
++  T (double, int64_t, COND, SUFFIX##_double)
++
++#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX)	\
++  T (uint8_t, uint8_t, COND, SUFFIX)			\
++  T (uint16_t, uint16_t, COND, SUFFIX)			\
++  T (uint32_t, uint32_t, COND, SUFFIX)			\
++  T (uint64_t, uint64_t, COND, SUFFIX)			\
++  T (float, uint32_t, COND, SUFFIX##_float)		\
++  T (double, uint64_t, COND, SUFFIX##_double)
++
++#define TEST_COND_VAR_ALL(T, COND, SUFFIX)	\
++  TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX)	\
++  TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX)
++
++#define TEST_VAR_ALL(T)				\
++  TEST_COND_VAR_ALL (T, >, _gt)			\
++  TEST_COND_VAR_ALL (T, <, _lt)			\
++  TEST_COND_VAR_ALL (T, >=, _ge)		\
++  TEST_COND_VAR_ALL (T, <=, _le)		\
++  TEST_COND_VAR_ALL (T, ==, _eq)		\
++  TEST_COND_VAR_ALL (T, !=, _ne)
++
++TEST_VAR_ALL (DEF_VCOND_VAR)
++
++/* { dg-final { scan-assembler-times {\txvslt\.b} 4 } } */
++/* { dg-final { scan-assembler-times {\txvslt\.h} 4 } } */
++/* { dg-final { scan-assembler-times {\txvslt\.w} 4 } } */
++/* { dg-final { scan-assembler-times {\txvslt\.d} 4 } } */
++/* { dg-final { scan-assembler-times {\txvsle\.b} 4 } } */
++/* { dg-final { scan-assembler-times {\txvsle\.h} 4 } } */
++/* { dg-final { scan-assembler-times {\txvsle\.w} 4 } } */
++/* { dg-final { scan-assembler-times {\txvsle\.d} 4 } } */
++/* { dg-final { scan-assembler-times {\txvseq\.b} 4 } } */
++/* { dg-final { scan-assembler-times {\txvseq\.h} 4 } } */
++/* { dg-final { scan-assembler-times {\txvseq\.w} 4 } } */
++/* { dg-final { scan-assembler-times {\txvseq\.d} 4 } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
+new file mode 100644
+index 000000000..5f40ed44c
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
+@@ -0,0 +1,87 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops  -mlasx" } */
++
++#include <stdint-gcc.h>
++
++#define eq(A, B) ((A) == (B))
++#define ne(A, B) ((A) != (B))
++#define olt(A, B) ((A) < (B))
++#define ole(A, B) ((A) <= (B))
++#define oge(A, B) ((A) >= (B))
++#define ogt(A, B) ((A) > (B))
++#define ordered(A, B) (!__builtin_isunordered (A, B))
++#define unordered(A, B) (__builtin_isunordered (A, B))
++#define ueq(A, B) (!__builtin_islessgreater (A, B))
++#define ult(A, B) (__builtin_isless (A, B))
++#define ule(A, B) (__builtin_islessequal (A, B))
++#define uge(A, B) (__builtin_isgreaterequal (A, B))
++#define ugt(A, B) (__builtin_isgreater (A, B))
++#define nueq(A, B) (__builtin_islessgreater (A, B))
++#define nult(A, B) (!__builtin_isless (A, B))
++#define nule(A, B) (!__builtin_islessequal (A, B))
++#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
++#define nugt(A, B) (!__builtin_isgreater (A, B))
++
++#define TEST_LOOP(TYPE1, TYPE2, CMP)				\
++  void __attribute__ ((noinline, noclone))			\
++  test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest,	\
++					TYPE1 *restrict src,	\
++					TYPE1 fallback,		\
++					TYPE2 *restrict a,	\
++					TYPE2 *restrict b,	\
++					int count)		\
++  {								\
++    for (int i = 0; i < count; ++i)				\

_service:tar_scm:0022-LoongArch-Define-HAVE_AS_TLS-to-0-if-it-s-undefined-.patch Added

@@ -0,0 +1,34 @@
+From 0527589fb1b7b97cff2c441c1219fb9c8a44dd23 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Mon, 30 Oct 2023 19:39:27 +0800
+Subject: PATCH 022/188 LoongArch: Define HAVE_AS_TLS to 0 if it's undefined
+ PR112299
+
+Now loongarch.md uses HAVE_AS_TLS, we need this to fix the failure
+building a cross compiler if the cross assembler is not installed yet.
+
+gcc/ChangeLog:
+
+	PR target/112299
+	* config/loongarch/loongarch-opts.h (HAVE_AS_TLS): Define to 0
+	if not defined yet.
+---
+ gcc/config/loongarch/loongarch-opts.h | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
+index f2b59abe6..c4975af00 100644
+--- a/gcc/config/loongarch/loongarch-opts.h
++++ b/gcc/config/loongarch/loongarch-opts.h
+@@ -103,4 +103,8 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
+ #define HAVE_AS_MRELAX_OPTION 0
+ #endif
+ 
++#ifndef HAVE_AS_TLS
++#define HAVE_AS_TLS 0
++#endif
++
+ #endif /* LOONGARCH_OPTS_H */
+-- 
+2.43.0
+

_service:tar_scm:0023-LoongArch-Fix-instruction-name-typo-in-lsx_vreplgr2v.patch Added

_service:tar_scm:0024-LoongArch-Use-simplify_gen_subreg-instead-of-gen_rtx.patch Added

@@ -0,0 +1,116 @@
+From b8f47a362000bb51dec88e0a73f885c57a46f568 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 12 Nov 2023 00:55:13 +0800
+Subject: PATCH 024/188 LoongArch: Use simplify_gen_subreg instead of
+ gen_rtx_SUBREG in loongarch_expand_vec_cond_mask_expr PR112476
+
+GCC internal says:
+
+    'subreg's of 'subreg's are not supported.  Using
+    'simplify_gen_subreg' is the recommended way to avoid this problem.
+
+Unfortunately loongarch_expand_vec_cond_mask_expr might create nested
+subreg under certain circumstances, causing an ICE.
+
+Use simplify_gen_subreg as the internal document suggests.
+
+gcc/ChangeLog:
+
+	PR target/112476
+	* config/loongarch/loongarch.cc
+	(loongarch_expand_vec_cond_mask_expr): Call simplify_gen_subreg
+	instead of gen_rtx_SUBREG.
+
+gcc/testsuite/ChangeLog:
+
+	PR target/112476
+	* gcc.target/loongarch/pr112476-1.c: New test.
+	* gcc.target/loongarch/pr112476-2.c: New test.
+---
+ gcc/config/loongarch/loongarch.cc             | 11 ++++++---
+ .../gcc.target/loongarch/pr112476-1.c         | 24 +++++++++++++++++++
+ .../gcc.target/loongarch/pr112476-2.c         |  5 ++++
+ 3 files changed, 37 insertions(+), 3 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-1.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-2.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index fa5c14be6..65ca1489f 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -11190,7 +11190,9 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode,
+ 	  if (mode != vimode)
+ 	    {
+ 	      xop1 = gen_reg_rtx (vimode);
+-	      emit_move_insn (xop1, gen_rtx_SUBREG (vimode, operands1, 0));
++	      emit_move_insn (xop1,
++			      simplify_gen_subreg (vimode, operands1,
++						   mode, 0));
+ 	    }
+ 	  emit_move_insn (src1, xop1);
+ 	}
+@@ -11207,7 +11209,9 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode,
+ 	  if (mode != vimode)
+ 	    {
+ 	      xop2 = gen_reg_rtx (vimode);
+-	      emit_move_insn (xop2, gen_rtx_SUBREG (vimode, operands2, 0));
++	      emit_move_insn (xop2,
++			      simplify_gen_subreg (vimode, operands2,
++						   mode, 0));
+ 	    }
+ 	  emit_move_insn (src2, xop2);
+ 	}
+@@ -11226,7 +11230,8 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode,
+ 			  gen_rtx_AND (vimode, mask, src1));
+       /* The result is placed back to a register with the mask.  */
+       emit_insn (gen_rtx_SET (mask, bsel));
+-      emit_move_insn (operands0, gen_rtx_SUBREG (mode, mask, 0));
++      emit_move_insn (operands0, simplify_gen_subreg (mode, mask,
++							vimode, 0));
+     }
+ }
+ 
+diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-1.c b/gcc/testsuite/gcc.target/loongarch/pr112476-1.c
+new file mode 100644
+index 000000000..4cf133e7a
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/pr112476-1.c
+@@ -0,0 +1,24 @@
++/* PR target/112476: ICE with -mlsx */
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=loongarch64 -mfpu=64 -mabi=lp64d -mlsx" } */
++
++int foo, bar;
++float baz, res, a;
++
++void
++apply_adjacent_ternary (float *dst, float *src0)
++{
++  do
++    {
++      __builtin_memcpy (&res, &src0, sizeof (res));
++      *dst = foo ? baz : res;
++      dst++;
++    }
++  while (dst != src0);
++}
++
++void
++xx (void)
++{
++  apply_adjacent_ternary (&a, &a);
++}
+diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-2.c b/gcc/testsuite/gcc.target/loongarch/pr112476-2.c
+new file mode 100644
+index 000000000..cc0dfbfc9
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/pr112476-2.c
+@@ -0,0 +1,5 @@
++/* PR target/112476: ICE with -mlasx */
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=loongarch64 -mfpu=64 -mabi=lp64d -mlasx" } */
++
++#include "pr112476-1.c"
+-- 
+2.43.0
+

_service:tar_scm:0025-LoongArch-Optimize-single-used-address-with-mexplici.patch Added

@@ -0,0 +1,116 @@
+From b23a89e835962ae7d89e5c6f87a69c021097d715 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Mon, 30 Oct 2023 20:24:58 +0800
+Subject: PATCH 025/188 LoongArch: Optimize single-used address with
+ -mexplicit-relocs=auto for fld/fst
+
+fld and fst have same address mode as ld.w and st.w, so the same
+optimization as r14-4851 should be applied for them too.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (LD_AT_LEAST_32_BIT): New mode
+	iterator.
+	(ST_ANY): New mode iterator.
+	(define_peephole2): Use LD_AT_LEAST_32_BIT instead of GPR and
+	ST_ANY instead of QHWD for applicable patterns.
+---
+ gcc/config/loongarch/loongarch.md | 38 +++++++++++++++++++------------
+ 1 file changed, 24 insertions(+), 14 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 80487488d..ed86c95bd 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -400,6 +400,14 @@
+    (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
+    (TF "TARGET_64BIT && TARGET_DOUBLE_FLOAT"))
+ 
++;; A mode for anything with 32 bits or more, and able to be loaded with
++;; the same addressing mode as ld.w.
++(define_mode_iterator LD_AT_LEAST_32_BIT GPR ANYF)
++
++;; A mode for anything able to be stored with the same addressing mode as
++;; st.w.
++(define_mode_iterator ST_ANY QHWD ANYF)
++
+ ;; In GPR templates, a string like "mul.<d>" will expand to "mul.w" in the
+ ;; 32-bit version and "mul.d" in the 64-bit version.
+ (define_mode_attr d (SI "w") (DI "d"))
+@@ -3785,13 +3793,14 @@
+ (define_peephole2
+   (set (match_operand:P 0 "register_operand")
+ 	(match_operand:P 1 "symbolic_pcrel_operand"))
+-   (set (match_operand:GPR 2 "register_operand")
+-	(mem:GPR (match_dup 0)))
++   (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand")
++	(mem:LD_AT_LEAST_32_BIT (match_dup 0)))
+   "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
+    && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
+    && (peep2_reg_dead_p (2, operands0) \
+        || REGNO (operands0) == REGNO (operands2))"
+-  (set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1))))
++  (set (match_dup 2)
++	(mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))
+   {
+     emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1));
+   })
+@@ -3799,14 +3808,15 @@
+ (define_peephole2
+   (set (match_operand:P 0 "register_operand")
+ 	(match_operand:P 1 "symbolic_pcrel_operand"))
+-   (set (match_operand:GPR 2 "register_operand")
+-	(mem:GPR (plus (match_dup 0)
+-		       (match_operand 3 "const_int_operand"))))
++   (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand")
++	(mem:LD_AT_LEAST_32_BIT (plus (match_dup 0)
++				(match_operand 3 "const_int_operand"))))
+   "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
+    && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
+    && (peep2_reg_dead_p (2, operands0) \
+        || REGNO (operands0) == REGNO (operands2))"
+-  (set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1))))
++  (set (match_dup 2)
++	(mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))
+   {
+     operands1 = plus_constant (Pmode, operands1, INTVAL (operands3));
+     emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1));
+@@ -3850,13 +3860,13 @@
+ (define_peephole2
+   (set (match_operand:P 0 "register_operand")
+ 	(match_operand:P 1 "symbolic_pcrel_operand"))
+-   (set (mem:QHWD (match_dup 0))
+-	(match_operand:QHWD 2 "register_operand"))
++   (set (mem:ST_ANY (match_dup 0))
++	(match_operand:ST_ANY 2 "register_operand"))
+   "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
+    && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
+    && (peep2_reg_dead_p (2, operands0)) \
+    && REGNO (operands0) != REGNO (operands2)"
+-  (set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))
++  (set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))
+   {
+     emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1));
+   })
+@@ -3864,14 +3874,14 @@
+ (define_peephole2
+   (set (match_operand:P 0 "register_operand")
+ 	(match_operand:P 1 "symbolic_pcrel_operand"))
+-   (set (mem:QHWD (plus (match_dup 0)
+-			(match_operand 3 "const_int_operand")))
+-	(match_operand:QHWD 2 "register_operand"))
++   (set (mem:ST_ANY (plus (match_dup 0)
++			  (match_operand 3 "const_int_operand")))
++	(match_operand:ST_ANY 2 "register_operand"))
+   "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
+    && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
+    && (peep2_reg_dead_p (2, operands0)) \
+    && REGNO (operands0) != REGNO (operands2)"
+-  (set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))
++  (set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))
+   {
+     operands1 = plus_constant (Pmode, operands1, INTVAL (operands3));
+     emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1));
+-- 
+2.43.0
+

_service:tar_scm:0026-LoongArch-Disable-relaxation-if-the-assembler-don-t-.patch Added

@@ -0,0 +1,305 @@
+From f1cfdec1602a5a316a9b9022a95143a7385489c2 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Fri, 3 Nov 2023 21:19:59 +0800
+Subject: PATCH 026/188 LoongArch: Disable relaxation if the assembler don't
+ support conditional branch relaxation PR112330
+
+As the commit message of r14-4674 has indicated, if the assembler does
+not support conditional branch relaxation, a relocation overflow may
+happen on conditional branches when relaxation is enabled because the
+number of NOP instructions inserted by the assembler will be more than
+the number estimated by GCC.
+
+To work around this issue, disable relaxation by default if the
+assembler is detected incapable to perform conditional branch relaxation
+at GCC build time.  We also need to pass -mno-relax to the assembler to
+really disable relaxation.  But, if the assembler does not support
+-mrelax option at all, we should not pass -mno-relax to the assembler or
+it will immediately error out.  Also handle this with the build time
+assembler capability probing, and add a pair of options
+-mno-pass-mrelax-to-as to allow using a different assembler from the
+build-time one.
+
+With this change, if GCC is built with GAS 2.41, relaxation will be
+disabled by default.  So the default value of -mexplicit-relocs= is also
+changed to 'always' if -mno-relax is specified or implied by the
+build-time default, because using assembler macros for symbol addresses
+produces no benefit when relaxation is disabled.
+
+gcc/ChangeLog:
+
+	PR target/112330
+	* config/loongarch/genopts/loongarch.opt.in: Add
+	-mno-pass-relax-to-as.  Change the default of -mno-relax to
+	account conditional branch relaxation support status.
+	* config/loongarch/loongarch.opt: Regenerate.
+	* configure.ac (gcc_cv_as_loongarch_cond_branch_relax): Check if
+	the assembler supports conditional branch relaxation.
+	* configure: Regenerate.
+	* config.in: Regenerate.  Note that there are some unrelated
+	changes introduced by r14-5424 (which does not contain a
+	config.in regeneration).
+	* config/loongarch/loongarch-opts.h
+	(HAVE_AS_COND_BRANCH_RELAXATION): Define to 0 if not defined.
+	* config/loongarch/loongarch-driver.h (ASM_MRELAX_DEFAULT):
+	Define.
+	(ASM_MRELAX_SPEC): Define.
+	(ASM_SPEC): Use ASM_MRELAX_SPEC instead of "%{mno-relax}".
+	* config/loongarch/loongarch.cc: Take the setting of
+	-mno-relax into account when determining the default of
+	-mexplicit-relocs=.
+	* doc/invoke.texi: Document -mno-relax and
+	-mno-pass-mrelax-to-as for LoongArch.  Update the default
+	value of -mexplicit-relocs=.
+---
+ gcc/config.in                                 | 35 ++++++++++++++++++-
+ gcc/config/loongarch/genopts/loongarch.opt.in |  6 +++-
+ gcc/config/loongarch/loongarch-driver.h       | 16 ++++++++-
+ gcc/config/loongarch/loongarch-opts.h         |  4 +++
+ gcc/config/loongarch/loongarch.cc             |  2 +-
+ gcc/config/loongarch/loongarch.opt            |  6 +++-
+ gcc/configure                                 | 35 +++++++++++++++++++
+ gcc/configure.ac                              | 10 ++++++
+ 8 files changed, 109 insertions(+), 5 deletions(-)
+
+diff --git a/gcc/config.in b/gcc/config.in
+index 0c55e67e7..04968b53c 100644
+--- a/gcc/config.in
++++ b/gcc/config.in
+@@ -374,6 +374,12 @@
+ #endif
+ 
+ 
++/* Define if your assembler supports conditional branch relaxation. */
++#ifndef USED_FOR_TARGET
++#undef HAVE_AS_COND_BRANCH_RELAXATION
++#endif
++
++
+ /* Define if your assembler supports the --debug-prefix-map option. */
+ #ifndef USED_FOR_TARGET
+ #undef HAVE_AS_DEBUG_PREFIX_MAP
+@@ -798,6 +804,20 @@
+ #endif
+ 
+ 
++/* Define to 1 if you have the Mac OS X function
++   CFLocaleCopyPreferredLanguages in the CoreFoundation framework. */
++#ifndef USED_FOR_TARGET
++#undef HAVE_CFLOCALECOPYPREFERREDLANGUAGES
++#endif
++
++
++/* Define to 1 if you have the Mac OS X function CFPreferencesCopyAppValue in
++   the CoreFoundation framework. */
++#ifndef USED_FOR_TARGET
++#undef HAVE_CFPREFERENCESCOPYAPPVALUE
++#endif
++
++
+ /* Define to 1 if you have the `clearerr_unlocked' function. */
+ #ifndef USED_FOR_TARGET
+ #undef HAVE_CLEARERR_UNLOCKED
+@@ -822,6 +842,13 @@
+ #endif
+ 
+ 
++/* Define if the GNU dcgettext() function is already present or preinstalled.
++   */
++#ifndef USED_FOR_TARGET
++#undef HAVE_DCGETTEXT
++#endif
++
++
+ /* Define to 1 if we found a declaration for 'abort', otherwise define to 0.
+    */
+ #ifndef USED_FOR_TARGET
+@@ -1554,6 +1581,12 @@
+ #endif
+ 
+ 
++/* Define if the GNU gettext() function is already present or preinstalled. */
++#ifndef USED_FOR_TARGET
++#undef HAVE_GETTEXT
++#endif
++
++
+ /* Define to 1 if you have the `gettimeofday' function. */
+ #ifndef USED_FOR_TARGET
+ #undef HAVE_GETTIMEOFDAY
+@@ -1585,7 +1618,7 @@
+ #endif
+ 
+ 
+-/* Define if you have the iconv() function. */
++/* Define if you have the iconv() function and it works. */
+ #ifndef USED_FOR_TARGET
+ #undef HAVE_ICONV
+ #endif
+diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
+index e7df1964a..bd3cfaf60 100644
+--- a/gcc/config/loongarch/genopts/loongarch.opt.in
++++ b/gcc/config/loongarch/genopts/loongarch.opt.in
+@@ -229,10 +229,14 @@ Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0)
+ Avoid using the GOT to access external symbols.
+ 
+ mrelax
+-Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
++Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION)
+ Take advantage of linker relaxations to reduce the number of instructions
+ required to materialize symbol addresses.
+ 
++mpass-mrelax-to-as
++Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION)
++Pass -mrelax or -mno-relax option to the assembler.
++
+ -param=loongarch-vect-unroll-limit=
+ Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param
+ Used to limit unroll factor which indicates how much the autovectorizer may
+diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h
+index 59fa3263d..c8dba2cc4 100644
+--- a/gcc/config/loongarch/loongarch-driver.h
++++ b/gcc/config/loongarch/loongarch-driver.h
+@@ -51,9 +51,23 @@ along with GCC; see the file COPYING3.  If not see
+   "%{G*} %{,ada:-gnatea %{mabi=*} -gnatez} " \
+   "%(subtarget_cc1_spec)"
+ 
++#if HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION
++#define ASM_MRELAX_DEFAULT "%{!mrelax:%{!mno-relax:-mrelax}}"
++#else
++#define ASM_MRELAX_DEFAULT "%{!mrelax:%{!mno-relax:-mno-relax}}"
++#endif
++
++#if HAVE_AS_MRELAX_OPTION
++#define ASM_MRELAX_SPEC \
++  "%{!mno-pass-mrelax-to-as:%{mrelax} %{mno-relax} " ASM_MRELAX_DEFAULT "}"
++#else
++#define ASM_MRELAX_SPEC \
++  "%{mpass-mrelax-to-as:%{mrelax} %{mno-relax} " ASM_MRELAX_DEFAULT "}"
++#endif
++
+ #undef ASM_SPEC
+ #define ASM_SPEC \
+-  "%{mabi=*} %{mno-relax} %(subtarget_asm_spec)"
++  "%{mabi=*} " ASM_MRELAX_SPEC " %(subtarget_asm_spec)"
+ 
+ 
+ extern const char*
+diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
+index c4975af00..dfbe9dd5c 100644
+--- a/gcc/config/loongarch/loongarch-opts.h
++++ b/gcc/config/loongarch/loongarch-opts.h
+@@ -103,6 +103,10 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
+ #define HAVE_AS_MRELAX_OPTION 0
+ #endif
+ 
++#ifndef HAVE_AS_COND_BRANCH_RELAXATION
++#define HAVE_AS_COND_BRANCH_RELAXATION 0
++#endif
++

_service:tar_scm:0027-LoongArch-Remove-redundant-barrier-instructions-befo.patch Added

@@ -0,0 +1,391 @@
+From 4498010fba61c1446286c96cbda24d5ed53c53c7 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Mon, 6 Nov 2023 16:06:08 +0800
+Subject: PATCH 027/188 LoongArch: Remove redundant barrier instructions
+ before LL-SC loops
+
+This is isomorphic to the LLVM changes 1-2.
+
+On LoongArch, the LL and SC instructions has memory barrier semantics:
+
+- LL: <memory-barrier> + <load-exclusive>
+- SC: <store-conditional> + <memory-barrier>
+
+But the compare and swap operation is allowed to fail, and if it fails
+the SC instruction is not executed, thus the guarantee of acquiring
+semantics cannot be ensured. Therefore, an acquire barrier needs to be
+generated when failure_memorder includes an acquire operation.
+
+On CPUs implementing LoongArch v1.10 or later, "dbar 0b10100" is an
+acquire barrier; on CPUs implementing LoongArch v1.00, it is a full
+barrier.  So it's always enough for acquire semantics.  OTOH if an
+acquire semantic is not needed, we still needs the "dbar 0x700" as the
+load-load barrier like all LL-SC loops.
+
+1:https://github.com/llvm/llvm-project/pull/67391
+2:https://github.com/llvm/llvm-project/pull/69339
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc
+	(loongarch_memmodel_needs_release_fence): Remove.
+	(loongarch_cas_failure_memorder_needs_acquire): New static
+	function.
+	(loongarch_print_operand): Redefine 'G' for the barrier on CAS
+	failure.
+	* config/loongarch/sync.md (atomic_cas_value_strong<mode>):
+	Remove the redundant barrier before the LL instruction, and
+	emit an acquire barrier on failure if needed by
+	failure_memorder.
+	(atomic_cas_value_cmp_and_7_<mode>): Likewise.
+	(atomic_cas_value_add_7_<mode>): Remove the unnecessary barrier
+	before the LL instruction.
+	(atomic_cas_value_sub_7_<mode>): Likewise.
+	(atomic_cas_value_and_7_<mode>): Likewise.
+	(atomic_cas_value_xor_7_<mode>): Likewise.
+	(atomic_cas_value_or_7_<mode>): Likewise.
+	(atomic_cas_value_nand_7_<mode>): Likewise.
+	(atomic_cas_value_exchange_7_<mode>): Likewise.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/cas-acquire.c: New test.
+---
+ gcc/config/loongarch/loongarch.cc             | 30 ++++---
+ gcc/config/loongarch/sync.md                  | 49 +++++------
+ .../gcc.target/loongarch/cas-acquire.c        | 82 +++++++++++++++++++
+ 3 files changed, 119 insertions(+), 42 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/cas-acquire.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 6d580ee75..8467f03cf 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -5829,27 +5829,27 @@ loongarch_memmodel_needs_rel_acq_fence (enum memmodel model)
+     }
+ }
+ 
+-/* Return true if a FENCE should be emitted to before a memory access to
+-   implement the release portion of memory model MODEL.  */
++/* Return true if a FENCE should be emitted after a failed CAS to
++   implement the acquire semantic of failure_memorder.  */
+ 
+ static bool
+-loongarch_memmodel_needs_release_fence (enum memmodel model)
++loongarch_cas_failure_memorder_needs_acquire (enum memmodel model)
+ {
+-  switch (model)
++  switch (memmodel_base (model))
+     {
++    case MEMMODEL_ACQUIRE:
+     case MEMMODEL_ACQ_REL:
+     case MEMMODEL_SEQ_CST:
+-    case MEMMODEL_SYNC_SEQ_CST:
+-    case MEMMODEL_RELEASE:
+-    case MEMMODEL_SYNC_RELEASE:
+       return true;
+ 
+-    case MEMMODEL_ACQUIRE:
+-    case MEMMODEL_CONSUME:
+-    case MEMMODEL_SYNC_ACQUIRE:
+     case MEMMODEL_RELAXED:
++    case MEMMODEL_RELEASE:
+       return false;
+ 
++    /* MEMMODEL_CONSUME is deliberately not handled because it's always
++       replaced by MEMMODEL_ACQUIRE as at now.  If you see an ICE caused by
++       MEMMODEL_CONSUME, read the change (re)introducing it carefully and
++       decide what to do.  See PR 59448 and get_memmodel in builtins.cc.  */
+     default:
+       gcc_unreachable ();
+     }
+@@ -5962,7 +5962,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
+    'd'	Print CONST_INT OP in decimal.
+    'E'	Print CONST_INT OP element 0 of a replicated CONST_VECTOR in decimal.
+    'F'	Print the FPU branch condition for comparison OP.
+-   'G'	Print a DBAR insn if the memory model requires a release.
++   'G'	Print a DBAR insn for CAS failure (with an acquire semantic if
++	needed, otherwise a simple load-load barrier).
+    'H'  Print address 52-61bit relocation associated with OP.
+    'h'  Print the high-part relocation associated with OP.
+    'i'	Print i if the operand is not a register.
+@@ -6053,8 +6054,11 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
+       break;
+ 
+     case 'G':
+-      if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op)))
+-	fputs ("dbar\t0", file);
++      if (loongarch_cas_failure_memorder_needs_acquire (
++	    memmodel_from_int (INTVAL (op))))
++	fputs ("dbar\t0b10100", file);
++      else
++	fputs ("dbar\t0x700", file);
+       break;
+ 
+     case 'h':
+diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
+index efa40f24c..dd1f98946 100644
+--- a/gcc/config/loongarch/sync.md
++++ b/gcc/config/loongarch/sync.md
+@@ -162,19 +162,18 @@
+    (clobber (match_scratch:GPR 6 "=&r"))
+   ""
+ {
+-  return "%G5\\n\\t"
+-	 "1:\\n\\t"
++  return "1:\\n\\t"
+ 	 "ll.<amo>\\t%0,%1\\n\\t"
+ 	 "bne\\t%0,%z2,2f\\n\\t"
+ 	 "or%i3\\t%6,$zero,%3\\n\\t"
+ 	 "sc.<amo>\\t%6,%1\\n\\t"
+-	 "beq\\t$zero,%6,1b\\n\\t"
++	 "beqz\\t%6,1b\\n\\t"
+ 	 "b\\t3f\\n\\t"
+ 	 "2:\\n\\t"
+-	 "dbar\\t0x700\\n\\t"
++	 "%G5\\n\\t"
+ 	 "3:\\n\\t";
+ }
+-  (set (attr "length") (const_int 32)))
++  (set (attr "length") (const_int 28)))
+ 
+ (define_expand "atomic_compare_and_swap<mode>"
+   (match_operand:SI 0 "register_operand" "")   ;; bool output
+@@ -267,8 +266,7 @@
+    (clobber (match_scratch:GPR 7 "=&r"))
+   ""
+ {
+-  return "%G6\\n\\t"
+-	 "1:\\n\\t"
++  return "1:\\n\\t"
+ 	 "ll.<amo>\\t%0,%1\\n\\t"
+ 	 "and\\t%7,%0,%2\\n\\t"
+ 	 "bne\\t%7,%z4,2f\\n\\t"
+@@ -278,10 +276,10 @@
+ 	 "beq\\t$zero,%7,1b\\n\\t"
+ 	 "b\\t3f\\n\\t"
+ 	 "2:\\n\\t"
+-	 "dbar\\t0x700\\n\\t"
++	 "%G6\\n\\t"
+ 	 "3:\\n\\t";
+ }
+-  (set (attr "length") (const_int 40)))
++  (set (attr "length") (const_int 36)))
+ 
+ (define_expand "atomic_compare_and_swap<mode>"
+   (match_operand:SI 0 "register_operand" "")   ;; bool output
+@@ -336,8 +334,7 @@
+    (clobber (match_scratch:GPR 8 "=&r"))
+   ""
+ {
+-  return "%G6\\n\\t"
+-	 "1:\\n\\t"
++  return "1:\\n\\t"
+ 	 "ll.<amo>\\t%0,%1\\n\\t"
+ 	 "and\\t%7,%0,%3\\n\\t"
+ 	 "add.w\\t%8,%0,%z5\\n\\t"
+@@ -347,7 +344,7 @@
+ 	 "beq\\t$zero,%7,1b";
+ }
+ 
+-  (set (attr "length") (const_int 32)))
++  (set (attr "length") (const_int 28)))
+ 
+ (define_insn "atomic_cas_value_sub_7_<mode>"
+   (set (match_operand:GPR 0 "register_operand" "=&r")				;; res
+@@ -363,8 +360,7 @@
+    (clobber (match_scratch:GPR 8 "=&r"))
+   ""
+ {

_service:tar_scm:0028-LoongArch-Fix-scan-assembler-times-of-lasx-lsx-test-.patch Added

@@ -0,0 +1,161 @@
+From 9731abbe19b9fad184dfe728bd9b2cc02b40c543 Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Thu, 16 Nov 2023 20:31:09 +0800
+Subject: PATCH 028/188 LoongArch: Fix scan-assembler-times of lasx/lsx test
+ case.
+
+These tests fail when they are first added,this patch adjusts the scan-assembler-times
+to fix them.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/lasx/lasx-vcond-1.c: Adjust assembler times.
+	* gcc.target/loongarch/vector/lasx/lasx-vcond-2.c: Ditto.
+	* gcc.target/loongarch/vector/lsx/lsx-vcond-1.c: Ditto.
+	* gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: Ditto.
+---
+ .../loongarch/vector/lasx/lasx-vcond-1.c      | 12 +++----
+ .../loongarch/vector/lasx/lasx-vcond-2.c      | 36 +++++++++----------
+ .../loongarch/vector/lsx/lsx-vcond-1.c        | 12 +++----
+ .../loongarch/vector/lsx/lsx-vcond-2.c        | 36 +++++++++----------
+ 4 files changed, 48 insertions(+), 48 deletions(-)
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c
+index ee9cb1a1f..57064eac9 100644
+--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c
++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c
+@@ -52,13 +52,13 @@ TEST_VAR_ALL (DEF_VCOND_VAR)
+ 
+ /* { dg-final { scan-assembler-times {\txvslt\.b} 4 } } */
+ /* { dg-final { scan-assembler-times {\txvslt\.h} 4 } } */
+-/* { dg-final { scan-assembler-times {\txvslt\.w} 4 } } */
+-/* { dg-final { scan-assembler-times {\txvslt\.d} 4 } } */
++/* { dg-final { scan-assembler-times {\txvslt\.w} 8 } } */
++/* { dg-final { scan-assembler-times {\txvslt\.d} 8 } } */
+ /* { dg-final { scan-assembler-times {\txvsle\.b} 4 } } */
+ /* { dg-final { scan-assembler-times {\txvsle\.h} 4 } } */
+-/* { dg-final { scan-assembler-times {\txvsle\.w} 4 } } */
+-/* { dg-final { scan-assembler-times {\txvsle\.d} 4 } } */
++/* { dg-final { scan-assembler-times {\txvsle\.w} 8 } } */
++/* { dg-final { scan-assembler-times {\txvsle\.d} 8 } } */
+ /* { dg-final { scan-assembler-times {\txvseq\.b} 4 } } */
+ /* { dg-final { scan-assembler-times {\txvseq\.h} 4 } } */
+-/* { dg-final { scan-assembler-times {\txvseq\.w} 4 } } */
+-/* { dg-final { scan-assembler-times {\txvseq\.d} 4 } } */
++/* { dg-final { scan-assembler-times {\txvseq\.w} 8 } } */
++/* { dg-final { scan-assembler-times {\txvseq\.d} 8 } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
+index 5f40ed44c..55d5a084c 100644
+--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
+@@ -67,21 +67,21 @@ TEST_CMP (nule)
+ TEST_CMP (nuge)
+ TEST_CMP (nugt)
+ 
+-/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 2 } } */
+-/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 2 } } */
+-/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 2 } } */
+-/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 2 } } */
+-/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 4 } } */
+-/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 4 } } */
+-/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 4 } } */
+-/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.d} 4 } } */
+-/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.s} 2 } } */
+-/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.d} 2 } } */
+-/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.s} 2 } } */
+-/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.d} 2 } } */
+-/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.s} 4 } } */
+-/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.d} 4 } } */
+-/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.s} 8 } } */
+-/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.d} 8 } } */
+-/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.s} 8 } } */
+-/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.d} 8 } } */
++/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 3 } } */
++/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 3 } } */
++/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 3 } } */
++/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 3 } } */
++/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 6 } } */
++/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 6 } } */
++/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 6 } } */
++/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.d} 6 } } */
++/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.s} 3 } } */
++/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.d} 3 } } */
++/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.s} 3 } } */
++/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.d} 3 } } */
++/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.s} 6 } } */
++/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.d} 6 } } */
++/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.s} 12 } } */
++/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.d} 12 } } */
++/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.s} 12 } } */
++/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.d} 12 } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c
+index 138adccfa..8c69f0d9b 100644
+--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c
++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c
+@@ -52,13 +52,13 @@ TEST_VAR_ALL (DEF_VCOND_VAR)
+ 
+ /* { dg-final { scan-assembler-times {\tvslt\.b} 4 } } */
+ /* { dg-final { scan-assembler-times {\tvslt\.h} 4 } } */
+-/* { dg-final { scan-assembler-times {\tvslt\.w} 4 } } */
+-/* { dg-final { scan-assembler-times {\tvslt\.d} 4 } } */
++/* { dg-final { scan-assembler-times {\tvslt\.w} 8 } } */
++/* { dg-final { scan-assembler-times {\tvslt\.d} 8 } } */
+ /* { dg-final { scan-assembler-times {\tvsle\.b} 4 } } */
+ /* { dg-final { scan-assembler-times {\tvsle\.h} 4 } } */
+-/* { dg-final { scan-assembler-times {\tvsle\.w} 4 } } */
+-/* { dg-final { scan-assembler-times {\tvsle\.d} 4 } } */
++/* { dg-final { scan-assembler-times {\tvsle\.w} 8 } } */
++/* { dg-final { scan-assembler-times {\tvsle\.d} 8 } } */
+ /* { dg-final { scan-assembler-times {\tvseq\.b} 4 } } */
+ /* { dg-final { scan-assembler-times {\tvseq\.h} 4 } } */
+-/* { dg-final { scan-assembler-times {\tvseq\.w} 4 } } */
+-/* { dg-final { scan-assembler-times {\tvseq\.d} 4 } } */
++/* { dg-final { scan-assembler-times {\tvseq\.w} 8 } } */
++/* { dg-final { scan-assembler-times {\tvseq\.d} 8 } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
+index e8fe31f8f..2214afd0a 100644
+--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
+@@ -67,21 +67,21 @@ TEST_CMP (nule)
+ TEST_CMP (nuge)
+ TEST_CMP (nugt)
+ 
+-/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 2 } } */
+-/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 2 } } */
+-/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 2 } } */
+-/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 2 } } */
+-/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 4 } } */
+-/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 4 } } */
+-/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 4 } } */
+-/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.d} 4 } } */
+-/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.s} 2 } } */
+-/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.d} 2 } } */
+-/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.s} 2 } } */
+-/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.d} 2 } } */
+-/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.s} 4 } } */
+-/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.d} 4 } } */
+-/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.s} 8 } } */
+-/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.d} 8 } } */
+-/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.s} 8 } } */
+-/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.d} 8 } } */
++/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 3 } } */
++/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 3 } } */
++/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 3 } } */
++/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 3 } } */
++/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 6 } } */
++/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 6 } } */
++/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 6 } } */
++/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.d} 6 } } */
++/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.s} 3 } } */
++/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.d} 3 } } */
++/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.s} 3 } } */
++/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.d} 3 } } */
++/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.s} 6 } } */
++/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.d} 6 } } */
++/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.s} 12 } } */
++/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.d} 12 } } */
++/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.s} 12 } } */
++/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.d} 12 } } */
+-- 
+2.43.0
+

_service:tar_scm:0029-LoongArch-Increase-cost-of-vector-aligned-store-load.patch Added

@@ -0,0 +1,45 @@
+From 526e1effd86cfa0b1afae88890ce4f74f7150d88 Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Thu, 16 Nov 2023 16:44:36 +0800
+Subject: PATCH 029/188 LoongArch: Increase cost of vector aligned
+ store/load.
+
+Based on SPEC2017 performance evaluation results, it's better to make them equal
+to the cost of unaligned store/load so as to avoid odd alignment peeling.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc
+	(loongarch_builtin_vectorization_cost): Adjust.
+---
+ gcc/config/loongarch/loongarch.cc | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 8467f03cf..b6f0d61ef 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -3889,11 +3889,9 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+       case scalar_stmt:
+       case scalar_load:
+       case vector_stmt:
+-      case vector_load:
+       case vec_to_scalar:
+       case scalar_to_vec:
+       case scalar_store:
+-      case vector_store:
+ 	return 1;
+ 
+       case vec_promote_demote:
+@@ -3901,6 +3899,8 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ 	return LASX_SUPPORTED_MODE_P (mode)
+ 	  && !LSX_SUPPORTED_MODE_P (mode) ? 2 : 1;
+ 
++      case vector_load:
++      case vector_store:
+       case unaligned_load:
+       case unaligned_store:
+ 	return 2;
+-- 
+2.43.0
+

_service:tar_scm:0030-LoongArch-Implement-C-LT-Z_DEFINED_VALUE_AT_ZERO.patch Added

@@ -0,0 +1,58 @@
+From bd74cb3e1238e842d15bcd4044c9e2f246cc18bc Mon Sep 17 00:00:00 2001
+From: Li Wei <liwei@loongson.cn>
+Date: Fri, 17 Nov 2023 10:38:02 +0800
+Subject: PATCH 030/188 LoongArch: Implement CLTZ_DEFINED_VALUE_AT_ZERO
+
+The LoongArch has defined ctz and clz on the backend, but if we want GCC
+do CTZ transformation optimization in forwprop2 pass, GCC need to know
+the value of cltz at zero, which may be beneficial for some test cases
+(like spec2017 deepsjeng_r).
+
+After implementing the macro, we test dynamic instruction count on
+deepsjeng_r:
+- before 1688423249186
+- after  1660311215745 (1.66% reduction)
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.h (CLZ_DEFINED_VALUE_AT_ZERO):
+	Implement.
+	(CTZ_DEFINED_VALUE_AT_ZERO): Same.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.dg/pr90838.c: add clz/ctz test support on LoongArch.
+---
+ gcc/config/loongarch/loongarch.h | 5 +++++
+ gcc/testsuite/gcc.dg/pr90838.c   | 5 +++++
+ 2 files changed, 10 insertions(+)
+
+diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
+index 6e8ac293a..19cf6fd33 100644
+--- a/gcc/config/loongarch/loongarch.h
++++ b/gcc/config/loongarch/loongarch.h
+@@ -1239,3 +1239,8 @@ struct GTY (()) machine_function
+ 
+ #define TARGET_EXPLICIT_RELOCS \
+   (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS)
++
++#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
++  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
++#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
++  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
+diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c
+index 7502b8463..7aa912525 100644
+--- a/gcc/testsuite/gcc.dg/pr90838.c
++++ b/gcc/testsuite/gcc.dg/pr90838.c
+@@ -82,3 +82,8 @@ int ctz4 (unsigned long x)
+ /* { dg-final { scan-assembler-times "ctz\t" 3 { target { rv32 } } } } */
+ /* { dg-final { scan-assembler-times "andi\t" 1 { target { rv32 } } } } */
+ /* { dg-final { scan-assembler-times "mul\t" 1 { target { rv32 } } } } */
++
++/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target { loongarch64*-*-* } } } } */
++/* { dg-final { scan-assembler-times "ctz.d\t" 1 { target { loongarch64*-*-* } } } } */
++/* { dg-final { scan-assembler-times "ctz.w\t" 3 { target { loongarch64*-*-* } } } } */
++/* { dg-final { scan-assembler-times "andi\t" 4 { target { loongarch64*-*-* } } } } */
+-- 
+2.43.0
+

_service:tar_scm:0031-LoongArch-Handle-vectorized-copysign-x-1-expansion-e.patch Added

@@ -0,0 +1,197 @@
+From 61daf071708947ef8431ac36bc6c6b47339fdd2a Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Tue, 14 Nov 2023 00:17:19 +0800
+Subject: PATCH 031/188 LoongArch: Handle vectorized copysign (x, -1)
+ expansion efficiently
+
+With LSX or LASX, copysign (xi, -1) (or any negative constant) can be
+vectorized using xvbitseti.{w/d} instructions to directly set the
+signbits.
+
+Inspired by Tamar Christina's "AArch64: Handle copysign (x, -1) expansion
+efficiently" (r14-5289).
+
+gcc/ChangeLog:
+
+	* config/loongarch/lsx.md (copysign<mode>3): Allow operand2 to
+	be an reg_or_vector_same_val_operand.  If it's a const vector
+	with same negative elements, expand the copysign with a bitset
+	instruction.  Otherwise, force it into an register.
+	* config/loongarch/lasx.md (copysign<mode>3): Likewise.
+
+gcc/testsuite/ChangeLog:
+
+	* g++.target/loongarch/vect-copysign-negconst.C: New test.
+	* g++.target/loongarch/vect-copysign-negconst-run.C: New test.
+---
+ gcc/config/loongarch/lasx.md                  | 22 ++++++++-
+ gcc/config/loongarch/lsx.md                   | 22 ++++++++-
+ .../loongarch/vect-copysign-negconst-run.C    | 47 +++++++++++++++++++
+ .../loongarch/vect-copysign-negconst.C        | 27 +++++++++++
+ 4 files changed, 116 insertions(+), 2 deletions(-)
+ create mode 100644 gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C
+ create mode 100644 gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index f0f2dd08d..2e11f0612 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -3136,11 +3136,31 @@
+ 	  (match_operand:FLASX 1 "register_operand")))
+    (set (match_dup 5)
+ 	(and:FLASX (match_dup 3)
+-		   (match_operand:FLASX 2 "register_operand")))
++		   (match_operand:FLASX 2 "reg_or_vector_same_val_operand")))
+    (set (match_operand:FLASX 0 "register_operand")
+ 	(ior:FLASX (match_dup 4) (match_dup 5)))
+   "ISA_HAS_LASX"
+ {
++  /* copysign (x, -1) should instead be expanded as setting the sign
++     bit.  */
++  if (!REG_P (operands2))
++    {
++      rtx op2_elt = unwrap_const_vec_duplicate (operands2);
++      if (GET_CODE (op2_elt) == CONST_DOUBLE
++	  && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
++	{
++	  rtx n = GEN_INT (8 * GET_MODE_SIZE (<UNITMODE>mode) - 1);
++	  operands0 = lowpart_subreg (<VIMODE256>mode, operands0,
++					<MODE>mode);
++	  operands1 = lowpart_subreg (<VIMODE256>mode, operands1,
++					<MODE>mode);
++	  emit_insn (gen_lasx_xvbitseti_<lasxfmt> (operands0,
++						   operands1, n));
++	  DONE;
++	}
++    }
++
++  operands2 = force_reg (<MODE>mode, operands2);
+   operands3 = loongarch_build_signbit_mask (<MODE>mode, 1, 0);
+ 
+   operands4 = gen_reg_rtx (<MODE>mode);
+diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
+index 55c7d79a0..8ea41c85b 100644
+--- a/gcc/config/loongarch/lsx.md
++++ b/gcc/config/loongarch/lsx.md
+@@ -2873,11 +2873,31 @@
+ 	  (match_operand:FLSX 1 "register_operand")))
+    (set (match_dup 5)
+ 	(and:FLSX (match_dup 3)
+-		  (match_operand:FLSX 2 "register_operand")))
++		  (match_operand:FLSX 2 "reg_or_vector_same_val_operand")))
+    (set (match_operand:FLSX 0 "register_operand")
+ 	(ior:FLSX (match_dup 4) (match_dup 5)))
+   "ISA_HAS_LSX"
+ {
++  /* copysign (x, -1) should instead be expanded as setting the sign
++     bit.  */
++  if (!REG_P (operands2))
++    {
++      rtx op2_elt = unwrap_const_vec_duplicate (operands2);
++      if (GET_CODE (op2_elt) == CONST_DOUBLE
++	  && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
++	{
++	  rtx n = GEN_INT (8 * GET_MODE_SIZE (<UNITMODE>mode) - 1);
++	  operands0 = lowpart_subreg (<VIMODE>mode, operands0,
++					<MODE>mode);
++	  operands1 = lowpart_subreg (<VIMODE>mode, operands1,
++					<MODE>mode);
++	  emit_insn (gen_lsx_vbitseti_<lsxfmt> (operands0, operands1,
++						n));
++	  DONE;
++	}
++    }
++
++  operands2 = force_reg (<MODE>mode, operands2);
+   operands3 = loongarch_build_signbit_mask (<MODE>mode, 1, 0);
+ 
+   operands4 = gen_reg_rtx (<MODE>mode);
+diff --git a/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C
+new file mode 100644
+index 000000000..d2d5d15c9
+--- /dev/null
++++ b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C
+@@ -0,0 +1,47 @@
++/* { dg-do run } */
++/* { dg-options "-O2 -march=loongarch64 -mlasx -mno-strict-align" } */
++/* { dg-require-effective-target loongarch_asx_hw } */
++
++#include "vect-copysign-negconst.C"
++
++double d = {1.2, -3.4, -5.6, 7.8};
++float f = {1.2, -3.4, -5.6, 7.8, -9.0, -11.4, 51.4, 1919.810};
++
++double _abs(double x) { return __builtin_fabs (x); }
++float _abs(float x) { return __builtin_fabsf (x); }
++
++template <class T>
++void
++check (T *arr, T *orig, int len)
++{
++  for (int i = 0; i < len; i++)
++    {
++      if (arri > 0)
++	__builtin_trap ();
++      if (_abs (arri) != _abs (origi))
++	__builtin_trap ();
++    }
++}
++
++int
++main()
++{
++  double test_d4;
++  float test_f8;
++
++  __builtin_memcpy (test_d, d, sizeof (test_d));
++  force_negative<2> (test_d);
++  check (test_d, d, 2);
++
++  __builtin_memcpy (test_d, d, sizeof (test_d));
++  force_negative<4> (test_d);
++  check (test_d, d, 4);
++
++  __builtin_memcpy (test_f, f, sizeof (test_f));
++  force_negative<4> (test_f);
++  check (test_f, f, 4);
++
++  __builtin_memcpy (test_f, f, sizeof (test_f));
++  force_negative<8> (test_f);
++  check (test_f, f, 8);
++}
+diff --git a/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C
+new file mode 100644
+index 000000000..5e8820d2b
+--- /dev/null
++++ b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C
+@@ -0,0 +1,27 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=loongarch64 -mlasx -mno-strict-align" } */
++/* { dg-final { scan-assembler "\txvbitseti.*63" } } */
++/* { dg-final { scan-assembler "\txvbitseti.*31" } } */
++/* { dg-final { scan-assembler "\tvbitseti.*63" } } */
++/* { dg-final { scan-assembler "\tvbitseti.*31" } } */
++
++template <int N>
++__attribute__ ((noipa)) void
++force_negative (float *arr)
++{
++  for (int i = 0; i < N; i++)
++    arri = __builtin_copysignf (arri, -2);
++}
++
++template <int N>
++__attribute__ ((noipa)) void
++force_negative (double *arr)
++{
++  for (int i = 0; i < N; i++)
++    arri = __builtin_copysign (arri, -3);
++}
++
++template void force_negative<4>(float *);
++template void force_negative<8>(float *);
++template void force_negative<2>(double *);
++template void force_negative<4>(double *);
+-- 
+2.43.0
+

_service:tar_scm:0032-LoongArch-Add-code-generation-support-for-call36-fun.patch Added

@@ -0,0 +1,561 @@
+From 5ab014701ddd9968855026f0e2ae1af2b165bcd7 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 16 Nov 2023 15:06:11 +0800
+Subject: PATCH 032/188 LoongArch: Add code generation support for call36
+ function calls.
+
+When compiling with '-mcmodel=medium', the function call is made through
+'pcaddu18i+jirl' if binutils supports call36, otherwise the
+native implementation 'pcalau12i+jirl' is used.
+
+gcc/ChangeLog:
+
+	* config.in: Regenerate.
+	* config/loongarch/loongarch-opts.h (HAVE_AS_SUPPORT_CALL36): Define macro.
+	* config/loongarch/loongarch.cc (loongarch_legitimize_call_address):
+	If binutils supports call36, the function call is not split over expand.
+	* config/loongarch/loongarch.md: Add call36 generation code.
+	* config/loongarch/predicates.md: Likewise.
+	* configure: Regenerate.
+	* configure.ac: Check whether binutils supports call36.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/func-call-medium-5.c: If the assembler supports call36,
+	the test is abandoned.
+	* gcc.target/loongarch/func-call-medium-6.c: Likewise.
+	* gcc.target/loongarch/func-call-medium-7.c: Likewise.
+	* gcc.target/loongarch/func-call-medium-8.c: Likewise.
+	* lib/target-supports.exp: Added a function to see if the assembler supports
+	the call36 relocation.
+	* gcc.target/loongarch/func-call-medium-call36-1.c: New test.
+	* gcc.target/loongarch/func-call-medium-call36.c: New test.
+
+Co-authored-by: Xi Ruoyao <xry111@xry111.site>
+---
+ gcc/config.in                                 |   6 +
+ gcc/config/loongarch/loongarch-opts.h         |   4 +
+ gcc/config/loongarch/loongarch.cc             |  12 +-
+ gcc/config/loongarch/loongarch.md             | 171 +++++++++++++++---
+ gcc/config/loongarch/predicates.md            |   7 +-
+ gcc/configure                                 |  32 ++++
+ gcc/configure.ac                              |   6 +
+ .../gcc.target/loongarch/func-call-medium-5.c |   1 +
+ .../gcc.target/loongarch/func-call-medium-6.c |   1 +
+ .../gcc.target/loongarch/func-call-medium-7.c |   1 +
+ .../gcc.target/loongarch/func-call-medium-8.c |   1 +
+ .../loongarch/func-call-medium-call36-1.c     |  21 +++
+ .../loongarch/func-call-medium-call36.c       |  32 ++++
+ gcc/testsuite/lib/target-supports.exp         |   9 +
+ 14 files changed, 268 insertions(+), 36 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-call36-1.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-call36.c
+
+diff --git a/gcc/config.in b/gcc/config.in
+index 04968b53c..033cfb98b 100644
+--- a/gcc/config.in
++++ b/gcc/config.in
+@@ -759,6 +759,12 @@
+ #endif
+ 
+ 
++/* Define if your assembler supports call36 relocation. */
++#ifndef USED_FOR_TARGET
++#undef HAVE_AS_SUPPORT_CALL36
++#endif
++
++
+ /* Define if your assembler and linker support thread-local storage. */
+ #ifndef USED_FOR_TARGET
+ #undef HAVE_AS_TLS
+diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
+index dfbe9dd5c..22ce1a122 100644
+--- a/gcc/config/loongarch/loongarch-opts.h
++++ b/gcc/config/loongarch/loongarch-opts.h
+@@ -99,6 +99,10 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
+ #define HAVE_AS_EXPLICIT_RELOCS 0
+ #endif
+ 
++#ifndef HAVE_AS_SUPPORT_CALL36
++#define HAVE_AS_SUPPORT_CALL36 0
++#endif
++
+ #ifndef HAVE_AS_MRELAX_OPTION
+ #define HAVE_AS_MRELAX_OPTION 0
+ #endif
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index b6f0d61ef..43f0e82ba 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -3002,12 +3002,16 @@ loongarch_legitimize_call_address (rtx addr)
+ 
+   enum loongarch_symbol_type symbol_type = loongarch_classify_symbol (addr);
+ 
+-  /* Split function call insn 'bl sym' or 'bl %plt(sym)' to :
+-     pcalau12i $rd, %pc_hi20(sym)
+-     jr $rd, %pc_lo12(sym).  */
++  /* If add the compilation option '-cmodel=medium', and the assembler does
++     not support call36.  The following sequence of instructions will be
++     used for the function call:
++	pcalau12i $rd, %pc_hi20(sym)
++	jr $rd, %pc_lo12(sym)
++  */
+ 
+   if (TARGET_CMODEL_MEDIUM
+-      && TARGET_EXPLICIT_RELOCS
++      && !HAVE_AS_SUPPORT_CALL36
++      && (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
+       && (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
+       && (symbol_type == SYMBOL_PCREL
+ 	  || (symbol_type == SYMBOL_GOT_DISP && flag_plt)))
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index ed86c95bd..52e40a208 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -3274,7 +3274,13 @@
+ 					    XEXP (target, 1),
+ 					    operands1));
+   else
+-    emit_call_insn (gen_sibcall_internal (target, operands1));
++    {
++      rtx call = emit_call_insn (gen_sibcall_internal (target, operands1));
++
++      if (TARGET_CMODEL_MEDIUM && !REG_P (target))
++	clobber_reg (&CALL_INSN_FUNCTION_USAGE (call),
++		     gen_rtx_REG (Pmode, T0_REGNUM));
++    }
+   DONE;
+ })
+ 
+@@ -3282,10 +3288,25 @@
+   (call (mem:SI (match_operand 0 "call_insn_operand" "j,c,b"))
+ 	 (match_operand 1 "" ""))
+   "SIBLING_CALL_P (insn)"
+-  "@
+-   jr\t%0
+-   b\t%0
+-   b\t%%plt(%0)"
++{
++  switch (which_alternative)
++    {
++    case 0:
++      return "jr\t%0";
++    case 1:
++      if (TARGET_CMODEL_MEDIUM)
++	return "pcaddu18i\t$r12,%%call36(%0)\n\tjirl\t$r0,$r12,0";
++      else
++	return "b\t%0";
++    case 2:
++      if (TARGET_CMODEL_MEDIUM)
++	return "pcaddu18i\t$r12,%%call36(%0)\n\tjirl\t$r0,$r12,0";
++      else
++	return "b\t%%plt(%0)";
++    default:
++      gcc_unreachable ();
++    }
++}
+   (set_attr "jirl" "indirect,direct,direct"))
+ 
+ (define_insn "@sibcall_internal_1<mode>"
+@@ -3318,9 +3339,17 @@
+ 							   operands2,
+ 							   arg2));
+       else
+-	emit_call_insn (gen_sibcall_value_multiple_internal (arg1, target,
+-							   operands2,
+-							   arg2));
++	{
++	  rtx call
++	    = emit_call_insn (gen_sibcall_value_multiple_internal (arg1,
++								   target,
++								   operands2,
++								   arg2));
++
++	  if (TARGET_CMODEL_MEDIUM && !REG_P (target))
++	    clobber_reg (&CALL_INSN_FUNCTION_USAGE (call),
++			gen_rtx_REG (Pmode, T0_REGNUM));
++	}
+     }
+    else
+     {
+@@ -3334,8 +3363,15 @@
+ 						  XEXP (target, 1),
+ 						  operands2));
+       else
+-	emit_call_insn (gen_sibcall_value_internal (operands0, target,
+-						  operands2));
++	{
++	  rtx call = emit_call_insn (gen_sibcall_value_internal (operands0,
++								 target,
++								 operands2));
++
++	  if (TARGET_CMODEL_MEDIUM && !REG_P (target))
++	    clobber_reg (&CALL_INSN_FUNCTION_USAGE (call),
++			gen_rtx_REG (Pmode, T0_REGNUM));
++	}
+     }
+   DONE;
+ })
+@@ -3345,10 +3381,25 @@

_service:tar_scm:0033-LoongArch-Implement-atomic-operations-using-LoongArc.patch Added

@@ -0,0 +1,362 @@
+From 704e67084fcd7f3ea89321e17dfafa7e907c907c Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Fri, 17 Nov 2023 15:42:53 +0800
+Subject: PATCH 033/188 LoongArch: Implement atomic operations using
+ LoongArch1.1 instructions.
+
+1. short and char type calls for atomic_add_fetch and __atomic_fetch_add are
+   implemented using amadd{_db}.{b/h}.
+2. Use amcas{_db}.{b/h/w/d} to implement __atomic_compare_exchange_n and __atomic_compare_exchange.
+3. The short and char types of the functions __atomic_exchange and __atomic_exchange_n are
+   implemented using amswap{_db}.{b/h}.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-def.h: Add comments.
+	* config/loongarch/loongarch-opts.h (ISA_BASE_IS_LA64V110): Define macro.
+	* config/loongarch/loongarch.cc (loongarch_memmodel_needs_rel_acq_fence):
+	Remove redundant code implementations.
+	* config/loongarch/sync.md (d): Added QI, HI support.
+	(atomic_add<mode>): New template.
+	(atomic_exchange<mode>_short): Likewise.
+	(atomic_cas_value_strong<mode>_amcas): Likewise..
+	(atomic_fetch_add<mode>_short): Likewise.
+---
+ gcc/config/loongarch/loongarch-def.h  |   2 +
+ gcc/config/loongarch/loongarch-opts.h |   2 +-
+ gcc/config/loongarch/loongarch.cc     |   6 +-
+ gcc/config/loongarch/sync.md          | 186 ++++++++++++++++++++------
+ 4 files changed, 147 insertions(+), 49 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
+index 4757de14b..078d8607d 100644
+--- a/gcc/config/loongarch/loongarch-def.h
++++ b/gcc/config/loongarch/loongarch-def.h
+@@ -54,7 +54,9 @@ extern "C" {
+ 
+ /* enum isa_base */
+ extern const char* loongarch_isa_base_strings;
++/* LoongArch V1.00.  */
+ #define ISA_BASE_LA64V100     0
++/* LoongArch V1.10.  */
+ #define ISA_BASE_LA64V110     1
+ #define N_ISA_BASE_TYPES      2
+ 
+diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
+index 22ce1a122..9b3d023ac 100644
+--- a/gcc/config/loongarch/loongarch-opts.h
++++ b/gcc/config/loongarch/loongarch-opts.h
+@@ -86,10 +86,10 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
+ 				   || la_target.isa.simd == ISA_EXT_SIMD_LASX)
+ #define ISA_HAS_LASX		  (la_target.isa.simd == ISA_EXT_SIMD_LASX)
+ 
+-
+ /* TARGET_ macros for use in *.md template conditionals */
+ #define TARGET_uARCH_LA464	  (la_target.cpu_tune == CPU_LA464)
+ #define TARGET_uARCH_LA664	  (la_target.cpu_tune == CPU_LA664)
++#define ISA_BASE_IS_LA64V110	  (la_target.isa.base == ISA_BASE_LA64V110)
+ 
+ /* Note: optimize_size may vary across functions,
+    while -mno-memcpy imposes a global constraint.  */
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 43f0e82ba..7bb46a45d 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -5813,16 +5813,12 @@ loongarch_print_operand_punct_valid_p (unsigned char code)
+ static bool
+ loongarch_memmodel_needs_rel_acq_fence (enum memmodel model)
+ {
+-  switch (model)
++  switch (memmodel_base (model))
+     {
+       case MEMMODEL_ACQ_REL:
+       case MEMMODEL_SEQ_CST:
+-      case MEMMODEL_SYNC_SEQ_CST:
+       case MEMMODEL_RELEASE:
+-      case MEMMODEL_SYNC_RELEASE:
+       case MEMMODEL_ACQUIRE:
+-      case MEMMODEL_CONSUME:
+-      case MEMMODEL_SYNC_ACQUIRE:
+ 	return true;
+ 
+       case MEMMODEL_RELAXED:
+diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
+index dd1f98946..1eabaec04 100644
+--- a/gcc/config/loongarch/sync.md
++++ b/gcc/config/loongarch/sync.md
+@@ -38,7 +38,7 @@
+   (plus "add") (ior "or") (xor "xor") (and "and"))
+ 
+ ;; This attribute gives the format suffix for atomic memory operations.
+-(define_mode_attr amo (SI "w") (DI "d"))
++(define_mode_attr amo (QI "b") (HI "h") (SI "w") (DI "d"))
+ 
+ ;; <amop> expands to the name of the atomic operand that implements a
+ ;; particular code.
+@@ -123,7 +123,18 @@
+ 	 UNSPEC_SYNC_OLD_OP))
+   ""
+   "am<amop>%A2.<amo>\t$zero,%z1,%0"
+-  (set (attr "length") (const_int 8)))
++  (set (attr "length") (const_int 4)))
++
++(define_insn "atomic_add<mode>"
++  (set (match_operand:SHORT 0 "memory_operand" "+ZB")
++	(unspec_volatile:SHORT
++	  (plus:SHORT (match_dup 0)
++		       (match_operand:SHORT 1 "reg_or_0_operand" "rJ"))
++	   (match_operand:SI 2 "const_int_operand") ;; model
++	 UNSPEC_SYNC_OLD_OP))
++  "ISA_BASE_IS_LA64V110"
++  "amadd%A2.<amo>\t$zero,%z1,%0"
++  (set (attr "length") (const_int 4)))
+ 
+ (define_insn "atomic_fetch_<atomic_optab><mode>"
+   (set (match_operand:GPR 0 "register_operand" "=&r")
+@@ -131,12 +142,12 @@
+    (set (match_dup 1)
+ 	(unspec_volatile:GPR
+ 	  (any_atomic:GPR (match_dup 1)
+-		     (match_operand:GPR 2 "reg_or_0_operand" "rJ"))
++			   (match_operand:GPR 2 "reg_or_0_operand" "rJ"))
+ 	   (match_operand:SI 3 "const_int_operand") ;; model
+ 	 UNSPEC_SYNC_OLD_OP))
+   ""
+   "am<amop>%A3.<amo>\t%0,%z2,%1"
+-  (set (attr "length") (const_int 8)))
++  (set (attr "length") (const_int 4)))
+ 
+ (define_insn "atomic_exchange<mode>"
+   (set (match_operand:GPR 0 "register_operand" "=&r")
+@@ -148,7 +159,19 @@
+ 	(match_operand:GPR 2 "register_operand" "r"))
+   ""
+   "amswap%A3.<amo>\t%0,%z2,%1"
+-  (set (attr "length") (const_int 8)))
++  (set (attr "length") (const_int 4)))
++
++(define_insn "atomic_exchange<mode>_short"
++  (set (match_operand:SHORT 0 "register_operand" "=&r")
++	(unspec_volatile:SHORT
++	  (match_operand:SHORT 1 "memory_operand" "+ZB")
++	   (match_operand:SI 3 "const_int_operand") ;; model
++	  UNSPEC_SYNC_EXCHANGE))
++   (set (match_dup 1)
++	(match_operand:SHORT 2 "register_operand" "r"))
++  "ISA_BASE_IS_LA64V110"
++  "amswap%A3.<amo>\t%0,%z2,%1"
++  (set (attr "length") (const_int 4)))
+ 
+ (define_insn "atomic_cas_value_strong<mode>"
+   (set (match_operand:GPR 0 "register_operand" "=&r")
+@@ -156,25 +179,36 @@
+    (set (match_dup 1)
+ 	(unspec_volatile:GPR (match_operand:GPR 2 "reg_or_0_operand" "rJ")
+ 			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")
+-			      (match_operand:SI 4 "const_int_operand")  ;; mod_s
+-			      (match_operand:SI 5 "const_int_operand") ;; mod_f
++			      (match_operand:SI 4 "const_int_operand")  ;; mod_s
+ 	 UNSPEC_COMPARE_AND_SWAP))
+-   (clobber (match_scratch:GPR 6 "=&r"))
++   (clobber (match_scratch:GPR 5 "=&r"))
+   ""
+ {
+   return "1:\\n\\t"
+ 	 "ll.<amo>\\t%0,%1\\n\\t"
+ 	 "bne\\t%0,%z2,2f\\n\\t"
+-	 "or%i3\\t%6,$zero,%3\\n\\t"
+-	 "sc.<amo>\\t%6,%1\\n\\t"
+-	 "beqz\\t%6,1b\\n\\t"
++	 "or%i3\\t%5,$zero,%3\\n\\t"
++	 "sc.<amo>\\t%5,%1\\n\\t"
++	 "beqz\\t%5,1b\\n\\t"
+ 	 "b\\t3f\\n\\t"
+ 	 "2:\\n\\t"
+-	 "%G5\\n\\t"
++	 "%G4\\n\\t"
+ 	 "3:\\n\\t";
+ }
+   (set (attr "length") (const_int 28)))
+ 
++(define_insn "atomic_cas_value_strong<mode>_amcas"
++  (set (match_operand:QHWD 0 "register_operand" "=&r")
++	(match_operand:QHWD 1 "memory_operand" "+ZB"))
++   (set (match_dup 1)
++	(unspec_volatile:QHWD (match_operand:QHWD 2 "reg_or_0_operand" "rJ")
++			       (match_operand:QHWD 3 "reg_or_0_operand" "rJ")
++			       (match_operand:SI 4 "const_int_operand")  ;; mod_s
++	 UNSPEC_COMPARE_AND_SWAP))
++  "ISA_BASE_IS_LA64V110"
++  "ori\t%0,%z2,0\n\tamcas%A4.<amo>\t%0,%z3,%1"
++  (set (attr "length") (const_int 8)))
++
+ (define_expand "atomic_compare_and_swap<mode>"
+   (match_operand:SI 0 "register_operand" "")   ;; bool output
+    (match_operand:GPR 1 "register_operand" "")  ;; val output
+@@ -186,9 +220,29 @@
+    (match_operand:SI 7 "const_int_operand" "") ;; mod_f
+   ""
+ {

_service:tar_scm:0034-LoongArch-atomic_load-and-atomic_store-are-implement.patch Added

@@ -0,0 +1,140 @@
+From 61a70e6b6b44bf420eae559d998e109b70e5a9b6 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Fri, 17 Nov 2023 16:04:45 +0800
+Subject: PATCH 034/188 LoongArch: atomic_load and atomic_store are
+ implemented using dbar grading.
+
+Because the la464 memory model design allows the same address load out of order,
+so in the following test example, the Load of 23 lines may be executed first over
+the load of 21 lines, resulting in an error.
+So when memmodel is MEMMODEL_RELAXED, the load instruction will be followed by
+"dbar 0x700" when implementing _atomic_load.
+
+  1 void *
+  2 gomp_ptrlock_get_slow (gomp_ptrlock_t *ptrlock)
+  3 {
+  4   int *intptr;
+  5   uintptr_t oldval = 1;
+  6
+  7   __atomic_compare_exchange_n (ptrlock, &oldval, 2, false,
+  8                                MEMMODEL_RELAXED, MEMMODEL_RELAXED);
+  9
+ 10   /* futex works on ints, not pointers.
+ 11      But a valid work share pointer will be at least
+ 12      8 byte aligned, so it is safe to assume the low
+ 13      32-bits of the pointer won't contain values 1 or 2.  */
+ 14   __asm volatile ("" : "=r" (intptr) : "0" (ptrlock));
+ 15 #if __BYTE_ORDER == __BIG_ENDIAN
+ 16   if (sizeof (*ptrlock) > sizeof (int))
+ 17     intptr += (sizeof (*ptrlock) / sizeof (int)) - 1;
+ 18 #endif
+ 19   do
+ 20     do_wait (intptr, 2);
+ 21   while (__atomic_load_n (intptr, MEMMODEL_RELAXED) == 2);
+ 22   __asm volatile ("" : : : "memory");
+ 23   return (void *) __atomic_load_n (ptrlock, MEMMODEL_ACQUIRE);
+ 24 }
+
+gcc/ChangeLog:
+
+	* config/loongarch/sync.md (atomic_load<mode>): New template.
+---
+ gcc/config/loongarch/sync.md | 70 +++++++++++++++++++++++++++++++++---
+ 1 file changed, 65 insertions(+), 5 deletions(-)
+
+diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
+index 1eabaec04..f4673c856 100644
+--- a/gcc/config/loongarch/sync.md
++++ b/gcc/config/loongarch/sync.md
+@@ -30,6 +30,7 @@
+   UNSPEC_SYNC_OLD_OP
+   UNSPEC_SYNC_EXCHANGE
+   UNSPEC_ATOMIC_STORE
++  UNSPEC_ATOMIC_LOAD
+   UNSPEC_MEMORY_BARRIER
+ )
+ 
+@@ -103,16 +104,75 @@
+ 
+ ;; Atomic memory operations.
+ 
++(define_insn "atomic_load<mode>"
++  (set (match_operand:QHWD 0 "register_operand" "=r")
++    (unspec_volatile:QHWD
++      (match_operand:QHWD 1 "memory_operand" "+m")
++       (match_operand:SI 2 "const_int_operand")                        ;; model
++      UNSPEC_ATOMIC_LOAD))
++  ""
++{
++  enum memmodel model = memmodel_base (INTVAL (operands2));
++
++  switch (model)
++    {
++    case MEMMODEL_SEQ_CST:
++      return "dbar\t0x11\\n\\t"
++	     "ld.<size>\t%0,%1\\n\\t"
++	     "dbar\t0x14\\n\\t";
++    case MEMMODEL_ACQUIRE:
++      return "ld.<size>\t%0,%1\\n\\t"
++	     "dbar\t0x14\\n\\t";
++    case MEMMODEL_RELAXED:
++      return "ld.<size>\t%0,%1\\n\\t"
++	     "dbar\t0x700\\n\\t";
++
++    default:
++      /* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST,
++	 __ATOMIC_CONSUME and __ATOMIC_ACQUIRE.
++	 The expand_builtin_atomic_store function converts all invalid memmodels
++	 to MEMMODEL_SEQ_CST.
++
++	 __atomic builtins doc: "Consume is implemented using the
++	 stronger acquire memory order because of a deficiency in C++11's
++	 semantics."  See PR 59448 and get_memmodel in builtins.cc.  */
++      gcc_unreachable ();
++    }
++}
++  (set (attr "length") (const_int 12)))
++
+ ;; Implement atomic stores with amoswap.  Fall back to fences for atomic loads.
+ (define_insn "atomic_store<mode>"
+-  (set (match_operand:GPR 0 "memory_operand" "+ZB")
+-    (unspec_volatile:GPR
+-      (match_operand:GPR 1 "reg_or_0_operand" "rJ")
++  (set (match_operand:QHWD 0 "memory_operand" "+m")
++    (unspec_volatile:QHWD
++      (match_operand:QHWD 1 "reg_or_0_operand" "rJ")
+        (match_operand:SI 2 "const_int_operand")      ;; model
+       UNSPEC_ATOMIC_STORE))
+   ""
+-  "amswap%A2.<amo>\t$zero,%z1,%0"
+-  (set (attr "length") (const_int 8)))
++{
++  enum memmodel model = memmodel_base (INTVAL (operands2));
++
++  switch (model)
++    {
++    case MEMMODEL_SEQ_CST:
++      return "dbar\t0x12\\n\\t"
++	     "st.<size>\t%z1,%0\\n\\t"
++	     "dbar\t0x18\\n\\t";
++    case MEMMODEL_RELEASE:
++      return "dbar\t0x12\\n\\t"
++	     "st.<size>\t%z1,%0\\n\\t";
++    case MEMMODEL_RELAXED:
++      return "st.<size>\t%z1,%0";
++
++    default:
++      /* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST,
++	 and __ATOMIC_RELEASE.
++	 The expand_builtin_atomic_store function converts all invalid memmodels
++	 to MEMMODEL_SEQ_CST.  */
++      gcc_unreachable ();
++    }
++}
++  (set (attr "length") (const_int 12)))
+ 
+ (define_insn "atomic_<atomic_optab><mode>"
+   (set (match_operand:GPR 0 "memory_operand" "+ZB")
+-- 
+2.43.0
+

_service:tar_scm:0035-LoongArch-genopts-Add-infrastructure-to-generate-cod.patch Added

@@ -0,0 +1,615 @@
+From 535fb5a2d4347801439fbb51fa07cd0317183cee Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Fri, 25 Oct 2024 02:08:03 +0000
+Subject: PATCH 035/188 LoongArch: genopts: Add infrastructure to generate
+ code for  new features in ISA evolution
+
+LoongArch v1.10 introduced the concept of ISA evolution.  During ISA
+evolution, many independent features can be added and enumerated via
+CPUCFG.
+
+Add a data file into genopts storing the CPUCFG word, bit, the name
+of the command line option controlling if this feature should be used
+for compilation, and the text description.  Make genstr.sh process these
+info and add the command line options into loongarch.opt and
+loongarch-str.h, and generate a new file loongarch-cpucfg-map.h for
+mapping CPUCFG output to the corresponding option.  When handling
+-march=native, use the information in loongarch-cpucfg-map.h to generate
+the corresponding option mask.  Enable the features implied by -march
+setting unless the user has explicitly disabled the feature.
+
+The added options (-mdiv32 and -mld-seq-sa) are not really handled yet.
+They'll be used in the following patches.
+
+gcc/ChangeLog:
+
+        * config/loongarch/genopts/isa-evolution.in: New data file.
+        * config/loongarch/genopts/genstr.sh: Translate info in
+        isa-evolution.in when generating loongarch-str.h, loongarch.opt,
+        and loongarch-cpucfg-map.h.
+        * config/loongarch/genopts/loongarch.opt.in (isa_evolution):
+        New variable.
+        * config/loongarch/t-loongarch: (loongarch-cpucfg-map.h): New
+        rule.
+        (loongarch-str.h): Depend on isa-evolution.in.
+        (loongarch.opt): Depend on isa-evolution.in.
+        (loongarch-cpu.o): Depend on loongarch-cpucfg-map.h.
+        * config/loongarch/loongarch-str.h: Regenerate.
+        * config/loongarch/loongarch-def.h (loongarch_isa):  Add field
+        for evolution features.  Add helper function to enable features
+        in this field.
+        Probe native CPU capability and save the corresponding options
+        into preset.
+        * config/loongarch/loongarch-cpu.cc (fill_native_cpu_config):
+        Probe native CPU capability and save the corresponding options
+        into preset.
+        (cache_cpucfg): Simplify with C++11-style for loop.
+        (cpucfg_useful_idx, N_CPUCFG_WORDS): Move to ...
+        * config/loongarch/loongarch.cc
+        (loongarch_option_override_internal): Enable the ISA evolution
+        feature options implied by -march and not explicitly disabled.
+        (loongarch_asm_code_end): New function, print ISA information as
+        comments in the assembly if -fverbose-asm.  It makes easier to
+        debug things like -march=native.
+        (TARGET_ASM_CODE_END): Define.
+        * config/loongarch/loongarch.opt: Regenerate.
+        * config/loongarch/loongarch-cpucfg-map.h: Generate.
+        (cpucfg_useful_idx, N_CPUCFG_WORDS) ... here.
+---
+ gcc/config/loongarch/genopts/genstr.sh        | 92 ++++++++++++++++++-
+ gcc/config/loongarch/genopts/isa-evolution.in |  2 +
+ gcc/config/loongarch/genopts/loongarch.opt.in |  7 ++
+ gcc/config/loongarch/loongarch-cpu.cc         | 46 +++++-----
+ gcc/config/loongarch/loongarch-cpucfg-map.h   | 48 ++++++++++
+ gcc/config/loongarch/loongarch-def.h          |  7 ++
+ gcc/config/loongarch/loongarch-str.h          |  6 +-
+ gcc/config/loongarch/loongarch.cc             | 31 +++++++
+ gcc/config/loongarch/loongarch.opt            | 20 +++-
+ gcc/config/loongarch/t-loongarch              | 21 ++++-
+ 10 files changed, 244 insertions(+), 36 deletions(-)
+ create mode 100644 gcc/config/loongarch/genopts/isa-evolution.in
+ create mode 100644 gcc/config/loongarch/loongarch-cpucfg-map.h
+
+diff --git a/gcc/config/loongarch/genopts/genstr.sh b/gcc/config/loongarch/genopts/genstr.sh
+index 972ef125f..bcc616e98 100755
+--- a/gcc/config/loongarch/genopts/genstr.sh
++++ b/gcc/config/loongarch/genopts/genstr.sh
+@@ -25,8 +25,8 @@ cd "$(dirname "$0")"
+ # Generate a header containing definitions from the string table.
+ gen_defines() {
+     cat <<EOF
+-/* Generated automatically by "genstr" from "loongarch-strings".
+-   Please do not edit this file directly.
++/* Generated automatically by "genstr" from "loongarch-strings" and
++   "isa-evolution.in".  Please do not edit this file directly.
+ 
+    Copyright (C) 2021-2022 Free Software Foundation, Inc.
+    Contributed by Loongson Ltd.
+@@ -56,6 +56,15 @@ EOF
+ 	loongarch-strings
+ 
+     echo
++
++   # Generate the strings from isa-evolution.in.
++   awk '{
++     a=$3
++     gsub(/-/, "_", a)
++     print("#define OPTSTR_"toupper(a)"\t\""$3"\"")
++   }' isa-evolution.in
++
++    echo
+     echo "#endif /* LOONGARCH_STR_H */"
+ }
+ 
+@@ -77,11 +86,12 @@ gen_options() {
+ 	# print a header
+ 	cat << EOF
+ ; Generated by "genstr" from the template "loongarch.opt.in"
+-; and definitions from "loongarch-strings".
++; and definitions from "loongarch-strings" and "isa-evolution.in".
+ ;
+ ; Please do not edit this file directly.
+ ; It will be automatically updated during a gcc build
+-; if you change "loongarch.opt.in" or "loongarch-strings".
++; if you change "loongarch.opt.in", "loongarch-strings", or
++; "isa-evolution.in".
+ ;
+ EOF
+ 
+@@ -91,13 +101,85 @@ EOF
+ 		eval "echo \"$line\""
+ 	    done
+     }
++
++    # Generate the strings from isa-evolution.in.
++    awk '{
++      print("")
++      print("m"$3)
++      gsub(/-/, "_", $3)
++      print("Target Mask(ISA_"toupper($3)") Var(isa_evolution)")
++      $1=""; $2=""; $3=""
++      sub(/^ */, "", $0)
++      print($0)
++    }' isa-evolution.in
++}
++
++gen_cpucfg_map() {
++    cat <<EOF
++/* Generated automatically by "genstr" from "isa-evolution.in".
++   Please do not edit this file directly.
++
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#ifndef LOONGARCH_CPUCFG_MAP_H
++#define LOONGARCH_CPUCFG_MAP_H
++
++#include "options.h"
++
++static constexpr struct {
++  int cpucfg_word;
++  unsigned int cpucfg_bit;
++  HOST_WIDE_INT isa_evolution_bit;
++} cpucfg_map = {
++EOF
++
++    # Generate the strings from isa-evolution.in.
++    awk '{
++      gsub(/-/, "_", $3)
++      print("  { "$1", 1u << "$2", OPTION_MASK_ISA_"toupper($3)" },")
++    }' isa-evolution.in
++
++    echo "};"
++    echo
++    echo "static constexpr int cpucfg_useful_idx = {"
++
++    awk 'BEGIN { print("  0,\n  1,\n  2,\n  16,\n  17,\n  18,\n  19,") }
++    {if ($1+0 > max+0) max=$1; print("  "$1",")}' \
++   isa-evolution.in | sort -n | uniq
++
++    echo "};"
++    echo ""
++
++    awk 'BEGIN { max=19 }
++    { if ($1+0 > max+0) max=$1 }
++    END { print "static constexpr int N_CPUCFG_WORDS = "1+max";" }' \
++   isa-evolution.in
++
++    echo "#endif /* LOONGARCH_CPUCFG_MAP_H */"
+ }
+ 
+ main() {
+     case "$1" in
++    cpucfg-map) gen_cpucfg_map;;
+ 	header) gen_defines;;

_service:tar_scm:0036-LoongArch-Add-evolution-features-of-base-ISA-revisio.patch Added

@@ -0,0 +1,148 @@
+From 24648180418affbaf044a58ae0b5f79a0cf71155 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sat, 18 Nov 2023 03:19:07 +0800
+Subject: PATCH 036/188 LoongArch: Add evolution features of base ISA
+ revisions
+
+	* config/loongarch/loongarch-def.h:
+	(loongarch_isa_base_features): Declare.  Define it in ...
+	* config/loongarch/loongarch-cpu.cc
+	(loongarch_isa_base_features): ... here.
+	(fill_native_cpu_config): If we know the base ISA of the CPU
+	model from PRID, use it instead of la64 (v1.0).  Check if all
+	expected features of this base ISA is available, emit a warning
+	if not.
+	* config/loongarch/loongarch-opts.cc (config_target_isa): Enable
+	the features implied by the base ISA if not -march=native.
+---
+ gcc/config/loongarch/loongarch-cpu.cc  | 62 ++++++++++++++++++--------
+ gcc/config/loongarch/loongarch-def.h   |  5 +++
+ gcc/config/loongarch/loongarch-opts.cc |  3 ++
+ 3 files changed, 52 insertions(+), 18 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc
+index e1cd85d02..76d66fa55 100644
+--- a/gcc/config/loongarch/loongarch-cpu.cc
++++ b/gcc/config/loongarch/loongarch-cpu.cc
+@@ -32,6 +32,19 @@ along with GCC; see the file COPYING3.  If not see
+ #include "loongarch-cpucfg-map.h"
+ #include "loongarch-str.h"
+ 
++/* loongarch_isa_base_features defined here instead of loongarch-def.c
++   because we need to use options.h.  Pay attention on the order of elements
++   in the initializer becaue ISO C++ does not allow C99 designated
++   initializers!  */
++
++#define ISA_BASE_LA64V110_FEATURES \
++  (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA)
++
++int64_t loongarch_isa_base_featuresN_ISA_BASE_TYPES = {
++  /* ISA_BASE_LA64V100 = */ 0,
++  /* ISA_BASE_LA64V110 = */ ISA_BASE_LA64V110_FEATURES,
++};
++
+ /* Native CPU detection with "cpucfg" */
+ static uint32_t cpucfg_cacheN_CPUCFG_WORDS = { 0 };
+ 
+@@ -127,24 +140,22 @@ fill_native_cpu_config (struct loongarch_target *tgt)
+ 	 With: base architecture (ARCH)
+ 	 At:   cpucfg_words11:0 */
+ 
+-      switch (cpucfg_cache1 & 0x3)
+-	{
+-	  case 0x02:
+-	    tmp = ISA_BASE_LA64V100;
+-	    break;
+-
+-	  default:
+-	    fatal_error (UNKNOWN_LOCATION,
+-			 "unknown native base architecture %<0x%x%>, "
+-			 "%qs failed", (unsigned int) (cpucfg_cache1 & 0x3),
+-			 "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE);
+-	}
+-
+-      /* Check consistency with PRID presets.  */
+-      if (native_cpu_type != CPU_NATIVE && tmp != preset.base)
+-	warning (0, "base architecture %qs differs from PRID preset %qs",
+-		 loongarch_isa_base_stringstmp,
+-		 loongarch_isa_base_stringspreset.base);
++      if (native_cpu_type != CPU_NATIVE)
++	tmp = loongarch_cpu_default_isanative_cpu_type.base;
++      else
++	switch (cpucfg_cache1 & 0x3)
++	  {
++	    case 0x02:
++	      tmp = ISA_BASE_LA64V100;
++	      break;
++
++	    default:
++	      fatal_error (UNKNOWN_LOCATION,
++			   "unknown native base architecture %<0x%x%>, "
++			   "%qs failed",
++			   (unsigned int) (cpucfg_cache1 & 0x3),
++			   "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE);
++	  }
+ 
+       /* Use the native value anyways.  */
+       preset.base = tmp;
+@@ -227,6 +238,21 @@ fill_native_cpu_config (struct loongarch_target *tgt)
+       for (const auto &entry: cpucfg_map)
+ 	if (cpucfg_cacheentry.cpucfg_word & entry.cpucfg_bit)
+ 	  preset.evolution |= entry.isa_evolution_bit;
++
++      if (native_cpu_type != CPU_NATIVE)
++	{
++	  /* Check if the local CPU really supports the features of the base
++	     ISA of probed native_cpu_type.  If any feature is not detected,
++	     either GCC or the hardware is buggy.  */
++	  auto base_isa_feature = loongarch_isa_base_featurespreset.base;
++	  if ((preset.evolution & base_isa_feature) != base_isa_feature)
++	    warning (0,
++		     "detected base architecture %qs, but some of its "
++		     "features are not detected; the detected base "
++		     "architecture may be unreliable, only detected "
++		     "features will be enabled",
++		     loongarch_isa_base_stringspreset.base);
++	}
+     }
+ 
+   if (tune_native_p)
+diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
+index cb99caebe..ca0a324dd 100644
+--- a/gcc/config/loongarch/loongarch-def.h
++++ b/gcc/config/loongarch/loongarch-def.h
+@@ -55,12 +55,17 @@ extern "C" {
+ 
+ /* enum isa_base */
+ extern const char* loongarch_isa_base_strings;
++
+ /* LoongArch V1.00.  */
+ #define ISA_BASE_LA64V100     0
+ /* LoongArch V1.10.  */
+ #define ISA_BASE_LA64V110     1
+ #define N_ISA_BASE_TYPES      2
+ 
++/* Unlike other arrays, this is defined in loongarch-cpu.cc.  The problem is
++   we cannot use the C++ header options.h in loongarch-def.c.  */
++extern int64_t loongarch_isa_base_features;
++
+ /* enum isa_ext_* */
+ extern const char* loongarch_isa_ext_strings;
+ #define ISA_EXT_NONE	      0
+diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
+index f10a9d3ff..390720479 100644
+--- a/gcc/config/loongarch/loongarch-opts.cc
++++ b/gcc/config/loongarch/loongarch-opts.cc
+@@ -284,6 +284,9 @@ config_target_isa:
+   /* Get default ISA from "-march" or its default value.  */
+   t.isa = loongarch_cpu_default_isat.cpu_arch;
+ 
++  if (t.cpu_arch != CPU_NATIVE)
++    t.isa.evolution |= loongarch_isa_base_featurest.isa.base;
++
+   /* Apply incremental changes.  */
+   /* "-march=native" overrides the default FPU type.  */
+ 
+-- 
+2.43.0
+

_service:tar_scm:0037-LoongArch-Take-the-advantage-of-mdiv32-if-it-s-enabl.patch Added

@@ -0,0 +1,156 @@
+From 6b483504c4fbb2a05a17d67e8f51b72149f1bbf9 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Thu, 16 Nov 2023 09:21:47 +0800
+Subject: PATCH 037/188 LoongArch: Take the advantage of -mdiv32 if it's
+ enabled
+
+With -mdiv32, we can assume div.wu and mod.wu works on low 32 bits
+of a 64-bit GPR even if it's not sign-extended.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (DIV): New mode iterator.
+	(<optab:ANY_DIV><mode:GPR>3): Don't expand if TARGET_DIV32.
+	(<optab:ANY_DIV>di3_fake): Disable if TARGET_DIV32.
+	(*<optab:ANY_DIV><mode:GPR>3): Allow SImode if TARGET_DIV32.
+	(<optab:ANY_DIV>si3_extended): New insn if TARGET_DIV32.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/div-div32.c: New test.
+	* gcc.target/loongarch/div-no-div32.c: New test.
+---
+ gcc/config/loongarch/loongarch.md             | 31 ++++++++++++++++---
+ .../gcc.target/loongarch/div-div32.c          | 31 +++++++++++++++++++
+ .../gcc.target/loongarch/div-no-div32.c       | 11 +++++++
+ 3 files changed, 68 insertions(+), 5 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/div-div32.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/div-no-div32.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 52e40a208..c4e7af107 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -408,6 +408,10 @@
+ ;; st.w.
+ (define_mode_iterator ST_ANY QHWD ANYF)
+ 
++;; A mode for anything legal as a input of a div or mod instruction.
++(define_mode_iterator DIV (DI "TARGET_64BIT")
++			   (SI "!TARGET_64BIT || TARGET_DIV32"))
++
+ ;; In GPR templates, a string like "mul.<d>" will expand to "mul.w" in the
+ ;; 32-bit version and "mul.d" in the 64-bit version.
+ (define_mode_attr d (SI "w") (DI "d"))
+@@ -914,7 +918,7 @@
+ 		     (match_operand:GPR 2 "register_operand")))
+   ""
+ {
+- if (GET_MODE (operands0) == SImode && TARGET_64BIT)
++ if (GET_MODE (operands0) == SImode && TARGET_64BIT && !TARGET_DIV32)
+   {
+     rtx reg1 = gen_reg_rtx (DImode);
+     rtx reg2 = gen_reg_rtx (DImode);
+@@ -934,9 +938,9 @@
+ })
+ 
+ (define_insn "*<optab><mode>3"
+-  (set (match_operand:X 0 "register_operand" "=r,&r,&r")
+-	(any_div:X (match_operand:X 1 "register_operand" "r,r,0")
+-		   (match_operand:X 2 "register_operand" "r,r,r")))
++  (set (match_operand:DIV 0 "register_operand" "=r,&r,&r")
++	(any_div:DIV (match_operand:DIV 1 "register_operand" "r,r,0")
++		     (match_operand:DIV 2 "register_operand" "r,r,r")))
+   ""
+ {
+   return loongarch_output_division ("<insn>.<d><u>\t%0,%1,%2", operands);
+@@ -949,6 +953,23 @@
+ 	(const_string "yes")
+ 	(const_string "no"))))
+ 
++(define_insn "<optab>si3_extended"
++  (set (match_operand:DI 0 "register_operand" "=r,&r,&r")
++	(sign_extend
++	  (any_div:SI (match_operand:SI 1 "register_operand" "r,r,0")
++		      (match_operand:SI 2 "register_operand" "r,r,r"))))
++  "TARGET_64BIT && TARGET_DIV32"
++{
++  return loongarch_output_division ("<insn>.w<u>\t%0,%1,%2", operands);
++}
++  (set_attr "type" "idiv")
++   (set_attr "mode" "SI")
++   (set (attr "enabled")
++      (if_then_else
++	(match_test "!!which_alternative == loongarch_check_zero_div_p()")
++	(const_string "yes")
++	(const_string "no"))))
++
+ (define_insn "<optab>di3_fake"
+   (set (match_operand:DI 0 "register_operand" "=r,&r,&r")
+ 	(sign_extend:DI
+@@ -957,7 +978,7 @@
+ 	     (any_div:DI (match_operand:DI 1 "register_operand" "r,r,0")
+ 			 (match_operand:DI 2 "register_operand" "r,r,r")) 0)
+ 	  UNSPEC_FAKE_ANY_DIV)))
+-  "TARGET_64BIT"
++  "TARGET_64BIT && !TARGET_DIV32"
+ {
+   return loongarch_output_division ("<insn>.w<u>\t%0,%1,%2", operands);
+ }
+diff --git a/gcc/testsuite/gcc.target/loongarch/div-div32.c b/gcc/testsuite/gcc.target/loongarch/div-div32.c
+new file mode 100644
+index 000000000..8b1f686ec
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/div-div32.c
+@@ -0,0 +1,31 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mdiv32" } */
++/* { dg-final { scan-assembler "div\.w" } } */
++/* { dg-final { scan-assembler "div\.wu" } } */
++/* { dg-final { scan-assembler "mod\.w" } } */
++/* { dg-final { scan-assembler "mod\.wu" } } */
++/* { dg-final { scan-assembler-not "slli\.w.*,0" } } */
++
++int
++divw (long a, long b)
++{
++  return (int)a / (int)b;
++}
++
++unsigned int
++divwu (long a, long b)
++{
++  return (unsigned int)a / (unsigned int)b;
++}
++
++int
++modw (long a, long b)
++{
++  return (int)a % (int)b;
++}
++
++unsigned int
++modwu (long a, long b)
++{
++  return (unsigned int)a % (unsigned int)b;
++}
+diff --git a/gcc/testsuite/gcc.target/loongarch/div-no-div32.c b/gcc/testsuite/gcc.target/loongarch/div-no-div32.c
+new file mode 100644
+index 000000000..f0f697ba5
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/div-no-div32.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */
++/* { dg-final { scan-assembler "div\.w" } } */
++/* { dg-final { scan-assembler "div\.wu" } } */
++/* { dg-final { scan-assembler "mod\.w" } } */
++/* { dg-final { scan-assembler "mod\.wu" } } */
++
++/* -mno-div32 should be implied by -march=loongarch64.  */
++/* { dg-final { scan-assembler-times "slli\.w\^\n\*0" 8 } } */
++
++#include "div-div32.c"
+-- 
+2.43.0
+

_service:tar_scm:0038-LoongArch-Don-t-emit-dbar-0x700-if-mld-seq-sa.patch Added

@@ -0,0 +1,61 @@
+From 42368d6ab1200c157ff473c37889b56b596040e2 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Thu, 16 Nov 2023 09:30:14 +0800
+Subject: PATCH 038/188 LoongArch: Don't emit dbar 0x700 if -mld-seq-sa
+
+This option (CPUCFG word 0x3 bit 23) means "the hardware guarantee that
+two loads on the same address won't be reordered with each other".  Thus
+we can omit the "load-load" barrier dbar 0x700.
+
+This is only a micro-optimization because dbar 0x700 is already treated
+as nop if the hardware supports LD_SEQ_SA.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_print_operand): Don't
+	print dbar 0x700 if TARGET_LD_SEQ_SA.
+	* config/loongarch/sync.md (atomic_load<mode>): Likewise.
+---
+ gcc/config/loongarch/loongarch.cc | 2 +-
+ gcc/config/loongarch/sync.md      | 9 +++++----
+ 2 files changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 8bd46da62..c86b787c4 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -6057,7 +6057,7 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
+       if (loongarch_cas_failure_memorder_needs_acquire (
+ 	    memmodel_from_int (INTVAL (op))))
+ 	fputs ("dbar\t0b10100", file);
+-      else
++      else if (!TARGET_LD_SEQ_SA)
+ 	fputs ("dbar\t0x700", file);
+       break;
+ 
+diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
+index f4673c856..65443c899 100644
+--- a/gcc/config/loongarch/sync.md
++++ b/gcc/config/loongarch/sync.md
+@@ -119,13 +119,14 @@
+     case MEMMODEL_SEQ_CST:
+       return "dbar\t0x11\\n\\t"
+ 	     "ld.<size>\t%0,%1\\n\\t"
+-	     "dbar\t0x14\\n\\t";
++	     "dbar\t0x14";
+     case MEMMODEL_ACQUIRE:
+       return "ld.<size>\t%0,%1\\n\\t"
+-	     "dbar\t0x14\\n\\t";
++	     "dbar\t0x14";
+     case MEMMODEL_RELAXED:
+-      return "ld.<size>\t%0,%1\\n\\t"
+-	     "dbar\t0x700\\n\\t";
++      return TARGET_LD_SEQ_SA ? "ld.<size>\t%0,%1\\n\\t"
++			      : "ld.<size>\t%0,%1\\n\\t"
++				"dbar\t0x700";
+ 
+     default:
+       /* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST,
+-- 
+2.43.0
+

_service:tar_scm:0039-LoongArch-Add-fine-grained-control-for-LAM_BH-and-LA.patch Added

@@ -0,0 +1,208 @@
+From 416bdd180a6c0dab4736a6da26de245cb0487c0e Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Fri, 25 Oct 2024 02:13:53 +0000
+Subject: PATCH 039/188 LoongArch: Add fine-grained control for LAM_BH and
+ LAMCAS
+
+gcc/ChangeLog:
+
+        * config/loongarch/genopts/isa-evolution.in: (lam-bh, lamcas):
+        Add.
+        * config/loongarch/loongarch-str.h: Regenerate.
+        * config/loongarch/loongarch.opt: Regenerate.
+        * config/loongarch/loongarch-cpucfg-map.h: Regenerate.
+        * config/loongarch/loongarch-cpu.cc
+        (ISA_BASE_LA64V110_FEATURES): Include OPTION_MASK_ISA_LAM_BH
+        and OPTION_MASK_ISA_LAMCAS.
+        * config/loongarch/sync.md (atomic_add<mode:SHORT>): Use
+        TARGET_LAM_BH instead of ISA_BASE_IS_LA64V110.  Remove empty
+        lines from assembly output.
+        (atomic_exchange<mode>_short): Likewise.
+        (atomic_exchange<mode:SHORT>): Likewise.
+        (atomic_fetch_add<mode>_short): Likewise.
+        (atomic_fetch_add<mode:SHORT>): Likewise.
+        (atomic_cas_value_strong<mode>_amcas): Use TARGET_LAMCAS instead
+        of ISA_BASE_IS_LA64V110.
+        (atomic_compare_and_swap<mode>): Likewise.
+        (atomic_compare_and_swap<mode:GPR>): Likewise.
+        (atomic_compare_and_swap<mode:SHORT>): Likewise.
+        * config/loongarch/loongarch.cc (loongarch_asm_code_end): Dump
+        status if -mlam-bh and -mlamcas if -fverbose-asm.
+---
+ gcc/config/loongarch/genopts/isa-evolution.in |  2 ++
+ gcc/config/loongarch/loongarch-cpu.cc         |  3 ++-
+ gcc/config/loongarch/loongarch-cpucfg-map.h   |  2 ++
+ gcc/config/loongarch/loongarch-str.h          |  2 ++
+ gcc/config/loongarch/loongarch.cc             |  2 ++
+ gcc/config/loongarch/loongarch.opt            |  8 ++++++++
+ gcc/config/loongarch/sync.md                  | 18 +++++++++---------
+ 7 files changed, 27 insertions(+), 10 deletions(-)
+
+diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in
+index e58f0d6a1..a6bc3f87f 100644
+--- a/gcc/config/loongarch/genopts/isa-evolution.in
++++ b/gcc/config/loongarch/genopts/isa-evolution.in
+@@ -1,2 +1,4 @@
+ 2	26	div32		Support div.wu and mod.wu instructions with inputs not sign-extended.
++2	27	lam-bh		Support am{swap/add}_db.{b/h} instructions.
++2	28	lamcas		Support amcas_db.{b/h/w/d} instructions.
+ 3	23	ld-seq-sa	Do not need load-load barriers (dbar 0x700).
+diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc
+index 76d66fa55..bbce82c9c 100644
+--- a/gcc/config/loongarch/loongarch-cpu.cc
++++ b/gcc/config/loongarch/loongarch-cpu.cc
+@@ -38,7 +38,8 @@ along with GCC; see the file COPYING3.  If not see
+    initializers!  */
+ 
+ #define ISA_BASE_LA64V110_FEATURES \
+-  (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA)
++  (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA \
++   | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS)
+ 
+ int64_t loongarch_isa_base_featuresN_ISA_BASE_TYPES = {
+   /* ISA_BASE_LA64V100 = */ 0,
+diff --git a/gcc/config/loongarch/loongarch-cpucfg-map.h b/gcc/config/loongarch/loongarch-cpucfg-map.h
+index 0c078c397..02ff16712 100644
+--- a/gcc/config/loongarch/loongarch-cpucfg-map.h
++++ b/gcc/config/loongarch/loongarch-cpucfg-map.h
+@@ -30,6 +30,8 @@ static constexpr struct {
+   HOST_WIDE_INT isa_evolution_bit;
+ } cpucfg_map = {
+   { 2, 1u << 26, OPTION_MASK_ISA_DIV32 },
++  { 2, 1u << 27, OPTION_MASK_ISA_LAM_BH },
++  { 2, 1u << 28, OPTION_MASK_ISA_LAMCAS },
+   { 3, 1u << 23, OPTION_MASK_ISA_LD_SEQ_SA },
+ };
+ 
+diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
+index cd9dbb41b..0fee9abe5 100644
+--- a/gcc/config/loongarch/loongarch-str.h
++++ b/gcc/config/loongarch/loongarch-str.h
+@@ -70,5 +70,7 @@ along with GCC; see the file COPYING3.  If not see
+ #define STR_EXPLICIT_RELOCS_ALWAYS "always"
+ 
+ #define OPTSTR_DIV32   "div32"
++#define OPTSTR_LAM_BH  "lam-bh"
++#define OPTSTR_LAMCAS  "lamcas"
+ #define OPTSTR_LD_SEQ_SA   "ld-seq-sa"
+ #endif /* LOONGARCH_STR_H */
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index c86b787c4..33d23a731 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -11448,6 +11448,8 @@ loongarch_asm_code_end (void)
+       fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START,
+ 	       loongarch_isa_base_strings la_target.isa.base);
+       DUMP_FEATURE (TARGET_DIV32);
++      DUMP_FEATURE (TARGET_LAM_BH);
++      DUMP_FEATURE (TARGET_LAMCAS);
+       DUMP_FEATURE (TARGET_LD_SEQ_SA);
+     }
+ 
+diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
+index 5251f705d..ea0d5bb4e 100644
+--- a/gcc/config/loongarch/loongarch.opt
++++ b/gcc/config/loongarch/loongarch.opt
+@@ -267,6 +267,14 @@ mdiv32
+ Target Mask(ISA_DIV32) Var(isa_evolution)
+ Support div.wu and mod.wu instructions with inputs not sign-extended.
+ 
++mlam-bh
++Target Mask(ISA_LAM_BH) Var(isa_evolution)
++Support am{swap/add}_db.{b/h} instructions.
++
++mlamcas
++Target Mask(ISA_LAMCAS) Var(isa_evolution)
++Support amcas_db.{b/h/w/d} instructions.
++
+ mld-seq-sa
+ Target Mask(ISA_LD_SEQ_SA) Var(isa_evolution)
+ Do not need load-load barriers (dbar 0x700).
+diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
+index 65443c899..a678e7131 100644
+--- a/gcc/config/loongarch/sync.md
++++ b/gcc/config/loongarch/sync.md
+@@ -124,7 +124,7 @@
+       return "ld.<size>\t%0,%1\\n\\t"
+ 	     "dbar\t0x14";
+     case MEMMODEL_RELAXED:
+-      return TARGET_LD_SEQ_SA ? "ld.<size>\t%0,%1\\n\\t"
++      return TARGET_LD_SEQ_SA ? "ld.<size>\t%0,%1"
+ 			      : "ld.<size>\t%0,%1\\n\\t"
+ 				"dbar\t0x700";
+ 
+@@ -193,7 +193,7 @@
+ 		       (match_operand:SHORT 1 "reg_or_0_operand" "rJ"))
+ 	   (match_operand:SI 2 "const_int_operand") ;; model
+ 	 UNSPEC_SYNC_OLD_OP))
+-  "ISA_BASE_IS_LA64V110"
++  "TARGET_LAM_BH"
+   "amadd%A2.<amo>\t$zero,%z1,%0"
+   (set (attr "length") (const_int 4)))
+ 
+@@ -230,7 +230,7 @@
+ 	  UNSPEC_SYNC_EXCHANGE))
+    (set (match_dup 1)
+ 	(match_operand:SHORT 2 "register_operand" "r"))
+-  "ISA_BASE_IS_LA64V110"
++  "TARGET_LAM_BH"
+   "amswap%A3.<amo>\t%0,%z2,%1"
+   (set (attr "length") (const_int 4)))
+ 
+@@ -266,7 +266,7 @@
+ 			       (match_operand:QHWD 3 "reg_or_0_operand" "rJ")
+ 			       (match_operand:SI 4 "const_int_operand")  ;; mod_s
+ 	 UNSPEC_COMPARE_AND_SWAP))
+-  "ISA_BASE_IS_LA64V110"
++  "TARGET_LAMCAS"
+   "ori\t%0,%z2,0\n\tamcas%A4.<amo>\t%0,%z3,%1"
+   (set (attr "length") (const_int 8)))
+ 
+@@ -296,7 +296,7 @@
+ 
+   operands6 = mod_s;
+ 
+-  if (ISA_BASE_IS_LA64V110)
++  if (TARGET_LAMCAS)
+     emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands1, operands2,
+ 							 operands3, operands4,
+ 							 operands6));
+@@ -422,7 +422,7 @@
+ 
+   operands6 = mod_s;
+ 
+-  if (ISA_BASE_IS_LA64V110)
++  if (TARGET_LAMCAS)
+     emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands1, operands2,
+ 						       operands3, operands4,
+ 						       operands6));
+@@ -642,7 +642,7 @@
+ 	(match_operand:SHORT 2 "register_operand"))
+   ""
+ {
+-  if (ISA_BASE_IS_LA64V110)
++  if (TARGET_LAM_BH)
+     emit_insn (gen_atomic_exchange<mode>_short (operands0, operands1, operands2, operands3));
+   else
+     {
+@@ -663,7 +663,7 @@
+ 		     (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
+ 	   (match_operand:SI 3 "const_int_operand") ;; model
+ 	 UNSPEC_SYNC_OLD_OP))
+-  "ISA_BASE_IS_LA64V110"
++  "TARGET_LAM_BH"
+   "amadd%A3.<amo>\t%0,%z2,%1"
+   (set (attr "length") (const_int 4)))
+ 
+@@ -678,7 +678,7 @@
+ 	 UNSPEC_SYNC_OLD_OP))
+   ""

_service:tar_scm:0040-LoongArch-Fix-mexplict-relocs-none-mcmodel-medium-pr.patch Added

@@ -0,0 +1,50 @@
+From 8ca46859ad70fb9473f6dbb1d3069e68ed43ef36 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 19 Nov 2023 01:41:12 +0800
+Subject: PATCH 040/188 LoongArch: Fix "-mexplict-relocs=none
+ -mcmodel=medium" producing %call36 when the assembler does not support it
+
+Even if !HAVE_AS_SUPPORT_CALL36, const_call_insn_operand should still
+return false when -mexplict-relocs=none -mcmodel=medium to make
+loongarch_legitimize_call_address emit la.local or la.global.
+
+gcc/ChangeLog:
+
+	* config/loongarch/predicates.md (const_call_insn_operand):
+	Remove buggy "HAVE_AS_SUPPORT_CALL36" conditions.  Change "1" to
+	"true" to make the coding style consistent.
+---
+ gcc/config/loongarch/predicates.md | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
+index 2aae87db4..30a0dee9f 100644
+--- a/gcc/config/loongarch/predicates.md
++++ b/gcc/config/loongarch/predicates.md
+@@ -444,21 +444,19 @@
+     case SYMBOL_PCREL:
+       if (TARGET_CMODEL_EXTREME
+ 	  || (TARGET_CMODEL_MEDIUM
+-	      && HAVE_AS_SUPPORT_CALL36
+ 	      && (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)))
+ 	return false;
+       else
+-	return 1;
++	return true;
+ 
+     case SYMBOL_GOT_DISP:
+       if (TARGET_CMODEL_EXTREME
+ 	  || !flag_plt
+ 	  || (flag_plt && TARGET_CMODEL_MEDIUM
+-	      && HAVE_AS_SUPPORT_CALL36
+ 	      && (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)))
+ 	return false;
+       else
+-	return 1;
++	return true;
+ 
+     default:
+       return false;
+-- 
+2.43.0
+

_service:tar_scm:0041-LoongArch-Modify-MUSL_DYNAMIC_LINKER.patch Added

@@ -0,0 +1,43 @@
+From 4c24f920e52c0dddf4bbbc391d2e5d2524754b4a Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Sat, 18 Nov 2023 11:04:42 +0800
+Subject: PATCH 041/188 LoongArch: Modify MUSL_DYNAMIC_LINKER.
+
+Use no suffix at all in the musl dynamic linker name for hard
+float ABI. Use -sf and -sp suffixes in musl dynamic linker name
+for soft float and single precision ABIs. The following table
+outlines the musl interpreter names for the LoongArch64 ABI names.
+
+musl interpreter            | LoongArch64 ABI
+--------------------------- | -----------------
+ld-musl-loongarch64.so.1    | loongarch64-lp64d
+ld-musl-loongarch64-sp.so.1 | loongarch64-lp64f
+ld-musl-loongarch64-sf.so.1 | loongarch64-lp64s
+
+gcc/ChangeLog:
+
+	* config/loongarch/gnu-user.h (MUSL_ABI_SPEC): Modify suffix.
+---
+ gcc/config/loongarch/gnu-user.h | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
+index 60ef75601..9fc49dc8f 100644
+--- a/gcc/config/loongarch/gnu-user.h
++++ b/gcc/config/loongarch/gnu-user.h
+@@ -34,9 +34,9 @@ along with GCC; see the file COPYING3.  If not see
+   "/lib" ABI_GRLEN_SPEC "/ld-linux-loongarch-" ABI_SPEC ".so.1"
+ 
+ #define MUSL_ABI_SPEC \
+-  "%{mabi=lp64d:-lp64d}" \
+-  "%{mabi=lp64f:-lp64f}" \
+-  "%{mabi=lp64s:-lp64s}"
++  "%{mabi=lp64d:}" \
++  "%{mabi=lp64f:-sp}" \
++  "%{mabi=lp64s:-sf}"
+ 
+ #undef MUSL_DYNAMIC_LINKER
+ #define MUSL_DYNAMIC_LINKER \
+-- 
+2.43.0
+

_service:tar_scm:0042-LoongArch-Fix-libgcc-build-failure-when-libc-is-not-.patch Added

@@ -0,0 +1,85 @@
+From 0f65e5ebe60d9ad5141115661ed71c321156cd95 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Tue, 21 Nov 2023 09:09:25 +0800
+Subject: PATCH 042/188 LoongArch: Fix libgcc build failure when libc is not
+ available
+
+To use int64_t we included <stdint.h> in loongarch-def.h.
+Unfortunately, loongarch-def.h is also used by libgcc etc., causing a
+build failure when building a "stage1" cross compiler at which the
+target libc is not built yet.
+
+As int64_t is used for a C-compatible replacement of HOST_WIDE_INT, it's
+not directly or indirectly referred by the target libraries.  So
+guard everything requiring stdint.h with #if then they'll not block
+target libraries.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-def.h (stdint.h): Guard with #if to
+	exclude it for target libraries.
+	(loongarch_isa_base_features): Likewise.
+	(loongarch_isa): Likewise.
+	(loongarch_abi): Likewise.
+	(loongarch_target): Likewise.
+	(loongarch_cpu_default_isa): Likewise.
+---
+ gcc/config/loongarch/loongarch-def.h | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
+index ca0a324dd..ef848f606 100644
+--- a/gcc/config/loongarch/loongarch-def.h
++++ b/gcc/config/loongarch/loongarch-def.h
+@@ -46,7 +46,10 @@ along with GCC; see the file COPYING3.  If not see
+ #ifndef LOONGARCH_DEF_H
+ #define LOONGARCH_DEF_H
+ 
++#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
+ #include <stdint.h>
++#endif
++
+ #include "loongarch-tune.h"
+ 
+ #ifdef __cplusplus
+@@ -62,9 +65,11 @@ extern const char* loongarch_isa_base_strings;
+ #define ISA_BASE_LA64V110     1
+ #define N_ISA_BASE_TYPES      2
+ 
++#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
+ /* Unlike other arrays, this is defined in loongarch-cpu.cc.  The problem is
+    we cannot use the C++ header options.h in loongarch-def.c.  */
+ extern int64_t loongarch_isa_base_features;
++#endif
+ 
+ /* enum isa_ext_* */
+ extern const char* loongarch_isa_ext_strings;
+@@ -121,6 +126,7 @@ extern const char* loongarch_cmodel_strings;
+ #define M_OPT_ABSENT(opt_enum)  ((opt_enum) == M_OPT_UNSET)
+ 
+ 
++#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
+ /* Internal representation of the target.  */
+ struct loongarch_isa
+ {
+@@ -150,6 +156,9 @@ struct loongarch_target
+   int cmodel;	    /* CMODEL_ */
+ };
+ 
++extern struct loongarch_isa loongarch_cpu_default_isa;
++#endif
++
+ /* CPU properties.  */
+ /* index */
+ #define CPU_NATIVE	  0
+@@ -162,7 +171,6 @@ struct loongarch_target
+ 
+ /* parallel tables.  */
+ extern const char* loongarch_cpu_strings;
+-extern struct loongarch_isa loongarch_cpu_default_isa;
+ extern int loongarch_cpu_issue_rate;
+ extern int loongarch_cpu_multipass_dfa_lookahead;
+ 
+-- 
+2.43.0
+

_service:tar_scm:0043-LoongArch-Optimize-LSX-vector-shuffle-on-floating-po.patch Added

@@ -0,0 +1,148 @@
+From cdea7c114fa48012705d65134276619b5679fa35 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 19 Nov 2023 06:12:22 +0800
+Subject: PATCH 043/188 LoongArch: Optimize LSX vector shuffle on
+ floating-point vector
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The vec_perm expander was wrongly defined.  GCC internal says:
+
+Operand 3 is the “selector”.  It is an integral mode vector of the same
+width and number of elements as mode M.
+
+But we made operand 3 in the same mode as the shuffled vectors, so it
+would be a FP mode vector if the shuffled vectors are FP mode.
+
+With this mistake, the generic code manages to work around and it ends
+up creating some very nasty code for a simple __builtin_shuffle (a, b,
+c) where a and b are V4SF, c is V4SI:
+
+    la.local    $r12,.LANCHOR0
+    la.local    $r13,.LANCHOR1
+    vld $vr1,$r12,48
+    vslli.w $vr1,$vr1,2
+    vld $vr2,$r12,16
+    vld $vr0,$r13,0
+    vld $vr3,$r13,16
+    vshuf.b $vr0,$vr1,$vr1,$vr0
+    vld $vr1,$r12,32
+    vadd.b  $vr0,$vr0,$vr3
+    vandi.b $vr0,$vr0,31
+    vshuf.b $vr0,$vr1,$vr2,$vr0
+    vst $vr0,$r12,0
+    jr  $r1
+
+This is obviously stupid.  Fix the expander definition and adjust
+loongarch_expand_vec_perm to handle it correctly.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lsx.md (vec_perm<mode:LSX>): Make the
+	selector VIMODE.
+	* config/loongarch/loongarch.cc (loongarch_expand_vec_perm):
+	Use the mode of the selector (instead of the shuffled vector)
+	for truncating it.  Operate on subregs in the selector mode if
+	the shuffled vector has a different mode (i. e. it's a
+	floating-point vector).
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vect-shuf-fp.c: New test.
+---
+ gcc/config/loongarch/loongarch.cc              | 18 ++++++++++--------
+ gcc/config/loongarch/lsx.md                    |  2 +-
+ .../gcc.target/loongarch/vect-shuf-fp.c        | 16 ++++++++++++++++
+ 3 files changed, 27 insertions(+), 9 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 33d23a731..d95ac68e8 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -8603,8 +8603,9 @@ void
+ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
+ {
+   machine_mode vmode = GET_MODE (target);
++  machine_mode vimode = GET_MODE (sel);
+   auto nelt = GET_MODE_NUNITS (vmode);
+-  auto round_reg = gen_reg_rtx (vmode);
++  auto round_reg = gen_reg_rtx (vimode);
+   rtx round_dataMAX_VECT_LEN;
+ 
+   for (int i = 0; i < nelt; i += 1)
+@@ -8612,9 +8613,16 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
+       round_datai = GEN_INT (0x1f);
+     }
+ 
+-  rtx round_data_rtx = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, round_data));
++  rtx round_data_rtx = gen_rtx_CONST_VECTOR (vimode, gen_rtvec_v (nelt, round_data));
+   emit_move_insn (round_reg, round_data_rtx);
+ 
++  if (vmode != vimode)
++    {
++      target = lowpart_subreg (vimode, target, vmode);
++      op0 = lowpart_subreg (vimode, op0, vmode);
++      op1 = lowpart_subreg (vimode, op1, vmode);
++    }
++
+   switch (vmode)
+     {
+     case E_V16QImode:
+@@ -8622,17 +8630,11 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
+       emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel));
+       break;
+     case E_V2DFmode:
+-      emit_insn (gen_andv2di3 (sel, sel, round_reg));
+-      emit_insn (gen_lsx_vshuf_d_f (target, sel, op1, op0));
+-      break;
+     case E_V2DImode:
+       emit_insn (gen_andv2di3 (sel, sel, round_reg));
+       emit_insn (gen_lsx_vshuf_d (target, sel, op1, op0));
+       break;
+     case E_V4SFmode:
+-      emit_insn (gen_andv4si3 (sel, sel, round_reg));
+-      emit_insn (gen_lsx_vshuf_w_f (target, sel, op1, op0));
+-      break;
+     case E_V4SImode:
+       emit_insn (gen_andv4si3 (sel, sel, round_reg));
+       emit_insn (gen_lsx_vshuf_w (target, sel, op1, op0));
+diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
+index 8ea41c85b..5e8d8d74b 100644
+--- a/gcc/config/loongarch/lsx.md
++++ b/gcc/config/loongarch/lsx.md
+@@ -837,7 +837,7 @@
+  (match_operand:LSX 0 "register_operand")
+   (match_operand:LSX 1 "register_operand")
+   (match_operand:LSX 2 "register_operand")
+-  (match_operand:LSX 3 "register_operand")
++  (match_operand:<VIMODE> 3 "register_operand")
+   "ISA_HAS_LSX"
+ {
+   loongarch_expand_vec_perm (operands0, operands1,
+diff --git a/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c b/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c
+new file mode 100644
+index 000000000..7acc2113a
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c
+@@ -0,0 +1,16 @@
++/* { dg-do compile } */
++/* { dg-options "-mlasx -O3" } */
++/* { dg-final { scan-assembler "vshuf\.w" } } */
++
++#define V __attribute__ ((vector_size (16)))
++
++int a V;
++float b V;
++float c V;
++float d V;
++
++void
++test (void)
++{
++  d = __builtin_shuffle (b, c, a);
++}
+-- 
+2.43.0
+

_service:tar_scm:0044-LoongArch-Optimize-the-loading-of-immediate-numbers-.patch Added

@@ -0,0 +1,112 @@
+From aaf58efe8414a4eaceb6721d9c242df710d1762c Mon Sep 17 00:00:00 2001
+From: Guo Jie <guojie@loongson.cn>
+Date: Thu, 23 Nov 2023 11:04:17 +0800
+Subject: PATCH 044/188 LoongArch: Optimize the loading of immediate numbers
+ with the same high and low 32-bit values
+
+For the following immediate load operation in gcc/testsuite/gcc.target/loongarch/imm-load1.c:
+
+	long long r = 0x0101010101010101;
+
+Before this patch:
+
+	lu12i.w	    $r15,16842752>>12
+	ori	    $r15,$r15,257
+	lu32i.d	    $r15,0x1010100000000>>32
+	lu52i.d	    $r15,$r15,0x100000000000000>>52
+
+After this patch:
+
+	lu12i.w     $r15,16842752>>12
+	ori         $r15,$r15,257
+	bstrins.d   $r15,$r15,63,32
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc
+	(enum loongarch_load_imm_method): Add new method.
+	(loongarch_build_integer): Add relevant implementations for
+	new method.
+	(loongarch_move_integer): Ditto.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/imm-load1.c: Change old check.
+---
+ gcc/config/loongarch/loongarch.cc             | 22 ++++++++++++++++++-
+ .../gcc.target/loongarch/imm-load1.c          |  3 ++-
+ 2 files changed, 23 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index d95ac68e8..048d3802b 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -142,12 +142,16 @@ struct loongarch_address_info
+ 
+    METHOD_LU52I:
+      Load 52-63 bit of the immediate number.
++
++   METHOD_MIRROR:
++     Copy 0-31 bit of the immediate number to 32-63bit.
+ */
+ enum loongarch_load_imm_method
+ {
+   METHOD_NORMAL,
+   METHOD_LU32I,
+-  METHOD_LU52I
++  METHOD_LU52I,
++  METHOD_MIRROR
+ };
+ 
+ struct loongarch_integer_op
+@@ -1553,11 +1557,23 @@ loongarch_build_integer (struct loongarch_integer_op *codes,
+ 
+       int sign31 = (value & (HOST_WIDE_INT_1U << 31)) >> 31;
+       int sign51 = (value & (HOST_WIDE_INT_1U << 51)) >> 51;
++
++      uint32_t hival = (uint32_t) (value >> 32);
++      uint32_t loval = (uint32_t) value;
++
+       /* Determine whether the upper 32 bits are sign-extended from the lower
+ 	 32 bits. If it is, the instructions to load the high order can be
+ 	 ommitted.  */
+       if (lu32isign31 && lu52isign31)
+ 	return cost;
++      /* If the lower 32 bits are the same as the upper 32 bits, just copy
++	 the lower 32 bits to the upper 32 bits.  */
++      else if (loval == hival)
++	{
++	  codescost.method = METHOD_MIRROR;
++	  codescost.curr_value = value;
++	  return cost + 1;
++	}
+       /* Determine whether bits 32-51 are sign-extended from the lower 32
+ 	 bits. If so, directly load 52-63 bits.  */
+       else if (lu32isign31)
+@@ -3230,6 +3246,10 @@ loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
+ 			   gen_rtx_AND (DImode, x, GEN_INT (0xfffffffffffff)),
+ 			   GEN_INT (codesi.value));
+ 	  break;
++	case METHOD_MIRROR:
++	  gcc_assert (mode == DImode);
++	  emit_insn (gen_insvdi (x, GEN_INT (32), GEN_INT (32), x));
++	  break;
+ 	default:
+ 	  gcc_unreachable ();
+ 	}
+diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load1.c b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
+index 2ff029712..f64cc2956 100644
+--- a/gcc/testsuite/gcc.target/loongarch/imm-load1.c
++++ b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
+@@ -1,6 +1,7 @@
+ /* { dg-do compile } */
+ /* { dg-options "-mabi=lp64d -O2" } */
+-/* { dg-final { scan-assembler "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */
++/* { dg-final { scan-assembler-not "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */
++/* { dg-final { scan-assembler "test:.*lu12i\.w.*\n\tbstrins\.d.*\n\.L2:" } } */
+ 
+ 
+ extern long long b10;
+-- 
+2.43.0
+

_service:tar_scm:0045-LoongArch-Fix-runtime-error-in-a-gcc-build-with-with.patch Added

@@ -0,0 +1,30 @@
+From fa28ce4ac91691595e14838be49c9dd42b153b7f Mon Sep 17 00:00:00 2001
+From: Guo Jie <guojie@loongson.cn>
+Date: Thu, 23 Nov 2023 11:05:56 +0800
+Subject: PATCH 045/188 LoongArch: Fix runtime error in a gcc build with
+ --with-build-config=bootstrap-ubsan
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_split_plus_constant):
+	avoid left shift of negative value -0x8000.
+---
+ gcc/config/loongarch/loongarch.cc | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 048d3802b..ecceca22d 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -4265,7 +4265,7 @@ loongarch_split_plus_constant (rtx *op, machine_mode mode)
+   else if (loongarch_addu16i_imm12_operand_p (v, mode))
+     a = (v & ~HWIT_UC_0xFFF) + ((v & 0x800) << 1);
+   else if (mode == DImode && DUAL_ADDU16I_OPERAND (v))
+-    a = (v > 0 ? 0x7fff : -0x8000) << 16;
++    a = (v > 0 ? 0x7fff0000 : ~0x7fffffff);
+   else
+     gcc_unreachable ();
+ 
+-- 
+2.43.0
+

_service:tar_scm:0046-LoongArch-Fix-usage-of-LSX-and-LASX-frint-ftint-inst.patch Added

@@ -0,0 +1,1295 @@
+From d37308b7a62246e16ee61c40441548feb76761f1 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sat, 18 Nov 2023 04:48:20 +0800
+Subject: PATCH 046/188 LoongArch: Fix usage of LSX and LASX frint/ftint
+ instructions PR112578
+
+The usage LSX and LASX frint/ftint instructions had some problems:
+
+1. These instructions raises FE_INEXACT, which is not allowed with
+   -fno-fp-int-builtin-inexact for most C2x section F.10.6 functions
+   (the only exceptions are rint, lrint, and llrint).
+2. The "frint" instruction without explicit rounding mode is used for
+   roundM2, this is incorrect because roundM2 is defined "rounding
+   operand 1 to the *nearest* integer, rounding away from zero in the
+   event of a tie".  We actually don't have such an instruction.  Our
+   frintrne instruction is roundevenM2 (unfortunately, this is not
+   documented).
+3. These define_insn's are written in a way not so easy to hack.
+
+So I removed these instructions and created a "simd.md" file, then added
+them and the corresponding expanders there.  The advantage of the
+simd.md file is we don't need to duplicate the RTL template twice (in
+lsx.md and lasx.md).
+
+gcc/ChangeLog:
+
+	PR target/112578
+	* config/loongarch/lsx.md (UNSPEC_LSX_VFTINT_S,
+	UNSPEC_LSX_VFTINTRNE, UNSPEC_LSX_VFTINTRP,
+	UNSPEC_LSX_VFTINTRM, UNSPEC_LSX_VFRINTRNE_S,
+	UNSPEC_LSX_VFRINTRNE_D, UNSPEC_LSX_VFRINTRZ_S,
+	UNSPEC_LSX_VFRINTRZ_D, UNSPEC_LSX_VFRINTRP_S,
+	UNSPEC_LSX_VFRINTRP_D, UNSPEC_LSX_VFRINTRM_S,
+	UNSPEC_LSX_VFRINTRM_D): Remove.
+	(ILSX, FLSX): Move into ...
+	(VIMODE): Move into ...
+	(FRINT_S, FRINT_D): Remove.
+	(frint_pattern_s, frint_pattern_d, frint_suffix): Remove.
+	(lsx_vfrint_<flsxfmt>, lsx_vftint_s_<ilsxfmt>_<flsxfmt>,
+	lsx_vftintrne_w_s, lsx_vftintrne_l_d, lsx_vftintrp_w_s,
+	lsx_vftintrp_l_d, lsx_vftintrm_w_s, lsx_vftintrm_l_d,
+	lsx_vfrintrne_s, lsx_vfrintrne_d, lsx_vfrintrz_s,
+	lsx_vfrintrz_d, lsx_vfrintrp_s, lsx_vfrintrp_d,
+	lsx_vfrintrm_s, lsx_vfrintrm_d,
+	<FRINT_S:frint_pattern_s>v4sf2,
+	<FRINT_D:frint_pattern_d>v2df2, round<mode>2,
+	fix_trunc<mode>2): Remove.
+	* config/loongarch/lasx.md: Likewise.
+	* config/loongarch/simd.md: New file.
+	(ILSX, ILASX, FLSX, FLASX, VIMODE): ... here.
+	(IVEC, FVEC): New mode iterators.
+	(VIMODE): ... here.  Extend it to work for all LSX/LASX vector
+	modes.
+	(x, wu, simd_isa, WVEC, vimode, simdfmt, simdifmt_for_f,
+	elebits): New mode attributes.
+	(UNSPEC_SIMD_FRINTRP, UNSPEC_SIMD_FRINTRZ, UNSPEC_SIMD_FRINT,
+	UNSPEC_SIMD_FRINTRM, UNSPEC_SIMD_FRINTRNE): New unspecs.
+	(SIMD_FRINT): New int iterator.
+	(simd_frint_rounding, simd_frint_pattern): New int attributes.
+	(<simd_isa>_<x>vfrint<simd_frint_rounding>_<simdfmt>): New
+	define_insn template for frint instructions.
+	(<simd_isa>_<x>vftint<simd_frint_rounding>_<simdifmt_for_f>_<simdfmt>):
+	Likewise, but for ftint instructions.
+	(<simd_frint_pattern><mode>2): New define_expand with
+	flag_fp_int_builtin_inexact checked.
+	(l<simd_frint_pattern><mode><vimode>2): Likewise.
+	(ftrunc<mode>2): New define_expand.  It does not require
+	flag_fp_int_builtin_inexact.
+	(fix_trunc<mode><vimode>2): New define_insn_and_split.  It does
+	not require flag_fp_int_builtin_inexact.
+	(include): Add lsx.md and lasx.md.
+	* config/loongarch/loongarch.md (include): Include simd.md,
+	instead of including lsx.md and lasx.md directly.
+	* config/loongarch/loongarch-builtins.cc
+	(CODE_FOR_lsx_vftint_w_s, CODE_FOR_lsx_vftint_l_d,
+	CODE_FOR_lasx_xvftint_w_s, CODE_FOR_lasx_xvftint_l_d):
+	Remove.
+
+gcc/testsuite/ChangeLog:
+
+	PR target/112578
+	* gcc.target/loongarch/vect-frint.c: New test.
+	* gcc.target/loongarch/vect-frint-no-inexact.c: New test.
+	* gcc.target/loongarch/vect-ftint.c: New test.
+	* gcc.target/loongarch/vect-ftint-no-inexact.c: New test.
+---
+ gcc/config/loongarch/lasx.md                  | 239 -----------------
+ gcc/config/loongarch/loongarch-builtins.cc    |   4 -
+ gcc/config/loongarch/loongarch.md             |   7 +-
+ gcc/config/loongarch/lsx.md                   | 243 ------------------
+ gcc/config/loongarch/simd.md                  | 213 +++++++++++++++
+ .../loongarch/vect-frint-no-inexact.c         |  48 ++++
+ .../gcc.target/loongarch/vect-frint.c         |  85 ++++++
+ .../loongarch/vect-ftint-no-inexact.c         |  44 ++++
+ .../gcc.target/loongarch/vect-ftint.c         |  83 ++++++
+ 9 files changed, 475 insertions(+), 491 deletions(-)
+ create mode 100644 gcc/config/loongarch/simd.md
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint-no-inexact.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-ftint.c
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index 2e11f0612..d4a56c307 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -53,7 +53,6 @@
+   UNSPEC_LASX_XVFCMP_SULT
+   UNSPEC_LASX_XVFCMP_SUN
+   UNSPEC_LASX_XVFCMP_SUNE
+-  UNSPEC_LASX_XVFTINT_S
+   UNSPEC_LASX_XVFTINT_U
+   UNSPEC_LASX_XVCLO
+   UNSPEC_LASX_XVSAT_S
+@@ -92,12 +91,6 @@
+   UNSPEC_LASX_XVEXTRINS
+   UNSPEC_LASX_XVMSKLTZ
+   UNSPEC_LASX_XVSIGNCOV
+-  UNSPEC_LASX_XVFTINTRNE_W_S
+-  UNSPEC_LASX_XVFTINTRNE_L_D
+-  UNSPEC_LASX_XVFTINTRP_W_S
+-  UNSPEC_LASX_XVFTINTRP_L_D
+-  UNSPEC_LASX_XVFTINTRM_W_S
+-  UNSPEC_LASX_XVFTINTRM_L_D
+   UNSPEC_LASX_XVFTINT_W_D
+   UNSPEC_LASX_XVFFINT_S_L
+   UNSPEC_LASX_XVFTINTRZ_W_D
+@@ -116,14 +109,6 @@
+   UNSPEC_LASX_XVFTINTRML_L_S
+   UNSPEC_LASX_XVFTINTRNEL_L_S
+   UNSPEC_LASX_XVFTINTRNEH_L_S
+-  UNSPEC_LASX_XVFRINTRNE_S
+-  UNSPEC_LASX_XVFRINTRNE_D
+-  UNSPEC_LASX_XVFRINTRZ_S
+-  UNSPEC_LASX_XVFRINTRZ_D
+-  UNSPEC_LASX_XVFRINTRP_S
+-  UNSPEC_LASX_XVFRINTRP_D
+-  UNSPEC_LASX_XVFRINTRM_S
+-  UNSPEC_LASX_XVFRINTRM_D
+   UNSPEC_LASX_XVREPLVE0_Q
+   UNSPEC_LASX_XVPERM_W
+   UNSPEC_LASX_XVPERMI_Q
+@@ -206,9 +191,6 @@
+ ;; Only used for copy256_{u,s}.w.
+ (define_mode_iterator LASX_W    V8SI V8SF)
+ 
+-;; Only integer modes in LASX.
+-(define_mode_iterator ILASX V4DI V8SI V16HI V32QI)
+-
+ ;; As ILASX but excludes V32QI.
+ (define_mode_iterator ILASX_DWH V4DI V8SI V16HI)
+ 
+@@ -224,9 +206,6 @@
+ ;; Only integer modes smaller than a word.
+ (define_mode_iterator ILASX_HB  V16HI V32QI)
+ 
+-;; Only floating-point modes in LASX.
+-(define_mode_iterator FLASX  V4DF V8SF)
+-
+ ;; Only used for immediate set shuffle elements instruction.
+ (define_mode_iterator LASX_WHB_W V8SI V16HI V32QI V8SF)
+ 
+@@ -500,37 +479,6 @@
+    (V16HI "w")
+    (V32QI "w"))
+ 
+-(define_int_iterator FRINT256_S UNSPEC_LASX_XVFRINTRP_S
+-			       UNSPEC_LASX_XVFRINTRZ_S
+-			       UNSPEC_LASX_XVFRINT
+-			       UNSPEC_LASX_XVFRINTRM_S)
+-
+-(define_int_iterator FRINT256_D UNSPEC_LASX_XVFRINTRP_D
+-			       UNSPEC_LASX_XVFRINTRZ_D
+-			       UNSPEC_LASX_XVFRINT
+-			       UNSPEC_LASX_XVFRINTRM_D)
+-
+-(define_int_attr frint256_pattern_s
+-  (UNSPEC_LASX_XVFRINTRP_S  "ceil")
+-   (UNSPEC_LASX_XVFRINTRZ_S  "btrunc")
+-   (UNSPEC_LASX_XVFRINT	     "rint")
+-   (UNSPEC_LASX_XVFRINTRM_S  "floor"))
+-
+-(define_int_attr frint256_pattern_d
+-  (UNSPEC_LASX_XVFRINTRP_D  "ceil")
+-   (UNSPEC_LASX_XVFRINTRZ_D  "btrunc")
+-   (UNSPEC_LASX_XVFRINT	     "rint")
+-   (UNSPEC_LASX_XVFRINTRM_D  "floor"))
+-
+-(define_int_attr frint256_suffix
+-  (UNSPEC_LASX_XVFRINTRP_S  "rp")
+-   (UNSPEC_LASX_XVFRINTRP_D  "rp")
+-   (UNSPEC_LASX_XVFRINTRZ_S  "rz")
+-   (UNSPEC_LASX_XVFRINTRZ_D  "rz")
+-   (UNSPEC_LASX_XVFRINT	     "")
+-   (UNSPEC_LASX_XVFRINTRM_S  "rm")
+-   (UNSPEC_LASX_XVFRINTRM_D  "rm"))
+-
+ (define_expand "vec_init<mode><unitmode>"
+   (match_operand:LASX 0 "register_operand")

_service:tar_scm:0047-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch Added

@@ -0,0 +1,268 @@
+From 4c13256ea34b4169ceb3f9c7826843b754c6a6e0 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 19 Nov 2023 16:28:59 +0800
+Subject: PATCH 047/188 LoongArch: Use standard pattern name and RTX code for
+ LSX/LASX muh instructions
+
+Removes unnecessary UNSPECs and make the muh instructions useful with
+GNU vectors or auto vectorization.
+
+gcc/ChangeLog:
+
+	* config/loongarch/simd.md (muh): New code attribute mapping
+	any_extend to smul_highpart or umul_highpart.
+	(<su>mul<mode>3_highpart): New define_insn.
+	* config/loongarch/lsx.md (UNSPEC_LSX_VMUH_S): Remove.
+	(UNSPEC_LSX_VMUH_U): Remove.
+	(lsx_vmuh_s_<lsxfmt>): Remove.
+	(lsx_vmuh_u_<lsxfmt>): Remove.
+	* config/loongarch/lasx.md (UNSPEC_LASX_XVMUH_S): Remove.
+	(UNSPEC_LASX_XVMUH_U): Remove.
+	(lasx_xvmuh_s_<lasxfmt>): Remove.
+	(lasx_xvmuh_u_<lasxfmt>): Remove.
+	* config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vmuh_b):
+	Redefine to standard pattern name.
+	(CODE_FOR_lsx_vmuh_h): Likewise.
+	(CODE_FOR_lsx_vmuh_w): Likewise.
+	(CODE_FOR_lsx_vmuh_d): Likewise.
+	(CODE_FOR_lsx_vmuh_bu): Likewise.
+	(CODE_FOR_lsx_vmuh_hu): Likewise.
+	(CODE_FOR_lsx_vmuh_wu): Likewise.
+	(CODE_FOR_lsx_vmuh_du): Likewise.
+	(CODE_FOR_lasx_xvmuh_b): Likewise.
+	(CODE_FOR_lasx_xvmuh_h): Likewise.
+	(CODE_FOR_lasx_xvmuh_w): Likewise.
+	(CODE_FOR_lasx_xvmuh_d): Likewise.
+	(CODE_FOR_lasx_xvmuh_bu): Likewise.
+	(CODE_FOR_lasx_xvmuh_hu): Likewise.
+	(CODE_FOR_lasx_xvmuh_wu): Likewise.
+	(CODE_FOR_lasx_xvmuh_du): Likewise.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vect-muh.c: New test.
+---
+ gcc/config/loongarch/lasx.md                  | 22 ------------
+ gcc/config/loongarch/loongarch-builtins.cc    | 32 ++++++++---------
+ gcc/config/loongarch/lsx.md                   | 22 ------------
+ gcc/config/loongarch/simd.md                  | 16 +++++++++
+ gcc/testsuite/gcc.target/loongarch/vect-muh.c | 36 +++++++++++++++++++
+ 5 files changed, 68 insertions(+), 60 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-muh.c
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index d4a56c307..023a023b4 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -68,8 +68,6 @@
+   UNSPEC_LASX_BRANCH
+   UNSPEC_LASX_BRANCH_V
+ 
+-  UNSPEC_LASX_XVMUH_S
+-  UNSPEC_LASX_XVMUH_U
+   UNSPEC_LASX_MXVEXTW_U
+   UNSPEC_LASX_XVSLLWIL_S
+   UNSPEC_LASX_XVSLLWIL_U
+@@ -2823,26 +2821,6 @@
+   (set_attr "type" "simd_logic")
+    (set_attr "mode" "<MODE>"))
+ 
+-(define_insn "lasx_xvmuh_s_<lasxfmt>"
+-  (set (match_operand:ILASX 0 "register_operand" "=f")
+-	(unspec:ILASX (match_operand:ILASX 1 "register_operand" "f")
+-		       (match_operand:ILASX 2 "register_operand" "f")
+-		      UNSPEC_LASX_XVMUH_S))
+-  "ISA_HAS_LASX"
+-  "xvmuh.<lasxfmt>\t%u0,%u1,%u2"
+-  (set_attr "type" "simd_int_arith")
+-   (set_attr "mode" "<MODE>"))
+-
+-(define_insn "lasx_xvmuh_u_<lasxfmt_u>"
+-  (set (match_operand:ILASX 0 "register_operand" "=f")
+-	(unspec:ILASX (match_operand:ILASX 1 "register_operand" "f")
+-		       (match_operand:ILASX 2 "register_operand" "f")
+-		      UNSPEC_LASX_XVMUH_U))
+-  "ISA_HAS_LASX"
+-  "xvmuh.<lasxfmt_u>\t%u0,%u1,%u2"
+-  (set_attr "type" "simd_int_arith")
+-   (set_attr "mode" "<MODE>"))
+-
+ (define_insn "lasx_xvsllwil_s_<dlasxfmt>_<lasxfmt>"
+   (set (match_operand:<VDMODE256> 0 "register_operand" "=f")
+ 	(unspec:<VDMODE256> (match_operand:ILASX_WHB 1 "register_operand" "f")
+diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
+index fb458feac..41ea357cf 100644
+--- a/gcc/config/loongarch/loongarch-builtins.cc
++++ b/gcc/config/loongarch/loongarch-builtins.cc
+@@ -319,6 +319,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
+ #define CODE_FOR_lsx_vmod_hu CODE_FOR_umodv8hi3
+ #define CODE_FOR_lsx_vmod_wu CODE_FOR_umodv4si3
+ #define CODE_FOR_lsx_vmod_du CODE_FOR_umodv2di3
++#define CODE_FOR_lsx_vmuh_b CODE_FOR_smulv16qi3_highpart
++#define CODE_FOR_lsx_vmuh_h CODE_FOR_smulv8hi3_highpart
++#define CODE_FOR_lsx_vmuh_w CODE_FOR_smulv4si3_highpart
++#define CODE_FOR_lsx_vmuh_d CODE_FOR_smulv2di3_highpart
++#define CODE_FOR_lsx_vmuh_bu CODE_FOR_umulv16qi3_highpart
++#define CODE_FOR_lsx_vmuh_hu CODE_FOR_umulv8hi3_highpart
++#define CODE_FOR_lsx_vmuh_wu CODE_FOR_umulv4si3_highpart
++#define CODE_FOR_lsx_vmuh_du CODE_FOR_umulv2di3_highpart
+ #define CODE_FOR_lsx_vmul_b CODE_FOR_mulv16qi3
+ #define CODE_FOR_lsx_vmul_h CODE_FOR_mulv8hi3
+ #define CODE_FOR_lsx_vmul_w CODE_FOR_mulv4si3
+@@ -439,14 +447,6 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
+ #define CODE_FOR_lsx_vfnmsub_s CODE_FOR_vfnmsubv4sf4_nmsub4
+ #define CODE_FOR_lsx_vfnmsub_d CODE_FOR_vfnmsubv2df4_nmsub4
+ 
+-#define CODE_FOR_lsx_vmuh_b CODE_FOR_lsx_vmuh_s_b
+-#define CODE_FOR_lsx_vmuh_h CODE_FOR_lsx_vmuh_s_h
+-#define CODE_FOR_lsx_vmuh_w CODE_FOR_lsx_vmuh_s_w
+-#define CODE_FOR_lsx_vmuh_d CODE_FOR_lsx_vmuh_s_d
+-#define CODE_FOR_lsx_vmuh_bu CODE_FOR_lsx_vmuh_u_bu
+-#define CODE_FOR_lsx_vmuh_hu CODE_FOR_lsx_vmuh_u_hu
+-#define CODE_FOR_lsx_vmuh_wu CODE_FOR_lsx_vmuh_u_wu
+-#define CODE_FOR_lsx_vmuh_du CODE_FOR_lsx_vmuh_u_du
+ #define CODE_FOR_lsx_vsllwil_h_b CODE_FOR_lsx_vsllwil_s_h_b
+ #define CODE_FOR_lsx_vsllwil_w_h CODE_FOR_lsx_vsllwil_s_w_h
+ #define CODE_FOR_lsx_vsllwil_d_w CODE_FOR_lsx_vsllwil_s_d_w
+@@ -588,6 +588,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
+ #define CODE_FOR_lasx_xvmul_h CODE_FOR_mulv16hi3
+ #define CODE_FOR_lasx_xvmul_w CODE_FOR_mulv8si3
+ #define CODE_FOR_lasx_xvmul_d CODE_FOR_mulv4di3
++#define CODE_FOR_lasx_xvmuh_b CODE_FOR_smulv32qi3_highpart
++#define CODE_FOR_lasx_xvmuh_h CODE_FOR_smulv16hi3_highpart
++#define CODE_FOR_lasx_xvmuh_w CODE_FOR_smulv8si3_highpart
++#define CODE_FOR_lasx_xvmuh_d CODE_FOR_smulv4di3_highpart
++#define CODE_FOR_lasx_xvmuh_bu CODE_FOR_umulv32qi3_highpart
++#define CODE_FOR_lasx_xvmuh_hu CODE_FOR_umulv16hi3_highpart
++#define CODE_FOR_lasx_xvmuh_wu CODE_FOR_umulv8si3_highpart
++#define CODE_FOR_lasx_xvmuh_du CODE_FOR_umulv4di3_highpart
+ #define CODE_FOR_lasx_xvclz_b CODE_FOR_clzv32qi2
+ #define CODE_FOR_lasx_xvclz_h CODE_FOR_clzv16hi2
+ #define CODE_FOR_lasx_xvclz_w CODE_FOR_clzv8si2
+@@ -697,14 +705,6 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
+ #define CODE_FOR_lasx_xvavgr_hu CODE_FOR_lasx_xvavgr_u_hu
+ #define CODE_FOR_lasx_xvavgr_wu CODE_FOR_lasx_xvavgr_u_wu
+ #define CODE_FOR_lasx_xvavgr_du CODE_FOR_lasx_xvavgr_u_du
+-#define CODE_FOR_lasx_xvmuh_b CODE_FOR_lasx_xvmuh_s_b
+-#define CODE_FOR_lasx_xvmuh_h CODE_FOR_lasx_xvmuh_s_h
+-#define CODE_FOR_lasx_xvmuh_w CODE_FOR_lasx_xvmuh_s_w
+-#define CODE_FOR_lasx_xvmuh_d CODE_FOR_lasx_xvmuh_s_d
+-#define CODE_FOR_lasx_xvmuh_bu CODE_FOR_lasx_xvmuh_u_bu
+-#define CODE_FOR_lasx_xvmuh_hu CODE_FOR_lasx_xvmuh_u_hu
+-#define CODE_FOR_lasx_xvmuh_wu CODE_FOR_lasx_xvmuh_u_wu
+-#define CODE_FOR_lasx_xvmuh_du CODE_FOR_lasx_xvmuh_u_du
+ #define CODE_FOR_lasx_xvssran_b_h CODE_FOR_lasx_xvssran_s_b_h
+ #define CODE_FOR_lasx_xvssran_h_w CODE_FOR_lasx_xvssran_s_h_w
+ #define CODE_FOR_lasx_xvssran_w_d CODE_FOR_lasx_xvssran_s_w_d
+diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
+index c1c3719e3..537afaf96 100644
+--- a/gcc/config/loongarch/lsx.md
++++ b/gcc/config/loongarch/lsx.md
+@@ -64,8 +64,6 @@
+   UNSPEC_LSX_VSRLR
+   UNSPEC_LSX_VSRLRI
+   UNSPEC_LSX_VSHUF
+-  UNSPEC_LSX_VMUH_S
+-  UNSPEC_LSX_VMUH_U
+   UNSPEC_LSX_VEXTW_S
+   UNSPEC_LSX_VEXTW_U
+   UNSPEC_LSX_VSLLWIL_S
+@@ -2506,26 +2504,6 @@
+   (set_attr "type" "simd_logic")
+    (set_attr "mode" "<MODE>"))
+ 
+-(define_insn "lsx_vmuh_s_<lsxfmt>"
+-  (set (match_operand:ILSX 0 "register_operand" "=f")
+-	(unspec:ILSX (match_operand:ILSX 1 "register_operand" "f")
+-		      (match_operand:ILSX 2 "register_operand" "f")
+-		     UNSPEC_LSX_VMUH_S))
+-  "ISA_HAS_LSX"
+-  "vmuh.<lsxfmt>\t%w0,%w1,%w2"
+-  (set_attr "type" "simd_int_arith")
+-   (set_attr "mode" "<MODE>"))
+-
+-(define_insn "lsx_vmuh_u_<lsxfmt_u>"
+-  (set (match_operand:ILSX 0 "register_operand" "=f")
+-	(unspec:ILSX (match_operand:ILSX 1 "register_operand" "f")
+-		      (match_operand:ILSX 2 "register_operand" "f")
+-		     UNSPEC_LSX_VMUH_U))
+-  "ISA_HAS_LSX"
+-  "vmuh.<lsxfmt_u>\t%w0,%w1,%w2"
+-  (set_attr "type" "simd_int_arith")
+-   (set_attr "mode" "<MODE>"))
+-
+ (define_insn "lsx_vextw_s_d"
+   (set (match_operand:V2DI 0 "register_operand" "=f")
+ 	(unspec:V2DI (match_operand:V4SI 1 "register_operand" "f")
+diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
+index 27d1ffecd..a0e8db3c0 100644
+--- a/gcc/config/loongarch/simd.md

_service:tar_scm:0048-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch Added

@@ -0,0 +1,285 @@
+From 9dde2178e64893e4c46b1c375a658f8ab6d34fdd Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 19 Nov 2023 17:28:06 +0800
+Subject: PATCH 048/188 LoongArch: Use standard pattern name and RTX code for
+ LSX/LASX rotate shift
+
+Remove unnecessary UNSPECs and make the xvrotri instructions useful
+with GNU vectors and auto vectorization.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lsx.md (bitimm): Move to ...
+	(UNSPEC_LSX_VROTR): Remove.
+	(lsx_vrotr_<lsxfmt>): Remove.
+	(lsx_vrotri_<lsxfmt>): Remove.
+	* config/loongarch/lasx.md (UNSPEC_LASX_XVROTR): Remove.
+	(lsx_vrotr_<lsxfmt>): Remove.
+	(lsx_vrotri_<lsxfmt>): Remove.
+	* config/loongarch/simd.md (bitimm): ... here.  Expand it to
+	cover LASX modes.
+	(vrotr<mode>3): New define_insn.
+	(vrotri<mode>3): New define_insn.
+	* config/loongarch/loongarch-builtins.cc:
+	(CODE_FOR_lsx_vrotr_b): Use standard pattern name.
+	(CODE_FOR_lsx_vrotr_h): Likewise.
+	(CODE_FOR_lsx_vrotr_w): Likewise.
+	(CODE_FOR_lsx_vrotr_d): Likewise.
+	(CODE_FOR_lasx_xvrotr_b): Likewise.
+	(CODE_FOR_lasx_xvrotr_h): Likewise.
+	(CODE_FOR_lasx_xvrotr_w): Likewise.
+	(CODE_FOR_lasx_xvrotr_d): Likewise.
+	(CODE_FOR_lsx_vrotri_b): Define to standard pattern name.
+	(CODE_FOR_lsx_vrotri_h): Likewise.
+	(CODE_FOR_lsx_vrotri_w): Likewise.
+	(CODE_FOR_lsx_vrotri_d): Likewise.
+	(CODE_FOR_lasx_xvrotri_b): Likewise.
+	(CODE_FOR_lasx_xvrotri_h): Likewise.
+	(CODE_FOR_lasx_xvrotri_w): Likewise.
+	(CODE_FOR_lasx_xvrotri_d): Likewise.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vect-rotr.c: New test.
+---
+ gcc/config/loongarch/lasx.md                  | 22 ------------
+ gcc/config/loongarch/loongarch-builtins.cc    | 16 +++++++++
+ gcc/config/loongarch/lsx.md                   | 28 ---------------
+ gcc/config/loongarch/simd.md                  | 29 +++++++++++++++
+ .../gcc.target/loongarch/vect-rotr.c          | 36 +++++++++++++++++++
+ 5 files changed, 81 insertions(+), 50 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-rotr.c
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index 023a023b4..116b30c07 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -138,7 +138,6 @@
+   UNSPEC_LASX_XVHSUBW_Q_D
+   UNSPEC_LASX_XVHADDW_QU_DU
+   UNSPEC_LASX_XVHSUBW_QU_DU
+-  UNSPEC_LASX_XVROTR
+   UNSPEC_LASX_XVADD_Q
+   UNSPEC_LASX_XVSUB_Q
+   UNSPEC_LASX_XVREPLVE
+@@ -4232,18 +4231,6 @@
+   (set_attr "type" "simd_int_arith")
+    (set_attr "mode" "V4DI"))
+ 
+-;;XVROTR.B   XVROTR.H   XVROTR.W   XVROTR.D
+-;;TODO-478
+-(define_insn "lasx_xvrotr_<lasxfmt>"
+-  (set (match_operand:ILASX 0 "register_operand" "=f")
+-	(unspec:ILASX (match_operand:ILASX 1 "register_operand" "f")
+-		       (match_operand:ILASX 2 "register_operand" "f")
+-		      UNSPEC_LASX_XVROTR))
+-  "ISA_HAS_LASX"
+-  "xvrotr.<lasxfmt>\t%u0,%u1,%u2"
+-  (set_attr "type" "simd_int_arith")
+-   (set_attr "mode" "<MODE>"))
+-
+ ;;XVADD.Q
+ ;;TODO2
+ (define_insn "lasx_xvadd_q"
+@@ -4426,15 +4413,6 @@
+   (set_attr "type" "simd_fcvt")
+    (set_attr "mode" "V4DI"))
+ 
+-(define_insn "lasx_xvrotri_<lasxfmt>"
+-  (set (match_operand:ILASX 0 "register_operand" "=f")
+-	(rotatert:ILASX (match_operand:ILASX 1 "register_operand" "f")
+-		       (match_operand 2 "const_<bitimm256>_operand" "")))
+-  "ISA_HAS_LASX"
+-  "xvrotri.<lasxfmt>\t%u0,%u1,%2"
+-  (set_attr "type" "simd_shf")
+-   (set_attr "mode" "<MODE>"))
+-
+ (define_insn "lasx_xvextl_q_d"
+   (set (match_operand:V4DI 0 "register_operand" "=f")
+ 	(unspec:V4DI (match_operand:V4DI 1 "register_operand" "f")
+diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
+index 41ea357cf..f4523c8bf 100644
+--- a/gcc/config/loongarch/loongarch-builtins.cc
++++ b/gcc/config/loongarch/loongarch-builtins.cc
+@@ -369,6 +369,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
+ #define CODE_FOR_lsx_vsrli_h CODE_FOR_vlshrv8hi3
+ #define CODE_FOR_lsx_vsrli_w CODE_FOR_vlshrv4si3
+ #define CODE_FOR_lsx_vsrli_d CODE_FOR_vlshrv2di3
++#define CODE_FOR_lsx_vrotr_b CODE_FOR_vrotrv16qi3
++#define CODE_FOR_lsx_vrotr_h CODE_FOR_vrotrv8hi3
++#define CODE_FOR_lsx_vrotr_w CODE_FOR_vrotrv4si3
++#define CODE_FOR_lsx_vrotr_d CODE_FOR_vrotrv2di3
++#define CODE_FOR_lsx_vrotri_b CODE_FOR_rotrv16qi3
++#define CODE_FOR_lsx_vrotri_h CODE_FOR_rotrv8hi3
++#define CODE_FOR_lsx_vrotri_w CODE_FOR_rotrv4si3
++#define CODE_FOR_lsx_vrotri_d CODE_FOR_rotrv2di3
+ #define CODE_FOR_lsx_vsub_b CODE_FOR_subv16qi3
+ #define CODE_FOR_lsx_vsub_h CODE_FOR_subv8hi3
+ #define CODE_FOR_lsx_vsub_w CODE_FOR_subv4si3
+@@ -634,6 +642,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
+ #define CODE_FOR_lasx_xvsrli_h CODE_FOR_vlshrv16hi3
+ #define CODE_FOR_lasx_xvsrli_w CODE_FOR_vlshrv8si3
+ #define CODE_FOR_lasx_xvsrli_d CODE_FOR_vlshrv4di3
++#define CODE_FOR_lasx_xvrotr_b CODE_FOR_vrotrv32qi3
++#define CODE_FOR_lasx_xvrotr_h CODE_FOR_vrotrv16hi3
++#define CODE_FOR_lasx_xvrotr_w CODE_FOR_vrotrv8si3
++#define CODE_FOR_lasx_xvrotr_d CODE_FOR_vrotrv4di3
++#define CODE_FOR_lasx_xvrotri_b CODE_FOR_rotrv32qi3
++#define CODE_FOR_lasx_xvrotri_h CODE_FOR_rotrv16hi3
++#define CODE_FOR_lasx_xvrotri_w CODE_FOR_rotrv8si3
++#define CODE_FOR_lasx_xvrotri_d CODE_FOR_rotrv4di3
+ #define CODE_FOR_lasx_xvsub_b CODE_FOR_subv32qi3
+ #define CODE_FOR_lasx_xvsub_h CODE_FOR_subv16hi3
+ #define CODE_FOR_lasx_xvsub_w CODE_FOR_subv8si3
+diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
+index 537afaf96..232399934 100644
+--- a/gcc/config/loongarch/lsx.md
++++ b/gcc/config/loongarch/lsx.md
+@@ -141,7 +141,6 @@
+   UNSPEC_LSX_VMADDWOD
+   UNSPEC_LSX_VMADDWOD2
+   UNSPEC_LSX_VMADDWOD3
+-  UNSPEC_LSX_VROTR
+   UNSPEC_LSX_VADD_Q
+   UNSPEC_LSX_VSUB_Q
+   UNSPEC_LSX_VEXTH_Q_D
+@@ -363,14 +362,6 @@
+    (V8HI "exp_8")
+    (V16QI "exp_16"))
+ 
+-;; This attribute is used to form an immediate operand constraint using
+-;; "const_<bitimm>_operand".
+-(define_mode_attr bitimm
+-  (V16QI "uimm3")
+-   (V8HI  "uimm4")
+-   (V4SI  "uimm5")
+-   (V2DI  "uimm6"))
+-
+ (define_expand "vec_init<mode><unitmode>"
+   (match_operand:LSX 0 "register_operand")
+    (match_operand:LSX 1 "")
+@@ -4152,16 +4143,6 @@
+   (set_attr "type" "simd_int_arith")
+    (set_attr "mode" "V2DI"))
+ 
+-(define_insn "lsx_vrotr_<lsxfmt>"
+-  (set (match_operand:ILSX 0 "register_operand" "=f")
+-	(unspec:ILSX (match_operand:ILSX 1 "register_operand" "f")
+-		      (match_operand:ILSX 2 "register_operand" "f")
+-		     UNSPEC_LSX_VROTR))
+-  "ISA_HAS_LSX"
+-  "vrotr.<lsxfmt>\t%w0,%w1,%w2"
+-  (set_attr "type" "simd_int_arith")
+-   (set_attr "mode" "<MODE>"))
+-
+ (define_insn "lsx_vadd_q"
+   (set (match_operand:V2DI 0 "register_operand" "=f")
+ 	(unspec:V2DI (match_operand:V2DI 1 "register_operand" "f")
+@@ -4255,15 +4236,6 @@
+   (set_attr "type" "simd_fcvt")
+    (set_attr "mode" "V2DI"))
+ 
+-(define_insn "lsx_vrotri_<lsxfmt>"
+-  (set (match_operand:ILSX 0 "register_operand" "=f")
+-	(rotatert:ILSX (match_operand:ILSX 1 "register_operand" "f")
+-		      (match_operand 2 "const_<bitimm>_operand" "")))
+-  "ISA_HAS_LSX"
+-  "vrotri.<lsxfmt>\t%w0,%w1,%2"
+-  (set_attr "type" "simd_shf")
+-   (set_attr "mode" "<MODE>"))
+-
+ (define_insn "lsx_vextl_q_d"
+   (set (match_operand:V2DI 0 "register_operand" "=f")
+ 	(unspec:V2DI (match_operand:V2DI 1 "register_operand" "f")
+diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
+index a0e8db3c0..4ecf7a55e 100644
+--- a/gcc/config/loongarch/simd.md
++++ b/gcc/config/loongarch/simd.md
+@@ -91,6 +91,13 @@
+ 			   (V8HI "16") (V16HI "16")

_service:tar_scm:0049-LoongArch-Remove-lrint_allow_inexact.patch Added

@@ -0,0 +1,42 @@
+From c898e4a85c04a72f08db9ba2a454130f15f6f280 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Mon, 20 Nov 2023 01:34:26 +0800
+Subject: PATCH 049/188 LoongArch: Remove lrint_allow_inexact
+
+No functional change, just a cleanup.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (lrint_allow_inexact): Remove.
+	(<lrint_pattern><ANYF:mode><ANYFI:mode>2): Check if <LRINT>
+	== UNSPEC_FTINT instead of <lrint_allow_inexact>.
+---
+ gcc/config/loongarch/loongarch.md | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index d1c766cbf..11577f407 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -585,9 +585,6 @@
+ (define_int_attr lrint_submenmonic (UNSPEC_FTINT "")
+ 				    (UNSPEC_FTINTRM "rm")
+ 				    (UNSPEC_FTINTRP "rp"))
+-(define_int_attr lrint_allow_inexact (UNSPEC_FTINT "1")
+-				      (UNSPEC_FTINTRM "0")
+-				      (UNSPEC_FTINTRP "0"))
+ 
+ ;; Iterator and attributes for bytepick.d
+ (define_int_iterator bytepick_w_ashift_amount 8 16 24)
+@@ -2384,7 +2381,7 @@
+ 	(unspec:ANYFI (match_operand:ANYF 1 "register_operand" "f")
+ 		      LRINT))
+   "TARGET_HARD_FLOAT &&
+-   (<lrint_allow_inexact>
++   (<LRINT> == UNSPEC_FTINT
+     || flag_fp_int_builtin_inexact
+     || !flag_trapping_math)"
+   "ftint<lrint_submenmonic>.<ANYFI:ifmt>.<ANYF:fmt> %0,%1"
+-- 
+2.43.0
+

_service:tar_scm:0050-LoongArch-Use-LSX-for-scalar-FP-rounding-with-explic.patch Added

@@ -0,0 +1,150 @@
+From 05fafb78b301ce9a545e0dad896b19339f716eaf Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Mon, 20 Nov 2023 03:51:56 +0800
+Subject: PATCH 050/188 LoongArch: Use LSX for scalar FP rounding with
+ explicit rounding mode
+
+In LoongArch FP base ISA there is only the frint.{s/d} instruction which
+reads the global rounding mode.  Utilize LSX for explicit rounding mode
+even if the operand is scalar.  It seems wasting the CPU power, but
+still much faster than calling the library function.
+
+gcc/ChangeLog:
+
+	* config/loongarch/simd.md (LSX_SCALAR_FRINT): New int iterator.
+	(VLSX_FOR_FMODE): New mode attribute.
+	(<simd_for_scalar_frint_pattern><mode>2): New expander,
+	expanding to vreplvei.{w/d} + frint{rp/rz/rm/rne}.{s.d}.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vect-frint-scalar.c: New test.
+	* gcc.target/loongarch/vect-frint-scalar-no-inexact.c: New test.
+---
+ gcc/config/loongarch/simd.md                  | 28 ++++++++++++
+ .../loongarch/vect-frint-scalar-no-inexact.c  | 23 ++++++++++
+ .../gcc.target/loongarch/vect-frint-scalar.c  | 43 +++++++++++++++++++
+ 3 files changed, 94 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c
+
+diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
+index 4ecf7a55e..843b1a41f 100644
+--- a/gcc/config/loongarch/simd.md
++++ b/gcc/config/loongarch/simd.md
+@@ -169,6 +169,34 @@
+ 		     UNSPEC_SIMD_FRINTRZ))
+   "")
+ 
++;; Use LSX for scalar ceil/floor/trunc/roundeven when -mlsx and -ffp-int-
++;; builtin-inexact.  The base FP instruction set lacks these operations.
++;; Yes we are wasting 50% or even 75% of the CPU horsepower, but it's still
++;; much faster than calling a libc function: on LA464 and LA664 there is a
++;; 3x ~ 5x speed up.
++;;
++;; Note that a vreplvei instruction is needed or we'll also operate on the
++;; junk in high bits of the vector register and produce random FP exceptions.
++
++(define_int_iterator LSX_SCALAR_FRINT
++  UNSPEC_SIMD_FRINTRP
++   UNSPEC_SIMD_FRINTRZ
++   UNSPEC_SIMD_FRINTRM
++   UNSPEC_SIMD_FRINTRNE)
++
++(define_mode_attr VLSX_FOR_FMODE (DF "V2DF") (SF "V4SF"))
++
++(define_expand "<simd_frint_pattern><mode>2"
++  (set (match_dup 2)
++     (vec_duplicate:<VLSX_FOR_FMODE>
++       (match_operand:ANYF 1 "register_operand")))
++   (set (match_dup 2)
++	(unspec:<VLSX_FOR_FMODE> (match_dup 2) LSX_SCALAR_FRINT))
++   (set (match_operand:ANYF 0 "register_operand")
++	(vec_select:ANYF (match_dup 2) (parallel (const_int 0))))
++  "ISA_HAS_LSX && (flag_fp_int_builtin_inexact || !flag_trapping_math)"
++  "operands2 = gen_reg_rtx (<VLSX_FOR_FMODE>mode);")
++
+ ;; <x>vftint.{/rp/rz/rm}
+ (define_insn
+   "<simd_isa>_<x>vftint<simd_frint_rounding>_<simdifmt_for_f>_<simdfmt>"
+diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c
+new file mode 100644
+index 000000000..002e3b92d
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c
+@@ -0,0 +1,23 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mlsx -fno-fp-int-builtin-inexact" } */
++
++#include "vect-frint-scalar.c"
++
++/* cannot use LSX for these with -fno-fp-int-builtin-inexact,
++   call library function.  */
++/* { dg-final { scan-assembler "\tb\t%plt\$ceil\$" } } */
++/* { dg-final { scan-assembler "\tb\t%plt\$ceilf\$" } } */
++/* { dg-final { scan-assembler "\tb\t%plt\$floor\$" } } */
++/* { dg-final { scan-assembler "\tb\t%plt\$floorf\$" } } */
++/* { dg-final { scan-assembler "\tb\t%plt\$trunc\$" } } */
++/* { dg-final { scan-assembler "\tb\t%plt\$truncf\$" } } */
++/* { dg-final { scan-assembler "\tb\t%plt\$roundeven\$" } } */
++/* { dg-final { scan-assembler "\tb\t%plt\$roundevenf\$" } } */
++
++/* nearbyint is not allowed to rasie FE_INEXACT for decades */
++/* { dg-final { scan-assembler "\tb\t%plt\$nearbyint\$" } } */
++/* { dg-final { scan-assembler "\tb\t%plt\$nearbyintf\$" } } */
++
++/* rint should just use basic FP operation */
++/* { dg-final { scan-assembler "\tfrint\.s" } } */
++/* { dg-final { scan-assembler "\tfrint\.d" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c
+new file mode 100644
+index 000000000..c7cb40be7
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c
+@@ -0,0 +1,43 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mlsx" } */
++
++#define test(func, suffix) \
++__typeof__ (1.##suffix) \
++_##func##suffix (__typeof__ (1.##suffix) x) \
++{ \
++  return __builtin_##func##suffix (x); \
++}
++
++test (ceil, f)
++test (ceil, )
++test (floor, f)
++test (floor, )
++test (trunc, f)
++test (trunc, )
++test (roundeven, f)
++test (roundeven, )
++test (nearbyint, f)
++test (nearbyint, )
++test (rint, f)
++test (rint, )
++
++/* { dg-final { scan-assembler "\tvfrintrp\.s" } } */
++/* { dg-final { scan-assembler "\tvfrintrm\.s" } } */
++/* { dg-final { scan-assembler "\tvfrintrz\.s" } } */
++/* { dg-final { scan-assembler "\tvfrintrne\.s" } } */
++/* { dg-final { scan-assembler "\tvfrintrp\.d" } } */
++/* { dg-final { scan-assembler "\tvfrintrm\.d" } } */
++/* { dg-final { scan-assembler "\tvfrintrz\.d" } } */
++/* { dg-final { scan-assembler "\tvfrintrne\.d" } } */
++
++/* must do vreplvei first */
++/* { dg-final { scan-assembler-times "\tvreplvei\.w\t\\\$vr0,\\\$vr0,0" 4 } } */
++/* { dg-final { scan-assembler-times "\tvreplvei\.d\t\\\$vr0,\\\$vr0,0" 4 } } */
++
++/* nearbyint is not allowed to rasie FE_INEXACT for decades */
++/* { dg-final { scan-assembler "\tb\t%plt\$nearbyint\$" } } */
++/* { dg-final { scan-assembler "\tb\t%plt\$nearbyintf\$" } } */
++
++/* rint should just use basic FP operation */
++/* { dg-final { scan-assembler "\tfrint\.s" } } */
++/* { dg-final { scan-assembler "\tfrint\.d" } } */
+-- 
+2.43.0
+

_service:tar_scm:0051-LoongArch-Remove-duplicate-definition-of-CLZ_DEFINED.patch Added

@@ -0,0 +1,49 @@
+From 21bb4f07db53df717d02e9115dcdb7b5475ede2a Mon Sep 17 00:00:00 2001
+From: Li Wei <liwei@loongson.cn>
+Date: Tue, 28 Nov 2023 15:56:35 +0800
+Subject: PATCH 051/188 LoongArch: Remove duplicate definition of
+ CLZ_DEFINED_VALUE_AT_ZERO.
+
+In the r14-5547 commit, CLTZ_DEFINED_VALUE_AT_ZERO were defined at
+the same time, but in fact, CLZ_DEFINED_VALUE_AT_ZERO has already been
+defined, so remove the duplicate definition.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.h (CTZ_DEFINED_VALUE_AT_ZERO): Add
+	description.
+	(CLZ_DEFINED_VALUE_AT_ZERO): Remove duplicate definition.
+---
+ gcc/config/loongarch/loongarch.h | 9 +++------
+ 1 file changed, 3 insertions(+), 6 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
+index 19cf6fd33..8b28be0e4 100644
+--- a/gcc/config/loongarch/loongarch.h
++++ b/gcc/config/loongarch/loongarch.h
+@@ -288,10 +288,12 @@ along with GCC; see the file COPYING3.  If not see
+ /* Define if loading short immediate values into registers sign extends.  */
+ #define SHORT_IMMEDIATES_SIGN_EXTEND 1
+ 
+-/* The clz.{w/d} instructions have the natural values at 0.  */
++/* The clz.{w/d}, ctz.{w/d} instructions have the natural values at 0.  */
+ 
+ #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+   ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
++#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
++  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
+ 
+ /* Standard register usage.  */
+ 
+@@ -1239,8 +1241,3 @@ struct GTY (()) machine_function
+ 
+ #define TARGET_EXPLICIT_RELOCS \
+   (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS)
+-
+-#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+-  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
+-#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+-  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
+-- 
+2.43.0
+

_service:tar_scm:0052-LoongArch-Added-vectorized-hardware-inspection-for-t.patch Added

@@ -0,0 +1,4375 @@
+From 8d5c983efc35804f98823e203eada6263dd1604e Mon Sep 17 00:00:00 2001
+From: chenxiaolong <chenxiaolong@loongson.cn>
+Date: Tue, 28 Nov 2023 16:23:53 +0800
+Subject: PATCH 052/188 LoongArch: Added vectorized hardware inspection for
+ testsuite.
+
+When GCC regression tests are executed on a cpu that does not support
+vectorization, the loongarch/vector directory will have some FAIL entries for
+all test cases related to vectorization runs. In order to solve this kind
+of problem, a vectorized hardware detection function was added to the code,
+which can only be compiled but not run.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/lasx/lasx-xvabsd-1.c:Remove
+	the default Settings to run the behavior.
+	* gcc.target/loongarch/vector/lasx/lasx-xvabsd-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvadd.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvadda.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvaddi.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvaddwev-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvaddwev-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvaddwev-3.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvaddwod-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvaddwod-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvaddwod-3.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvand.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvandi.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvandn.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvavg-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvavg-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvavgr-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvavgr-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvbitclr.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvbitclri.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvbitrev.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvbitrevi.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvbitsel.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvbitseli.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvbitset.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvbitseti.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvbsll_v.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvbsrl_v.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvclo.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvclz.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvdiv-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvdiv-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvext2xv-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvext2xv-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvexth-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvexth-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvextl-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvextl-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvextrins.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfadd_d.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfadd_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfclass_d.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfclass_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfcmp_caf_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfcmp_ceq_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cle_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfcmp_clt_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cne_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cor_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cun_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfcmp_saf_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfcmp_seq_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sle_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfcmp_slt_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sne_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sor_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sun_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfcvt.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfcvth.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvffint-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvffint-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvffinth.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvflogb_d.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvflogb_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfmadd_d.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfmadd_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfmax_d.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfmax_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_d.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_d.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfrint_d.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfrstp.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfrstpi.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_d.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_s.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvftint-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvftint-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvftint-3.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvftintl.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvhaddw-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvhaddw-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvhsubw-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvhsubw-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvilvh.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvilvl.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvinsgr2vr.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvinsve0.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvld.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvldi.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmadd.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-3.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-3.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmax-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmax-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmaxi-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmaxi-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmin-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmin-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmini-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmini-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmod-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmod-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmskgez.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmskltz.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmsknz.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmsub.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmuh-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmuh-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmul.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmulwev-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmulwev-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmulwev-3.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmulwod-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmulwod-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvmulwod-3.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvneg.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvnor.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvnori.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvor.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvori.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvorn.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvpackev.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvpackod.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvpcnt.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvpickev.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvpickod.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvpickve.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvpickve2gr.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvprem.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvpremi.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvreplgr2vr.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvreplve.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvreplve0.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvreplvei.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvrotr.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvrotri.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsadd-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsadd-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsat-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsat-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvseq.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvseqi.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvshuf4i_b.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsigncov.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsle-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsle-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvslei-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvslei-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsll.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvslli.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsllwil-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsllwil-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvslt-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvslt-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvslti-1.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvslti-2.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsra.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsrai.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsran.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsrani.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsrar.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsrari.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsrarn.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsrarni.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsrl.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsrli.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsrln.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsrlni.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsrlr.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsrlri.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsrlrn.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvsrlrni.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvssran.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvssrani.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvssrarn.c:Dito.
+	* gcc.target/loongarch/vector/lasx/lasx-xvssrarni.c:Dito.

_service:tar_scm:0053-LoongArch-Accelerate-optimization-of-scalar-signed-u.patch Added

@@ -0,0 +1,148 @@
+From 87230032bc7fbcec1e3927b2b4a6aeba78040cc6 Mon Sep 17 00:00:00 2001
+From: Li Wei <liwei@loongson.cn>
+Date: Tue, 28 Nov 2023 15:38:37 +0800
+Subject: PATCH 053/188 LoongArch: Accelerate optimization of scalar
+ signed/unsigned popcount.
+
+In LoongArch, the vector popcount has corresponding instructions, while
+the scalar does not. Currently, the scalar popcount is calculated
+through a loop, and the value of a non-power of two needs to be iterated
+several times, so the vector popcount instruction is considered for
+optimization.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (v2di): Used to simplify the
+	following templates.
+	(popcount<mode>2): New.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/popcnt.c: New test.
+	* gcc.target/loongarch/popcount.c: New test.
+---
+ gcc/config/loongarch/loongarch.md             | 27 +++++++++++-
+ gcc/testsuite/gcc.target/loongarch/popcnt.c   | 41 +++++++++++++++++++
+ gcc/testsuite/gcc.target/loongarch/popcount.c | 17 ++++++++
+ 3 files changed, 83 insertions(+), 2 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/popcnt.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/popcount.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 11577f407..cfd7a8ec6 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -1512,7 +1512,30 @@
+    (set_attr "cnv_mode"	"D2S")
+    (set_attr "mode" "SF"))
+ 
+-&#xc;
++;; In vector registers, popcount can be implemented directly through
++;; the vector instruction XVPCNT.  For GP registers, we can implement
++;; it through the following method.  Compared with loop implementation
++;; of popcount, the following method has better performance.
++
++;; This attribute used for get connection of scalar mode and corresponding
++;; vector mode.
++(define_mode_attr cntmap (SI "v4si") (DI "v2di"))
++
++(define_expand "popcount<mode>2"
++  (set (match_operand:GPR 0 "register_operand")
++	(popcount:GPR (match_operand:GPR 1 "register_operand")))
++  "ISA_HAS_LSX"
++{
++  rtx in = operands1;
++  rtx out = operands0;
++  rtx vreg = <MODE>mode == SImode ? gen_reg_rtx (V4SImode) :
++				    gen_reg_rtx (V2DImode);
++  emit_insn (gen_lsx_vinsgr2vr_<size> (vreg, in, vreg, GEN_INT (1)));
++  emit_insn (gen_popcount<cntmap>2 (vreg, vreg));
++  emit_insn (gen_lsx_vpickve2gr_<size> (out, vreg, GEN_INT (0)));
++  DONE;
++})
++
+ ;;
+ ;;  ....................
+ ;;
+@@ -3879,7 +3902,7 @@
+ 		   (any_extend:SI (match_dup 3))))
+   "")
+ 
+-&#xc;
++
+ 
+ (define_mode_iterator QHSD QI HI SI DI)
+ 
+diff --git a/gcc/testsuite/gcc.target/loongarch/popcnt.c b/gcc/testsuite/gcc.target/loongarch/popcnt.c
+new file mode 100644
+index 000000000..a10fca420
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/popcnt.c
+@@ -0,0 +1,41 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mlsx" } */
++/* { dg-final { scan-assembler-not {popcount} } } */
++/* { dg-final { scan-assembler-times "vpcnt.d" 2 { target { loongarch64*-*-* } } } } */
++/* { dg-final { scan-assembler-times "vpcnt.w" 4 { target { loongarch64*-*-* } } } } */
++
++int
++foo (int x)
++{
++  return __builtin_popcount (x);
++}
++
++long
++foo1 (long x)
++{
++  return __builtin_popcountl (x);
++}
++
++long long
++foo2 (long long x)
++{
++  return __builtin_popcountll (x);
++}
++
++int
++foo3 (int *p)
++{
++  return __builtin_popcount (*p);
++}
++
++unsigned
++foo4 (int x)
++{
++  return __builtin_popcount (x);
++}
++
++unsigned long
++foo5 (int x)
++{
++  return __builtin_popcount (x);
++}
+diff --git a/gcc/testsuite/gcc.target/loongarch/popcount.c b/gcc/testsuite/gcc.target/loongarch/popcount.c
+new file mode 100644
+index 000000000..390ff0676
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/popcount.c
+@@ -0,0 +1,17 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mlsx -fdump-tree-optimized" } */
++/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 1 "optimized" } } */
++
++int
++PopCount (long b)
++{
++  int c = 0;
++
++  while (b)
++    {
++      b &= b - 1;
++      c++;
++    }
++
++  return c;
++}
+-- 
+2.43.0
+

_service:tar_scm:0054-LoongArch-Optimize-vector-constant-extract-even-odd-.patch Added

@@ -0,0 +1,163 @@
+From 19282fbb0dab42c3553326a1ed01ad9a599622dd Mon Sep 17 00:00:00 2001
+From: Li Wei <liwei@loongson.cn>
+Date: Tue, 28 Nov 2023 15:39:00 +0800
+Subject: PATCH 054/188 LoongArch: Optimize vector constant
+ extract-{even/odd} permutation.
+
+For vector constant extract-{even/odd} permutation replace the default
+xvshuf instruction combination with xvilv{l/h} instruction, which
+can reduce instructions and improves performance.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_is_odd_extraction):
+	Supplementary function prototype.
+	(loongarch_is_even_extraction): Adjust.
+	(loongarch_try_expand_lsx_vshuf_const): Adjust.
+	(loongarch_is_extraction_permutation): Adjust.
+	(loongarch_expand_vec_perm_const_2): Adjust.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/lasx-extract-even_odd-opt.c: New test.
+---
+ gcc/config/loongarch/loongarch.cc             | 33 +++++++++++-
+ .../loongarch/lasx-extract-even_odd-opt.c     | 54 +++++++++++++++++++
+ 2 files changed, 85 insertions(+), 2 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index ecceca22d..3ef7e3605 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -8668,6 +8668,12 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
+     }
+ }
+ 
++static bool
++loongarch_is_odd_extraction (struct expand_vec_perm_d *);
++
++static bool
++loongarch_is_even_extraction (struct expand_vec_perm_d *);
++
+ static bool
+ loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
+ {
+@@ -8690,6 +8696,24 @@ loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
+       if (d->testing_p)
+ 	return true;
+ 
++      /* If match extract-even and extract-odd permutations pattern, use
++       * vselect much better than vshuf.  */
++      if (loongarch_is_odd_extraction (d)
++	  || loongarch_is_even_extraction (d))
++	{
++	  if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1,
++						d->perm, d->nelt))
++	    return true;
++
++	  unsigned char perm2MAX_VECT_LEN;
++	  for (i = 0; i < d->nelt; ++i)
++	    perm2i = (d->permi + d->nelt) & (2 * d->nelt - 1);
++
++	  if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0,
++						perm2, d->nelt))
++	    return true;
++	}
++
+       for (i = 0; i < d->nelt; i += 1)
+ 	{
+ 	  rpermi = GEN_INT (d->permi);
+@@ -8874,7 +8898,7 @@ loongarch_is_even_extraction (struct expand_vec_perm_d *d)
+ 	  result = false;
+ 	  break;
+ 	}
+-      buf += 1;
++      buf += 2;
+     }
+ 
+   return result;
+@@ -8896,7 +8920,7 @@ loongarch_is_extraction_permutation (struct expand_vec_perm_d *d)
+ 	  result = false;
+ 	  break;
+ 	}
+-      buf += 2;
++      buf += 1;
+     }
+ 
+   return result;
+@@ -9373,6 +9397,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
+ 	 Selector after: { 1, 3, 1, 3 }.
+ 	 Even extraction selector sample: E_V4DImode, { 0, 2, 4, 6 }
+ 	 Selector after: { 0, 2, 0, 2 }.  */
++
++      /* Better implement of extract-even and extract-odd permutations.  */
++      if (loongarch_expand_vec_perm_even_odd (d))
++	return true;
++
+       for (i = 0; i < d->nelt / 2; i += 1)
+ 	{
+ 	  idx = d->permi;
+diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c b/gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c
+new file mode 100644
+index 000000000..515f0c862
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c
+@@ -0,0 +1,54 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -mlasx" } */
++/* { dg-final { scan-assembler "xvilvl.d" } } */
++/* { dg-final { scan-assembler "xvilvh.d" } } */
++
++#define CMUL(a, b, c)                                                         \
++  {                                                                           \
++    (c).ai = (a).ai * (b).ai - (a).bi * (b).bi;                               \
++    (c).bi = (a).ai * (b).bi + (a).bi * (b).ai;                               \
++    (c).ci = (a).ci * (b).ci - (a).di * (b).di;                               \
++    (c).di = (a).ci * (b).di + (a).di * (b).ci;                               \
++  }
++#define CSUM(a, b)                                                            \
++  {                                                                           \
++    (a).ai += (b).ai;                                                         \
++    (a).bi += (b).bi;                                                         \
++    (a).ci += (b).ci;                                                         \
++    (a).di += (b).di;                                                         \
++  }
++
++typedef struct
++{
++  double ai;
++  double bi;
++  double ci;
++  double di;
++} complex;
++
++typedef struct
++{
++  complex e66;
++} matrix;
++
++typedef struct
++{
++  complex c6;
++} vector;
++
++void
++mult_adj_mat_vec (matrix *a, vector *b, vector *c)
++{
++  register int i, j;
++  register complex x, y;
++  for (i = 0; i < 6; i++)
++    {
++      x.ai = x.bi = x.ci = x.di = 0.0;
++      for (j = 0; j < 6; j++)
++        {
++          CMUL (a->eji, b->cj, y);
++          CSUM (x, y);
++        }
++      c->ci = x;
++    }
++}
+-- 
+2.43.0
+

_service:tar_scm:0055-LoongArch-Add-intrinsic-function-descriptions-for-LS.patch Added

@@ -0,0 +1,1697 @@
+From 548322a75cdeb96960fb9d324a2abf8735c4d254 Mon Sep 17 00:00:00 2001
+From: chenxiaolong <chenxiaolong@loongson.cn>
+Date: Tue, 7 Nov 2023 11:53:39 +0800
+Subject: PATCH 055/188 LoongArch: Add intrinsic function descriptions for
+ LSX and LASX instructions to doc.
+
+gcc/ChangeLog:
+
+	* doc/extend.texi: Add information about the intrinsic function of the vector
+	instruction.
+---
+ gcc/doc/extend.texi | 1662 +++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 1662 insertions(+)
+
+diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
+index 497c6de5f..7edd3974d 100644
+--- a/gcc/doc/extend.texi
++++ b/gcc/doc/extend.texi
+@@ -14679,6 +14679,8 @@ instructions, but allow the compiler to schedule those calls.
+ * BPF Built-in Functions::
+ * FR-V Built-in Functions::
+ * LoongArch Base Built-in Functions::
++* LoongArch SX Vector Intrinsics::
++* LoongArch ASX Vector Intrinsics::
+ * MIPS DSP Built-in Functions::
+ * MIPS Paired-Single Support::
+ * MIPS Loongson Built-in Functions::
+@@ -16262,6 +16264,1666 @@ Returns the value that is currently set in the @samp{tp} register.
+     void * __builtin_thread_pointer (void)
+ @end smallexample
+ 
++@node LoongArch SX Vector Intrinsics
++@subsection LoongArch SX Vector Intrinsics
++
++GCC provides intrinsics to access the LSX (Loongson SIMD Extension) instructions.
++The interface is made available by including @code{<lsxintrin.h>} and using
++@option{-mlsx}.
++
++The following vectors typedefs are included in @code{lsxintrin.h}:
++
++@itemize
++@item @code{__m128i}, a 128-bit vector of fixed point;
++@item @code{__m128}, a 128-bit vector of single precision floating point;
++@item @code{__m128d}, a 128-bit vector of double precision floating point.
++@end itemize
++
++Instructions and corresponding built-ins may have additional restrictions and/or
++input/output values manipulated:
++@itemize
++@item @code{imm0_1}, an integer literal in range 0 to 1;
++@item @code{imm0_3}, an integer literal in range 0 to 3;
++@item @code{imm0_7}, an integer literal in range 0 to 7;
++@item @code{imm0_15}, an integer literal in range 0 to 15;
++@item @code{imm0_31}, an integer literal in range 0 to 31;
++@item @code{imm0_63}, an integer literal in range 0 to 63;
++@item @code{imm0_127}, an integer literal in range 0 to 127;
++@item @code{imm0_255}, an integer literal in range 0 to 255;
++@item @code{imm_n16_15}, an integer literal in range -16 to 15;
++@item @code{imm_n128_127}, an integer literal in range -128 to 127;
++@item @code{imm_n256_255}, an integer literal in range -256 to 255;
++@item @code{imm_n512_511}, an integer literal in range -512 to 511;
++@item @code{imm_n1024_1023}, an integer literal in range -1024 to 1023;
++@item @code{imm_n2048_2047}, an integer literal in range -2048 to 2047.
++@end itemize
++
++For convenience, GCC defines functions @code{__lsx_vrepli_@{b/h/w/d@}} and
++@code{__lsx_bnz_@{v/b/h/w/d@}}, which are implemented as follows:
++
++@smallexample
++a. @code{__lsx_vrepli_@{b/h/w/d@}}: Implemented the case where the highest
++   bit of @code{vldi} instruction @code{i13} is 1.
++
++   i1312 == 1'b0
++   case i1311:10 of :
++     2'b00: __lsx_vrepli_b (imm_n512_511)
++     2'b01: __lsx_vrepli_h (imm_n512_511)
++     2'b10: __lsx_vrepli_w (imm_n512_511)
++     2'b11: __lsx_vrepli_d (imm_n512_511)
++
++b. @code{__lsx_bnz_@{v/b/h/w/d@}}: Since the @code{vseteqz} class directive
++   cannot be used on its own, this function is defined.
++
++   _lsx_bz_v  => vseteqz.v + bcnez
++   _lsx_bnz_v => vsetnez.v + bcnez
++   _lsx_bz_b  => vsetanyeqz.b + bcnez
++   _lsx_bz_h  => vsetanyeqz.h + bcnez
++   _lsx_bz_w  => vsetanyeqz.w + bcnez
++   _lsx_bz_d  => vsetanyeqz.d + bcnez
++   _lsx_bnz_b => vsetallnez.b + bcnez
++   _lsx_bnz_h => vsetallnez.h + bcnez
++   _lsx_bnz_w => vsetallnez.w + bcnez
++   _lsx_bnz_d => vsetallnez.d + bcnez
++@end smallexample
++
++@smallexample
++eg:
++  #include <lsxintrin.h>
++
++  extern __m128i @var{a};
++
++  void
++  test (void)
++  @{
++    if (__lsx_bz_v (@var{a}))
++      printf ("1\n");
++    else
++      printf ("2\n");
++  @}
++@end smallexample
++
++@emph{Note:} For directives where the intent operand is also the source operand
++(modifying only part of the bitfield of the intent register), the first parameter
++in the builtin call function is used as the intent operand.
++
++@smallexample
++eg:
++  #include <lsxintrin.h>
++
++  extern __m128i @var{dst};
++  extern int @var{src};
++
++  void
++  test (void)
++  @{
++    @var{dst} = __lsx_vinsgr2vr_b (@var{dst}, @var{src}, 3);
++  @}
++@end smallexample
++
++The intrinsics provided are listed below:
++@smallexample
++int __lsx_bnz_b (__m128i);
++int __lsx_bnz_d (__m128i);
++int __lsx_bnz_h (__m128i);
++int __lsx_bnz_v (__m128i);
++int __lsx_bnz_w (__m128i);
++int __lsx_bz_b (__m128i);
++int __lsx_bz_d (__m128i);
++int __lsx_bz_h (__m128i);
++int __lsx_bz_v (__m128i);
++int __lsx_bz_w (__m128i);
++__m128i __lsx_vabsd_b (__m128i, __m128i);
++__m128i __lsx_vabsd_bu (__m128i, __m128i);
++__m128i __lsx_vabsd_di (__m128i, __m128i);
++__m128i __lsx_vabsd_du (__m128i, __m128i);
++__m128i __lsx_vabsd_h (__m128i, __m128i);
++__m128i __lsx_vabsd_hu (__m128i, __m128i);
++__m128i __lsx_vabsd_w (__m128i, __m128i);
++__m128i __lsx_vabsd_wu (__m128i, __m128i);
++__m128i __lsx_vadda_b (__m128i, __m128i);
++__m128i __lsx_vadda_d (__m128i, __m128i);
++__m128i __lsx_vadda_h (__m128i, __m128i);
++__m128i __lsx_vadda_w (__m128i, __m128i);
++__m128i __lsx_vadd_b (__m128i, __m128i);
++__m128i __lsx_vadd_d (__m128i, __m128i);
++__m128i __lsx_vadd_h (__m128i, __m128i);
++__m128i __lsx_vaddi_bu (__m128i, imm0_31);
++__m128i __lsx_vaddi_du (__m128i, imm0_31);
++__m128i __lsx_vaddi_hu (__m128i, imm0_31);
++__m128i __lsx_vaddi_wu (__m128i, imm0_31);
++__m128i __lsx_vadd_q (__m128i, __m128i);
++__m128i __lsx_vadd_w (__m128i, __m128i);
++__m128i __lsx_vaddwev_d_w (__m128i, __m128i);
++__m128i __lsx_vaddwev_d_wu (__m128i, __m128i);
++__m128i __lsx_vaddwev_d_wu_w (__m128i, __m128i);
++__m128i __lsx_vaddwev_h_b (__m128i, __m128i);
++__m128i __lsx_vaddwev_h_bu (__m128i, __m128i);
++__m128i __lsx_vaddwev_h_bu_b (__m128i, __m128i);
++__m128i __lsx_vaddwev_q_d (__m128i, __m128i);
++__m128i __lsx_vaddwev_q_du (__m128i, __m128i);
++__m128i __lsx_vaddwev_q_du_d (__m128i, __m128i);
++__m128i __lsx_vaddwev_w_h (__m128i, __m128i);
++__m128i __lsx_vaddwev_w_hu (__m128i, __m128i);
++__m128i __lsx_vaddwev_w_hu_h (__m128i, __m128i);
++__m128i __lsx_vaddwod_d_w (__m128i, __m128i);
++__m128i __lsx_vaddwod_d_wu (__m128i, __m128i);
++__m128i __lsx_vaddwod_d_wu_w (__m128i, __m128i);
++__m128i __lsx_vaddwod_h_b (__m128i, __m128i);
++__m128i __lsx_vaddwod_h_bu (__m128i, __m128i);
++__m128i __lsx_vaddwod_h_bu_b (__m128i, __m128i);
++__m128i __lsx_vaddwod_q_d (__m128i, __m128i);
++__m128i __lsx_vaddwod_q_du (__m128i, __m128i);
++__m128i __lsx_vaddwod_q_du_d (__m128i, __m128i);
++__m128i __lsx_vaddwod_w_h (__m128i, __m128i);
++__m128i __lsx_vaddwod_w_hu (__m128i, __m128i);
++__m128i __lsx_vaddwod_w_hu_h (__m128i, __m128i);
++__m128i __lsx_vandi_b (__m128i, imm0_255);
++__m128i __lsx_vandn_v (__m128i, __m128i);
++__m128i __lsx_vand_v (__m128i, __m128i);
++__m128i __lsx_vavg_b (__m128i, __m128i);
++__m128i __lsx_vavg_bu (__m128i, __m128i);
++__m128i __lsx_vavg_d (__m128i, __m128i);
++__m128i __lsx_vavg_du (__m128i, __m128i);
++__m128i __lsx_vavg_h (__m128i, __m128i);
++__m128i __lsx_vavg_hu (__m128i, __m128i);
++__m128i __lsx_vavgr_b (__m128i, __m128i);
++__m128i __lsx_vavgr_bu (__m128i, __m128i);
++__m128i __lsx_vavgr_d (__m128i, __m128i);
++__m128i __lsx_vavgr_du (__m128i, __m128i);
++__m128i __lsx_vavgr_h (__m128i, __m128i);

_service:tar_scm:0056-LoongArch-Switch-loongarch-def-from-C-to-C-to-make-i.patch Added

@@ -0,0 +1,925 @@
+From 6c85d03940f87770a7e8b7195ffe45f99afef411 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Fri, 1 Dec 2023 10:09:33 +0800
+Subject: PATCH 056/188 LoongArch: Switch loongarch-def from C to C++ to make
+ it possible.
+
+We'll use HOST_WIDE_INT in LoongArch static properties in following patches.
+
+To keep the same readability as C99 designated initializers, create a
+std::array like data structure with position setter function, and add
+field setter functions for structs used in loongarch-def.cc.
+
+Remove unneeded guards #if
+!defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
+in loongarch-def.h and loongarch-opts.h.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-def.h: Remove extern "C".
+	(loongarch_isa_base_strings): Declare as loongarch_def_array
+	instead of plain array.
+	(loongarch_isa_ext_strings): Likewise.
+	(loongarch_abi_base_strings): Likewise.
+	(loongarch_abi_ext_strings): Likewise.
+	(loongarch_cmodel_strings): Likewise.
+	(loongarch_cpu_strings): Likewise.
+	(loongarch_cpu_default_isa): Likewise.
+	(loongarch_cpu_issue_rate): Likewise.
+	(loongarch_cpu_multipass_dfa_lookahead): Likewise.
+	(loongarch_cpu_cache): Likewise.
+	(loongarch_cpu_align): Likewise.
+	(loongarch_cpu_rtx_cost_data): Likewise.
+	(loongarch_isa): Add a constructor and field setter functions.
+	* config/loongarch/loongarch-opts.h (loongarch-defs.h): Do not
+	include for target libraries.
+	* config/loongarch/loongarch-opts.cc: Comment code that doesn't
+	run and causes compilation errors.
+	* config/loongarch/loongarch-tune.h (LOONGARCH_TUNE_H): Likewise.
+	(struct loongarch_rtx_cost_data): Likewise.
+	(struct loongarch_cache): Likewise.
+	(struct loongarch_align): Likewise.
+	* config/loongarch/t-loongarch: Compile loongarch-def.cc with the
+	C++ compiler.
+	* config/loongarch/loongarch-def-array.h: New file for a
+	std:array like data structure with position setter function.
+	* config/loongarch/loongarch-def.c: Rename to ...
+	* config/loongarch/loongarch-def.cc: ... here.
+	(loongarch_cpu_strings): Define as loongarch_def_array instead
+	of plain array.
+	(loongarch_cpu_default_isa): Likewise.
+	(loongarch_cpu_cache): Likewise.
+	(loongarch_cpu_align): Likewise.
+	(loongarch_cpu_rtx_cost_data): Likewise.
+	(loongarch_cpu_issue_rate): Likewise.
+	(loongarch_cpu_multipass_dfa_lookahead): Likewise.
+	(loongarch_isa_base_strings): Likewise.
+	(loongarch_isa_ext_strings): Likewise.
+	(loongarch_abi_base_strings): Likewise.
+	(loongarch_abi_ext_strings): Likewise.
+	(loongarch_cmodel_strings): Likewise.
+	(abi_minimal_isa): Likewise.
+	(loongarch_rtx_cost_optimize_size): Use field setter functions
+	instead of designated initializers.
+	(loongarch_rtx_cost_data): Implement default constructor.
+---
+ gcc/config/loongarch/loongarch-def-array.h |  40 ++++
+ gcc/config/loongarch/loongarch-def.c       | 227 ---------------------
+ gcc/config/loongarch/loongarch-def.cc      | 187 +++++++++++++++++
+ gcc/config/loongarch/loongarch-def.h       |  55 ++---
+ gcc/config/loongarch/loongarch-opts.cc     |   7 +
+ gcc/config/loongarch/loongarch-opts.h      |   5 +-
+ gcc/config/loongarch/loongarch-tune.h      | 123 ++++++++++-
+ gcc/config/loongarch/t-loongarch           |   4 +-
+ 8 files changed, 390 insertions(+), 258 deletions(-)
+ create mode 100644 gcc/config/loongarch/loongarch-def-array.h
+ delete mode 100644 gcc/config/loongarch/loongarch-def.c
+ create mode 100644 gcc/config/loongarch/loongarch-def.cc
+
+diff --git a/gcc/config/loongarch/loongarch-def-array.h b/gcc/config/loongarch/loongarch-def-array.h
+new file mode 100644
+index 000000000..bdb3e9c6a
+--- /dev/null
++++ b/gcc/config/loongarch/loongarch-def-array.h
+@@ -0,0 +1,40 @@
++/* A std::array like data structure for LoongArch static properties.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#ifndef _LOONGARCH_DEF_ARRAY_H
++#define _LOONGARCH_DEF_ARRAY_H 1
++
++template <class T, int N>
++class loongarch_def_array {
++private:
++  T arrN;
++public:
++  loongarch_def_array () : arr{} {}
++
++  T &operator (int n) { return arrn; }
++  const T &operator (int n) const { return arrn; }
++
++  loongarch_def_array set (int idx, T &&value)
++  {
++    (*this)idx = value;
++    return *this;
++  }
++};
++
++#endif
+diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
+deleted file mode 100644
+index fe4474e77..000000000
+--- a/gcc/config/loongarch/loongarch-def.c
++++ /dev/null
+@@ -1,227 +0,0 @@
+-/* LoongArch static properties.
+-   Copyright (C) 2021-2022 Free Software Foundation, Inc.
+-   Contributed by Loongson Ltd.
+-
+-This file is part of GCC.
+-
+-GCC is free software; you can redistribute it and/or modify
+-it under the terms of the GNU General Public License as published by
+-the Free Software Foundation; either version 3, or (at your option)
+-any later version.
+-
+-GCC is distributed in the hope that it will be useful,
+-but WITHOUT ANY WARRANTY; without even the implied warranty of
+-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+-GNU General Public License for more details.
+-
+-You should have received a copy of the GNU General Public License
+-along with GCC; see the file COPYING3.  If not see
+-<http://www.gnu.org/licenses/>.  */
+-
+-#include "loongarch-def.h"
+-#include "loongarch-str.h"
+-
+-/* CPU property tables.  */
+-const char*
+-loongarch_cpu_stringsN_TUNE_TYPES = {
+-  CPU_NATIVE		  = STR_CPU_NATIVE,
+-  CPU_ABI_DEFAULT	  = STR_CPU_ABI_DEFAULT,
+-  CPU_LOONGARCH64	  = STR_CPU_LOONGARCH64,
+-  CPU_LA464		  = STR_CPU_LA464,
+-  CPU_LA664		  = STR_CPU_LA664,
+-};
+-
+-struct loongarch_isa
+-loongarch_cpu_default_isaN_ARCH_TYPES = {
+-  CPU_LOONGARCH64 = {
+-      .base = ISA_BASE_LA64V100,
+-      .fpu = ISA_EXT_FPU64,
+-      .simd = 0,
+-  },
+-  CPU_LA464 = {
+-      .base = ISA_BASE_LA64V100,
+-      .fpu = ISA_EXT_FPU64,
+-      .simd = ISA_EXT_SIMD_LASX,
+-  },
+-  CPU_LA664 = {
+-      .base = ISA_BASE_LA64V110,
+-      .fpu = ISA_EXT_FPU64,
+-      .simd = ISA_EXT_SIMD_LASX,
+-  },
+-};
+-
+-struct loongarch_cache
+-loongarch_cpu_cacheN_TUNE_TYPES = {
+-  CPU_LOONGARCH64 = {
+-      .l1d_line_size = 64,
+-      .l1d_size = 64,
+-      .l2d_size = 256,
+-      .simultaneous_prefetches = 4,
+-  },
+-  CPU_LA464 = {
+-      .l1d_line_size = 64,
+-      .l1d_size = 64,
+-      .l2d_size = 256,
+-      .simultaneous_prefetches = 4,
+-  },
+-  CPU_LA664 = {
+-      .l1d_line_size = 64,
+-      .l1d_size = 64,

_service:tar_scm:0057-LoongArch-Remove-the-definition-of-ISA_BASE_LA64V110.patch Added

@@ -0,0 +1,261 @@
+From 1ec35f153636077760b65dc3e0385d0a4d383486 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Fri, 1 Dec 2023 11:51:51 +0800
+Subject: PATCH 057/188 LoongArch: Remove the definition of ISA_BASE_LA64V110
+ from the code.
+
+The instructions defined in LoongArch Reference Manual v1.1 are not the instruction
+set v1.1 version. The CPU defined later may only support some instructions in
+LoongArch Reference Manual v1.1. Therefore, the macro ISA_BASE_LA64V110 and
+related definitions are removed here.
+
+gcc/ChangeLog:
+
+	* config/loongarch/genopts/loongarch-strings: Delete STR_ISA_BASE_LA64V110.
+	* config/loongarch/genopts/loongarch.opt.in: Likewise.
+	* config/loongarch/loongarch-cpu.cc (ISA_BASE_LA64V110_FEATURES): Delete macro.
+	(fill_native_cpu_config): Define a new variable hw_isa_evolution record the
+	extended instruction set support read from cpucfg.
+	* config/loongarch/loongarch-def.cc: Set evolution at initialization.
+	* config/loongarch/loongarch-def.h (ISA_BASE_LA64V100): Delete.
+	(ISA_BASE_LA64V110): Likewise.
+	(N_ISA_BASE_TYPES): Likewise.
+	(defined): Likewise.
+	* config/loongarch/loongarch-opts.cc: Likewise.
+	* config/loongarch/loongarch-opts.h (TARGET_64BIT): Likewise.
+	(ISA_BASE_IS_LA64V110): Likewise.
+	* config/loongarch/loongarch-str.h (STR_ISA_BASE_LA64V110): Likewise.
+	* config/loongarch/loongarch.opt: Regenerate.
+---
+ .../loongarch/genopts/loongarch-strings       |  1 -
+ gcc/config/loongarch/genopts/loongarch.opt.in |  3 ---
+ gcc/config/loongarch/loongarch-cpu.cc         | 23 +++++--------------
+ gcc/config/loongarch/loongarch-def.cc         | 14 +++++++----
+ gcc/config/loongarch/loongarch-def.h          | 12 ++--------
+ gcc/config/loongarch/loongarch-opts.cc        |  3 ---
+ gcc/config/loongarch/loongarch-opts.h         |  4 +---
+ gcc/config/loongarch/loongarch-str.h          |  1 -
+ gcc/config/loongarch/loongarch.opt            |  3 ---
+ 9 files changed, 19 insertions(+), 45 deletions(-)
+
+diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings
+index 6c8a42af2..411ad5696 100644
+--- a/gcc/config/loongarch/genopts/loongarch-strings
++++ b/gcc/config/loongarch/genopts/loongarch-strings
+@@ -30,7 +30,6 @@ STR_CPU_LA664	      la664
+ 
+ # Base architecture
+ STR_ISA_BASE_LA64V100 la64
+-STR_ISA_BASE_LA64V110 la64v1.1
+ 
+ # -mfpu
+ OPTSTR_ISA_EXT_FPU    fpu
+diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
+index a49de07c9..cd5e75e4f 100644
+--- a/gcc/config/loongarch/genopts/loongarch.opt.in
++++ b/gcc/config/loongarch/genopts/loongarch.opt.in
+@@ -32,9 +32,6 @@ Basic ISAs of LoongArch:
+ EnumValue
+ Enum(isa_base) String(@@STR_ISA_BASE_LA64V100@@) Value(ISA_BASE_LA64V100)
+ 
+-EnumValue
+-Enum(isa_base) String(@@STR_ISA_BASE_LA64V110@@) Value(ISA_BASE_LA64V110)
+-
+ ;; ISA extensions / adjustments
+ Enum
+ Name(isa_ext_fpu) Type(int)
+diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc
+index bbce82c9c..7e0625835 100644
+--- a/gcc/config/loongarch/loongarch-cpu.cc
++++ b/gcc/config/loongarch/loongarch-cpu.cc
+@@ -23,7 +23,6 @@ along with GCC; see the file COPYING3.  If not see
+ #include "config.h"
+ #include "system.h"
+ #include "coretypes.h"
+-#include "tm.h"
+ #include "diagnostic-core.h"
+ 
+ #include "loongarch-def.h"
+@@ -32,19 +31,6 @@ along with GCC; see the file COPYING3.  If not see
+ #include "loongarch-cpucfg-map.h"
+ #include "loongarch-str.h"
+ 
+-/* loongarch_isa_base_features defined here instead of loongarch-def.c
+-   because we need to use options.h.  Pay attention on the order of elements
+-   in the initializer becaue ISO C++ does not allow C99 designated
+-   initializers!  */
+-
+-#define ISA_BASE_LA64V110_FEATURES \
+-  (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA \
+-   | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS)
+-
+-int64_t loongarch_isa_base_featuresN_ISA_BASE_TYPES = {
+-  /* ISA_BASE_LA64V100 = */ 0,
+-  /* ISA_BASE_LA64V110 = */ ISA_BASE_LA64V110_FEATURES,
+-};
+ 
+ /* Native CPU detection with "cpucfg" */
+ static uint32_t cpucfg_cacheN_CPUCFG_WORDS = { 0 };
+@@ -235,18 +221,20 @@ fill_native_cpu_config (struct loongarch_target *tgt)
+       /* Use the native value anyways.  */
+       preset.simd = tmp;
+ 
++
++      int64_t hw_isa_evolution = 0;
++
+       /* Features added during ISA evolution.  */
+       for (const auto &entry: cpucfg_map)
+ 	if (cpucfg_cacheentry.cpucfg_word & entry.cpucfg_bit)
+-	  preset.evolution |= entry.isa_evolution_bit;
++	  hw_isa_evolution |= entry.isa_evolution_bit;
+ 
+       if (native_cpu_type != CPU_NATIVE)
+ 	{
+ 	  /* Check if the local CPU really supports the features of the base
+ 	     ISA of probed native_cpu_type.  If any feature is not detected,
+ 	     either GCC or the hardware is buggy.  */
+-	  auto base_isa_feature = loongarch_isa_base_featurespreset.base;
+-	  if ((preset.evolution & base_isa_feature) != base_isa_feature)
++	  if ((preset.evolution & hw_isa_evolution) != hw_isa_evolution)
+ 	    warning (0,
+ 		     "detected base architecture %qs, but some of its "
+ 		     "features are not detected; the detected base "
+@@ -254,6 +242,7 @@ fill_native_cpu_config (struct loongarch_target *tgt)
+ 		     "features will be enabled",
+ 		     loongarch_isa_base_stringspreset.base);
+ 	}
++      preset.evolution = hw_isa_evolution;
+     }
+ 
+   if (tune_native_p)
+diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
+index 6990c86c2..bc6997e45 100644
+--- a/gcc/config/loongarch/loongarch-def.cc
++++ b/gcc/config/loongarch/loongarch-def.cc
+@@ -18,6 +18,11 @@ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3.  If not see
+ <http://www.gnu.org/licenses/>.  */
+ 
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "tm.h"
++
+ #include "loongarch-def.h"
+ #include "loongarch-str.h"
+ 
+@@ -51,9 +56,11 @@ array_arch<loongarch_isa> loongarch_cpu_default_isa =
+ 	    .simd_ (ISA_EXT_SIMD_LASX))
+     .set (CPU_LA664,
+ 	  loongarch_isa ()
+-	    .base_ (ISA_BASE_LA64V110)
++	    .base_ (ISA_BASE_LA64V100)
+ 	    .fpu_ (ISA_EXT_FPU64)
+-	    .simd_ (ISA_EXT_SIMD_LASX));
++	    .simd_ (ISA_EXT_SIMD_LASX)
++	    .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA
++		    | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS));
+ 
+ static inline loongarch_cache la464_cache ()
+ {
+@@ -136,8 +143,7 @@ array_tune<int> loongarch_cpu_multipass_dfa_lookahead = array_tune<int> ()
+ 
+ array<const char *, N_ISA_BASE_TYPES> loongarch_isa_base_strings =
+   array<const char *, N_ISA_BASE_TYPES> ()
+-    .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100)
+-    .set (ISA_BASE_LA64V110, STR_ISA_BASE_LA64V110);
++    .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100);
+ 
+ array<const char *, N_ISA_EXT_TYPES> loongarch_isa_ext_strings =
+   array<const char *, N_ISA_EXT_TYPES> ()
+diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
+index 5ac70dfdd..f8f36f0e2 100644
+--- a/gcc/config/loongarch/loongarch-def.h
++++ b/gcc/config/loongarch/loongarch-def.h
+@@ -56,19 +56,11 @@ along with GCC; see the file COPYING3.  If not see
+ /* enum isa_base */
+ 
+ /* LoongArch V1.00.  */
+-#define ISA_BASE_LA64V100     0
+-/* LoongArch V1.10.  */
+-#define ISA_BASE_LA64V110     1
+-#define N_ISA_BASE_TYPES      2
++#define ISA_BASE_LA64V100	0
++#define N_ISA_BASE_TYPES	1
+ extern loongarch_def_array<const char *, N_ISA_BASE_TYPES>
+   loongarch_isa_base_strings;
+ 
+-#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
+-/* Unlike other arrays, this is defined in loongarch-cpu.cc.  The problem is
+-   we cannot use the C++ header options.h in loongarch-def.c.  */
+-extern int64_t loongarch_isa_base_features;
+-#endif
+-
+ /* enum isa_ext_* */
+ #define ISA_EXT_NONE	      0
+ #define ISA_EXT_FPU32	      1
+diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
+index 45fc521e4..d31becc67 100644
+--- a/gcc/config/loongarch/loongarch-opts.cc

_service:tar_scm:0058-LoongArch-Add-support-for-xorsign.patch Added

@@ -0,0 +1,412 @@
+From dac02bbb72cae374ddc905fffcc6c94c901f9b26 Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Fri, 17 Nov 2023 17:00:21 +0800
+Subject: PATCH 058/188 LoongArch: Add support for xorsign.
+
+This patch adds support for xorsign pattern to scalar fp and vector. With the
+new expands, uniformly using vector bitwise logical operations to handle xorsign.
+
+On LoongArch64, floating-point registers and vector registers share the same register,
+so this patch also allows conversion between LSX vector mode and scalar fp mode to
+avoid unnecessary instruction generation.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasx.md (xorsign<mode>3): New expander.
+	* config/loongarch/loongarch.cc (loongarch_can_change_mode_class): Allow
+	conversion between LSX vector mode and scalar fp mode.
+	* config/loongarch/loongarch.md (@xorsign<mode>3): New expander.
+	* config/loongarch/lsx.md (@xorsign<mode>3): Ditto.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c: New test.
+	* gcc.target/loongarch/vector/lasx/lasx-xorsign.c: New test.
+	* gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c: New test.
+	* gcc.target/loongarch/vector/lsx/lsx-xorsign.c: New test.
+	* gcc.target/loongarch/xorsign-run.c: New test.
+	* gcc.target/loongarch/xorsign.c: New test.
+---
+ gcc/config/loongarch/lasx.md                  | 22 +++++--
+ gcc/config/loongarch/loongarch.cc             |  5 ++
+ gcc/config/loongarch/loongarch.md             | 17 ++++++
+ gcc/config/loongarch/lsx.md                   | 23 +++++--
+ .../loongarch/vector/lasx/lasx-xorsign-run.c  | 60 +++++++++++++++++++
+ .../loongarch/vector/lasx/lasx-xorsign.c      | 19 ++++++
+ .../loongarch/vector/lsx/lsx-xorsign-run.c    | 60 +++++++++++++++++++
+ .../loongarch/vector/lsx/lsx-xorsign.c        | 19 ++++++
+ .../gcc.target/loongarch/xorsign-run.c        | 25 ++++++++
+ gcc/testsuite/gcc.target/loongarch/xorsign.c  | 18 ++++++
+ 10 files changed, 260 insertions(+), 8 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/xorsign-run.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/xorsign.c
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index 116b30c07..de7c88f14 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -1065,10 +1065,10 @@
+    (set_attr "mode" "<MODE>"))
+ 
+ (define_insn "xor<mode>3"
+-  (set (match_operand:ILASX 0 "register_operand" "=f,f,f")
+-	(xor:ILASX
+-	  (match_operand:ILASX 1 "register_operand" "f,f,f")
+-	  (match_operand:ILASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))
++  (set (match_operand:LASX 0 "register_operand" "=f,f,f")
++	(xor:LASX
++	  (match_operand:LASX 1 "register_operand" "f,f,f")
++	  (match_operand:LASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))
+   "ISA_HAS_LASX"
+   "@
+    xvxor.v\t%u0,%u1,%u2
+@@ -3061,6 +3061,20 @@
+   operands5 = gen_reg_rtx (<MODE>mode);
+ })
+ 
++(define_expand "xorsign<mode>3"
++  (set (match_dup 4)
++    (and:FLASX (match_dup 3)
++        (match_operand:FLASX 2 "register_operand")))
++   (set (match_operand:FLASX 0 "register_operand")
++    (xor:FLASX (match_dup 4)
++         (match_operand:FLASX 1 "register_operand")))
++  "ISA_HAS_LASX"
++{
++  operands3 = loongarch_build_signbit_mask (<MODE>mode, 1, 0);
++
++  operands4 = gen_reg_rtx (<MODE>mode);
++})
++
+ 
+ (define_insn "absv4df2"
+   (set (match_operand:V4DF 0 "register_operand" "=f")
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 3ef7e3605..3c8ae9a42 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -6703,6 +6703,11 @@ loongarch_can_change_mode_class (machine_mode from, machine_mode to,
+   if (LSX_SUPPORTED_MODE_P (from) && LSX_SUPPORTED_MODE_P (to))
+     return true;
+ 
++  /* Allow conversion between LSX vector mode and scalar fp mode. */
++  if ((LSX_SUPPORTED_MODE_P (from) && SCALAR_FLOAT_MODE_P (to))
++      || ((SCALAR_FLOAT_MODE_P (from) && LSX_SUPPORTED_MODE_P (to))))
++    return true;
++
+   return !reg_classes_intersect_p (FP_REGS, rclass);
+ }
+ 
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index cfd7a8ec6..afc3c591f 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -1164,6 +1164,23 @@
+   "fcopysign.<fmt>\t%0,%1,%2"
+   (set_attr "type" "fcopysign")
+    (set_attr "mode" "<UNITMODE>"))
++
++(define_expand "@xorsign<mode>3"
++  (match_operand:ANYF 0 "register_operand")
++   (match_operand:ANYF 1 "register_operand")
++   (match_operand:ANYF 2 "register_operand")
++  "ISA_HAS_LSX"
++{
++  machine_mode lsx_mode
++    = <MODE>mode == SFmode ? V4SFmode : V2DFmode;
++  rtx tmp = gen_reg_rtx (lsx_mode);
++  rtx op1 = lowpart_subreg (lsx_mode, operands1, <MODE>mode);
++  rtx op2 = lowpart_subreg (lsx_mode, operands2, <MODE>mode);
++  emit_insn (gen_xorsign3 (lsx_mode, tmp, op1, op2));
++  emit_move_insn (operands0,
++          lowpart_subreg (<MODE>mode, tmp, lsx_mode));
++  DONE;
++})
+ &#xc;
+ ;;
+ ;;  ....................
+diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
+index 232399934..ce6ec6d69 100644
+--- a/gcc/config/loongarch/lsx.md
++++ b/gcc/config/loongarch/lsx.md
+@@ -957,10 +957,10 @@
+    (set_attr "mode" "<MODE>"))
+ 
+ (define_insn "xor<mode>3"
+-  (set (match_operand:ILSX 0 "register_operand" "=f,f,f")
+-	(xor:ILSX
+-	  (match_operand:ILSX 1 "register_operand" "f,f,f")
+-	  (match_operand:ILSX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))
++  (set (match_operand:LSX 0 "register_operand" "=f,f,f")
++	(xor:LSX
++	  (match_operand:LSX 1 "register_operand" "f,f,f")
++	  (match_operand:LSX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))
+   "ISA_HAS_LSX"
+   "@
+    vxor.v\t%w0,%w1,%w2
+@@ -2786,6 +2786,21 @@
+   operands5 = gen_reg_rtx (<MODE>mode);
+ })
+ 
++(define_expand "@xorsign<mode>3"
++  (set (match_dup 4)
++    (and:FLSX (match_dup 3)
++        (match_operand:FLSX 2 "register_operand")))
++   (set (match_operand:FLSX 0 "register_operand")
++    (xor:FLSX (match_dup 4)
++         (match_operand:FLSX 1 "register_operand")))
++  "ISA_HAS_LSX"
++{
++  operands3 = loongarch_build_signbit_mask (<MODE>mode, 1, 0);
++
++  operands4 = gen_reg_rtx (<MODE>mode);
++})
++
++
+ (define_insn "absv2df2"
+   (set (match_operand:V2DF 0 "register_operand" "=f")
+ 	(abs:V2DF (match_operand:V2DF 1 "register_operand" "f")))
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c
+new file mode 100644
+index 000000000..2295503d4
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c
+@@ -0,0 +1,60 @@
++/* { dg-do run } */
++/* { dg-options "-O2 -ftree-vectorize -mlasx" } */
++/* { dg-require-effective-target loongarch_asx_hw } */
++
++#include "lasx-xorsign.c"
++
++extern void abort ();
++
++#define N 16
++float aN = {-0.1f, -3.2f, -6.3f, -9.4f,
++              -12.5f, -15.6f, -18.7f, -21.8f,
++              24.9f, 27.1f, 30.2f, 33.3f,
++              36.4f, 39.5f, 42.6f, 45.7f};
++float bN = {-1.2f, 3.4f, -5.6f, 7.8f,
++              -9.0f, 1.0f, -2.0f, 3.0f,
++              -4.0f, -5.0f, 6.0f, 7.0f,
++              -8.0f, -9.0f, 10.0f, 11.0f};
++float rN;
++
++double adN = {-0.1d,  -3.2d,  -6.3d,  -9.4d,
++                -12.5d, -15.6d, -18.7d, -21.8d,

_service:tar_scm:0059-LoongArch-Add-support-for-LoongArch-V1.1-approximate.patch Added

@@ -0,0 +1,730 @@
+From 88117f2703d06e44983e54a985ec0ad6f2397a46 Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Wed, 6 Dec 2023 15:04:49 +0800
+Subject: PATCH 059/188 LoongArch: Add support for LoongArch V1.1 approximate
+ instructions.
+
+This patch adds define_insn/builtins/intrinsics for these instructions, and add option
+-mfrecipe to control instruction generation.
+
+gcc/ChangeLog:
+
+	* config/loongarch/genopts/isa-evolution.in (fecipe): Add.
+	* config/loongarch/larchintrin.h (__frecipe_s): New intrinsic.
+	(__frecipe_d): Ditto.
+	(__frsqrte_s): Ditto.
+	(__frsqrte_d): Ditto.
+	* config/loongarch/lasx.md (lasx_xvfrecipe_<flasxfmt>): New insn pattern.
+	(lasx_xvfrsqrte_<flasxfmt>): Ditto.
+	* config/loongarch/lasxintrin.h (__lasx_xvfrecipe_s): New intrinsic.
+	(__lasx_xvfrecipe_d): Ditto.
+	(__lasx_xvfrsqrte_s): Ditto.
+	(__lasx_xvfrsqrte_d): Ditto.
+	* config/loongarch/loongarch-builtins.cc (AVAIL_ALL): Add predicates.
+	(LSX_EXT_BUILTIN): New macro.
+	(LASX_EXT_BUILTIN): Ditto.
+	* config/loongarch/loongarch-cpucfg-map.h: Regenerate.
+	* config/loongarch/loongarch-c.cc: Add builtin macro "__loongarch_frecipe".
+	* config/loongarch/loongarch-def.cc: Regenerate.
+	* config/loongarch/loongarch-str.h (OPTSTR_FRECIPE): Regenerate.
+	* config/loongarch/loongarch.cc (loongarch_asm_code_end): Dump status for TARGET_FRECIPE.
+	* config/loongarch/loongarch.md (loongarch_frecipe_<fmt>): New insn pattern.
+	(loongarch_frsqrte_<fmt>): Ditto.
+	* config/loongarch/loongarch.opt: Regenerate.
+	* config/loongarch/lsx.md (lsx_vfrecipe_<flsxfmt>): New insn pattern.
+	(lsx_vfrsqrte_<flsxfmt>): Ditto.
+	* config/loongarch/lsxintrin.h (__lsx_vfrecipe_s): New intrinsic.
+	(__lsx_vfrecipe_d): Ditto.
+	(__lsx_vfrsqrte_s): Ditto.
+	(__lsx_vfrsqrte_d): Ditto.
+	* doc/extend.texi: Add documentation for LoongArch new builtins and intrinsics.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/larch-frecipe-builtin.c: New test.
+	* gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c: New test.
+	* gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c: New test.
+---
+ gcc/config/loongarch/genopts/isa-evolution.in |  1 +
+ gcc/config/loongarch/larchintrin.h            | 38 +++++++++++++++++
+ gcc/config/loongarch/lasx.md                  | 24 +++++++++++
+ gcc/config/loongarch/lasxintrin.h             | 34 +++++++++++++++
+ gcc/config/loongarch/loongarch-builtins.cc    | 42 +++++++++++++++++++
+ gcc/config/loongarch/loongarch-c.cc           |  3 ++
+ gcc/config/loongarch/loongarch-cpucfg-map.h   |  1 +
+ gcc/config/loongarch/loongarch-def.cc         |  3 +-
+ gcc/config/loongarch/loongarch-str.h          |  1 +
+ gcc/config/loongarch/loongarch.cc             |  1 +
+ gcc/config/loongarch/loongarch.md             | 35 +++++++++++++++-
+ gcc/config/loongarch/loongarch.opt            |  4 ++
+ gcc/config/loongarch/lsx.md                   | 24 +++++++++++
+ gcc/config/loongarch/lsxintrin.h              | 34 +++++++++++++++
+ gcc/doc/extend.texi                           | 35 ++++++++++++++++
+ .../loongarch/larch-frecipe-builtin.c         | 28 +++++++++++++
+ .../vector/lasx/lasx-frecipe-builtin.c        | 30 +++++++++++++
+ .../vector/lsx/lsx-frecipe-builtin.c          | 30 +++++++++++++
+ 18 files changed, 365 insertions(+), 3 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c
+
+diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in
+index a6bc3f87f..11a198b64 100644
+--- a/gcc/config/loongarch/genopts/isa-evolution.in
++++ b/gcc/config/loongarch/genopts/isa-evolution.in
+@@ -1,3 +1,4 @@
++2	25	frecipe		Support frecipe.{s/d} and frsqrte.{s/d} instructions.
+ 2	26	div32		Support div.wu and mod.wu instructions with inputs not sign-extended.
+ 2	27	lam-bh		Support am{swap/add}_db.{b/h} instructions.
+ 2	28	lamcas		Support amcas_db.{b/h/w/d} instructions.
+diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h
+index 2833f1487..22035e767 100644
+--- a/gcc/config/loongarch/larchintrin.h
++++ b/gcc/config/loongarch/larchintrin.h
+@@ -333,6 +333,44 @@ __iocsrwr_d (unsigned long int _1, unsigned int _2)
+ }
+ #endif
+ 
++#ifdef __loongarch_frecipe
++/* Assembly instruction format: fd, fj.  */
++/* Data types in instruction templates:  SF, SF.  */
++extern __inline void
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__frecipe_s (float _1)
++{
++  __builtin_loongarch_frecipe_s ((float) _1);
++}
++
++/* Assembly instruction format: fd, fj.  */
++/* Data types in instruction templates:  DF, DF.  */
++extern __inline void
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__frecipe_d (double _1)
++{
++  __builtin_loongarch_frecipe_d ((double) _1);
++}
++
++/* Assembly instruction format: fd, fj.  */
++/* Data types in instruction templates:  SF, SF.  */
++extern __inline void
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__frsqrte_s (float _1)
++{
++  __builtin_loongarch_frsqrte_s ((float) _1);
++}
++
++/* Assembly instruction format: fd, fj.  */
++/* Data types in instruction templates:  DF, DF.  */
++extern __inline void
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__frsqrte_d (double _1)
++{
++  __builtin_loongarch_frsqrte_d ((double) _1);
++}
++#endif
++
+ /* Assembly instruction format:	ui15.  */
+ /* Data types in instruction templates:  USI.  */
+ #define __dbar(/*ui15*/ _1) __builtin_loongarch_dbar ((_1))
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index de7c88f14..b1416f6c3 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -40,8 +40,10 @@
+   UNSPEC_LASX_XVFCVTL
+   UNSPEC_LASX_XVFLOGB
+   UNSPEC_LASX_XVFRECIP
++  UNSPEC_LASX_XVFRECIPE
+   UNSPEC_LASX_XVFRINT
+   UNSPEC_LASX_XVFRSQRT
++  UNSPEC_LASX_XVFRSQRTE
+   UNSPEC_LASX_XVFCMP_SAF
+   UNSPEC_LASX_XVFCMP_SEQ
+   UNSPEC_LASX_XVFCMP_SLE
+@@ -1633,6 +1635,17 @@
+   (set_attr "type" "simd_fdiv")
+    (set_attr "mode" "<MODE>"))
+ 
++;; Approximate Reciprocal Instructions.
++
++(define_insn "lasx_xvfrecipe_<flasxfmt>"
++  (set (match_operand:FLASX 0 "register_operand" "=f")
++    (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f")
++		  UNSPEC_LASX_XVFRECIPE))
++  "ISA_HAS_LASX && TARGET_FRECIPE"
++  "xvfrecipe.<flasxfmt>\t%u0,%u1"
++  (set_attr "type" "simd_fdiv")
++   (set_attr "mode" "<MODE>"))
++
+ (define_insn "lasx_xvfrsqrt_<flasxfmt>"
+   (set (match_operand:FLASX 0 "register_operand" "=f")
+ 	(unspec:FLASX (match_operand:FLASX 1 "register_operand" "f")
+@@ -1642,6 +1655,17 @@
+   (set_attr "type" "simd_fdiv")
+    (set_attr "mode" "<MODE>"))
+ 
++;; Approximate Reciprocal Square Root Instructions.
++
++(define_insn "lasx_xvfrsqrte_<flasxfmt>"
++  (set (match_operand:FLASX 0 "register_operand" "=f")
++    (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f")
++		  UNSPEC_LASX_XVFRSQRTE))
++  "ISA_HAS_LASX && TARGET_FRECIPE"
++  "xvfrsqrte.<flasxfmt>\t%u0,%u1"
++  (set_attr "type" "simd_fdiv")
++   (set_attr "mode" "<MODE>"))
++
+ (define_insn "lasx_xvftint_u_<ilasxfmt_u>_<flasxfmt>"
+   (set (match_operand:<VIMODE256> 0 "register_operand" "=f")
+ 	(unspec:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f")
+diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h
+index 7bce2c757..5e65e76e7 100644
+--- a/gcc/config/loongarch/lasxintrin.h
++++ b/gcc/config/loongarch/lasxintrin.h
+@@ -2399,6 +2399,40 @@ __m256d __lasx_xvfrecip_d (__m256d _1)
+   return (__m256d)__builtin_lasx_xvfrecip_d ((v4f64)_1);
+ }
+ 
++#if defined(__loongarch_frecipe)
++/* Assembly instruction format: xd, xj.  */
++/* Data types in instruction templates:  V8SF, V8SF.  */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++__m256 __lasx_xvfrecipe_s (__m256 _1)
++{
++  return (__m256)__builtin_lasx_xvfrecipe_s ((v8f32)_1);
++}
++
++/* Assembly instruction format: xd, xj.  */
++/* Data types in instruction templates:  V4DF, V4DF.  */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))

_service:tar_scm:0060-LoongArch-Use-standard-pattern-name-for-xvfrsqrt-vfr.patch Added

@@ -0,0 +1,257 @@
+From e8210e26ac638eb443f8991fee6d412b297cb279 Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Wed, 6 Dec 2023 15:04:50 +0800
+Subject: PATCH 060/188 LoongArch: Use standard pattern name for
+ xvfrsqrt/vfrsqrt instructions.
+
+Rename lasx_xvfrsqrt*/lsx_vfrsqrt* to rsqrt<mode>2 to align with standard
+pattern name. Define function use_rsqrt_p to decide when to use rsqrt optab.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasx.md (lasx_xvfrsqrt_<flasxfmt>): Renamed to ..
+	(rsqrt<mode>2): .. this.
+	* config/loongarch/loongarch-builtins.cc
+	(CODE_FOR_lsx_vfrsqrt_d): Redefine to standard pattern name.
+	(CODE_FOR_lsx_vfrsqrt_s): Ditto.
+	(CODE_FOR_lasx_xvfrsqrt_d): Ditto.
+	(CODE_FOR_lasx_xvfrsqrt_s): Ditto.
+	* config/loongarch/loongarch.cc (use_rsqrt_p): New function.
+	(loongarch_optab_supported_p): Ditto.
+	(TARGET_OPTAB_SUPPORTED_P): New hook.
+	* config/loongarch/loongarch.md (*rsqrt<mode>a): Remove.
+	(*rsqrt<mode>2): New insn pattern.
+	(*rsqrt<mode>b): Remove.
+	* config/loongarch/lsx.md (lsx_vfrsqrt_<flsxfmt>): Renamed to ..
+	(rsqrt<mode>2): .. this.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/lasx/lasx-rsqrt.c: New test.
+	* gcc.target/loongarch/vector/lsx/lsx-rsqrt.c: New test.
+---
+ gcc/config/loongarch/lasx.md                  |  6 ++---
+ gcc/config/loongarch/loongarch-builtins.cc    |  4 +++
+ gcc/config/loongarch/loongarch.cc             | 27 +++++++++++++++++++
+ gcc/config/loongarch/loongarch.md             | 24 +++++------------
+ gcc/config/loongarch/lsx.md                   |  6 ++---
+ .../loongarch/vector/lasx/lasx-rsqrt.c        | 26 ++++++++++++++++++
+ .../loongarch/vector/lsx/lsx-rsqrt.c          | 26 ++++++++++++++++++
+ 7 files changed, 96 insertions(+), 23 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index b1416f6c3..3a4a1fe51 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -1646,10 +1646,10 @@
+   (set_attr "type" "simd_fdiv")
+    (set_attr "mode" "<MODE>"))
+ 
+-(define_insn "lasx_xvfrsqrt_<flasxfmt>"
++(define_insn "rsqrt<mode>2"
+   (set (match_operand:FLASX 0 "register_operand" "=f")
+-	(unspec:FLASX (match_operand:FLASX 1 "register_operand" "f")
+-		      UNSPEC_LASX_XVFRSQRT))
++    (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f")
++		  UNSPEC_LASX_XVFRSQRT))
+   "ISA_HAS_LASX"
+   "xvfrsqrt.<flasxfmt>\t%u0,%u1"
+   (set_attr "type" "simd_fdiv")
+diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
+index bc156bd36..4aae27a5e 100644
+--- a/gcc/config/loongarch/loongarch-builtins.cc
++++ b/gcc/config/loongarch/loongarch-builtins.cc
+@@ -500,6 +500,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE)
+ #define CODE_FOR_lsx_vssrlrn_bu_h CODE_FOR_lsx_vssrlrn_u_bu_h
+ #define CODE_FOR_lsx_vssrlrn_hu_w CODE_FOR_lsx_vssrlrn_u_hu_w
+ #define CODE_FOR_lsx_vssrlrn_wu_d CODE_FOR_lsx_vssrlrn_u_wu_d
++#define CODE_FOR_lsx_vfrsqrt_d CODE_FOR_rsqrtv2df2
++#define CODE_FOR_lsx_vfrsqrt_s CODE_FOR_rsqrtv4sf2
+ 
+ /* LoongArch ASX define CODE_FOR_lasx_mxxx */
+ #define CODE_FOR_lasx_xvsadd_b CODE_FOR_ssaddv32qi3
+@@ -776,6 +778,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE)
+ #define CODE_FOR_lasx_xvsat_hu CODE_FOR_lasx_xvsat_u_hu
+ #define CODE_FOR_lasx_xvsat_wu CODE_FOR_lasx_xvsat_u_wu
+ #define CODE_FOR_lasx_xvsat_du CODE_FOR_lasx_xvsat_u_du
++#define CODE_FOR_lasx_xvfrsqrt_d CODE_FOR_rsqrtv4df2
++#define CODE_FOR_lasx_xvfrsqrt_s CODE_FOR_rsqrtv8sf2
+ 
+ static const struct loongarch_builtin_description loongarch_builtins = {
+ #define LARCH_MOVFCSR2GR 0
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index ce1c0a8bd..95aa9453b 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -11487,6 +11487,30 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode,
+ 						      is_packed);
+ }
+ 
++static bool
++use_rsqrt_p (void)
++{
++  return (flag_finite_math_only
++	  && !flag_trapping_math
++	  && flag_unsafe_math_optimizations);
++}
++
++/* Implement the TARGET_OPTAB_SUPPORTED_P hook.  */
++
++static bool
++loongarch_optab_supported_p (int op, machine_mode, machine_mode,
++			     optimization_type opt_type)
++{
++  switch (op)
++    {
++    case rsqrt_optab:
++      return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
++
++    default:
++      return true;
++    }
++}
++
+ /* If -fverbose-asm, dump some info for debugging.  */
+ static void
+ loongarch_asm_code_end (void)
+@@ -11625,6 +11649,9 @@ loongarch_asm_code_end (void)
+ #undef TARGET_FUNCTION_ARG_BOUNDARY
+ #define TARGET_FUNCTION_ARG_BOUNDARY loongarch_function_arg_boundary
+ 
++#undef TARGET_OPTAB_SUPPORTED_P
++#define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p
++
+ #undef TARGET_VECTOR_MODE_SUPPORTED_P
+ #define TARGET_VECTOR_MODE_SUPPORTED_P loongarch_vector_mode_supported_p
+ 
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 9080cec1c..4dfe583e2 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -60,6 +60,7 @@
+   UNSPEC_TIE
+ 
+   ;; RSQRT
++  UNSPEC_RSQRT
+   UNSPEC_RSQRTE
+ 
+   ;; RECIP
+@@ -1134,25 +1135,14 @@
+    (set_attr "mode" "<UNITMODE>")
+    (set_attr "insn_count" "1"))
+ 
+-(define_insn "*rsqrt<mode>a"
++(define_insn "*rsqrt<mode>2"
+   (set (match_operand:ANYF 0 "register_operand" "=f")
+-	(div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
+-		  (sqrt:ANYF (match_operand:ANYF 2 "register_operand" "f"))))
+-  "flag_unsafe_math_optimizations"
+-  "frsqrt.<fmt>\t%0,%2"
+-  (set_attr "type" "frsqrt")
+-   (set_attr "mode" "<UNITMODE>")
+-   (set_attr "insn_count" "1"))
+-
+-(define_insn "*rsqrt<mode>b"
+-  (set (match_operand:ANYF 0 "register_operand" "=f")
+-	(sqrt:ANYF (div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
+-			     (match_operand:ANYF 2 "register_operand" "f"))))
+-  "flag_unsafe_math_optimizations"
+-  "frsqrt.<fmt>\t%0,%2"
++    (unspec:ANYF (match_operand:ANYF 1 "register_operand" "f")
++	     UNSPEC_RSQRT))
++  "TARGET_HARD_FLOAT"
++  "frsqrt.<fmt>\t%0,%1"
+   (set_attr "type" "frsqrt")
+-   (set_attr "mode" "<UNITMODE>")
+-   (set_attr "insn_count" "1"))
++   (set_attr "mode" "<UNITMODE>"))
+ 
+ ;; Approximate Reciprocal Square Root Instructions.
+ 
+diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
+index 37bdc6910..cb4a448e7 100644
+--- a/gcc/config/loongarch/lsx.md
++++ b/gcc/config/loongarch/lsx.md
+@@ -1559,10 +1559,10 @@
+   (set_attr "type" "simd_fdiv")
+    (set_attr "mode" "<MODE>"))
+ 
+-(define_insn "lsx_vfrsqrt_<flsxfmt>"
++(define_insn "rsqrt<mode>2"
+   (set (match_operand:FLSX 0 "register_operand" "=f")
+-	(unspec:FLSX (match_operand:FLSX 1 "register_operand" "f")
+-		     UNSPEC_LSX_VFRSQRT))
++    (unspec:FLSX (match_operand:FLSX 1 "register_operand" "f")
++		 UNSPEC_LSX_VFRSQRT))
+   "ISA_HAS_LSX"
+   "vfrsqrt.<flsxfmt>\t%w0,%w1"
+   (set_attr "type" "simd_fdiv")
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c
+new file mode 100644
+index 000000000..24316944d
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c
+@@ -0,0 +1,26 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mlasx -ffast-math" } */
++/* { dg-final { scan-assembler "xvfrsqrt.s" } } */

_service:tar_scm:0061-LoongArch-Redefine-pattern-for-xvfrecip-vfrecip-inst.patch Added

@@ -0,0 +1,135 @@
+From 74924710ee8d662d883bf898d69aef1946d91ea5 Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Wed, 6 Dec 2023 15:04:51 +0800
+Subject: PATCH 061/188 LoongArch: Redefine pattern for xvfrecip/vfrecip
+ instructions.
+
+Redefine pattern for xvfrecip instructions use rtx code instead of unspec, and enable
+xvfrecip instructions to be generated during auto-vectorization.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasx.md (lasx_xvfrecip_<flasxfmt>): Renamed to ..
+	(recip<mode>3): .. this.
+	* config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vfrecip_d): Redefine
+	to new pattern name.
+	(CODE_FOR_lsx_vfrecip_s): Ditto.
+	(CODE_FOR_lasx_xvfrecip_d): Ditto.
+	(CODE_FOR_lasx_xvfrecip_s): Ditto.
+	(loongarch_expand_builtin_direct): For the vector recip instructions, construct a
+	temporary parameter const1_vector.
+	* config/loongarch/lsx.md (lsx_vfrecip_<flsxfmt>): Renamed to ..
+	(recip<mode>3): .. this.
+	* config/loongarch/predicates.md (const_vector_1_operand): New predicate.
+---
+ gcc/config/loongarch/lasx.md               |  8 ++++----
+ gcc/config/loongarch/loongarch-builtins.cc | 20 ++++++++++++++++++++
+ gcc/config/loongarch/lsx.md                |  8 ++++----
+ gcc/config/loongarch/predicates.md         |  4 ++++
+ 4 files changed, 32 insertions(+), 8 deletions(-)
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index 3a4a1fe51..ad49a3ffb 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -1626,12 +1626,12 @@
+   (set_attr "type" "simd_fminmax")
+    (set_attr "mode" "<MODE>"))
+ 
+-(define_insn "lasx_xvfrecip_<flasxfmt>"
++(define_insn "recip<mode>3"
+   (set (match_operand:FLASX 0 "register_operand" "=f")
+-	(unspec:FLASX (match_operand:FLASX 1 "register_operand" "f")
+-		      UNSPEC_LASX_XVFRECIP))
++       (div:FLASX (match_operand:FLASX 1 "const_vector_1_operand" "")
++		  (match_operand:FLASX 2 "register_operand" "f")))
+   "ISA_HAS_LASX"
+-  "xvfrecip.<flasxfmt>\t%u0,%u1"
++  "xvfrecip.<flasxfmt>\t%u0,%u2"
+   (set_attr "type" "simd_fdiv")
+    (set_attr "mode" "<MODE>"))
+ 
+diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
+index 4aae27a5e..85849ed29 100644
+--- a/gcc/config/loongarch/loongarch-builtins.cc
++++ b/gcc/config/loongarch/loongarch-builtins.cc
+@@ -502,6 +502,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE)
+ #define CODE_FOR_lsx_vssrlrn_wu_d CODE_FOR_lsx_vssrlrn_u_wu_d
+ #define CODE_FOR_lsx_vfrsqrt_d CODE_FOR_rsqrtv2df2
+ #define CODE_FOR_lsx_vfrsqrt_s CODE_FOR_rsqrtv4sf2
++#define CODE_FOR_lsx_vfrecip_d CODE_FOR_recipv2df3
++#define CODE_FOR_lsx_vfrecip_s CODE_FOR_recipv4sf3
+ 
+ /* LoongArch ASX define CODE_FOR_lasx_mxxx */
+ #define CODE_FOR_lasx_xvsadd_b CODE_FOR_ssaddv32qi3
+@@ -780,6 +782,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE)
+ #define CODE_FOR_lasx_xvsat_du CODE_FOR_lasx_xvsat_u_du
+ #define CODE_FOR_lasx_xvfrsqrt_d CODE_FOR_rsqrtv4df2
+ #define CODE_FOR_lasx_xvfrsqrt_s CODE_FOR_rsqrtv8sf2
++#define CODE_FOR_lasx_xvfrecip_d CODE_FOR_recipv4df3
++#define CODE_FOR_lasx_xvfrecip_s CODE_FOR_recipv8sf3
+ 
+ static const struct loongarch_builtin_description loongarch_builtins = {
+ #define LARCH_MOVFCSR2GR 0
+@@ -3019,6 +3023,22 @@ loongarch_expand_builtin_direct (enum insn_code icode, rtx target, tree exp,
+   if (has_target_p)
+     create_output_operand (&opsopno++, target, TYPE_MODE (TREE_TYPE (exp)));
+ 
++  /* For the vector reciprocal instructions, we need to construct a temporary
++     parameter const1_vector.  */
++  switch (icode)
++    {
++    case CODE_FOR_recipv8sf3:
++    case CODE_FOR_recipv4df3:
++    case CODE_FOR_recipv4sf3:
++    case CODE_FOR_recipv2df3:
++      loongarch_prepare_builtin_arg (&ops2, exp, 0);
++      create_input_operand (&ops1, CONST1_RTX (ops0.mode), ops0.mode);
++      return loongarch_expand_builtin_insn (icode, 3, ops, has_target_p);
++
++    default:
++      break;
++    }
++
+   /* Map the arguments to the other operands.  */
+   gcc_assert (opno + call_expr_nargs (exp)
+ 	      == insn_dataicode.n_generator_args);
+diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
+index cb4a448e7..f2774f021 100644
+--- a/gcc/config/loongarch/lsx.md
++++ b/gcc/config/loongarch/lsx.md
+@@ -1539,12 +1539,12 @@
+   (set_attr "type" "simd_fminmax")
+    (set_attr "mode" "<MODE>"))
+ 
+-(define_insn "lsx_vfrecip_<flsxfmt>"
++(define_insn "recip<mode>3"
+   (set (match_operand:FLSX 0 "register_operand" "=f")
+-	(unspec:FLSX (match_operand:FLSX 1 "register_operand" "f")
+-		     UNSPEC_LSX_VFRECIP))
++       (div:FLSX (match_operand:FLSX 1 "const_vector_1_operand" "")
++		 (match_operand:FLSX 2 "register_operand" "f")))
+   "ISA_HAS_LSX"
+-  "vfrecip.<flsxfmt>\t%w0,%w1"
++  "vfrecip.<flsxfmt>\t%w0,%w2"
+   (set_attr "type" "simd_fdiv")
+    (set_attr "mode" "<MODE>"))
+ 
+diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
+index 30a0dee9f..572550dbc 100644
+--- a/gcc/config/loongarch/predicates.md
++++ b/gcc/config/loongarch/predicates.md
+@@ -227,6 +227,10 @@
+   (and (match_code "const_int,const_wide_int,const_double,const_vector")
+        (match_test "op == CONST1_RTX (GET_MODE (op))")))
+ 
++(define_predicate "const_vector_1_operand"
++  (and (match_code "const_vector")
++       (match_test "op == CONST1_RTX (GET_MODE (op))")))
++
+ (define_predicate "reg_or_1_operand"
+   (ior (match_operand 0 "const_1_operand")
+        (match_operand 0 "register_operand")))
+-- 
+2.43.0
+

_service:tar_scm:0062-LoongArch-New-options-mrecip-and-mrecip-with-ffast-m.patch Added

@@ -0,0 +1,1096 @@
+From faac4efbee23e60691fc086a78284225ecf824a8 Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Wed, 6 Dec 2023 15:04:52 +0800
+Subject: PATCH 062/188 LoongArch: New options -mrecip and -mrecip= with
+ ffast-math.
+
+When both the -mrecip and -mfrecipe options are enabled, use approximate reciprocal
+instructions and approximate reciprocal square root instructions with additional
+Newton-Raphson steps to implement single precision floating-point division, square
+root and reciprocal square root operations, for a better performance.
+
+gcc/ChangeLog:
+
+	* config/loongarch/genopts/loongarch.opt.in (recip_mask): New variable.
+	(-mrecip, -mrecip): New options.
+	* config/loongarch/lasx.md (div<mode>3): New expander.
+	(*div<mode>3): Rename.
+	(sqrt<mode>2): New expander.
+	(*sqrt<mode>2): Rename.
+	(rsqrt<mode>2): New expander.
+	* config/loongarch/loongarch-protos.h (loongarch_emit_swrsqrtsf): New prototype.
+	(loongarch_emit_swdivsf): Ditto.
+	* config/loongarch/loongarch.cc (loongarch_option_override_internal): Set
+	recip_mask for -mrecip and -mrecip= options.
+	(loongarch_emit_swrsqrtsf): New function.
+	(loongarch_emit_swdivsf): Ditto.
+	* config/loongarch/loongarch.h (RECIP_MASK_NONE, RECIP_MASK_DIV, RECIP_MASK_SQRT
+	RECIP_MASK_RSQRT, RECIP_MASK_VEC_DIV, RECIP_MASK_VEC_SQRT, RECIP_MASK_VEC_RSQRT
+	RECIP_MASK_ALL): New bitmasks.
+	(TARGET_RECIP_DIV, TARGET_RECIP_SQRT, TARGET_RECIP_RSQRT, TARGET_RECIP_VEC_DIV
+	TARGET_RECIP_VEC_SQRT, TARGET_RECIP_VEC_RSQRT): New tests.
+	* config/loongarch/loongarch.md (sqrt<mode>2): New expander.
+	(*sqrt<mode>2): Rename.
+	(rsqrt<mode>2): New expander.
+	* config/loongarch/loongarch.opt (recip_mask): New variable.
+	(-mrecip, -mrecip): New options.
+	* config/loongarch/lsx.md (div<mode>3): New expander.
+	(*div<mode>3): Rename.
+	(sqrt<mode>2): New expander.
+	(*sqrt<mode>2): Rename.
+	(rsqrt<mode>2): New expander.
+	* config/loongarch/predicates.md (reg_or_vecotr_1_operand): New predicate.
+	* doc/invoke.texi (LoongArch Options): Document new options.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/divf.c: New test.
+	* gcc.target/loongarch/recip-divf.c: New test.
+	* gcc.target/loongarch/recip-sqrtf.c: New test.
+	* gcc.target/loongarch/sqrtf.c: New test.
+	* gcc.target/loongarch/vector/lasx/lasx-divf.c: New test.
+	* gcc.target/loongarch/vector/lasx/lasx-recip-divf.c: New test.
+	* gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c: New test.
+	* gcc.target/loongarch/vector/lasx/lasx-recip.c: New test.
+	* gcc.target/loongarch/vector/lasx/lasx-sqrtf.c: New test.
+	* gcc.target/loongarch/vector/lsx/lsx-divf.c: New test.
+	* gcc.target/loongarch/vector/lsx/lsx-recip-divf.c: New test.
+	* gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c: New test.
+	* gcc.target/loongarch/vector/lsx/lsx-recip.c: New test.
+	* gcc.target/loongarch/vector/lsx/lsx-sqrtf.c: New test.
+---
+ gcc/config/loongarch/genopts/loongarch.opt.in |  11 +
+ gcc/config/loongarch/lasx.md                  |  53 ++++-
+ gcc/config/loongarch/loongarch-protos.h       |   2 +
+ gcc/config/loongarch/loongarch.cc             | 188 ++++++++++++++++++
+ gcc/config/loongarch/loongarch.h              |  18 ++
+ gcc/config/loongarch/loongarch.md             |  49 ++++-
+ gcc/config/loongarch/loongarch.opt            |  11 +
+ gcc/config/loongarch/lsx.md                   |  53 ++++-
+ gcc/config/loongarch/predicates.md            |   4 +
+ gcc/doc/invoke.texi                           |  55 ++++-
+ gcc/testsuite/gcc.target/loongarch/divf.c     |  10 +
+ .../gcc.target/loongarch/recip-divf.c         |   9 +
+ .../gcc.target/loongarch/recip-sqrtf.c        |  23 +++
+ gcc/testsuite/gcc.target/loongarch/sqrtf.c    |  24 +++
+ .../loongarch/vector/lasx/lasx-divf.c         |  13 ++
+ .../loongarch/vector/lasx/lasx-recip-divf.c   |  12 ++
+ .../loongarch/vector/lasx/lasx-recip-sqrtf.c  |  28 +++
+ .../loongarch/vector/lasx/lasx-recip.c        |  24 +++
+ .../loongarch/vector/lasx/lasx-sqrtf.c        |  29 +++
+ .../loongarch/vector/lsx/lsx-divf.c           |  13 ++
+ .../loongarch/vector/lsx/lsx-recip-divf.c     |  12 ++
+ .../loongarch/vector/lsx/lsx-recip-sqrtf.c    |  28 +++
+ .../loongarch/vector/lsx/lsx-recip.c          |  24 +++
+ .../loongarch/vector/lsx/lsx-sqrtf.c          |  29 +++
+ 24 files changed, 711 insertions(+), 11 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/divf.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/recip-divf.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/recip-sqrtf.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/sqrtf.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-divf.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-divf.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-sqrtf.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-divf.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-divf.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-sqrtf.c
+
+diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
+index cd5e75e4f..102202b03 100644
+--- a/gcc/config/loongarch/genopts/loongarch.opt.in
++++ b/gcc/config/loongarch/genopts/loongarch.opt.in
+@@ -23,6 +23,9 @@ config/loongarch/loongarch-opts.h
+ HeaderInclude
+ config/loongarch/loongarch-str.h
+ 
++TargetVariable
++unsigned int recip_mask = 0
++
+ ; ISA related options
+ ;; Base ISA
+ Enum
+@@ -194,6 +197,14 @@ mexplicit-relocs
+ Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET)
+ Use %reloc() assembly operators (for backward compatibility).
+ 
++mrecip
++Target RejectNegative Var(loongarch_recip)
++Generate approximate reciprocal divide and square root for better throughput.
++
++mrecip=
++Target RejectNegative Joined Var(loongarch_recip_name)
++Control generation of reciprocal estimates.
++
+ ; The code model option names for -mcmodel.
+ Enum
+ Name(cmodel) Type(int)
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index ad49a3ffb..eeac8cd98 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -1194,7 +1194,25 @@
+   (set_attr "type" "simd_fmul")
+    (set_attr "mode" "<MODE>"))
+ 
+-(define_insn "div<mode>3"
++(define_expand "div<mode>3"
++  (set (match_operand:FLASX 0 "register_operand")
++    (div:FLASX (match_operand:FLASX 1 "reg_or_vecotr_1_operand")
++	       (match_operand:FLASX 2 "register_operand")))
++  "ISA_HAS_LASX"
++{
++  if (<MODE>mode == V8SFmode
++    && TARGET_RECIP_VEC_DIV
++    && optimize_insn_for_speed_p ()
++    && flag_finite_math_only && !flag_trapping_math
++    && flag_unsafe_math_optimizations)
++  {
++    loongarch_emit_swdivsf (operands0, operands1,
++	operands2, V8SFmode);
++    DONE;
++  }
++})
++
++(define_insn "*div<mode>3"
+   (set (match_operand:FLASX 0 "register_operand" "=f")
+ 	(div:FLASX (match_operand:FLASX 1 "register_operand" "f")
+ 		   (match_operand:FLASX 2 "register_operand" "f")))
+@@ -1223,7 +1241,23 @@
+   (set_attr "type" "simd_fmadd")
+    (set_attr "mode" "<MODE>"))
+ 
+-(define_insn "sqrt<mode>2"
++(define_expand "sqrt<mode>2"
++  (set (match_operand:FLASX 0 "register_operand")
++    (sqrt:FLASX (match_operand:FLASX 1 "register_operand")))
++  "ISA_HAS_LASX"
++{
++  if (<MODE>mode == V8SFmode
++      && TARGET_RECIP_VEC_SQRT
++      && flag_unsafe_math_optimizations
++      && optimize_insn_for_speed_p ()
++      && flag_finite_math_only && !flag_trapping_math)
++    {
++      loongarch_emit_swrsqrtsf (operands0, operands1, V8SFmode, 0);
++      DONE;
++    }
++})
++
++(define_insn "*sqrt<mode>2"
+   (set (match_operand:FLASX 0 "register_operand" "=f")
+ 	(sqrt:FLASX (match_operand:FLASX 1 "register_operand" "f")))
+   "ISA_HAS_LASX"
+@@ -1646,7 +1680,20 @@
+   (set_attr "type" "simd_fdiv")
+    (set_attr "mode" "<MODE>"))
+ 
+-(define_insn "rsqrt<mode>2"
++(define_expand "rsqrt<mode>2"
++  (set (match_operand:FLASX 0 "register_operand" "=f")
++    (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f")
++	     UNSPEC_LASX_XVFRSQRT))
++  "ISA_HAS_LASX"
++ {
++   if (<MODE>mode == V8SFmode && TARGET_RECIP_VEC_RSQRT)
++     {

_service:tar_scm:0063-LoongArch-Vectorized-loop-unrolling-is-disable-for-d.patch Added

@@ -0,0 +1,83 @@
+From bb211ae35474a9fa1a8189f0a4c525ce3d8c280e Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Wed, 6 Dec 2023 15:04:53 +0800
+Subject: PATCH 063/188 LoongArch: Vectorized loop unrolling is disable for
+ divf/sqrtf/rsqrtf when -mrecip is enabled.
+
+Using -mrecip generates a sequence of instructions to replace divf, sqrtf and rsqrtf. The number
+of generated instructions is close to or exceeds the maximum issue instructions per cycle of the
+LoongArch, so vectorized loop unrolling is not performed on them.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_vector_costs::determine_suggested_unroll_factor):
+	If m_has_recip is true, uf return 1.
+	(loongarch_vector_costs::add_stmt_cost): Detect the use of approximate instruction sequence.
+---
+ gcc/config/loongarch/loongarch.cc | 36 +++++++++++++++++++++++++++++--
+ 1 file changed, 34 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 18326ce47..d64777179 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -3970,7 +3970,9 @@ protected:
+   /* Reduction factor for suggesting unroll factor.  */
+   unsigned m_reduc_factor = 0;
+   /* True if the loop contains an average operation. */
+-  bool m_has_avg =false;
++  bool m_has_avg = false;
++  /* True if the loop uses approximation instruction sequence.  */
++  bool m_has_recip = false;
+ };
+ 
+ /* Implement TARGET_VECTORIZE_CREATE_COSTS.  */
+@@ -4017,7 +4019,7 @@ loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vi
+ {
+   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ 
+-  if (m_has_avg)
++  if (m_has_avg || m_has_recip)
+     return 1;
+ 
+   /* Don't unroll if it's specified explicitly not to be unrolled.  */
+@@ -4077,6 +4079,36 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
+ 	}
+     }
+ 
++  combined_fn cfn;
++  if (kind == vector_stmt
++      && stmt_info
++      && stmt_info->stmt)
++    {
++      /* Detect the use of approximate instruction sequence.  */
++      if ((TARGET_RECIP_VEC_SQRT || TARGET_RECIP_VEC_RSQRT)
++	  && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
++	switch (cfn)
++	  {
++	  case CFN_BUILT_IN_SQRTF:
++	    m_has_recip = true;
++	  default:
++	    break;
++	  }
++      else if (TARGET_RECIP_VEC_DIV
++	       && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
++	{
++	  machine_mode mode = TYPE_MODE (vectype);
++	  switch (gimple_assign_rhs_code (stmt_info->stmt))
++	    {
++	    case RDIV_EXPR:
++	      if (GET_MODE_INNER (mode) == SFmode)
++		m_has_recip = true;
++	    default:
++	      break;
++	    }
++	}
++    }
++
+   return retval;
+ }
+ 
+-- 
+2.43.0
+

_service:tar_scm:0064-LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch Added

@@ -0,0 +1,130 @@
+From 6ca9670e02a7d3f939b1a75f7b5a9094cd1db909 Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Fri, 25 Oct 2024 02:45:35 +0000
+Subject: PATCH 064/188 LoongArch: Fix lsx-vshuf.c and lasx-xvshuf_b.c tests
+ fail on  LA664 PR112611
+
+For xvshuf instructions, if the index value in the selector exceeds 63, it triggers
+undefined behavior on LA464, but not on LA664. To ensure compatibility of these two
+tests on both LA464 and LA664, we have modified both tests to ensure that the index
+value in the selector does not exceed 63.
+
+gcc/testsuite/ChangeLog:
+
+        PR target/112611
+        * gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c: Sure index less than 64.
+        * gcc.target/loongarch/vector/lsx/lsx-vshuf.c: Ditto.
+---
+ .../loongarch/vector/lasx/lasx-xvshuf_b.c          | 14 +++++++-------
+ .../gcc.target/loongarch/vector/lsx/lsx-vshuf.c    | 12 ++++++------
+ 2 files changed, 13 insertions(+), 13 deletions(-)
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c
+index b8ab38711..910d29339 100644
+--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c
++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c
+@@ -99,9 +99,9 @@ main ()
+   *((unsigned long *)&__m256i_op12) = 0x7ff0000000000000;
+   *((unsigned long *)&__m256i_op11) = 0x7ff0000000000000;
+   *((unsigned long *)&__m256i_op10) = 0x7ff0000000000000;
+-  *((unsigned long *)&__m256i_op23) = 0x3ff0010000000000;
++  *((unsigned long *)&__m256i_op23) = 0x3f11010000000000;
+   *((unsigned long *)&__m256i_op22) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_op21) = 0x3ff0010000000000;
++  *((unsigned long *)&__m256i_op21) = 0x3f11010000000000;
+   *((unsigned long *)&__m256i_op20) = 0x0000000000000000;
+   *((unsigned long *)&__m256i_result3) = 0x0000000000000000;
+   *((unsigned long *)&__m256i_result2) = 0x0000000000000000;
+@@ -200,7 +200,7 @@ main ()
+   *((unsigned long *)&__m256i_op20) = 0x0000000000000000;
+   *((unsigned long *)&__m256i_result3) = 0x0000000000000000;
+   *((unsigned long *)&__m256i_result2) = 0x0000000000000000;
+-  *((unsigned long *)&__m256i_result1) = 0x0000000000000000;
++  *((unsigned long *)&__m256i_result1) = 0xffffffff00000000;
+   *((unsigned long *)&__m256i_result0) = 0x0000000000000000;
+   __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
+   ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
+@@ -351,7 +351,7 @@ main ()
+   *((unsigned long *)&__m256i_op21) = 0x0000000000000001;
+   *((unsigned long *)&__m256i_op20) = 0x00000000012e2110;
+   *((unsigned long *)&__m256i_result3) = 0x0000000000000001;
+-  *((unsigned long *)&__m256i_result2) = 0x0000000200000000;
++  *((unsigned long *)&__m256i_result2) = 0x0000000000000000;
+   *((unsigned long *)&__m256i_result1) = 0x00000000012e2110;
+   *((unsigned long *)&__m256i_result0) = 0x0000000000000000;
+   __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2);
+@@ -426,10 +426,10 @@ main ()
+   *((unsigned long *)&__m256i_op22) = 0x8000000080000000;
+   *((unsigned long *)&__m256i_op21) = 0xdfffffffdfffffff;
+   *((unsigned long *)&__m256i_op20) = 0x8000000080000000;
+-  *((unsigned long *)&__m256i_result3) = 0x8000000080000000;
++  *((unsigned long *)&__m256i_result3) = 0xdfffffff80000000;
+   *((unsigned long *)&__m256i_result2) = 0x7fc00000dfffffff;
+-  *((unsigned long *)&__m256i_result1) = 0x8000000080000000;
+-  *((unsigned long *)&__m256i_result0) = 0x8000000080000000;
++  *((unsigned long *)&__m256i_result1) = 0x7fc0000000000000;
++  *((unsigned long *)&__m256i_result0) = 0x8000000000000000;
+   __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2);
+   ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
+ 
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c
+index f3b800f88..93a3078fa 100644
+--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c
++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c
+@@ -33,7 +33,7 @@ main ()
+   *((unsigned long *)&__m128i_op21) = 0x0000000000000000;
+   *((unsigned long *)&__m128i_op20) = 0x3f2f1f0f00000000;
+   *((unsigned long *)&__m128i_result1) = 0x0000000000000000;
+-  *((unsigned long *)&__m128i_result0) = 0x0000000000000000;
++  *((unsigned long *)&__m128i_result0) = 0x00ff00ff00000000;
+   __m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2);
+   ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+ 
+@@ -153,7 +153,7 @@ main ()
+   *((unsigned long *)&__m128i_op10) = 0x000000002bfd9461;
+   *((unsigned long *)&__m128i_op21) = 0x00007fff00007fff;
+   *((unsigned long *)&__m128i_op20) = 0x0000000000000000;
+-  *((unsigned long *)&__m128i_result1) = 0x0000000000000000;
++  *((unsigned long *)&__m128i_result1) = 0x00007fff00000000;
+   *((unsigned long *)&__m128i_result0) = 0x0000000000000000;
+   __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2);
+   ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+@@ -198,7 +198,7 @@ main ()
+   *((unsigned long *)&__m128i_op21) = 0x00000000000000c0;
+   *((unsigned long *)&__m128i_op20) = 0x00000001ffffff29;
+   *((unsigned long *)&__m128i_result1) = 0xffffff29ffffff29;
+-  *((unsigned long *)&__m128i_result0) = 0x0000000100000001;
++  *((unsigned long *)&__m128i_result0) = 0xffffff2900000001;
+   __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
+   ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+ 
+@@ -219,7 +219,7 @@ main ()
+   *((unsigned long *)&__m128i_op10) = 0x0000000000000000;
+   *((unsigned long *)&__m128i_op21) = 0x0000000020000020;
+   *((unsigned long *)&__m128i_op20) = 0x0000000020000020;
+-  *((unsigned long *)&__m128i_result1) = 0x2000002000000000;
++  *((unsigned long *)&__m128i_result1) = 0x0000000000000000;
+   *((unsigned long *)&__m128i_result0) = 0x2000002020000020;
+   __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
+   ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+@@ -241,7 +241,7 @@ main ()
+   *((unsigned long *)&__m128i_op10) = 0x0000001000000010;
+   *((unsigned long *)&__m128i_op21) = 0x8000000100000000;
+   *((unsigned long *)&__m128i_op20) = 0x8000000000000103;
+-  *((unsigned long *)&__m128i_result1) = 0x0000010300000103;
++  *((unsigned long *)&__m128i_result1) = 0x8000000000000103;
+   *((unsigned long *)&__m128i_result0) = 0x0000010380000001;
+   __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
+   ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+@@ -252,7 +252,7 @@ main ()
+   *((unsigned long *)&__m128i_op10) = 0x0000000000000000;
+   *((unsigned long *)&__m128i_op21) = 0xffffffffffffffff;
+   *((unsigned long *)&__m128i_op20) = 0xffffffffffffffff;
+-  *((unsigned long *)&__m128i_result1) = 0x0000000000000000;
++  *((unsigned long *)&__m128i_result1) = 0xffffffff00000000;
+   *((unsigned long *)&__m128i_result0) = 0xffffffffffffffff;
+   __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
+   ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+-- 
+2.43.0
+

_service:tar_scm:0065-LoongArch-Fix-ICE-and-use-simplify_gen_subreg-instea.patch Added

@@ -0,0 +1,318 @@
+From 87396b4550eeb097cdbe73fb19c84059ba6bb85e Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Wed, 29 Nov 2023 11:18:00 +0800
+Subject: PATCH 065/188 LoongArch: Fix ICE and use simplify_gen_subreg
+ instead of gen_rtx_SUBREG directly.
+
+loongarch_expand_vec_cond_mask_expr generates 'subreg's of 'subreg's, which are not supported
+in gcc, it causes an ICE:
+
+ice.c:55:1: error: unrecognizable insn:
+   55 | }
+      | ^
+(insn 63 62 64 8 (set (reg:V4DI 278)
+        (subreg:V4DI (subreg:V4DF (reg:V4DI 273  vect__53.26 ) 0) 0)) -1
+     (nil))
+during RTL pass: vregs
+ice.c:55:1: internal compiler error: in extract_insn, at recog.cc:2804
+
+Last time, Ruoyao has fixed a similar ICE:
+https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636156.html
+
+This patch fixes ICE and use simplify_gen_subreg instead of gen_rtx_SUBREG as much as possible
+to avoid the same ice happening again.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_try_expand_lsx_vshuf_const): Use
+	simplify_gen_subreg instead of gen_rtx_SUBREG.
+	(loongarch_expand_vec_perm_const_2): Ditto.
+	(loongarch_expand_vec_cond_expr): Ditto.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/pr112476-3.c: New test.
+	* gcc.target/loongarch/pr112476-4.c: New test.
+---
+ gcc/config/loongarch/loongarch.cc             | 79 +++++++++++--------
+ .../gcc.target/loongarch/pr112476-3.c         | 58 ++++++++++++++
+ .../gcc.target/loongarch/pr112476-4.c         |  4 +
+ 3 files changed, 108 insertions(+), 33 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-3.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-4.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index d64777179..4a3a7a246 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -8824,13 +8824,13 @@ loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
+       if (d->vmode == E_V2DFmode)
+ 	{
+ 	  sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm));
+-	  tmp = gen_rtx_SUBREG (E_V2DImode, d->target, 0);
++	  tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0);
+ 	  emit_move_insn (tmp, sel);
+ 	}
+       else if (d->vmode == E_V4SFmode)
+ 	{
+ 	  sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm));
+-	  tmp = gen_rtx_SUBREG (E_V4SImode, d->target, 0);
++	  tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0);
+ 	  emit_move_insn (tmp, sel);
+ 	}
+       else
+@@ -9614,8 +9614,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
+ 	  /* Adjust op1 for selecting correct value in high 128bit of target
+ 	     register.
+ 	     op1: E_V4DImode, { 4, 5, 6, 7 } -> { 2, 3, 4, 5 }.  */
+-	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
+-	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
++	  rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0);
++	  rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0);
+ 	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
+ 					      conv_op0, GEN_INT (0x21)));
+ 
+@@ -9644,8 +9644,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
+ 	  emit_move_insn (op0_alt, d->op0);
+ 
+ 	  /* Generate subreg for fitting into insn gen function.  */
+-	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
+-	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
++	  rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0);
++	  rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0);
+ 
+ 	  /* Adjust op value in temp register.
+ 	     op0 = {0,1,2,3}, op1 = {4,5,0,1}  */
+@@ -9691,9 +9691,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
+ 	  emit_move_insn (op1_alt, d->op1);
+ 	  emit_move_insn (op0_alt, d->op0);
+ 
+-	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
+-	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
+-	  rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
++	  rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0);
++	  rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0);
++	  rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target,
++						 d->vmode, 0);
+ 
+ 	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
+ 					      conv_op0, GEN_INT (0x02)));
+@@ -9725,9 +9726,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
+ 	 Selector sample: E_V4DImode, { 0, 1, 4 ,5 }  */
+       if (!d->testing_p)
+ 	{
+-	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0);
+-	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
+-	  rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
++	  rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0);
++	  rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0);
++	  rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target,
++						 d->vmode, 0);
+ 
+ 	  /* We can achieve the expectation by using sinple xvpermi.q insn.  */
+ 	  emit_move_insn (conv_target, conv_op1);
+@@ -9752,8 +9754,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
+ 	  emit_move_insn (op1_alt, d->op1);
+ 	  emit_move_insn (op0_alt, d->op0);
+ 
+-	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
+-	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
++	  rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0);
++	  rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0);
+ 	  /* Adjust op value in temp regiter.
+ 	     op0 = { 0, 1, 2, 3 }, op1 = { 6, 7, 2, 3 }  */
+ 	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
+@@ -9797,9 +9799,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
+ 	  emit_move_insn (op1_alt, d->op1);
+ 	  emit_move_insn (op0_alt, d->op0);
+ 
+-	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
+-	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
+-	  rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
++	  rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0);
++	  rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0);
++	  rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target,
++						 d->vmode, 0);
+ 
+ 	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
+ 					      conv_op0, GEN_INT (0x13)));
+@@ -9831,10 +9834,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
+ 	 Selector sample:E_V8SImode, { 2, 2, 2, 2, 2, 2, 2, 2 }  */
+       if (!d->testing_p)
+ 	{
+-	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0);
+-	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
++	  rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0);
++	  rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0);
+ 	  rtx temp_reg = gen_reg_rtx (d->vmode);
+-	  rtx conv_temp = gen_rtx_SUBREG (E_V4DImode, temp_reg, 0);
++	  rtx conv_temp = simplify_gen_subreg (E_V4DImode, temp_reg,
++					       d->vmode, 0);
+ 
+ 	  emit_move_insn (temp_reg, d->op0);
+ 
+@@ -9943,9 +9947,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
+ 	  emit_move_insn (op0_alt, d->op0);
+ 	  emit_move_insn (op1_alt, d->op1);
+ 
+-	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
+-	  rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
+-	  rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
++	  rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0);
++	  rtx conv_op0a = simplify_gen_subreg (E_V4DImode, op0_alt,
++					       d->vmode, 0);
++	  rtx conv_op1a = simplify_gen_subreg (E_V4DImode, op1_alt,
++					       d->vmode, 0);
+ 
+ 	  /* Duplicate op0's low 128bit in op0, then duplicate high 128bit
+ 	     in op1.  After this, xvshuf.* insn's selector argument can
+@@ -9978,10 +9984,12 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
+ 	  emit_move_insn (op0_alt, d->op0);
+ 	  emit_move_insn (op1_alt, d->op1);
+ 
+-	  rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
+-	  rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
+-	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
+-	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0);
++	  rtx conv_op0a = simplify_gen_subreg (E_V4DImode, op0_alt,
++					       d->vmode, 0);
++	  rtx conv_op1a = simplify_gen_subreg (E_V4DImode, op1_alt,
++					       d->vmode, 0);
++	  rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0);
++	  rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0);
+ 
+ 	  /* Reorganize op0's hi/lo 128bit and op1's hi/lo 128bit, to make sure
+ 	     that selector's low 128bit can access all op0's elements, and
+@@ -10101,12 +10109,12 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
+     {
+     case E_V4DFmode:
+       sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt, rperm));
+-      tmp = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
++      tmp = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0);
+       emit_move_insn (tmp, sel);
+       break;
+     case E_V8SFmode:
+       sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt, rperm));
+-      tmp = gen_rtx_SUBREG (E_V8SImode, d->target, 0);
++      tmp = simplify_gen_subreg (E_V8SImode, d->target, d->vmode, 0);
+       emit_move_insn (tmp, sel);
+       break;

_service:tar_scm:0066-LoongArch-Fix-eh_return-epilogue-for-normal-returns.patch Added

@@ -0,0 +1,236 @@
+From 34088d0a8685defa97754b7ab5d90b9bc536cfaa Mon Sep 17 00:00:00 2001
+From: Yang Yujie <yangyujie@loongson.cn>
+Date: Fri, 8 Dec 2023 18:01:18 +0800
+Subject: PATCH 066/188 LoongArch: Fix eh_return epilogue for normal returns.
+
+On LoongArch, the regitsters $r4 - $r7 (EH_RETURN_DATA_REGNO) will be saved
+and restored in the function prologue and epilogue if the given function calls
+__builtin_eh_return.  This causes the return value to be overwritten on normal
+return paths and breaks a rare case of libgcc's _Unwind_RaiseException.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc: Do not restore the saved eh_return
+	data registers ($r4-$r7) for a normal return of a function that calls
+	__builtin_eh_return elsewhere.
+	* config/loongarch/loongarch-protos.h: Same.
+	* config/loongarch/loongarch.md: Same.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/eh_return-normal-return.c: New test.
+---
+ gcc/config/loongarch/loongarch-protos.h       |  2 +-
+ gcc/config/loongarch/loongarch.cc             | 34 ++++++++++++-----
+ gcc/config/loongarch/loongarch.md             | 23 ++++++++++-
+ .../loongarch/eh_return-normal-return.c       | 38 +++++++++++++++++++
+ 4 files changed, 84 insertions(+), 13 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c
+
+diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
+index 117669e9f..e5fcf3111 100644
+--- a/gcc/config/loongarch/loongarch-protos.h
++++ b/gcc/config/loongarch/loongarch-protos.h
+@@ -60,7 +60,7 @@ enum loongarch_symbol_type {
+ extern rtx loongarch_emit_move (rtx, rtx);
+ extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int);
+ extern void loongarch_expand_prologue (void);
+-extern void loongarch_expand_epilogue (bool);
++extern void loongarch_expand_epilogue (int);
+ extern bool loongarch_can_use_return_insn (void);
+ &#xc;
+ extern bool loongarch_symbolic_constant_p (rtx, enum loongarch_symbol_type *);
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 4a3a7a246..7caf04d8d 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -1012,7 +1012,8 @@ loongarch_save_restore_reg (machine_mode mode, int regno, HOST_WIDE_INT offset,
+ 
+ static void
+ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
+-			      loongarch_save_restore_fn fn)
++			      loongarch_save_restore_fn fn,
++			      bool skip_eh_data_regs_p)
+ {
+   HOST_WIDE_INT offset;
+ 
+@@ -1021,7 +1022,14 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
+   for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+     if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
+       {
+-	if (!cfun->machine->reg_is_wrapped_separatelyregno)
++	/* Special care needs to be taken for $r4-$r7 (EH_RETURN_DATA_REGNO)
++	   when returning normally from a function that calls
++	   __builtin_eh_return.  In this case, these registers are saved but
++	   should not be restored, or the return value may be clobbered.  */
++
++	if (!(cfun->machine->reg_is_wrapped_separatelyregno
++	      || (skip_eh_data_regs_p
++	      && GP_ARG_FIRST <= regno && regno < GP_ARG_FIRST + 4)))
+ 	  loongarch_save_restore_reg (word_mode, regno, offset, fn);
+ 
+ 	offset -= UNITS_PER_WORD;
+@@ -1294,7 +1302,7 @@ loongarch_expand_prologue (void)
+ 			    GEN_INT (-step1));
+       RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+       size -= step1;
+-      loongarch_for_each_saved_reg (size, loongarch_save_reg);
++      loongarch_for_each_saved_reg (size, loongarch_save_reg, false);
+     }
+ 
+   /* Set up the frame pointer, if we're using one.  */
+@@ -1379,11 +1387,13 @@ loongarch_can_use_return_insn (void)
+   return reload_completed && cfun->machine->frame.total_size == 0;
+ }
+ 
+-/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P
+-   says which.  */
++/* Expand function epilogue using the following insn patterns:
++   "epilogue"	      (style == NORMAL_RETURN)
++   "sibcall_epilogue" (style == SIBCALL_RETURN)
++   "eh_return"	      (style == EXCEPTION_RETURN) */
+ 
+ void
+-loongarch_expand_epilogue (bool sibcall_p)
++loongarch_expand_epilogue (int style)
+ {
+   /* Split the frame into two.  STEP1 is the amount of stack we should
+      deallocate before restoring the registers.  STEP2 is the amount we
+@@ -1400,7 +1410,8 @@ loongarch_expand_epilogue (bool sibcall_p)
+   bool need_barrier_p
+     = (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0;
+ 
+-  if (!sibcall_p && loongarch_can_use_return_insn ())
++  /* Handle simple returns.  */
++  if (style == NORMAL_RETURN && loongarch_can_use_return_insn ())
+     {
+       emit_jump_insn (gen_return ());
+       return;
+@@ -1476,7 +1487,9 @@ loongarch_expand_epilogue (bool sibcall_p)
+ 
+   /* Restore the registers.  */
+   loongarch_for_each_saved_reg (frame->total_size - step2,
+-				loongarch_restore_reg);
++				loongarch_restore_reg,
++				crtl->calls_eh_return
++				&& style != EXCEPTION_RETURN);
+ 
+   if (need_barrier_p)
+     loongarch_emit_stack_tie ();
+@@ -1497,11 +1510,12 @@ loongarch_expand_epilogue (bool sibcall_p)
+     }
+ 
+   /* Add in the __builtin_eh_return stack adjustment.  */
+-  if (crtl->calls_eh_return)
++  if (crtl->calls_eh_return && style == EXCEPTION_RETURN)
+     emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
+ 			      EH_RETURN_STACKADJ_RTX));
+ 
+-  if (!sibcall_p)
++  /* Emit return unless doing sibcall.  */
++  if (style != SIBCALL_RETURN)
+     emit_jump_insn (gen_simple_return_internal (ra));
+ }
+ 
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index c6edd1dda..222f1ae83 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -125,6 +125,11 @@
+    (T1_REGNUM			13)
+    (S0_REGNUM			23)
+ 
++   ;; Return path styles
++   (NORMAL_RETURN		0)
++   (SIBCALL_RETURN		1)
++   (EXCEPTION_RETURN		2)
++
+    ;; PIC long branch sequences are never longer than 100 bytes.
+    (MAX_PIC_BRANCH_LENGTH	100)
+ )
+@@ -3276,7 +3281,7 @@
+   (const_int 2)
+   ""
+ {
+-  loongarch_expand_epilogue (false);
++  loongarch_expand_epilogue (NORMAL_RETURN);
+   DONE;
+ })
+ 
+@@ -3284,7 +3289,7 @@
+   (const_int 2)
+   ""
+ {
+-  loongarch_expand_epilogue (true);
++  loongarch_expand_epilogue (SIBCALL_RETURN);
+   DONE;
+ })
+ 
+@@ -3341,6 +3346,20 @@
+     emit_insn (gen_eh_set_ra_di (operands0));
+   else
+     emit_insn (gen_eh_set_ra_si (operands0));
++
++  emit_jump_insn (gen_eh_return_internal ());
++  emit_barrier ();
++  DONE;
++})
++
++(define_insn_and_split "eh_return_internal"
++  (eh_return)
++  ""
++  "#"
++  "epilogue_completed"
++  (const_int 0)
++{
++  loongarch_expand_epilogue (EXCEPTION_RETURN);
+   DONE;
+ })
+ 
+diff --git a/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c b/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c
+new file mode 100644
+index 000000000..f8f3965f8
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c
+@@ -0,0 +1,38 @@
++/* { dg-do run } */
++/* { dg-options "-O2" } */
++
++#include <stdlib.h>

_service:tar_scm:0067-LoongArch-Allow-mcmodel-extreme-and-model-attribute-.patch Added

@@ -0,0 +1,180 @@
+From fdb51014f00094737459d5c9008630454ec7f342 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Thu, 7 Dec 2023 15:45:30 +0800
+Subject: PATCH 067/188 LoongArch: Allow -mcmodel=extreme and model attribute
+ with -mexplicit-relocs=auto
+
+There seems no real reason to require -mexplicit-relocs=always for
+-mcmodel=extreme or model attribute.  As the linker does not know how to
+relax a 3-operand la.local or la.global pseudo instruction, just emit
+explicit relocs for SYMBOL_PCREL64, and under TARGET_CMODEL_EXTREME also
+SYMBOL_GOT_DISP.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_explicit_relocs_p):
+	Return true for SYMBOL_PCREL64.  Return true for SYMBOL_GOT_DISP
+	if TARGET_CMODEL_EXTREME.
+	(loongarch_split_symbol): Check for la_opt_explicit_relocs !=
+	EXPLICIT_RELOCS_NONE instead of TARGET_EXPLICIT_RELOCS.
+	(loongarch_print_operand_reloc): Likewise.
+	(loongarch_option_override_internal): Likewise.
+	(loongarch_handle_model_attribute): Likewise.
+	* doc/invoke.texi (-mcmodel=extreme): Update the compatibility
+	between it and -mexplicit-relocs=.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/attr-model-3.c: New test.
+	* gcc.target/loongarch/attr-model-4.c: New test.
+	* gcc.target/loongarch/func-call-extreme-3.c: New test.
+	* gcc.target/loongarch/func-call-extreme-4.c: New test.
+---
+ gcc/config/loongarch/loongarch.cc             | 25 ++++++++++++-------
+ gcc/doc/invoke.texi                           |  4 +--
+ .../gcc.target/loongarch/attr-model-3.c       |  6 +++++
+ .../gcc.target/loongarch/attr-model-4.c       |  6 +++++
+ .../loongarch/func-call-extreme-3.c           |  7 ++++++
+ .../loongarch/func-call-extreme-4.c           |  7 ++++++
+ 6 files changed, 44 insertions(+), 11 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-3.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-4.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 7caf04d8d..4362149ef 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -1969,9 +1969,16 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type)
+       case SYMBOL_TLS_LE:
+       case SYMBOL_TLSGD:
+       case SYMBOL_TLSLDM:
+-	/* The linker don't know how to relax TLS accesses.  */
++      case SYMBOL_PCREL64:
++	/* The linker don't know how to relax TLS accesses or 64-bit
++	   pc-relative accesses.  */
+ 	return true;
+       case SYMBOL_GOT_DISP:
++	/* The linker don't know how to relax GOT accesses in extreme
++	   code model.  */
++	if (TARGET_CMODEL_EXTREME)
++	  return true;
++
+ 	/* If we are performing LTO for a final link, and we have the
+ 	   linker plugin so we know the resolution of the symbols, then
+ 	   all GOT references are binding to external symbols or
+@@ -3134,7 +3141,7 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
+ 
+   if (loongarch_symbol_extreme_p (symbol_type) && can_create_pseudo_p ())
+     {
+-      gcc_assert (TARGET_EXPLICIT_RELOCS);
++      gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE);
+ 
+       temp1 = gen_reg_rtx (Pmode);
+       emit_move_insn (temp1, gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 0),
+@@ -5933,7 +5940,7 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
+     loongarch_classify_symbolic_expression (op);
+ 
+   if (loongarch_symbol_extreme_p (symbol_type))
+-    gcc_assert (TARGET_EXPLICIT_RELOCS);
++    gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE);
+ 
+   switch (symbol_type)
+     {
+@@ -7540,9 +7547,9 @@ loongarch_option_override_internal (struct gcc_options *opts,
+   switch (la_target.cmodel)
+     {
+       case CMODEL_EXTREME:
+-	if (!TARGET_EXPLICIT_RELOCS)
+-	  error ("code model %qs needs %s",
+-		 "extreme", "-mexplicit-relocs=always");
++	if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)
++	  error ("code model %qs is not compatible with %s",
++		 "extreme", "-mexplicit-relocs=none");
+ 
+ 	if (opts->x_flag_plt)
+ 	  {
+@@ -7908,11 +7915,11 @@ loongarch_handle_model_attribute (tree *node, tree name, tree arg, int,
+ 	  *no_add_attrs = true;
+ 	  return NULL_TREE;
+ 	}
+-      if (!TARGET_EXPLICIT_RELOCS)
++      if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)
+ 	{
+ 	  error_at (DECL_SOURCE_LOCATION (decl),
+-		    "%qE attribute requires %s", name,
+-		    "-mexplicit-relocs=always");
++		    "%qE attribute is not compatible with %s", name,
++		    "-mexplicit-relocs=none");
+ 	  *no_add_attrs = true;
+ 	  return NULL_TREE;
+ 	}
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 76a8f20d1..5c6515cb1 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -24602,8 +24602,8 @@ The text segment and data segment must be within 2GB addressing space.
+ 
+ @item extreme
+ This mode does not limit the size of the code segment and data segment.
+-The @option{-mcmodel=extreme} option is incompatible with @option{-fplt} and
+-@option{-mno-explicit-relocs}.
++The @option{-mcmodel=extreme} option is incompatible with @option{-fplt}
++and/or @option{-mexplicit-relocs=none}.
+ @end table
+ The default code model is @code{normal}.
+ 
+diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-3.c b/gcc/testsuite/gcc.target/loongarch/attr-model-3.c
+new file mode 100644
+index 000000000..5622d5086
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/attr-model-3.c
+@@ -0,0 +1,6 @@
++/* { dg-do compile } */
++/* { dg-options "-mexplicit-relocs=auto -mcmodel=normal -O2" } */
++/* { dg-final { scan-assembler-times "%pc64_hi12" 2 } } */
++
++#define ATTR_MODEL_TEST
++#include "attr-model-test.c"
+diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-4.c b/gcc/testsuite/gcc.target/loongarch/attr-model-4.c
+new file mode 100644
+index 000000000..482724bb9
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/attr-model-4.c
+@@ -0,0 +1,6 @@
++/* { dg-do compile } */
++/* { dg-options "-mexplicit-relocs=auto -mcmodel=extreme -O2" } */
++/* { dg-final { scan-assembler-times "%pc64_hi12" 3 } } */
++
++#define ATTR_MODEL_TEST
++#include "attr-model-test.c"
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c
+new file mode 100644
+index 000000000..a4da44b4a
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c
+@@ -0,0 +1,7 @@
++/* { dg-do compile } */
++/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */
++/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
++/* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
++/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
++
++#include "func-call-extreme-1.c"
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c
+new file mode 100644
+index 000000000..16b00f4c5
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c
+@@ -0,0 +1,7 @@
++/* { dg-do compile } */
++/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */
++/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
++/* { dg-final { scan-assembler "test1:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
++/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
++
++#include "func-call-extreme-1.c"
+-- 
+2.43.0
+

_service:tar_scm:0068-LoongArch-Fix-warnings-building-libgcc.patch Added

@@ -0,0 +1,79 @@
+From 5a910f294605d0163f8f4ac255a14425b154b5dd Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sat, 9 Dec 2023 22:08:37 +0800
+Subject: PATCH 068/188 LoongArch: Fix warnings building libgcc
+
+We are excluding loongarch-opts.h from target libraries, but now struct
+loongarch_target and gcc_options are not declared in the target
+libraries, causing:
+
+In file included from ../.././gcc/options.h:8,
+                 from ../.././gcc/tm.h:49,
+                 from ../../../gcc/libgcc/fixed-bit.c:48:
+../../../gcc/libgcc/../gcc/config/loongarch/loongarch-opts.h:57:41:
+warning: 'struct gcc_options' declared inside parameter list will not
+be visible outside of this definition or declaration
+   57 |                                  struct gcc_options *opts,
+      |                                         ^~~~~~~~~~~
+
+So exclude the declarations referring to the C++ structs as well.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-opts.h (la_target): Move into #if
+	for loongarch-def.h.
+	(loongarch_init_target): Likewise.
+	(loongarch_config_target): Likewise.
+	(loongarch_update_gcc_opt_status): Likewise.
+---
+ gcc/config/loongarch/loongarch-opts.h | 20 ++++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
+index 7010ddfec..639ed50bd 100644
+--- a/gcc/config/loongarch/loongarch-opts.h
++++ b/gcc/config/loongarch/loongarch-opts.h
+@@ -21,22 +21,15 @@ along with GCC; see the file COPYING3.  If not see
+ #ifndef LOONGARCH_OPTS_H
+ #define LOONGARCH_OPTS_H
+ 
+-/* This is a C++ header and it shouldn't be used by target libraries.  */
++/* The loongarch-def.h file is a C++ header and it shouldn't be used by
++   target libraries.  Exclude it and everything using the C++ structs
++   (struct loongarch_target and gcc_options) from target libraries.  */
+ #if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
+ #include "loongarch-def.h"
+-#endif
+ 
+ /* Target configuration */
+ extern struct loongarch_target la_target;
+ 
+-/* Flag status */
+-struct loongarch_flags {
+-    int flt; const char* flt_str;
+-#define SX_FLAG_TYPE(x) ((x) < 0 ? -(x) : (x))
+-    int sx2;
+-};
+-
+-
+ /* Initialize loongarch_target from separate option variables.  */
+ void
+ loongarch_init_target (struct loongarch_target *target,
+@@ -56,7 +49,14 @@ void
+ loongarch_update_gcc_opt_status (struct loongarch_target *target,
+ 				 struct gcc_options *opts,
+ 				 struct gcc_options *opts_set);
++#endif
+ 
++/* Flag status */
++struct loongarch_flags {
++    int flt; const char* flt_str;
++#define SX_FLAG_TYPE(x) ((x) < 0 ? -(x) : (x))
++    int sx2;
++};
+ 
+ /* Macros for common conditional expressions used in loongarch.{c,h,md} */
+ #define TARGET_CMODEL_NORMAL	    (la_target.cmodel == CMODEL_NORMAL)
+-- 
+2.43.0
+

_service:tar_scm:0069-LoongArch-testsuite-Remove-XFAIL-in-vect-ftint-no-in.patch Added

@@ -0,0 +1,30 @@
+From 639e7518c8a4468cd50d774c5a3dbda5f2dbb4a7 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Wed, 13 Dec 2023 02:39:35 +0800
+Subject: PATCH 069/188 LoongArch: testsuite: Remove XFAIL in
+ vect-ftint-no-inexact.c
+
+After r14-6455 this no longer fails.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vect-ftint-no-inexact.c (xfail): Remove.
+---
+ gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c
+index 83d268099..61918beef 100644
+--- a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c
++++ b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c
+@@ -39,6 +39,5 @@
+ /* { dg-final { scan-assembler-not "\txvftintrne\.w\.s" } } */
+ /* { dg-final { scan-assembler-not "\txvftintrne\.l\.d" } } */
+ 
+-/* trunc: XFAIL due to PR 107723 */
+-/* { dg-final { scan-assembler "bl\t%plt\$trunc\$" { xfail *-*-* } } } */
++/* { dg-final { scan-assembler "bl\t%plt\$trunc\$" } } */
+ /* { dg-final { scan-assembler "bl\t%plt\$truncf\$" } } */
+-- 
+2.43.0
+

_service:tar_scm:0070-LoongArch-Include-rtl.h-for-COSTS_N_INSNS-instead-of.patch Added

@@ -0,0 +1,44 @@
+From 6a5e3932a39f1ffa6f87479748ee711e4fa47d30 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sat, 9 Dec 2023 15:27:28 +0800
+Subject: PATCH 070/188 LoongArch: Include rtl.h for COSTS_N_INSNS instead of
+ hard coding our own
+
+With loongarch-def.cc switched from C to C++, we can include rtl.h for
+COSTS_N_INSNS, instead of hard coding our own.
+
+THis is a non-functional change for now, but it will make the code more
+future-proof in case COSTS_N_INSNS in rtl.h would be changed.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-def.cc (rtl.h): Include.
+	(COSTS_N_INSNS): Remove the macro definition.
+---
+ gcc/config/loongarch/loongarch-def.cc | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
+index c41804a18..6217b1926 100644
+--- a/gcc/config/loongarch/loongarch-def.cc
++++ b/gcc/config/loongarch/loongarch-def.cc
+@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "system.h"
+ #include "coretypes.h"
+ #include "tm.h"
++#include "rtl.h"
+ 
+ #include "loongarch-def.h"
+ #include "loongarch-str.h"
+@@ -89,8 +90,6 @@ array_tune<loongarch_align> loongarch_cpu_align =
+     .set (CPU_LA464, la464_align ())
+     .set (CPU_LA664, la464_align ());
+ 
+-#define COSTS_N_INSNS(N) ((N) * 4)
+-
+ /* Default RTX cost initializer.  */
+ loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
+   : fp_add (COSTS_N_INSNS (1)),
+-- 
+2.43.0
+

_service:tar_scm:0071-LoongArch-Fix-instruction-costs-PR112936.patch Added

@@ -0,0 +1,165 @@
+From c5abe64e64aba601e67f3367a27caf616062b8f4 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sat, 9 Dec 2023 17:41:32 +0800
+Subject: PATCH 071/188 LoongArch: Fix instruction costs PR112936
+
+Replace the instruction costs in loongarch_rtx_cost_data constructor
+based on micro-benchmark results on LA464 and LA664.
+
+This allows optimizations like "x * 17" to alsl, and "x * 68" to alsl
+and slli.
+
+gcc/ChangeLog:
+
+	PR target/112936
+	* config/loongarch/loongarch-def.cc
+	(loongarch_rtx_cost_data::loongarch_rtx_cost_data): Update
+	instruction costs per micro-benchmark results.
+	(loongarch_rtx_cost_optimize_size): Set all instruction costs
+	to (COSTS_N_INSNS (1) + 1).
+	* config/loongarch/loongarch.cc (loongarch_rtx_costs): Remove
+	special case for multiplication when optimizing for size.
+	Adjust division cost when TARGET_64BIT && !TARGET_DIV32.
+	Account the extra cost when TARGET_CHECK_ZERO_DIV and
+	optimizing for speed.
+
+gcc/testsuite/ChangeLog
+
+	PR target/112936
+	* gcc.target/loongarch/mul-const-reduction.c: New test.
+---
+ gcc/config/loongarch/loongarch-def.cc         | 39 ++++++++++---------
+ gcc/config/loongarch/loongarch.cc             | 22 +++++------
+ .../loongarch/mul-const-reduction.c           | 11 ++++++
+ 3 files changed, 43 insertions(+), 29 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c
+
+diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
+index 6217b1926..4a8885e83 100644
+--- a/gcc/config/loongarch/loongarch-def.cc
++++ b/gcc/config/loongarch/loongarch-def.cc
+@@ -92,15 +92,15 @@ array_tune<loongarch_align> loongarch_cpu_align =
+ 
+ /* Default RTX cost initializer.  */
+ loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
+-  : fp_add (COSTS_N_INSNS (1)),
+-    fp_mult_sf (COSTS_N_INSNS (2)),
+-    fp_mult_df (COSTS_N_INSNS (4)),
+-    fp_div_sf (COSTS_N_INSNS (6)),
++  : fp_add (COSTS_N_INSNS (5)),
++    fp_mult_sf (COSTS_N_INSNS (5)),
++    fp_mult_df (COSTS_N_INSNS (5)),
++    fp_div_sf (COSTS_N_INSNS (8)),
+     fp_div_df (COSTS_N_INSNS (8)),
+-    int_mult_si (COSTS_N_INSNS (1)),
+-    int_mult_di (COSTS_N_INSNS (1)),
+-    int_div_si (COSTS_N_INSNS (4)),
+-    int_div_di (COSTS_N_INSNS (6)),
++    int_mult_si (COSTS_N_INSNS (4)),
++    int_mult_di (COSTS_N_INSNS (4)),
++    int_div_si (COSTS_N_INSNS (5)),
++    int_div_di (COSTS_N_INSNS (5)),
+     branch_cost (6),
+     memory_latency (4) {}
+ 
+@@ -111,18 +111,21 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
+ array_tune<loongarch_rtx_cost_data> loongarch_cpu_rtx_cost_data =
+   array_tune<loongarch_rtx_cost_data> ();
+ 
+-/* RTX costs to use when optimizing for size.  */
++/* RTX costs to use when optimizing for size.
++   We use a value slightly larger than COSTS_N_INSNS (1) for all of them
++   because they are slower than simple instructions.  */
++#define COST_COMPLEX_INSN (COSTS_N_INSNS (1) + 1)
+ const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size =
+   loongarch_rtx_cost_data ()
+-    .fp_add_ (4)
+-    .fp_mult_sf_ (4)
+-    .fp_mult_df_ (4)
+-    .fp_div_sf_ (4)
+-    .fp_div_df_ (4)
+-    .int_mult_si_ (4)
+-    .int_mult_di_ (4)
+-    .int_div_si_ (4)
+-    .int_div_di_ (4);
++    .fp_add_ (COST_COMPLEX_INSN)
++    .fp_mult_sf_ (COST_COMPLEX_INSN)
++    .fp_mult_df_ (COST_COMPLEX_INSN)
++    .fp_div_sf_ (COST_COMPLEX_INSN)
++    .fp_div_df_ (COST_COMPLEX_INSN)
++    .int_mult_si_ (COST_COMPLEX_INSN)
++    .int_mult_di_ (COST_COMPLEX_INSN)
++    .int_div_si_ (COST_COMPLEX_INSN)
++    .int_div_di_ (COST_COMPLEX_INSN);
+ 
+ array_tune<int> loongarch_cpu_issue_rate = array_tune<int> ()
+   .set (CPU_NATIVE, 4)
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 4362149ef..afbb55390 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -3797,8 +3797,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+ 	*total = (speed
+ 		  ? loongarch_cost->int_mult_si * 3 + 6
+ 		  : COSTS_N_INSNS (7));
+-      else if (!speed)
+-	*total = COSTS_N_INSNS (1) + 1;
+       else if (mode == DImode)
+ 	*total = loongarch_cost->int_mult_di;
+       else
+@@ -3833,14 +3831,18 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+ 
+     case UDIV:
+     case UMOD:
+-      if (!speed)
+-	{
+-	  *total = COSTS_N_INSNS (loongarch_idiv_insns (mode));
+-	}
+-      else if (mode == DImode)
++      if (mode == DImode)
+ 	*total = loongarch_cost->int_div_di;
+       else
+-	*total = loongarch_cost->int_div_si;
++	{
++	  *total = loongarch_cost->int_div_si;
++	  if (TARGET_64BIT && !TARGET_DIV32)
++	    *total += COSTS_N_INSNS (2);
++	}
++
++      if (TARGET_CHECK_ZERO_DIV)
++	*total += COSTS_N_INSNS (2);
++
+       return false;
+ 
+     case SIGN_EXTEND:
+@@ -3872,9 +3874,7 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+ 		  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
+ 		      == ZERO_EXTEND))))
+ 	{
+-	  if (!speed)
+-	    *total = COSTS_N_INSNS (1) + 1;
+-	  else if (mode == DImode)
++	  if (mode == DImode)
+ 	    *total = loongarch_cost->int_mult_di;
+ 	  else
+ 	    *total = loongarch_cost->int_mult_si;
+diff --git a/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c b/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c
+new file mode 100644
+index 000000000..02d9a4876
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mtune=la464" } */
++/* { dg-final { scan-assembler "alsl\.w" } } */
++/* { dg-final { scan-assembler "slli\.w" } } */
++/* { dg-final { scan-assembler-not "mul\.w" } } */
++
++int
++test (int a)
++{
++  return a * 68;
++}
+-- 
+2.43.0
+

_service:tar_scm:0072-LoongArch-Add-alslsi3_extend.patch Added

@@ -0,0 +1,53 @@
+From 89dfb9ad8687f9b31be5925b2d106b6ec13cc628 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sat, 9 Dec 2023 18:02:35 +0800
+Subject: PATCH 072/188 LoongArch: Add alslsi3_extend
+
+Following the instruction cost fix, we are generating
+
+    alsl.w $a0, $a0, $a0, 4
+
+instead of
+
+    li.w  $t0, 17
+    mul.w $a0, $t0
+
+for "x * 4", because alsl.w is 4 times faster than mul.w.  But we didn't
+have a sign-extending pattern for alsl.w, causing an extra slli.w
+instruction generated to sign-extend $a0.  Add the pattern to remove the
+redundant extension.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (alslsi3_extend): New
+	define_insn.
+---
+ gcc/config/loongarch/loongarch.md | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 222f1ae83..23368008e 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -2874,6 +2874,18 @@
+   (set_attr "type" "arith")
+    (set_attr "mode" "<MODE>"))
+ 
++(define_insn "alslsi3_extend"
++  (set (match_operand:DI 0 "register_operand" "=r")
++	(sign_extend:DI
++	  (plus:SI
++	    (ashift:SI (match_operand:SI 1 "register_operand" "r")
++		       (match_operand 2 "const_immalsl_operand" ""))
++	    (match_operand:SI 3 "register_operand" "r"))))
++  ""
++  "alsl.w\t%0,%1,%3,%2"
++  (set_attr "type" "arith")
++   (set_attr "mode" "SI"))
++
+ &#xc;
+ 
+ ;; Reverse the order of bytes of operand 1 and store the result in operand 0.
+-- 
+2.43.0
+

_service:tar_scm:0073-LoongArch-Add-support-for-D-frontend.patch Added

@@ -0,0 +1,224 @@
+From 6ef045728a11218f023fee4527cd6d2fdb2c2910 Mon Sep 17 00:00:00 2001
+From: liushuyu <liushuyu011@gmail.com>
+Date: Mon, 18 Dec 2023 09:52:07 +0800
+Subject: PATCH 073/188 LoongArch: Add support for D frontend.
+
+gcc/ChangeLog:
+
+	* config.gcc: Add loongarch-d.o to d_target_objs for LoongArch
+	architecture.
+	* config/loongarch/t-loongarch: Add object target for loongarch-d.cc.
+	* config/loongarch/loongarch-d.cc
+	(loongarch_d_target_versions): add interface function to define builtin
+	D versions for LoongArch architecture.
+	(loongarch_d_handle_target_float_abi): add interface function to define
+	builtin D traits for LoongArch architecture.
+	(loongarch_d_register_target_info): add interface function to register
+	loongarch_d_handle_target_float_abi function.
+	* config/loongarch/loongarch-d.h
+	(loongarch_d_target_versions): add function prototype.
+	(loongarch_d_register_target_info): Likewise.
+
+libphobos/ChangeLog:
+
+	* configure.tgt: Enable libphobos for LoongArch architecture.
+	* libdruntime/gcc/sections/elf.d: Add TLS_DTV_OFFSET constant for
+	LoongArch64.
+	* libdruntime/gcc/unwind/generic.d: Add __aligned__ constant for
+	LoongArch64.
+---
+ gcc/config.gcc                             |  1 +
+ gcc/config/loongarch/loongarch-d.cc        | 77 ++++++++++++++++++++++
+ gcc/config/loongarch/loongarch-d.h         | 26 ++++++++
+ gcc/config/loongarch/t-loongarch           |  4 ++
+ libphobos/configure.tgt                    |  3 +
+ libphobos/libdruntime/gcc/sections/elf.d   |  2 +
+ libphobos/libdruntime/gcc/unwind/generic.d |  1 +
+ 7 files changed, 114 insertions(+)
+ create mode 100644 gcc/config/loongarch/loongarch-d.cc
+ create mode 100644 gcc/config/loongarch/loongarch-d.h
+
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index 11ab620d0..039187fa2 100644
+--- a/gcc/config.gcc
++++ b/gcc/config.gcc
+@@ -456,6 +456,7 @@ mips*-*-*)
+ 	;;
+ loongarch*-*-*)
+ 	cpu_type=loongarch
++	d_target_objs="loongarch-d.o"
+ 	extra_headers="larchintrin.h lsxintrin.h lasxintrin.h"
+ 	extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o"
+ 	extra_gcc_objs="loongarch-driver.o loongarch-cpu.o loongarch-opts.o loongarch-def.o"
+diff --git a/gcc/config/loongarch/loongarch-d.cc b/gcc/config/loongarch/loongarch-d.cc
+new file mode 100644
+index 000000000..9ac483c39
+--- /dev/null
++++ b/gcc/config/loongarch/loongarch-d.cc
+@@ -0,0 +1,77 @@
++/* Subroutines for the D front end on the LoongArch architecture.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#define IN_TARGET_CODE 1
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "tm_d.h"
++#include "d/d-target.h"
++#include "d/d-target-def.h"
++
++/* Implement TARGET_D_CPU_VERSIONS for LoongArch targets.  */
++
++void
++loongarch_d_target_versions (void)
++{
++  if (TARGET_64BIT)
++    d_add_builtin_version ("LoongArch64");
++  else
++    d_add_builtin_version ("LoongArch32");
++
++  if (TARGET_HARD_FLOAT_ABI)
++    {
++      d_add_builtin_version ("LoongArch_HardFloat");
++      d_add_builtin_version ("D_HardFloat");
++    }
++  else if (TARGET_SOFT_FLOAT_ABI)
++    {
++      d_add_builtin_version ("LoongArch_SoftFloat");
++      d_add_builtin_version ("D_SoftFloat");
++    }
++}
++
++/* Handle a call to `__traits(getTargetInfo, "floatAbi")'.  */
++
++static tree
++loongarch_d_handle_target_float_abi (void)
++{
++  const char *abi;
++
++  if (TARGET_HARD_FLOAT_ABI)
++    abi = "hard";
++  else if (TARGET_SOFT_FLOAT_ABI)
++    abi = "soft";
++  else
++    abi = "";
++
++  return build_string_literal (strlen (abi) + 1, abi);
++}
++
++/* Implement TARGET_D_REGISTER_CPU_TARGET_INFO.  */
++
++void
++loongarch_d_register_target_info (void)
++{
++  const struct d_target_info_spec handlers = {
++    {"floatAbi", loongarch_d_handle_target_float_abi},
++    {NULL, NULL},
++  };
++
++  d_add_target_info_handlers (handlers);
++}
+diff --git a/gcc/config/loongarch/loongarch-d.h b/gcc/config/loongarch/loongarch-d.h
+new file mode 100644
+index 000000000..a2fb8d51d
+--- /dev/null
++++ b/gcc/config/loongarch/loongarch-d.h
+@@ -0,0 +1,26 @@
++/* Definitions for the D front end on the LoongArch architecture.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++/* Defined in loongarch-d.cc  */
++extern void
++loongarch_d_target_versions (void);
++extern void
++loongarch_d_register_target_info (void);
++
++/* Target hooks for D language.  */
++#define TARGET_D_CPU_VERSIONS loongarch_d_target_versions
++#define TARGET_D_REGISTER_CPU_TARGET_INFO loongarch_d_register_target_info
+diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch
+index a1a40431f..994f4d19c 100644
+--- a/gcc/config/loongarch/t-loongarch
++++ b/gcc/config/loongarch/t-loongarch
+@@ -67,6 +67,10 @@ loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H) \
+ loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.cc $(LA_STR_H)
+ 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+ 
++loongarch-d.o: $(srcdir)/config/loongarch/loongarch-d.cc
++	$(COMPILE) $<
++	$(POSTCOMPILE)
++
+ $(srcdir)/config/loongarch/loongarch.opt: s-loongarch-opt ; @true
+ s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \
+ 	$(srcdir)/config/loongarch/genopts/loongarch.opt.in \
+diff --git a/libphobos/configure.tgt b/libphobos/configure.tgt
+index 0063dd232..dcb1551cd 100644
+--- a/libphobos/configure.tgt
++++ b/libphobos/configure.tgt
+@@ -36,6 +36,9 @@ case "${target}" in
+   hppa-*-linux*)
+ 	LIBPHOBOS_SUPPORTED=yes
+ 	;;
++  loongarch*-*-linux*)
++	LIBPHOBOS_SUPPORTED=yes
++	;;
+   mips*-*-linux*)
+ 	LIBPHOBOS_SUPPORTED=yes
+ 	;;
+diff --git a/libphobos/libdruntime/gcc/sections/elf.d b/libphobos/libdruntime/gcc/sections/elf.d
+index 5819811f3..bc993ea49 100644
+--- a/libphobos/libdruntime/gcc/sections/elf.d

_service:tar_scm:0074-libruntime-Add-fiber-context-switch-code-for-LoongAr.patch Added

@@ -0,0 +1,156 @@
+From 29eade7dc3032c6054f2ec2e2caa4ce43da6212d Mon Sep 17 00:00:00 2001
+From: Yang Yujie <yangyujie@loongson.cn>
+Date: Fri, 8 Dec 2023 18:09:41 +0800
+Subject: PATCH 074/188 libruntime: Add fiber context switch code for
+ LoongArch.
+
+libphobos/ChangeLog:
+
+	* libdruntime/config/loongarch/switchcontext.S: New file.
+---
+ .../config/loongarch/switchcontext.S          | 133 ++++++++++++++++++
+ 1 file changed, 133 insertions(+)
+ create mode 100644 libphobos/libdruntime/config/loongarch/switchcontext.S
+
+diff --git a/libphobos/libdruntime/config/loongarch/switchcontext.S b/libphobos/libdruntime/config/loongarch/switchcontext.S
+new file mode 100644
+index 000000000..edfb9b67e
+--- /dev/null
++++ b/libphobos/libdruntime/config/loongarch/switchcontext.S
+@@ -0,0 +1,133 @@
++/* LoongArch support code for fibers and multithreading.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free
++Software Foundation; either version 3, or (at your option) any later
++version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++Under Section 7 of GPL version 3, you are granted additional
++permissions described in the GCC Runtime Library Exception, version
++3.1, as published by the Free Software Foundation.
++
++You should have received a copy of the GNU General Public License and
++a copy of the GCC Runtime Library Exception along with this program;
++see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++<http://www.gnu.org/licenses/>.  */
++
++#include "../common/threadasm.S"
++
++/**
++ * Performs a context switch.
++ *
++ * $a0 - void** - ptr to old stack pointer
++ * $a1 - void*  - new stack pointer
++ *
++ */
++
++#if defined(__loongarch_lp64)
++#  define GPR_L ld.d
++#  define GPR_S st.d
++#  define SZ_GPR 8
++#  define ADDSP(si)   addi.d  $sp, $sp, si
++#elif defined(__loongarch64_ilp32)
++#  define GPR_L ld.w
++#  define GPR_S st.w
++#  define SZ_GPR 4
++#  define ADDSP(si)   addi.w  $sp, $sp, si
++#else
++#  error Unsupported GPR size (must be 64-bit or 32-bit).
++#endif
++
++#if defined(__loongarch_double_float)
++#  define FPR_L fld.d
++#  define FPR_S fst.d
++#  define SZ_FPR 8
++#elif defined(__loongarch_single_float)
++#  define FPR_L fld.s
++#  define FPR_S fst.s
++#  define SZ_FPR 4
++#else
++#  define SZ_FPR 0
++#endif
++
++    .text
++    .align 2
++    .global fiber_switchContext
++    .type   fiber_switchContext, @function
++fiber_switchContext:
++    .cfi_startproc
++    ADDSP(-11 * SZ_GPR)
++
++    // fp regs and return address are stored below the stack
++    // because we don't want the GC to scan them.
++
++    // return address (r1)
++    GPR_S  $r1, $sp, -SZ_GPR
++
++#if SZ_FPR != 0
++    // callee-saved scratch FPRs (f24-f31)
++    FPR_S  $f24, $sp, -SZ_GPR-1*SZ_FPR
++    FPR_S  $f25, $sp, -SZ_GPR-2*SZ_FPR
++    FPR_S  $f26, $sp, -SZ_GPR-3*SZ_FPR
++    FPR_S  $f27, $sp, -SZ_GPR-4*SZ_FPR
++    FPR_S  $f28, $sp, -SZ_GPR-5*SZ_FPR
++    FPR_S  $f29, $sp, -SZ_GPR-6*SZ_FPR
++    FPR_S  $f30, $sp, -SZ_GPR-7*SZ_FPR
++    FPR_S  $f31, $sp, -SZ_GPR-8*SZ_FPR
++#endif
++
++    // callee-saved GPRs (r21, fp (r22), r23-r31)
++    GPR_S $r21, $sp, 0*SZ_GPR
++    GPR_S  $fp, $sp, 1*SZ_GPR
++    GPR_S  $s0, $sp, 2*SZ_GPR
++    GPR_S  $s1, $sp, 3*SZ_GPR
++    GPR_S  $s2, $sp, 4*SZ_GPR
++    GPR_S  $s3, $sp, 5*SZ_GPR
++    GPR_S  $s4, $sp, 6*SZ_GPR
++    GPR_S  $s5, $sp, 7*SZ_GPR
++    GPR_S  $s6, $sp, 8*SZ_GPR
++    GPR_S  $s7, $sp, 9*SZ_GPR
++    GPR_S  $s8, $sp, 10*SZ_GPR
++
++    // swap stack pointer
++    GPR_S $sp, $a0, 0
++    move $sp, $a1
++
++    GPR_L  $r1, $sp, -SZ_GPR
++
++#if SZ_FPR != 0
++    FPR_L  $f24, $sp, -SZ_GPR-1*SZ_FPR
++    FPR_L  $f25, $sp, -SZ_GPR-2*SZ_FPR
++    FPR_L  $f26, $sp, -SZ_GPR-3*SZ_FPR
++    FPR_L  $f27, $sp, -SZ_GPR-4*SZ_FPR
++    FPR_L  $f28, $sp, -SZ_GPR-5*SZ_FPR
++    FPR_L  $f29, $sp, -SZ_GPR-6*SZ_FPR
++    FPR_L  $f30, $sp, -SZ_GPR-7*SZ_FPR
++    FPR_L  $f31, $sp, -SZ_GPR-8*SZ_FPR
++#endif
++
++    GPR_L $r21, $sp, 0*SZ_GPR
++    GPR_L  $fp, $sp, 1*SZ_GPR
++    GPR_L  $s0, $sp, 2*SZ_GPR
++    GPR_L  $s1, $sp, 3*SZ_GPR
++    GPR_L  $s2, $sp, 4*SZ_GPR
++    GPR_L  $s3, $sp, 5*SZ_GPR
++    GPR_L  $s4, $sp, 6*SZ_GPR
++    GPR_L  $s5, $sp, 7*SZ_GPR
++    GPR_L  $s6, $sp, 8*SZ_GPR
++    GPR_L  $s7, $sp, 9*SZ_GPR
++    GPR_L  $s8, $sp, 10*SZ_GPR
++
++    ADDSP(11 * SZ_GPR)
++
++    jr     $r1 // return
++    .cfi_endproc
++    .size fiber_switchContext,.-fiber_switchContext
+-- 
+2.43.0
+

_service:tar_scm:0075-LoongArch-Fix-FP-vector-comparsons-PR113034.patch Added

@@ -0,0 +1,866 @@
+From dd33794e64d462bf39e72f39343a384c191307f4 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 17 Dec 2023 01:09:20 +0800
+Subject: PATCH 075/188 LoongArch: Fix FP vector comparsons PR113034
+
+We had the following mappings between <x>vfcmp submenmonics and RTX
+codes:
+
+    (define_code_attr fcc
+      (unordered "cun")
+       (ordered   "cor")
+       (eq       "ceq")
+       (ne       "cne")
+       (uneq      "cueq")
+       (unle      "cule")
+       (unlt      "cult")
+       (le       "cle")
+       (lt       "clt"))
+
+This is inconsistent with scalar code:
+
+    (define_code_attr fcond (unordered "cun")
+                             (uneq "cueq")
+                             (unlt "cult")
+                             (unle "cule")
+                             (eq "ceq")
+                             (lt "slt")
+                             (le "sle")
+                             (ordered "cor")
+                             (ltgt "sne")
+                             (ne "cune")
+                             (ge "sge")
+                             (gt "sgt")
+                             (unge "cuge")
+                             (ungt "cugt"))
+
+For every RTX code for which the LSX/LASX code is different from the
+scalar code, the scalar code is correct and the LSX/LASX code is wrong.
+Most seriously, the RTX code NE should be mapped to "cneq", not "cne".
+Rewrite <x>vfcmp define_insns in simd.md using the same mapping as
+scalar fcmp.
+
+Note that GAS does not support xvfcmp.{c/s}u{ge/gt} (pseudo)
+instruction (although fcmp.{c/s}u{ge/gt} is supported), so we need to
+switch the order of inputs and use xvfcmp.{c/s}u{le/lt} instead.
+
+The <x>vfcmp.{sult/sule/clt/cle}.{s/d} instructions do not have a single
+RTX code, but they can be modeled as an inversed RTX code following a
+"not" operation.  Doing so allows the compiler to optimized vectorized
+__builtin_isless etc. to a single instruction.  This optimization should
+be added for scalar code too and I'll do it later.
+
+Tests are added for mapping between C code, IEC 60559 operations, and
+vfcmp instructions.
+
+1:https://gcc.gnu.org/pipermail/gcc-patches/2023-December/640713.html
+
+gcc/ChangeLog:
+
+	PR target/113034
+	* config/loongarch/lasx.md (UNSPEC_LASX_XVFCMP_*): Remove.
+	(lasx_xvfcmp_caf_<flasxfmt>): Remove.
+	(lasx_xvfcmp_cune_<FLASX:flasxfmt>): Remove.
+	(FSC256_UNS): Remove.
+	(fsc256): Remove.
+	(lasx_xvfcmp_<vfcond:fcc>_<FLASX:flasxfmt>): Remove.
+	(lasx_xvfcmp_<fsc256>_<FLASX:flasxfmt>): Remove.
+	* config/loongarch/lsx.md (UNSPEC_LSX_XVFCMP_*): Remove.
+	(lsx_vfcmp_caf_<flsxfmt>): Remove.
+	(lsx_vfcmp_cune_<FLSX:flsxfmt>): Remove.
+	(vfcond): Remove.
+	(fcc): Remove.
+	(FSC_UNS): Remove.
+	(fsc): Remove.
+	(lsx_vfcmp_<vfcond:fcc>_<FLSX:flsxfmt>): Remove.
+	(lsx_vfcmp_<fsc>_<FLSX:flsxfmt>): Remove.
+	* config/loongarch/simd.md
+	(fcond_simd): New define_code_iterator.
+	(<simd_isa>_<x>vfcmp_<fcond:fcond_simd>_<simdfmt>):
+	New define_insn.
+	(fcond_simd_rev): New define_code_iterator.
+	(fcond_rev_asm): New define_code_attr.
+	(<simd_isa>_<x>vfcmp_<fcond:fcond_simd_rev>_<simdfmt>):
+	New define_insn.
+	(fcond_inv): New define_code_iterator.
+	(fcond_inv_rev): New define_code_iterator.
+	(fcond_inv_rev_asm): New define_code_attr.
+	(<simd_isa>_<x>vfcmp_<fcond_inv>_<simdfmt>): New define_insn.
+	(<simd_isa>_<x>vfcmp_<fcond_inv:fcond_inv_rev>_<simdfmt>):
+	New define_insn.
+	(UNSPEC_SIMD_FCMP_CAF, UNSPEC_SIMD_FCMP_SAF,
+	UNSPEC_SIMD_FCMP_SEQ, UNSPEC_SIMD_FCMP_SUN,
+	UNSPEC_SIMD_FCMP_SUEQ, UNSPEC_SIMD_FCMP_CNE,
+	UNSPEC_SIMD_FCMP_SOR, UNSPEC_SIMD_FCMP_SUNE): New unspecs.
+	(SIMD_FCMP): New define_int_iterator.
+	(fcond_unspec): New define_int_attr.
+	(<simd_isa>_<x>vfcmp_<fcond_unspec>_<simdfmt>): New define_insn.
+	* config/loongarch/loongarch.cc (loongarch_expand_lsx_cmp):
+	Remove unneeded special cases.
+
+gcc/testsuite/ChangeLog:
+
+	PR target/113034
+	* gcc.target/loongarch/vfcmp-f.c: New test.
+	* gcc.target/loongarch/vfcmp-d.c: New test.
+	* gcc.target/loongarch/xvfcmp-f.c: New test.
+	* gcc.target/loongarch/xvfcmp-d.c: New test.
+	* gcc.target/loongarch/vector/lasx/lasx-vcond-2.c: Scan for cune
+	instead of cne.
+	* gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: Likewise.
+---
+ gcc/config/loongarch/lasx.md                  |  76 --------
+ gcc/config/loongarch/loongarch.cc             |  60 +-----
+ gcc/config/loongarch/lsx.md                   |  83 --------
+ gcc/config/loongarch/simd.md                  | 118 ++++++++++++
+ .../loongarch/vector/lasx/lasx-vcond-2.c      |   4 +-
+ .../loongarch/vector/lsx/lsx-vcond-2.c        |   4 +-
+ gcc/testsuite/gcc.target/loongarch/vfcmp-d.c  |  28 +++
+ gcc/testsuite/gcc.target/loongarch/vfcmp-f.c  | 178 ++++++++++++++++++
+ gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c |  29 +++
+ gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c |  27 +++
+ 10 files changed, 385 insertions(+), 222 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vfcmp-d.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vfcmp-f.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index eeac8cd98..921ce0eeb 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -32,9 +32,7 @@
+   UNSPEC_LASX_XVBITREVI
+   UNSPEC_LASX_XVBITSET
+   UNSPEC_LASX_XVBITSETI
+-  UNSPEC_LASX_XVFCMP_CAF
+   UNSPEC_LASX_XVFCLASS
+-  UNSPEC_LASX_XVFCMP_CUNE
+   UNSPEC_LASX_XVFCVT
+   UNSPEC_LASX_XVFCVTH
+   UNSPEC_LASX_XVFCVTL
+@@ -44,17 +42,6 @@
+   UNSPEC_LASX_XVFRINT
+   UNSPEC_LASX_XVFRSQRT
+   UNSPEC_LASX_XVFRSQRTE
+-  UNSPEC_LASX_XVFCMP_SAF
+-  UNSPEC_LASX_XVFCMP_SEQ
+-  UNSPEC_LASX_XVFCMP_SLE
+-  UNSPEC_LASX_XVFCMP_SLT
+-  UNSPEC_LASX_XVFCMP_SNE
+-  UNSPEC_LASX_XVFCMP_SOR
+-  UNSPEC_LASX_XVFCMP_SUEQ
+-  UNSPEC_LASX_XVFCMP_SULE
+-  UNSPEC_LASX_XVFCMP_SULT
+-  UNSPEC_LASX_XVFCMP_SUN
+-  UNSPEC_LASX_XVFCMP_SUNE
+   UNSPEC_LASX_XVFTINT_U
+   UNSPEC_LASX_XVCLO
+   UNSPEC_LASX_XVSAT_S
+@@ -1481,69 +1468,6 @@
+   (set_attr "type" "simd_fclass")
+    (set_attr "mode" "<MODE>"))
+ 
+-(define_insn "lasx_xvfcmp_caf_<flasxfmt>"
+-  (set (match_operand:<VIMODE256> 0 "register_operand" "=f")
+-	(unspec:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f")
+-			     (match_operand:FLASX 2 "register_operand" "f")
+-			    UNSPEC_LASX_XVFCMP_CAF))
+-  "ISA_HAS_LASX"
+-  "xvfcmp.caf.<flasxfmt>\t%u0,%u1,%u2"
+-  (set_attr "type" "simd_fcmp")
+-   (set_attr "mode" "<MODE>"))
+-
+-(define_insn "lasx_xvfcmp_cune_<FLASX:flasxfmt>"
+-  (set (match_operand:<VIMODE256> 0 "register_operand" "=f")
+-	(unspec:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f")
+-			     (match_operand:FLASX 2 "register_operand" "f")
+-			    UNSPEC_LASX_XVFCMP_CUNE))
+-  "ISA_HAS_LASX"
+-  "xvfcmp.cune.<FLASX:flasxfmt>\t%u0,%u1,%u2"
+-  (set_attr "type" "simd_fcmp")
+-   (set_attr "mode" "<MODE>"))
+-
+-
+-
+-(define_int_iterator FSC256_UNS UNSPEC_LASX_XVFCMP_SAF UNSPEC_LASX_XVFCMP_SUN
+-				 UNSPEC_LASX_XVFCMP_SOR UNSPEC_LASX_XVFCMP_SEQ
+-				 UNSPEC_LASX_XVFCMP_SNE UNSPEC_LASX_XVFCMP_SUEQ
+-				 UNSPEC_LASX_XVFCMP_SUNE UNSPEC_LASX_XVFCMP_SULE
+-				 UNSPEC_LASX_XVFCMP_SULT UNSPEC_LASX_XVFCMP_SLE
+-				 UNSPEC_LASX_XVFCMP_SLT)
+-
+-(define_int_attr fsc256
+-  (UNSPEC_LASX_XVFCMP_SAF  "saf")
+-   (UNSPEC_LASX_XVFCMP_SUN  "sun")
+-   (UNSPEC_LASX_XVFCMP_SOR  "sor")
+-   (UNSPEC_LASX_XVFCMP_SEQ  "seq")
+-   (UNSPEC_LASX_XVFCMP_SNE  "sne")
+-   (UNSPEC_LASX_XVFCMP_SUEQ "sueq")

_service:tar_scm:0076-LoongArch-Use-force_reg-instead-of-gen_reg_rtx-emit_.patch Added

@@ -0,0 +1,190 @@
+From be149d7f6527df6b16f3f9f8aec1e488466a71f1 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Tue, 19 Dec 2023 04:48:03 +0800
+Subject: PATCH 076/188 LoongArch: Use force_reg instead of gen_reg_rtx +
+ emit_move_insn in vec_init expander PR113033
+
+Jakub says:
+
+    Then that seems like a bug in the loongarch vec_init pattern(s).
+    Those really don't have a predicate in any of the backends on the
+    input operand, so they need to force_reg it if it is something it
+    can't handle. I've looked e.g. at i386 vec_init and that is exactly
+    what it does, see the various tests + force_reg calls in
+    ix86_expand_vector_init*.
+
+So replace gen_reg_rtx + emit_move_insn with force_reg to fix PR 113033.
+
+gcc/ChangeLog:
+
+	PR target/113033
+	* config/loongarch/loongarch.cc
+	(loongarch_expand_vector_init_same): Replace gen_reg_rtx +
+	emit_move_insn with force_reg.
+	(loongarch_expand_vector_init): Likewise.
+
+gcc/testsuite/ChangeLog:
+
+	PR target/113033
+	* gcc.target/loongarch/pr113033.c: New test.
+---
+ gcc/config/loongarch/loongarch.cc             | 38 ++++++-------------
+ gcc/testsuite/gcc.target/loongarch/pr113033.c | 23 +++++++++++
+ 2 files changed, 35 insertions(+), 26 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/pr113033.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index a22601d88..000d2d623 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -10745,7 +10745,7 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar)
+ 	  gcc_unreachable ();
+ 	}
+     }
+-  temp = gen_reg_rtx (imode);
++
+   if (imode == GET_MODE (same))
+     temp2 = same;
+   else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD)
+@@ -10770,7 +10770,8 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar)
+       else
+ 	temp2 = lowpart_subreg (imode, same, GET_MODE (same));
+     }
+-  emit_move_insn (temp, temp2);
++
++  temp = force_reg (imode, temp2);
+ 
+   switch (vmode)
+     {
+@@ -10992,35 +10993,29 @@ loongarch_expand_vector_init (rtx target, rtx vals)
+ 			 to reduce the number of instructions.  */
+ 		      if (i == 1)
+ 			{
+-			  op0 = gen_reg_rtx (imode);
+-			  emit_move_insn (op0, val_hi0);
+-			  op1 = gen_reg_rtx (imode);
+-			  emit_move_insn (op1, val_hi1);
++			  op0 = force_reg (imode, val_hi0);
++			  op1 = force_reg (imode, val_hi1);
+ 			  emit_insn (
+ 			    loongarch_vec_repl2_256 (target_hi, op0, op1));
+ 			}
+ 		      else if (i > 1)
+ 			{
+-			  op0 = gen_reg_rtx (imode);
+-			  emit_move_insn (op0, val_hii);
++			  op0 = force_reg (imode, val_hii);
+ 			  emit_insn (
+ 			    loongarch_vec_set256 (target_hi, op0, GEN_INT (i)));
+ 			}
+ 		    }
+ 		  else
+ 		    {
++		      op0 = force_reg (imode, val_hii);
+ 		      /* Assign the lowest element of val_hi to all elements
+ 			 of target_hi.  */
+ 		      if (i == 0)
+ 			{
+-			  op0 = gen_reg_rtx (imode);
+-			  emit_move_insn (op0, val_hi0);
+ 			  emit_insn (loongarch_vec_repl1_256 (target_hi, op0));
+ 			}
+ 		      else if (!rtx_equal_p (val_hii, val_hi0))
+ 			{
+-			  op0 = gen_reg_rtx (imode);
+-			  emit_move_insn (op0, val_hii);
+ 			  emit_insn (
+ 			    loongarch_vec_set256 (target_hi, op0, GEN_INT (i)));
+ 			}
+@@ -11028,18 +11023,15 @@ loongarch_expand_vector_init (rtx target, rtx vals)
+ 		}
+ 	      if (!lo_same && !half_same)
+ 		{
++		  op0 = force_reg (imode, val_loi);
+ 		  /* Assign the lowest element of val_lo to all elements
+ 		     of target_lo.  */
+ 		  if (i == 0)
+ 		    {
+-		      op0 = gen_reg_rtx (imode);
+-		      emit_move_insn (op0, val_lo0);
+ 		      emit_insn (loongarch_vec_repl1_128 (target_lo, op0));
+ 		    }
+ 		  else if (!rtx_equal_p (val_loi, val_lo0))
+ 		    {
+-		      op0 = gen_reg_rtx (imode);
+-		      emit_move_insn (op0, val_loi);
+ 		      emit_insn (
+ 			loongarch_vec_set128 (target_lo, op0, GEN_INT (i)));
+ 		    }
+@@ -11071,16 +11063,13 @@ loongarch_expand_vector_init (rtx target, rtx vals)
+ 		     reduce the number of instructions.  */
+ 		  if (i == 1)
+ 		    {
+-		      op0 = gen_reg_rtx (imode);
+-		      emit_move_insn (op0, val0);
+-		      op1 = gen_reg_rtx (imode);
+-		      emit_move_insn (op1, val1);
++		      op0 = force_reg (imode, val0);
++		      op1 = force_reg (imode, val1);
+ 		      emit_insn (loongarch_vec_repl2_128 (target, op0, op1));
+ 		    }
+ 		  else if (i > 1)
+ 		    {
+-		      op0 = gen_reg_rtx (imode);
+-		      emit_move_insn (op0, vali);
++		      op0 = force_reg (imode, vali);
+ 		      emit_insn (
+ 			loongarch_vec_set128 (target, op0, GEN_INT (i)));
+ 		    }
+@@ -11093,18 +11082,15 @@ loongarch_expand_vector_init (rtx target, rtx vals)
+ 			loongarch_vec_mirror (target, target, const0_rtx));
+ 		      return;
+ 		    }
++		  op0 = force_reg (imode, vali);
+ 		  /* Assign the lowest element of val to all elements of
+ 		     target.  */
+ 		  if (i == 0)
+ 		    {
+-		      op0 = gen_reg_rtx (imode);
+-		      emit_move_insn (op0, val0);
+ 		      emit_insn (loongarch_vec_repl1_128 (target, op0));
+ 		    }
+ 		  else if (!rtx_equal_p (vali, val0))
+ 		    {
+-		      op0 = gen_reg_rtx (imode);
+-		      emit_move_insn (op0, vali);
+ 		      emit_insn (
+ 			loongarch_vec_set128 (target, op0, GEN_INT (i)));
+ 		    }
+diff --git a/gcc/testsuite/gcc.target/loongarch/pr113033.c b/gcc/testsuite/gcc.target/loongarch/pr113033.c
+new file mode 100644
+index 000000000..4ccd037d8
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/pr113033.c
+@@ -0,0 +1,23 @@
++/* PR target/113033: ICE with vector left rotate */
++/* { dg-do compile } */
++/* { dg-options "-O2 -mlasx" } */
++
++typedef unsigned __attribute__ ((vector_size (16))) v4si;
++typedef unsigned __attribute__ ((vector_size (32))) v8si;
++typedef unsigned long long __attribute__ ((vector_size (16))) v2di;
++typedef unsigned long long __attribute__ ((vector_size (32))) v4di;
++
++#define TEST(tp) \
++extern tp data_##tp; \
++tp \
++test_##tp (int x) \
++{ \
++  const int bit = sizeof (data_##tp0) * __CHAR_BIT__; \
++  data_##tp = data_##tp << (x & (bit - 1)) \
++	      | data_##tp >> (bit - x & (bit - 1)); \
++}
++
++TEST (v4si)
++TEST (v8si)
++TEST (v2di)
++TEST (v4di)
+-- 
+2.43.0
+

_service:tar_scm:0077-LoongArch-Clean-up-vec_init-expander.patch Added

@@ -0,0 +1,83 @@
+From 38438021c770f077b78092299f22712fdd734814 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Tue, 19 Dec 2023 05:02:42 +0800
+Subject: PATCH 077/188 LoongArch: Clean up vec_init expander
+
+Non functional change, clean up the code.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc
+	(loongarch_expand_vector_init_same): Remove "temp2" and reuse
+	"temp" instead.
+	(loongarch_expand_vector_init): Use gcc_unreachable () instead
+	of gcc_assert (0), and fix the comment for it.
+---
+ gcc/config/loongarch/loongarch.cc | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 000d2d623..3aeafeafd 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -10723,7 +10723,7 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar)
+   machine_mode vmode = GET_MODE (target);
+   machine_mode imode = GET_MODE_INNER (vmode);
+   rtx same = XVECEXP (vals, 0, 0);
+-  rtx temp, temp2;
++  rtx temp;
+ 
+   if (CONST_INT_P (same) && nvar == 0
+       && loongarch_signed_immediate_p (INTVAL (same), 10, 0))
+@@ -10747,17 +10747,17 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar)
+     }
+ 
+   if (imode == GET_MODE (same))
+-    temp2 = same;
++    temp = same;
+   else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD)
+     {
+       if (GET_CODE (same) == MEM)
+ 	{
+ 	  rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
+ 	  loongarch_emit_move (reg_tmp, same);
+-	  temp2 = simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0);
++	  temp = simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0);
+ 	}
+       else
+-	temp2 = simplify_gen_subreg (imode, same, GET_MODE (same), 0);
++	temp = simplify_gen_subreg (imode, same, GET_MODE (same), 0);
+     }
+   else
+     {
+@@ -10765,13 +10765,13 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar)
+ 	{
+ 	  rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
+ 	  loongarch_emit_move (reg_tmp, same);
+-	  temp2 = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp));
++	  temp = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp));
+ 	}
+       else
+-	temp2 = lowpart_subreg (imode, same, GET_MODE (same));
++	temp = lowpart_subreg (imode, same, GET_MODE (same));
+     }
+ 
+-  temp = force_reg (imode, temp2);
++  temp = force_reg (imode, temp);
+ 
+   switch (vmode)
+     {
+@@ -11117,8 +11117,8 @@ loongarch_expand_vector_init (rtx target, rtx vals)
+       return;
+     }
+ 
+-  /* Loongson is the only cpu with vectors with more elements.  */
+-  gcc_assert (0);
++  /* No LoongArch CPU supports vectors with more elements as at now.  */
++  gcc_unreachable ();
+ }
+ 
+ /* Implement HARD_REGNO_CALLER_SAVE_MODE.  */
+-- 
+2.43.0
+

_service:tar_scm:0078-LoongArch-Fix-incorrect-code-generation-for-sad-patt.patch Added

@@ -0,0 +1,78 @@
+From e5c0e4b416b8628585e27b524ba524261cacf713 Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Thu, 14 Dec 2023 20:49:04 +0800
+Subject: PATCH 078/188 LoongArch: Fix incorrect code generation for sad
+ pattern
+
+When I attempt to enable vect_usad_char effective target for LoongArch, slp-reduc-sad.c
+and vect-reduc-sad*.c tests fail. These tests fail because the sad pattern generates bad
+code. This patch to fixed them, for sad patterns, use zero expansion instead of sign
+expansion for reduction.
+
+Currently, we are fixing failed vectorized tests, and in the future, we will
+enable more tests of "vect" for LoongArch.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasx.md: Use zero expansion instruction.
+	* config/loongarch/lsx.md: Ditto.
+---
+ gcc/config/loongarch/lasx.md | 8 ++++----
+ gcc/config/loongarch/lsx.md  | 8 ++++----
+ 2 files changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index 921ce0eeb..9ca3f9278 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -5021,8 +5021,8 @@
+   rtx t2 = gen_reg_rtx (V16HImode);
+   rtx t3 = gen_reg_rtx (V8SImode);
+   emit_insn (gen_lasx_xvabsd_u_bu (t1, operands1, operands2));
+-  emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1));
+-  emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2));
++  emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1));
++  emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2));
+   emit_insn (gen_addv8si3 (operands0, t3, operands3));
+   DONE;
+ })
+@@ -5038,8 +5038,8 @@
+   rtx t2 = gen_reg_rtx (V16HImode);
+   rtx t3 = gen_reg_rtx (V8SImode);
+   emit_insn (gen_lasx_xvabsd_s_b (t1, operands1, operands2));
+-  emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1));
+-  emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2));
++  emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1));
++  emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2));
+   emit_insn (gen_addv8si3 (operands0, t3, operands3));
+   DONE;
+ })
+diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
+index 57e0ee3d4..7f5fff40a 100644
+--- a/gcc/config/loongarch/lsx.md
++++ b/gcc/config/loongarch/lsx.md
+@@ -3385,8 +3385,8 @@
+   rtx t2 = gen_reg_rtx (V8HImode);
+   rtx t3 = gen_reg_rtx (V4SImode);
+   emit_insn (gen_lsx_vabsd_u_bu (t1, operands1, operands2));
+-  emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1));
+-  emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2));
++  emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1));
++  emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2));
+   emit_insn (gen_addv4si3 (operands0, t3, operands3));
+   DONE;
+ })
+@@ -3402,8 +3402,8 @@
+   rtx t2 = gen_reg_rtx (V8HImode);
+   rtx t3 = gen_reg_rtx (V4SImode);
+   emit_insn (gen_lsx_vabsd_s_b (t1, operands1, operands2));
+-  emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1));
+-  emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2));
++  emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1));
++  emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2));
+   emit_insn (gen_addv4si3 (operands0, t3, operands3));
+   DONE;
+ })
+-- 
+2.43.0
+

_service:tar_scm:0079-LoongArch-Modify-the-check-type-of-the-vector-builti.patch Added

@@ -0,0 +1,68 @@
+From bedb0338fadc373eeafc418a7bf6395d37eec78c Mon Sep 17 00:00:00 2001
+From: chenxiaolong <chenxiaolong@loongson.cn>
+Date: Wed, 13 Dec 2023 09:31:07 +0800
+Subject: PATCH 079/188 LoongArch: Modify the check type of the vector
+ builtin function.
+
+On LoongArch architecture, using the latest gcc14 in regression test,
+it is found that the vector test cases in vector directory appear FAIL
+entries with unmatched pointer types. In order to solve this kind of
+problem, the type of the variable in the check result is modified with
+the parameter type defined in the vector builtin function.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/simd_correctness_check.h:The variable
+	types in the check results are modified in conjunction with the
+	parameter types defined in the vector builtin function.
+---
+ .../loongarch/vector/simd_correctness_check.h       | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
+index eb7fbd59c..551340bd5 100644
+--- a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
++++ b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
+@@ -8,11 +8,12 @@
+       int fail = 0;                                                           \
+       for (size_t i = 0; i < sizeof (res) / sizeof (res0); ++i)             \
+         {                                                                     \
+-          long *temp_ref = &refi, *temp_res = &resi;                      \
++          long long *temp_ref = (long long *)&refi,                         \
++		*temp_res = (long long *)&resi;			      \
+           if (abs (*temp_ref - *temp_res) > 0)                                \
+             {                                                                 \
+               printf (" error: %s at line %ld , expected " #ref               \
+-                      "%ld:0x%lx, got: 0x%lx\n",                            \
++                      "%ld:0x%016lx, got: 0x%016lx\n",                      \
+                       __FILE__, line, i, *temp_ref, *temp_res);               \
+               fail = 1;                                                       \
+             }                                                                 \
+@@ -28,11 +29,11 @@
+       int fail = 0;                                                           \
+       for (size_t i = 0; i < sizeof (res) / sizeof (res0); ++i)             \
+         {                                                                     \
+-          int *temp_ref = &refi, *temp_res = &resi;                       \
++          int *temp_ref = (int *)&refi, *temp_res = (int *)&resi;         \
+           if (abs (*temp_ref - *temp_res) > 0)                                \
+             {                                                                 \
+               printf (" error: %s at line %ld , expected " #ref               \
+-                      "%ld:0x%x, got: 0x%x\n",                              \
++                      "%ld:0x%08x, got: 0x%08x\n",                          \
+                       __FILE__, line, i, *temp_ref, *temp_res);               \
+               fail = 1;                                                       \
+             }                                                                 \
+@@ -47,8 +48,8 @@
+     {                                                                         \
+       if (ref != res)                                                         \
+         {                                                                     \
+-          printf (" error: %s at line %ld , expected %d, got %d\n", __FILE__, \
+-                  line, ref, res);                                            \
++          printf (" error: %s at line %ld , expected 0x:%016x",               \
++		  "got 0x:%016x\n", __FILE__, line, ref, res);                \
+         }                                                                     \
+     }                                                                         \
+   while (0)
+-- 
+2.43.0
+

_service:tar_scm:0080-LoongArch-extend.texi-Fix-typos-in-LSX-intrinsics.patch Added

@@ -0,0 +1,250 @@
+From 2e0092b20b845e0e301b1dab177b338e35981f10 Mon Sep 17 00:00:00 2001
+From: Jiajie Chen <c@jia.je>
+Date: Wed, 13 Dec 2023 23:26:01 +0800
+Subject: PATCH 080/188 LoongArch: extend.texi: Fix typos in LSX intrinsics
+
+Several typos have been found and fixed: missing semicolons, using
+variable name instead of type, duplicate functions and wrong types.
+
+gcc/ChangeLog:
+
+	* doc/extend.texi(__lsx_vabsd_di): remove extra `i' in name.
+	(__lsx_vfrintrm_d, __lsx_vfrintrm_s, __lsx_vfrintrne_d,
+	__lsx_vfrintrne_s, __lsx_vfrintrp_d, __lsx_vfrintrp_s, __lsx_vfrintrz_d,
+	__lsx_vfrintrz_s): fix return types.
+	(__lsx_vld, __lsx_vldi, __lsx_vldrepl_b, __lsx_vldrepl_d,
+	__lsx_vldrepl_h, __lsx_vldrepl_w, __lsx_vmaxi_b, __lsx_vmaxi_d,
+	__lsx_vmaxi_h, __lsx_vmaxi_w, __lsx_vmini_b, __lsx_vmini_d,
+	__lsx_vmini_h, __lsx_vmini_w, __lsx_vsrani_d_q, __lsx_vsrarni_d_q,
+	__lsx_vsrlni_d_q, __lsx_vsrlrni_d_q, __lsx_vssrani_d_q,
+	__lsx_vssrarni_d_q, __lsx_vssrarni_du_q, __lsx_vssrlni_d_q,
+	__lsx_vssrlrni_du_q, __lsx_vst, __lsx_vstx, __lsx_vssrani_du_q,
+	__lsx_vssrlni_du_q, __lsx_vssrlrni_d_q): add missing semicolon.
+	(__lsx_vpickve2gr_bu, __lsx_vpickve2gr_hu): fix typo in return
+	type.
+	(__lsx_vstelm_b, __lsx_vstelm_d, __lsx_vstelm_h,
+	__lsx_vstelm_w): use imm type for the last argument.
+	(__lsx_vsigncov_b, __lsx_vsigncov_h, __lsx_vsigncov_w,
+	__lsx_vsigncov_d): remove duplicate definitions.
+---
+ gcc/doc/extend.texi | 90 ++++++++++++++++++++++-----------------------
+ 1 file changed, 43 insertions(+), 47 deletions(-)
+
+diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
+index bb042ae78..ac8da4e80 100644
+--- a/gcc/doc/extend.texi
++++ b/gcc/doc/extend.texi
+@@ -16392,7 +16392,7 @@ int __lsx_bz_v (__m128i);
+ int __lsx_bz_w (__m128i);
+ __m128i __lsx_vabsd_b (__m128i, __m128i);
+ __m128i __lsx_vabsd_bu (__m128i, __m128i);
+-__m128i __lsx_vabsd_di (__m128i, __m128i);
++__m128i __lsx_vabsd_d (__m128i, __m128i);
+ __m128i __lsx_vabsd_du (__m128i, __m128i);
+ __m128i __lsx_vabsd_h (__m128i, __m128i);
+ __m128i __lsx_vabsd_hu (__m128i, __m128i);
+@@ -16598,14 +16598,14 @@ __m128 __lsx_vfnmsub_s (__m128, __m128, __m128);
+ __m128d __lsx_vfrecip_d (__m128d);
+ __m128 __lsx_vfrecip_s (__m128);
+ __m128d __lsx_vfrint_d (__m128d);
+-__m128i __lsx_vfrintrm_d (__m128d);
+-__m128i __lsx_vfrintrm_s (__m128);
+-__m128i __lsx_vfrintrne_d (__m128d);
+-__m128i __lsx_vfrintrne_s (__m128);
+-__m128i __lsx_vfrintrp_d (__m128d);
+-__m128i __lsx_vfrintrp_s (__m128);
+-__m128i __lsx_vfrintrz_d (__m128d);
+-__m128i __lsx_vfrintrz_s (__m128);
++__m128d __lsx_vfrintrm_d (__m128d);
++__m128 __lsx_vfrintrm_s (__m128);
++__m128d __lsx_vfrintrne_d (__m128d);
++__m128 __lsx_vfrintrne_s (__m128);
++__m128d __lsx_vfrintrp_d (__m128d);
++__m128 __lsx_vfrintrp_s (__m128);
++__m128d __lsx_vfrintrz_d (__m128d);
++__m128 __lsx_vfrintrz_s (__m128);
+ __m128 __lsx_vfrint_s (__m128);
+ __m128d __lsx_vfrsqrt_d (__m128d);
+ __m128 __lsx_vfrsqrt_s (__m128);
+@@ -16674,12 +16674,12 @@ __m128i __lsx_vinsgr2vr_b (__m128i, int, imm0_15);
+ __m128i __lsx_vinsgr2vr_d (__m128i, long int, imm0_1);
+ __m128i __lsx_vinsgr2vr_h (__m128i, int, imm0_7);
+ __m128i __lsx_vinsgr2vr_w (__m128i, int, imm0_3);
+-__m128i __lsx_vld (void *, imm_n2048_2047)
+-__m128i __lsx_vldi (imm_n1024_1023)
+-__m128i __lsx_vldrepl_b (void *, imm_n2048_2047)
+-__m128i __lsx_vldrepl_d (void *, imm_n256_255)
+-__m128i __lsx_vldrepl_h (void *, imm_n1024_1023)
+-__m128i __lsx_vldrepl_w (void *, imm_n512_511)
++__m128i __lsx_vld (void *, imm_n2048_2047);
++__m128i __lsx_vldi (imm_n1024_1023);
++__m128i __lsx_vldrepl_b (void *, imm_n2048_2047);
++__m128i __lsx_vldrepl_d (void *, imm_n256_255);
++__m128i __lsx_vldrepl_h (void *, imm_n1024_1023);
++__m128i __lsx_vldrepl_w (void *, imm_n512_511);
+ __m128i __lsx_vldx (void *, long int);
+ __m128i __lsx_vmadd_b (__m128i, __m128i, __m128i);
+ __m128i __lsx_vmadd_d (__m128i, __m128i, __m128i);
+@@ -16715,13 +16715,13 @@ __m128i __lsx_vmax_d (__m128i, __m128i);
+ __m128i __lsx_vmax_du (__m128i, __m128i);
+ __m128i __lsx_vmax_h (__m128i, __m128i);
+ __m128i __lsx_vmax_hu (__m128i, __m128i);
+-__m128i __lsx_vmaxi_b (__m128i, imm_n16_15)
++__m128i __lsx_vmaxi_b (__m128i, imm_n16_15);
+ __m128i __lsx_vmaxi_bu (__m128i, imm0_31);
+-__m128i __lsx_vmaxi_d (__m128i, imm_n16_15)
++__m128i __lsx_vmaxi_d (__m128i, imm_n16_15);
+ __m128i __lsx_vmaxi_du (__m128i, imm0_31);
+-__m128i __lsx_vmaxi_h (__m128i, imm_n16_15)
++__m128i __lsx_vmaxi_h (__m128i, imm_n16_15);
+ __m128i __lsx_vmaxi_hu (__m128i, imm0_31);
+-__m128i __lsx_vmaxi_w (__m128i, imm_n16_15)
++__m128i __lsx_vmaxi_w (__m128i, imm_n16_15);
+ __m128i __lsx_vmaxi_wu (__m128i, imm0_31);
+ __m128i __lsx_vmax_w (__m128i, __m128i);
+ __m128i __lsx_vmax_wu (__m128i, __m128i);
+@@ -16731,13 +16731,13 @@ __m128i __lsx_vmin_d (__m128i, __m128i);
+ __m128i __lsx_vmin_du (__m128i, __m128i);
+ __m128i __lsx_vmin_h (__m128i, __m128i);
+ __m128i __lsx_vmin_hu (__m128i, __m128i);
+-__m128i __lsx_vmini_b (__m128i, imm_n16_15)
++__m128i __lsx_vmini_b (__m128i, imm_n16_15);
+ __m128i __lsx_vmini_bu (__m128i, imm0_31);
+-__m128i __lsx_vmini_d (__m128i, imm_n16_15)
++__m128i __lsx_vmini_d (__m128i, imm_n16_15);
+ __m128i __lsx_vmini_du (__m128i, imm0_31);
+-__m128i __lsx_vmini_h (__m128i, imm_n16_15)
++__m128i __lsx_vmini_h (__m128i, imm_n16_15);
+ __m128i __lsx_vmini_hu (__m128i, imm0_31);
+-__m128i __lsx_vmini_w (__m128i, imm_n16_15)
++__m128i __lsx_vmini_w (__m128i, imm_n16_15);
+ __m128i __lsx_vmini_wu (__m128i, imm0_31);
+ __m128i __lsx_vmin_w (__m128i, __m128i);
+ __m128i __lsx_vmin_wu (__m128i, __m128i);
+@@ -16826,11 +16826,11 @@ __m128i __lsx_vpickod_d (__m128i, __m128i);
+ __m128i __lsx_vpickod_h (__m128i, __m128i);
+ __m128i __lsx_vpickod_w (__m128i, __m128i);
+ int __lsx_vpickve2gr_b (__m128i, imm0_15);
+-unsinged int __lsx_vpickve2gr_bu (__m128i, imm0_15);
++unsigned int __lsx_vpickve2gr_bu (__m128i, imm0_15);
+ long int __lsx_vpickve2gr_d (__m128i, imm0_1);
+ unsigned long int __lsx_vpickve2gr_du (__m128i, imm0_1);
+ int __lsx_vpickve2gr_h (__m128i, imm0_7);
+-unsinged int __lsx_vpickve2gr_hu (__m128i, imm0_7);
++unsigned int __lsx_vpickve2gr_hu (__m128i, imm0_7);
+ int __lsx_vpickve2gr_w (__m128i, imm0_3);
+ unsigned int __lsx_vpickve2gr_wu (__m128i, imm0_3);
+ __m128i __lsx_vreplgr2vr_b (int);
+@@ -16893,10 +16893,6 @@ __m128i __lsx_vsigncov_b (__m128i, __m128i);
+ __m128i __lsx_vsigncov_d (__m128i, __m128i);
+ __m128i __lsx_vsigncov_h (__m128i, __m128i);
+ __m128i __lsx_vsigncov_w (__m128i, __m128i);
+-__m128i __lsx_vsigncov_b (__m128i, __m128i);
+-__m128i __lsx_vsigncov_d (__m128i, __m128i);
+-__m128i __lsx_vsigncov_h (__m128i, __m128i);
+-__m128i __lsx_vsigncov_w (__m128i, __m128i);
+ __m128i __lsx_vsle_b (__m128i, __m128i);
+ __m128i __lsx_vsle_bu (__m128i, __m128i);
+ __m128i __lsx_vsle_d (__m128i, __m128i);
+@@ -16953,7 +16949,7 @@ __m128i __lsx_vsrai_w (__m128i, imm0_31);
+ __m128i __lsx_vsran_b_h (__m128i, __m128i);
+ __m128i __lsx_vsran_h_w (__m128i, __m128i);
+ __m128i __lsx_vsrani_b_h (__m128i, __m128i, imm0_15);
+-__m128i __lsx_vsrani_d_q (__m128i, __m128i, imm0_127)
++__m128i __lsx_vsrani_d_q (__m128i, __m128i, imm0_127);
+ __m128i __lsx_vsrani_h_w (__m128i, __m128i, imm0_31);
+ __m128i __lsx_vsrani_w_d (__m128i, __m128i, imm0_63);
+ __m128i __lsx_vsran_w_d (__m128i, __m128i);
+@@ -16967,7 +16963,7 @@ __m128i __lsx_vsrari_w (__m128i, imm0_31);
+ __m128i __lsx_vsrarn_b_h (__m128i, __m128i);
+ __m128i __lsx_vsrarn_h_w (__m128i, __m128i);
+ __m128i __lsx_vsrarni_b_h (__m128i, __m128i, imm0_15);
+-__m128i __lsx_vsrarni_d_q (__m128i, __m128i, imm0_127)
++__m128i __lsx_vsrarni_d_q (__m128i, __m128i, imm0_127);
+ __m128i __lsx_vsrarni_h_w (__m128i, __m128i, imm0_31);
+ __m128i __lsx_vsrarni_w_d (__m128i, __m128i, imm0_63);
+ __m128i __lsx_vsrarn_w_d (__m128i, __m128i);
+@@ -16983,7 +16979,7 @@ __m128i __lsx_vsrli_w (__m128i, imm0_31);
+ __m128i __lsx_vsrln_b_h (__m128i, __m128i);
+ __m128i __lsx_vsrln_h_w (__m128i, __m128i);
+ __m128i __lsx_vsrlni_b_h (__m128i, __m128i, imm0_15);
+-__m128i __lsx_vsrlni_d_q (__m128i, __m128i, imm0_127)
++__m128i __lsx_vsrlni_d_q (__m128i, __m128i, imm0_127);
+ __m128i __lsx_vsrlni_h_w (__m128i, __m128i, imm0_31);
+ __m128i __lsx_vsrlni_w_d (__m128i, __m128i, imm0_63);
+ __m128i __lsx_vsrln_w_d (__m128i, __m128i);
+@@ -16997,7 +16993,7 @@ __m128i __lsx_vsrlri_w (__m128i, imm0_31);
+ __m128i __lsx_vsrlrn_b_h (__m128i, __m128i);
+ __m128i __lsx_vsrlrn_h_w (__m128i, __m128i);
+ __m128i __lsx_vsrlrni_b_h (__m128i, __m128i, imm0_15);
+-__m128i __lsx_vsrlrni_d_q (__m128i, __m128i, imm0_127)
++__m128i __lsx_vsrlrni_d_q (__m128i, __m128i, imm0_127);
+ __m128i __lsx_vsrlrni_h_w (__m128i, __m128i, imm0_31);
+ __m128i __lsx_vsrlrni_w_d (__m128i, __m128i, imm0_63);
+ __m128i __lsx_vsrlrn_w_d (__m128i, __m128i);
+@@ -17009,8 +17005,8 @@ __m128i __lsx_vssran_hu_w (__m128i, __m128i);
+ __m128i __lsx_vssran_h_w (__m128i, __m128i);
+ __m128i __lsx_vssrani_b_h (__m128i, __m128i, imm0_15);
+ __m128i __lsx_vssrani_bu_h (__m128i, __m128i, imm0_15);
+-__m128i __lsx_vssrani_d_q (__m128i, __m128i, imm0_127)
+-__m128i __lsx_vssrani_du_q (__m128i, __m128i, imm0_127)
++__m128i __lsx_vssrani_d_q (__m128i, __m128i, imm0_127);
++__m128i __lsx_vssrani_du_q (__m128i, __m128i, imm0_127);
+ __m128i __lsx_vssrani_hu_w (__m128i, __m128i, imm0_31);
+ __m128i __lsx_vssrani_h_w (__m128i, __m128i, imm0_31);
+ __m128i __lsx_vssrani_w_d (__m128i, __m128i, imm0_63);
+@@ -17023,8 +17019,8 @@ __m128i __lsx_vssrarn_hu_w (__m128i, __m128i);
+ __m128i __lsx_vssrarn_h_w (__m128i, __m128i);
+ __m128i __lsx_vssrarni_b_h (__m128i, __m128i, imm0_15);
+ __m128i __lsx_vssrarni_bu_h (__m128i, __m128i, imm0_15);

_service:tar_scm:0081-LoongArch-Fix-builtin-function-prototypes-for-LASX-i.patch Added

@@ -0,0 +1,60 @@
+From d9965ed8d9f4244ac1948c6fb92c7c0f7d80b3a4 Mon Sep 17 00:00:00 2001
+From: chenxiaolong <chenxiaolong@loongson.cn>
+Date: Tue, 19 Dec 2023 16:43:17 +0800
+Subject: PATCH 081/188 LoongArch: Fix builtin function prototypes for LASX
+ in doc.
+
+gcc/ChangeLog:
+
+	* doc/extend.texi:According to the documents submitted earlier,
+	Two problems with function return types and using the actual types
+	of parameters instead of variable names were found and fixed.
+---
+ gcc/doc/extend.texi | 24 ++++++++++++------------
+ 1 file changed, 12 insertions(+), 12 deletions(-)
+
+diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
+index ac8da4e80..c793c9c5d 100644
+--- a/gcc/doc/extend.texi
++++ b/gcc/doc/extend.texi
+@@ -17438,14 +17438,14 @@ __m256 __lasx_xvfnmsub_s (__m256, __m256, __m256);
+ __m256d __lasx_xvfrecip_d (__m256d);
+ __m256 __lasx_xvfrecip_s (__m256);
+ __m256d __lasx_xvfrint_d (__m256d);
+-__m256i __lasx_xvfrintrm_d (__m256d);
+-__m256i __lasx_xvfrintrm_s (__m256);
+-__m256i __lasx_xvfrintrne_d (__m256d);
+-__m256i __lasx_xvfrintrne_s (__m256);
+-__m256i __lasx_xvfrintrp_d (__m256d);
+-__m256i __lasx_xvfrintrp_s (__m256);
+-__m256i __lasx_xvfrintrz_d (__m256d);
+-__m256i __lasx_xvfrintrz_s (__m256);
++__m256d __lasx_xvfrintrm_d (__m256d);
++__m256 __lasx_xvfrintrm_s (__m256);
++__m256d __lasx_xvfrintrne_d (__m256d);
++__m256 __lasx_xvfrintrne_s (__m256);
++__m256d __lasx_xvfrintrp_d (__m256d);
++__m256 __lasx_xvfrintrp_s (__m256);
++__m256d __lasx_xvfrintrz_d (__m256d);
++__m256 __lasx_xvfrintrz_s (__m256);
+ __m256 __lasx_xvfrint_s (__m256);
+ __m256d __lasx_xvfrsqrt_d (__m256d);
+ __m256 __lasx_xvfrsqrt_s (__m256);
+@@ -17912,10 +17912,10 @@ __m256i __lasx_xvssub_hu (__m256i, __m256i);
+ __m256i __lasx_xvssub_w (__m256i, __m256i);
+ __m256i __lasx_xvssub_wu (__m256i, __m256i);
+ void __lasx_xvst (__m256i, void *, imm_n2048_2047);
+-void __lasx_xvstelm_b (__m256i, void *, imm_n128_127, idx);
+-void __lasx_xvstelm_d (__m256i, void *, imm_n128_127, idx);
+-void __lasx_xvstelm_h (__m256i, void *, imm_n128_127, idx);
+-void __lasx_xvstelm_w (__m256i, void *, imm_n128_127, idx);
++void __lasx_xvstelm_b (__m256i, void *, imm_n128_127, imm0_31);
++void __lasx_xvstelm_d (__m256i, void *, imm_n128_127, imm0_3);
++void __lasx_xvstelm_h (__m256i, void *, imm_n128_127, imm0_15);
++void __lasx_xvstelm_w (__m256i, void *, imm_n128_127, imm0_7);
+ void __lasx_xvstx (__m256i, void *, long int);
+ __m256i __lasx_xvsub_b (__m256i, __m256i);
+ __m256i __lasx_xvsub_d (__m256i, __m256i);
+-- 
+2.43.0
+

_service:tar_scm:0082-LoongArch-Add-asm-modifiers-to-the-LSX-and-LASX-dire.patch Added

@@ -0,0 +1,92 @@
+From 48f0d47eb6dc2c799c845a25cfabd586bd176378 Mon Sep 17 00:00:00 2001
+From: chenxiaolong <chenxiaolong@loongson.cn>
+Date: Tue, 5 Dec 2023 14:44:35 +0800
+Subject: PATCH 082/188 LoongArch: Add asm modifiers to the LSX and LASX
+ directives in the doc.
+
+gcc/ChangeLog:
+
+	* doc/extend.texi:Add modifiers to the vector of asm in the doc.
+	* doc/md.texi:Refine the description of the modifier 'f' in the doc.
+---
+ gcc/doc/extend.texi | 46 +++++++++++++++++++++++++++++++++++++++++++++
+ gcc/doc/md.texi     |  2 +-
+ 2 files changed, 47 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
+index c793c9c5d..bcb9329c2 100644
+--- a/gcc/doc/extend.texi
++++ b/gcc/doc/extend.texi
+@@ -11424,10 +11424,56 @@ The list below describes the supported modifiers and their effects for LoongArch
+ @item @code{d} @tab Same as @code{c}.
+ @item @code{i} @tab Print the character ''@code{i}'' if the operand is not a register.
+ @item @code{m} @tab Same as @code{c}, but the printed value is @code{operand - 1}.
++@item @code{u} @tab Print a LASX register.
++@item @code{w} @tab Print a LSX register.
+ @item @code{X} @tab Print a constant integer operand in hexadecimal.
+ @item @code{z} @tab Print the operand in its unmodified form, followed by a comma.
+ @end multitable
+ 
++References to input and output operands in the assembler template of extended
++asm statements can use modifiers to affect the way the operands are formatted
++in the code output to the assembler.  For example, the following code uses the
++'w' modifier for LoongArch:
++
++@example
++test-asm.c:
++
++#include <lsxintrin.h>
++
++__m128i foo (void)
++@{
++__m128i  a,b,c;
++__asm__ ("vadd.d %w0,%w1,%w2\n\t"
++   :"=f" (c)
++   :"f" (a),"f" (b));
++
++return c;
++@}
++
++@end example
++
++@noindent
++The compile command for the test case is as follows:
++
++@example
++gcc test-asm.c -mlsx -S -o test-asm.s
++@end example
++
++@noindent
++The assembly statement produces the following assembly code:
++
++@example
++vadd.d $vr0,$vr0,$vr1
++@end example
++
++This is a 128-bit vector addition instruction, @code{c} (referred to in the
++template string as %0) is the output, and @code{a} (%1) and @code{b} (%2) are
++the inputs.  @code{__m128i} is a vector data type defined in the  file
++@code{lsxintrin.h} (@xref{LoongArch SX Vector Intrinsics}).  The symbol '=f'
++represents a constraint using a floating-point register as an output type, and
++the 'f' in the input operand represents a constraint using a floating-point
++register operand, which can refer to the definition of a constraint
++(@xref{Constraints}) in gcc.
+ 
+ @lowersections
+ @include md.texi
+diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
+index b58da0787..a2e839073 100644
+--- a/gcc/doc/md.texi
++++ b/gcc/doc/md.texi
+@@ -2750,7 +2750,7 @@ $r1h
+ @item LoongArch---@file{config/loongarch/constraints.md}
+ @table @code
+ @item f
+-A floating-point register (if available).
++A floating-point or vector register (if available).
+ @item k
+ A memory operand whose address is formed by a base register and
+ (optionally scaled) index register.
+-- 
+2.43.0
+

_service:tar_scm:0083-LoongArch-Implement-FCCmode-reload-and-cstore-ANYF-m.patch Added

@@ -0,0 +1,392 @@
+From b199de440fc877efdd1dde90b5c1c5111e060c1b Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Fri, 15 Dec 2023 01:49:40 +0800
+Subject: PATCH 083/188 LoongArch: Implement FCCmode reload and
+ cstore<ANYF:mode>4
+
+We used a branch to load floating-point comparison results into GPR.
+This is very slow when the branch is not predictable.
+
+Implement movfcc so we can reload FCCmode into GPRs, FPRs, and MEM.
+Then implement cstore<ANYF:mode>4.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-tune.h
+	(loongarch_rtx_cost_data::movcf2gr): New field.
+	(loongarch_rtx_cost_data::movcf2gr_): New method.
+	(loongarch_rtx_cost_data::use_movcf2gr): New method.
+	* config/loongarch/loongarch-def.cc
+	(loongarch_rtx_cost_data::loongarch_rtx_cost_data): Set movcf2gr
+	to COSTS_N_INSNS (7) and movgr2cf to COSTS_N_INSNS (15), based
+	on timing on LA464.
+	(loongarch_cpu_rtx_cost_data): Set movcf2gr and movgr2cf to
+	COSTS_N_INSNS (1) for LA664.
+	(loongarch_rtx_cost_optimize_size): Set movcf2gr and movgr2cf to
+	COSTS_N_INSNS (1) + 1.
+	* config/loongarch/predicates.md (loongarch_fcmp_operator): New
+	predicate.
+	* config/loongarch/loongarch.md (movfcc): Change to
+	define_expand.
+	(movfcc_internal): New define_insn.
+	(fcc_to_<X:mode>): New define_insn.
+	(cstore<ANYF:mode>4): New define_expand.
+	* config/loongarch/loongarch.cc
+	(loongarch_hard_regno_mode_ok_uncached): Allow FCCmode in GPRs
+	and GPRs.
+	(loongarch_secondary_reload): Reload FCCmode via FPR and/or GPR.
+	(loongarch_emit_float_compare): Call gen_reg_rtx instead of
+	loongarch_allocate_fcc.
+	(loongarch_allocate_fcc): Remove.
+	(loongarch_move_to_gpr_cost): Handle FCC_REGS -> GR_REGS.
+	(loongarch_move_from_gpr_cost): Handle GR_REGS -> FCC_REGS.
+	(loongarch_register_move_cost): Handle FCC_REGS -> FCC_REGS,
+	FCC_REGS -> FP_REGS, and FP_REGS -> FCC_REGS.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/movcf2gr.c: New test.
+	* gcc.target/loongarch/movcf2gr-via-fr.c: New test.
+---
+ gcc/config/loongarch/loongarch-def.cc         | 13 +++-
+ gcc/config/loongarch/loongarch-tune.h         | 15 +++-
+ gcc/config/loongarch/loongarch.cc             | 70 ++++++++++++-------
+ gcc/config/loongarch/loongarch.md             | 69 ++++++++++++++++--
+ gcc/config/loongarch/predicates.md            |  4 ++
+ .../gcc.target/loongarch/movcf2gr-via-fr.c    | 10 +++
+ gcc/testsuite/gcc.target/loongarch/movcf2gr.c |  9 +++
+ 7 files changed, 157 insertions(+), 33 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/movcf2gr.c
+
+diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
+index 4a8885e83..843be78e4 100644
+--- a/gcc/config/loongarch/loongarch-def.cc
++++ b/gcc/config/loongarch/loongarch-def.cc
+@@ -101,15 +101,21 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
+     int_mult_di (COSTS_N_INSNS (4)),
+     int_div_si (COSTS_N_INSNS (5)),
+     int_div_di (COSTS_N_INSNS (5)),
++    movcf2gr (COSTS_N_INSNS (7)),
++    movgr2cf (COSTS_N_INSNS (15)),
+     branch_cost (6),
+     memory_latency (4) {}
+ 
+ /* The following properties cannot be looked up directly using "cpucfg".
+  So it is necessary to provide a default value for "unknown native"
+  tune targets (i.e. -mtune=native while PRID does not correspond to
+- any known "-mtune" type).  Currently all numbers are default.  */
++ any known "-mtune" type).  */
+ array_tune<loongarch_rtx_cost_data> loongarch_cpu_rtx_cost_data =
+-  array_tune<loongarch_rtx_cost_data> ();
++  array_tune<loongarch_rtx_cost_data> ()
++    .set (CPU_LA664,
++	  loongarch_rtx_cost_data ()
++	    .movcf2gr_ (COSTS_N_INSNS (1))
++	    .movgr2cf_ (COSTS_N_INSNS (1)));
+ 
+ /* RTX costs to use when optimizing for size.
+    We use a value slightly larger than COSTS_N_INSNS (1) for all of them
+@@ -125,7 +131,8 @@ const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size =
+     .int_mult_si_ (COST_COMPLEX_INSN)
+     .int_mult_di_ (COST_COMPLEX_INSN)
+     .int_div_si_ (COST_COMPLEX_INSN)
+-    .int_div_di_ (COST_COMPLEX_INSN);
++    .int_div_di_ (COST_COMPLEX_INSN)
++    .movcf2gr_ (COST_COMPLEX_INSN);
+ 
+ array_tune<int> loongarch_cpu_issue_rate = array_tune<int> ()
+   .set (CPU_NATIVE, 4)
+diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
+index 616b94e87..26f163f0a 100644
+--- a/gcc/config/loongarch/loongarch-tune.h
++++ b/gcc/config/loongarch/loongarch-tune.h
+@@ -35,6 +35,8 @@ struct loongarch_rtx_cost_data
+   unsigned short int_mult_di;
+   unsigned short int_div_si;
+   unsigned short int_div_di;
++  unsigned short movcf2gr;
++  unsigned short movgr2cf;
+   unsigned short branch_cost;
+   unsigned short memory_latency;
+ 
+@@ -95,6 +97,18 @@ struct loongarch_rtx_cost_data
+     return *this;
+   }
+ 
++  loongarch_rtx_cost_data movcf2gr_ (unsigned short _movcf2gr)
++  {
++    movcf2gr = _movcf2gr;
++    return *this;
++  }
++
++  loongarch_rtx_cost_data movgr2cf_ (unsigned short _movgr2cf)
++  {
++    movgr2cf = _movgr2cf;
++    return *this;
++  }
++
+   loongarch_rtx_cost_data branch_cost_ (unsigned short _branch_cost)
+   {
+     branch_cost = _branch_cost;
+@@ -106,7 +120,6 @@ struct loongarch_rtx_cost_data
+     memory_latency = _memory_latency;
+     return *this;
+   }
+-
+ };
+ 
+ /* Costs to use when optimizing for size.  */
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 3aeafeafd..56f631b1a 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -5119,29 +5119,6 @@ loongarch_zero_if_equal (rtx cmp0, rtx cmp1)
+ 		       OPTAB_DIRECT);
+ }
+ 
+-/* Allocate a floating-point condition-code register of mode MODE.  */
+-
+-static rtx
+-loongarch_allocate_fcc (machine_mode mode)
+-{
+-  unsigned int regno, count;
+-
+-  gcc_assert (TARGET_HARD_FLOAT);
+-
+-  if (mode == FCCmode)
+-    count = 1;
+-  else
+-    gcc_unreachable ();
+-
+-  cfun->machine->next_fcc += -cfun->machine->next_fcc & (count - 1);
+-  if (cfun->machine->next_fcc > FCC_REG_LAST - FCC_REG_FIRST)
+-    cfun->machine->next_fcc = 0;
+-
+-  regno = FCC_REG_FIRST + cfun->machine->next_fcc;
+-  cfun->machine->next_fcc += count;
+-  return gen_rtx_REG (mode, regno);
+-}
+-
+ /* Sign- or zero-extend OP0 and OP1 for integer comparisons.  */
+ 
+ static void
+@@ -5256,7 +5233,7 @@ loongarch_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1)
+      operands for FCMP.cond.fmt, instead a reversed condition code is
+      required and a test for false.  */
+   *code = NE;
+-  *op0 = loongarch_allocate_fcc (FCCmode);
++  *op0 = gen_reg_rtx (FCCmode);
+ 
+   *op1 = const0_rtx;
+   loongarch_emit_binary (cmp_code, *op0, cmp_op0, cmp_op1);
+@@ -6626,7 +6603,7 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode)
+   enum mode_class mclass;
+ 
+   if (mode == FCCmode)
+-    return FCC_REG_P (regno);
++    return FCC_REG_P (regno) || GP_REG_P (regno) || FP_REG_P (regno);
+ 
+   size = GET_MODE_SIZE (mode);
+   mclass = GET_MODE_CLASS (mode);
+@@ -6841,6 +6818,9 @@ loongarch_move_to_gpr_cost (reg_class_t from)
+       /* MOVFR2GR, etc.  */
+       return 4;
+ 
++    case FCC_REGS:
++      return loongarch_cost->movcf2gr;
++
+     default:

_service:tar_scm:0084-LoongArch-Add-sign_extend-pattern-for-32-bit-rotate-.patch Added

@@ -0,0 +1,69 @@
+From 8da6a317bc3ad64da8590649b83a841391f20438 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 17 Dec 2023 04:26:23 +0800
+Subject: PATCH 084/188 LoongArch: Add sign_extend pattern for 32-bit rotate
+ shift
+
+Remove a redundant sign extension.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (rotrsi3_extend): New
+	define_insn.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/rotrw.c: New test.
+---
+ gcc/config/loongarch/loongarch.md          | 10 ++++++++++
+ gcc/testsuite/gcc.target/loongarch/rotrw.c | 17 +++++++++++++++++
+ 2 files changed, 27 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/rotrw.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 6cf71d9e4..44e8d336a 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -2893,6 +2893,16 @@
+   (set_attr "type" "shift,shift")
+    (set_attr "mode" "<MODE>"))
+ 
++(define_insn "rotrsi3_extend"
++  (set (match_operand:DI 0 "register_operand" "=r,r")
++	(sign_extend:DI
++	  (rotatert:SI (match_operand:SI 1 "register_operand" "r,r")
++		       (match_operand:SI 2 "arith_operand" "r,I"))))
++  "TARGET_64BIT"
++  "rotr%i2.w\t%0,%1,%2"
++  (set_attr "type" "shift,shift")
++   (set_attr "mode" "SI"))
++
+ ;; The following templates were added to generate "bstrpick.d + alsl.d"
+ ;; instruction pairs.
+ ;; It is required that the values of const_immalsl_operand and
+diff --git a/gcc/testsuite/gcc.target/loongarch/rotrw.c b/gcc/testsuite/gcc.target/loongarch/rotrw.c
+new file mode 100644
+index 000000000..6ed45e8b8
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/rotrw.c
+@@ -0,0 +1,17 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++/* { dg-final { scan-assembler "rotr\\.w\t\\\$r4,\\\$r4,\\\$r5" } } */
++/* { dg-final { scan-assembler "rotri\\.w\t\\\$r4,\\\$r4,5" } } */
++/* { dg-final { scan-assembler-not "slli\\.w" } } */
++
++unsigned
++rotr (unsigned a, unsigned b)
++{
++  return a >> b | a << 32 - b;
++}
++
++unsigned
++rotri (unsigned a)
++{
++  return a >> 5 | a << 27;
++}
+-- 
+2.43.0
+

_service:tar_scm:0085-LoongArch-Fixed-bug-in-bstrins_-mode-_for_ior_mask-t.patch Added

@@ -0,0 +1,37 @@
+From e56d6d9526e1565fffeb320e15796385eb1732b8 Mon Sep 17 00:00:00 2001
+From: Li Wei <liwei@loongson.cn>
+Date: Mon, 25 Dec 2023 11:20:23 +0800
+Subject: PATCH 085/188 LoongArch: Fixed bug in *bstrins_<mode>_for_ior_mask
+ template.
+
+We found that using the latest compiled gcc will cause a miscompare error
+when running spec2006 400.perlbench test with -flto turned on.  After testing,
+it was found that only the LoongArch architecture will report errors.
+The first error commit was located through the git bisect command as
+r14-3773-g5b857e87201335.  Through debugging, it was found that the problem
+was that the split condition of the *bstrins_<mode>_for_ior_mask template was
+empty, which should actually be consistent with the insn condition.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md: Adjust.
+---
+ gcc/config/loongarch/loongarch.md | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 44e8d336a..3d5b75825 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -1489,7 +1489,7 @@
+   "loongarch_pre_reload_split () && \
+    loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)"
+   "#"
+-  ""
++  "&& true"
+   (set (match_dup 0) (match_dup 1))
+    (set (zero_extract:GPR (match_dup 0) (match_dup 2) (match_dup 4))
+ 	(match_dup 3))
+-- 
+2.43.0
+

_service:tar_scm:0086-LoongArch-Fix-insn-output-of-vec_concat-templates-fo.patch Added

@@ -0,0 +1,132 @@
+From b1947829a5949a37db09bc23681e44c8479bd404 Mon Sep 17 00:00:00 2001
+From: Chenghui Pan <panchenghui@loongson.cn>
+Date: Fri, 22 Dec 2023 16:22:03 +0800
+Subject: PATCH 086/188 LoongArch: Fix insn output of vec_concat templates
+ for LASX.
+
+When investigaing failure of gcc.dg/vect/slp-reduc-sad.c, following
+instruction block are being generated by vec_concatv32qi (which is
+generated by vec_initv32qiv16qi) at entrance of foo() function:
+
+  vldx    $vr3,$r5,$r6
+  vld     $vr2,$r5,0
+  xvpermi.q       $xr2,$xr3,0x20
+
+causes the reversion of vec_initv32qiv16qi operation's high and
+low 128-bit part.
+
+According to other target's similar impl and LSX impl for following
+RTL representation, current definition in lasx.md of "vec_concat<mode>"
+are wrong:
+
+  (set (op0) (vec_concat (op1) (op2)))
+
+For correct behavior, the last argument of xvpermi.q should be 0x02
+instead of 0x20. This patch fixes this issue and cleanup the vec_concat
+template impl.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasx.md (vec_concatv4di): Delete.
+	(vec_concatv8si): Delete.
+	(vec_concatv16hi): Delete.
+	(vec_concatv32qi): Delete.
+	(vec_concatv4df): Delete.
+	(vec_concatv8sf): Delete.
+	(vec_concat<mode>): New template with insn output fixed.
+---
+ gcc/config/loongarch/lasx.md | 74 ++++--------------------------------
+ 1 file changed, 7 insertions(+), 67 deletions(-)
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index 9ca3f9278..46150f2fb 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -577,77 +577,17 @@
+   (set_attr "type" "simd_insert")
+    (set_attr "mode" "<MODE>"))
+ 
+-(define_insn "vec_concatv4di"
+-  (set (match_operand:V4DI 0 "register_operand" "=f")
+-	(vec_concat:V4DI
+-	  (match_operand:V2DI 1 "register_operand" "0")
+-	  (match_operand:V2DI 2 "register_operand" "f")))
+-  "ISA_HAS_LASX"
+-{
+-  return "xvpermi.q\t%u0,%u2,0x20";
+-}
+-  (set_attr "type" "simd_splat")
+-   (set_attr "mode" "V4DI"))
+-
+-(define_insn "vec_concatv8si"
+-  (set (match_operand:V8SI 0 "register_operand" "=f")
+-	(vec_concat:V8SI
+-	  (match_operand:V4SI 1 "register_operand" "0")
+-	  (match_operand:V4SI 2 "register_operand" "f")))
+-  "ISA_HAS_LASX"
+-{
+-  return "xvpermi.q\t%u0,%u2,0x20";
+-}
+-  (set_attr "type" "simd_splat")
+-   (set_attr "mode" "V4DI"))
+-
+-(define_insn "vec_concatv16hi"
+-  (set (match_operand:V16HI 0 "register_operand" "=f")
+-	(vec_concat:V16HI
+-	  (match_operand:V8HI 1 "register_operand" "0")
+-	  (match_operand:V8HI 2 "register_operand" "f")))
+-  "ISA_HAS_LASX"
+-{
+-  return "xvpermi.q\t%u0,%u2,0x20";
+-}
+-  (set_attr "type" "simd_splat")
+-   (set_attr "mode" "V4DI"))
+-
+-(define_insn "vec_concatv32qi"
+-  (set (match_operand:V32QI 0 "register_operand" "=f")
+-	(vec_concat:V32QI
+-	  (match_operand:V16QI 1 "register_operand" "0")
+-	  (match_operand:V16QI 2 "register_operand" "f")))
+-  "ISA_HAS_LASX"
+-{
+-  return "xvpermi.q\t%u0,%u2,0x20";
+-}
+-  (set_attr "type" "simd_splat")
+-   (set_attr "mode" "V4DI"))
+-
+-(define_insn "vec_concatv4df"
+-  (set (match_operand:V4DF 0 "register_operand" "=f")
+-	(vec_concat:V4DF
+-	  (match_operand:V2DF 1 "register_operand" "0")
+-	  (match_operand:V2DF 2 "register_operand" "f")))
+-  "ISA_HAS_LASX"
+-{
+-  return "xvpermi.q\t%u0,%u2,0x20";
+-}
+-  (set_attr "type" "simd_splat")
+-   (set_attr "mode" "V4DF"))
+-
+-(define_insn "vec_concatv8sf"
+-  (set (match_operand:V8SF 0 "register_operand" "=f")
+-	(vec_concat:V8SF
+-	  (match_operand:V4SF 1 "register_operand" "0")
+-	  (match_operand:V4SF 2 "register_operand" "f")))
++(define_insn "vec_concat<mode>"
++  (set (match_operand:LASX 0 "register_operand" "=f")
++	(vec_concat:LASX
++	  (match_operand:<VHMODE256_ALL> 1 "register_operand" "0")
++	  (match_operand:<VHMODE256_ALL> 2 "register_operand" "f")))
+   "ISA_HAS_LASX"
+ {
+-  return "xvpermi.q\t%u0,%u2,0x20";
++  return "xvpermi.q\t%u0,%u2,0x02";
+ }
+   (set_attr "type" "simd_splat")
+-   (set_attr "mode" "V4DI"))
++   (set_attr "mode" "<MODE>"))
+ 
+ ;; xshuf.w
+ (define_insn "lasx_xvperm_<lasxfmt_f_wd>"
+-- 
+2.43.0
+

_service:tar_scm:0087-LoongArch-Fix-ICE-when-passing-two-same-vector-argum.patch Added

@@ -0,0 +1,232 @@
+From 1096571509762846e2222f575bc981385b4e9fb7 Mon Sep 17 00:00:00 2001
+From: Chenghui Pan <panchenghui@loongson.cn>
+Date: Fri, 22 Dec 2023 16:18:44 +0800
+Subject: PATCH 087/188 LoongArch: Fix ICE when passing two same vector
+ argument consecutively
+
+Following code will cause ICE on LoongArch target:
+
+  #include <lsxintrin.h>
+
+  extern void bar (__m128i, __m128i);
+
+  __m128i a;
+
+  void
+  foo ()
+  {
+    bar (a, a);
+  }
+
+It is caused by missing constraint definition in mov<mode>_lsx. This
+patch fixes the template and remove the unnecessary processing from
+loongarch_split_move () function.
+
+This patch also cleanup the redundant definition from
+loongarch_split_move () and loongarch_split_move_p ().
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasx.md: Use loongarch_split_move and
+	loongarch_split_move_p directly.
+	* config/loongarch/loongarch-protos.h
+	(loongarch_split_move): Remove unnecessary argument.
+	(loongarch_split_move_insn_p): Delete.
+	(loongarch_split_move_insn): Delete.
+	* config/loongarch/loongarch.cc
+	(loongarch_split_move_insn_p): Delete.
+	(loongarch_load_store_insns): Use loongarch_split_move_p
+	directly.
+	(loongarch_split_move): remove the unnecessary processing.
+	(loongarch_split_move_insn): Delete.
+	* config/loongarch/lsx.md: Use loongarch_split_move and
+	loongarch_split_move_p directly.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/lsx/lsx-mov-1.c: New test.
+---
+ gcc/config/loongarch/lasx.md                  |  4 +-
+ gcc/config/loongarch/loongarch-protos.h       |  4 +-
+ gcc/config/loongarch/loongarch.cc             | 49 +------------------
+ gcc/config/loongarch/lsx.md                   | 10 ++--
+ .../loongarch/vector/lsx/lsx-mov-1.c          | 14 ++++++
+ 5 files changed, 24 insertions(+), 57 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-mov-1.c
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index 46150f2fb..dbbf5a136 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -839,10 +839,10 @@
+   (set (match_operand:LASX 0 "nonimmediate_operand")
+ 	(match_operand:LASX 1 "move_operand"))
+   "reload_completed && ISA_HAS_LASX
+-   && loongarch_split_move_insn_p (operands0, operands1)"
++   && loongarch_split_move_p (operands0, operands1)"
+   (const_int 0)
+ {
+-  loongarch_split_move_insn (operands0, operands1, curr_insn);
++  loongarch_split_move (operands0, operands1);
+   DONE;
+ })
+ 
+diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
+index e5fcf3111..2067e50c3 100644
+--- a/gcc/config/loongarch/loongarch-protos.h
++++ b/gcc/config/loongarch/loongarch-protos.h
+@@ -82,11 +82,9 @@ extern rtx loongarch_legitimize_call_address (rtx);
+ 
+ extern rtx loongarch_subword (rtx, bool);
+ extern bool loongarch_split_move_p (rtx, rtx);
+-extern void loongarch_split_move (rtx, rtx, rtx);
++extern void loongarch_split_move (rtx, rtx);
+ extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode);
+ extern void loongarch_split_plus_constant (rtx *, machine_mode);
+-extern bool loongarch_split_move_insn_p (rtx, rtx);
+-extern void loongarch_split_move_insn (rtx, rtx, rtx);
+ extern void loongarch_split_128bit_move (rtx, rtx);
+ extern bool loongarch_split_128bit_move_p (rtx, rtx);
+ extern void loongarch_split_256bit_move (rtx, rtx);
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 56f631b1a..5c278386a 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -2558,7 +2558,6 @@ loongarch_split_const_insns (rtx x)
+   return low + high;
+ }
+ 
+-bool loongarch_split_move_insn_p (rtx dest, rtx src);
+ /* Return one word of 128-bit value OP, taking into account the fixed
+    endianness of certain registers.  BYTE selects from the byte address.  */
+ 
+@@ -2598,7 +2597,7 @@ loongarch_load_store_insns (rtx mem, rtx_insn *insn)
+     {
+       set = single_set (insn);
+       if (set
+-	  && !loongarch_split_move_insn_p (SET_DEST (set), SET_SRC (set)))
++	  && !loongarch_split_move_p (SET_DEST (set), SET_SRC (set)))
+ 	might_split_p = false;
+     }
+ 
+@@ -4216,7 +4215,7 @@ loongarch_split_move_p (rtx dest, rtx src)
+    SPLIT_TYPE describes the split condition.  */
+ 
+ void
+-loongarch_split_move (rtx dest, rtx src, rtx insn_)
++loongarch_split_move (rtx dest, rtx src)
+ {
+   rtx low_dest;
+ 
+@@ -4254,33 +4253,6 @@ loongarch_split_move (rtx dest, rtx src, rtx insn_)
+ 			       loongarch_subword (src, true));
+ 	}
+     }
+-
+-  /* This is a hack.  See if the next insn uses DEST and if so, see if we
+-     can forward SRC for DEST.  This is most useful if the next insn is a
+-     simple store.  */
+-  rtx_insn *insn = (rtx_insn *) insn_;
+-  struct loongarch_address_info addr = {};
+-  if (insn)
+-    {
+-      rtx_insn *next = next_nonnote_nondebug_insn_bb (insn);
+-      if (next)
+-	{
+-	  rtx set = single_set (next);
+-	  if (set && SET_SRC (set) == dest)
+-	    {
+-	      if (MEM_P (src))
+-		{
+-		  rtx tmp = XEXP (src, 0);
+-		  loongarch_classify_address (&addr, tmp, GET_MODE (tmp),
+-					      true);
+-		  if (addr.reg && !reg_overlap_mentioned_p (dest, addr.reg))
+-		    validate_change (next, &SET_SRC (set), src, false);
+-		}
+-	      else
+-		validate_change (next, &SET_SRC (set), src, false);
+-	    }
+-	}
+-    }
+ }
+ 
+ /* Check if adding an integer constant value for a specific mode can be
+@@ -4327,23 +4299,6 @@ loongarch_split_plus_constant (rtx *op, machine_mode mode)
+   op2 = gen_int_mode (v, mode);
+ }
+ 
+-/* Return true if a move from SRC to DEST in INSN should be split.  */
+-
+-bool
+-loongarch_split_move_insn_p (rtx dest, rtx src)
+-{
+-  return loongarch_split_move_p (dest, src);
+-}
+-
+-/* Split a move from SRC to DEST in INSN, given that
+-   loongarch_split_move_insn_p holds.  */
+-
+-void
+-loongarch_split_move_insn (rtx dest, rtx src, rtx insn)
+-{
+-  loongarch_split_move (dest, src, insn);
+-}
+-
+ /* Implement TARGET_CONSTANT_ALIGNMENT.  */
+ 
+ static HOST_WIDE_INT
+diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
+index 7f5fff40a..3e3248ef4 100644
+--- a/gcc/config/loongarch/lsx.md
++++ b/gcc/config/loongarch/lsx.md
+@@ -794,21 +794,21 @@
+ })
+ 
+ (define_insn "mov<mode>_lsx"
+-  (set (match_operand:LSX 0 "nonimmediate_operand" "=f,f,R,*r,*f")
+-	(match_operand:LSX 1 "move_operand" "fYGYI,R,f,*f,*r"))
++  (set (match_operand:LSX 0 "nonimmediate_operand" "=f,f,R,*r,*f,*r")
++	(match_operand:LSX 1 "move_operand" "fYGYI,R,f,*f,*r,*r"))
+   "ISA_HAS_LSX"
+ { return loongarch_output_move (operands0, operands1); }
+-  (set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert")
++  (set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert,simd_copy")
+    (set_attr "mode" "<MODE>"))
+ 
+ (define_split
+   (set (match_operand:LSX 0 "nonimmediate_operand")
+ 	(match_operand:LSX 1 "move_operand"))

_service:tar_scm:0088-LoongArch-Expand-left-rotate-to-right-rotate-with-ne.patch Added

@@ -0,0 +1,253 @@
+From a2cc86c9b5e44c3dcdb8c52d6ae5f535442ec1d4 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 17 Dec 2023 05:38:20 +0800
+Subject: PATCH 088/188 LoongArch: Expand left rotate to right rotate with
+ negated amount
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (rotl<mode>3):
+	New define_expand.
+	* config/loongarch/simd.md (vrotl<mode>3): Likewise.
+	(rotl<mode>3): Likewise.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/rotl-with-rotr.c: New test.
+	* gcc.target/loongarch/rotl-with-vrotr-b.c: New test.
+	* gcc.target/loongarch/rotl-with-vrotr-h.c: New test.
+	* gcc.target/loongarch/rotl-with-vrotr-w.c: New test.
+	* gcc.target/loongarch/rotl-with-vrotr-d.c: New test.
+	* gcc.target/loongarch/rotl-with-xvrotr-b.c: New test.
+	* gcc.target/loongarch/rotl-with-xvrotr-h.c: New test.
+	* gcc.target/loongarch/rotl-with-xvrotr-w.c: New test.
+	* gcc.target/loongarch/rotl-with-xvrotr-d.c: New test.
+---
+ gcc/config/loongarch/loongarch.md             | 12 ++++++++
+ gcc/config/loongarch/simd.md                  | 29 +++++++++++++++++++
+ .../gcc.target/loongarch/rotl-with-rotr.c     |  9 ++++++
+ .../gcc.target/loongarch/rotl-with-vrotr-b.c  |  7 +++++
+ .../gcc.target/loongarch/rotl-with-vrotr-d.c  |  7 +++++
+ .../gcc.target/loongarch/rotl-with-vrotr-h.c  |  7 +++++
+ .../gcc.target/loongarch/rotl-with-vrotr-w.c  | 28 ++++++++++++++++++
+ .../gcc.target/loongarch/rotl-with-xvrotr-b.c |  7 +++++
+ .../gcc.target/loongarch/rotl-with-xvrotr-d.c |  7 +++++
+ .../gcc.target/loongarch/rotl-with-xvrotr-h.c |  7 +++++
+ .../gcc.target/loongarch/rotl-with-xvrotr-w.c |  7 +++++
+ 11 files changed, 127 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-d.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-h.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-w.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 3d5b75825..ed4d4b906 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -2903,6 +2903,18 @@
+   (set_attr "type" "shift,shift")
+    (set_attr "mode" "SI"))
+ 
++;; Expand left rotate to right rotate.
++(define_expand "rotl<mode>3"
++  (set (match_dup 3)
++	(neg:SI (match_operand:SI 2 "register_operand")))
++   (set (match_operand:GPR 0 "register_operand")
++	(rotatert:GPR (match_operand:GPR 1 "register_operand")
++		      (match_dup 3)))
++  ""
++  {
++    operands3 = gen_reg_rtx (SImode);
++  });
++
+ ;; The following templates were added to generate "bstrpick.d + alsl.d"
+ ;; instruction pairs.
+ ;; It is required that the values of const_immalsl_operand and
+diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
+index 13202f79b..93fb39abc 100644
+--- a/gcc/config/loongarch/simd.md
++++ b/gcc/config/loongarch/simd.md
+@@ -268,6 +268,35 @@
+   (set_attr "type" "simd_int_arith")
+    (set_attr "mode" "<MODE>"))
+ 
++;; Expand left rotate to right rotate.
++(define_expand "vrotl<mode>3"
++  (set (match_dup 3)
++	(neg:IVEC (match_operand:IVEC 2 "register_operand")))
++   (set (match_operand:IVEC 0 "register_operand")
++	(rotatert:IVEC (match_operand:IVEC 1 "register_operand")
++		       (match_dup 3)))
++  ""
++  {
++    operands3 = gen_reg_rtx (<MODE>mode);
++  });
++
++;; Expand left rotate with a scalar amount to right rotate: negate the
++;; scalar before broadcasting it because scalar negation is cheaper than
++;; vector negation.
++(define_expand "rotl<mode>3"
++  (set (match_dup 3)
++	(neg:SI (match_operand:SI 2 "register_operand")))
++   (set (match_dup 4)
++	(vec_duplicate:IVEC (subreg:<IVEC:UNITMODE> (match_dup 3) 0)))
++   (set (match_operand:IVEC 0 "register_operand")
++	(rotatert:IVEC (match_operand:IVEC 1 "register_operand")
++		       (match_dup 4)))
++  ""
++  {
++    operands3 = gen_reg_rtx (SImode);
++    operands4 = gen_reg_rtx (<MODE>mode);
++  });
++
+ ;; <x>vrotri.{b/h/w/d}
+ 
+ (define_insn "rotr<mode>3"
+diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c
+new file mode 100644
+index 000000000..84cc53cec
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c
+@@ -0,0 +1,9 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++/* { dg-final { scan-assembler "rotr\\.w" } } */
++
++unsigned
++t (unsigned a, unsigned b)
++{
++  return a << b | a >> (32 - b);
++}
+diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c
+new file mode 100644
+index 000000000..14298bf9e
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c
+@@ -0,0 +1,7 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */
++/* { dg-final { scan-assembler-times "vrotr\\.b" 2 } } */
++/* { dg-final { scan-assembler-times "vneg\\.b" 1 } } */
++
++#define TYPE char
++#include "rotl-with-vrotr-w.c"
+diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c
+new file mode 100644
+index 000000000..0e971b323
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c
+@@ -0,0 +1,7 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */
++/* { dg-final { scan-assembler-times "vrotr\\.d" 2 } } */
++/* { dg-final { scan-assembler-times "vneg\\.d" 1 } } */
++
++#define TYPE long long
++#include "rotl-with-vrotr-w.c"
+diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c
+new file mode 100644
+index 000000000..93216ebc2
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c
+@@ -0,0 +1,7 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */
++/* { dg-final { scan-assembler-times "vrotr\\.h" 2 } } */
++/* { dg-final { scan-assembler-times "vneg\\.h" 1 } } */
++
++#define TYPE short
++#include "rotl-with-vrotr-w.c"
+diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c
+new file mode 100644
+index 000000000..d05b86f47
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c
+@@ -0,0 +1,28 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */
++/* { dg-final { scan-assembler-times "vrotr\\.w" 2 } } */
++/* { dg-final { scan-assembler-times "vneg\\.w" 1 } } */
++
++#ifndef VLEN
++#define VLEN 16
++#endif
++
++#ifndef TYPE
++#define TYPE int
++#endif
++
++typedef unsigned TYPE V __attribute__ ((vector_size (VLEN)));
++V a, b, c;
++
++void
++test (int x)
++{
++  b = a << x | a >> ((int)sizeof (TYPE) * __CHAR_BIT__ - x);
++}
++
++void
++test2 (void)
++{
++  for (int i = 0; i < VLEN / sizeof (TYPE); i++)
++    ci = ai << bi | ai >> ((int)sizeof (TYPE) * __CHAR_BIT__ - bi);
++}
+diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c

_service:tar_scm:0089-LoongArch-Fix-infinite-secondary-reloading-of-FCCmod.patch Added

@@ -0,0 +1,104 @@
+From 1e389ec3bad94888fadd153f191fe8862448f258 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Wed, 27 Dec 2023 04:28:56 +0800
+Subject: PATCH 089/188 LoongArch: Fix infinite secondary reloading of
+ FCCmode PR113148
+
+The GCC internal doc says:
+
+     X might be a pseudo-register or a 'subreg' of a pseudo-register,
+     which could either be in a hard register or in memory.  Use
+     'true_regnum' to find out; it will return -1 if the pseudo is in
+     memory and the hard register number if it is in a register.
+
+So "MEM_P (x)" is not enough for checking if we are reloading from/to
+the memory.  This bug has caused reload pass to stall and finally ICE
+complaining with "maximum number of generated reload insns per insn
+achieved", since r14-6814.
+
+Check if "true_regnum (x)" is -1 besides "MEM_P (x)" to fix the issue.
+
+gcc/ChangeLog:
+
+	PR target/113148
+	* config/loongarch/loongarch.cc (loongarch_secondary_reload):
+	Check if regno == -1 besides MEM_P (x) for reloading FCCmode
+	from/to FPR to/from memory.
+
+gcc/testsuite/ChangeLog:
+
+	PR target/113148
+	* gcc.target/loongarch/pr113148.c: New test.
+---
+ gcc/config/loongarch/loongarch.cc             |  3 +-
+ gcc/testsuite/gcc.target/loongarch/pr113148.c | 44 +++++++++++++++++++
+ 2 files changed, 46 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/pr113148.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 5c278386a..2e305f940 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -6902,7 +6902,8 @@ loongarch_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
+ 	  return NO_REGS;
+ 	}
+ 
+-      if (reg_class_subset_p (rclass, FP_REGS) && MEM_P (x))
++      if (reg_class_subset_p (rclass, FP_REGS)
++	  && (regno == -1 || MEM_P (x)))
+ 	return GR_REGS;
+ 
+       return NO_REGS;
+diff --git a/gcc/testsuite/gcc.target/loongarch/pr113148.c b/gcc/testsuite/gcc.target/loongarch/pr113148.c
+new file mode 100644
+index 000000000..cf48e5520
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/pr113148.c
+@@ -0,0 +1,44 @@
++/* PR 113148: ICE caused by infinite reloading */
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=la464 -mfpu=64 -mabi=lp64d" } */
++
++struct bound
++{
++  double max;
++} drawQuadrant_bound;
++double w4, innerXfromXY_y, computeBound_right_0;
++struct arc_def
++{
++  double w, h;
++  double a0, a1;
++};
++static void drawQuadrant (struct arc_def *);
++static void
++computeBound (struct arc_def *def, struct bound *bound)
++{
++  double ellipsex_1, ellipsex_0;
++  bound->max = def->a1 ?: __builtin_sin (w4) * def->h;
++  if (def->a0 == 5 && def->w == def->h)
++    ;
++  else
++    ellipsex_0 = def->a0 == 0.0 ?: __builtin_cos (w4);
++  if (def->a1 == 5 && def->w == def->h)
++    ellipsex_1 = bound->max;
++  __builtin_sqrt (ellipsex_1 * innerXfromXY_y * innerXfromXY_y * w4);
++  computeBound_right_0 = ellipsex_0;
++}
++void
++drawArc ()
++{
++  struct arc_def foo;
++  for (;;)
++    drawQuadrant (&foo);
++}
++void
++drawQuadrant (struct arc_def *def)
++{
++  int y, miny;
++  computeBound (def, &drawQuadrant_bound);
++  while (y >= miny)
++    ;
++}
+-- 
+2.43.0
+

_service:tar_scm:0090-LoongArch-Replace-mexplicit-relocs-auto-simple-used-.patch Added

@@ -0,0 +1,305 @@
+From 294893b352898328d804f2d07981f6bf1e54f8b6 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Tue, 12 Dec 2023 04:54:21 +0800
+Subject: PATCH 090/188 LoongArch: Replace -mexplicit-relocs=auto simple-used
+ address peephole2 with combine
+
+The problem with peephole2 is it uses a naive sliding-window algorithm
+and misses many cases.  For example:
+
+    float a10000;
+    float t() { return a0 + a8000; }
+
+is compiled to:
+
+    la.local    $r13,a
+    la.local    $r12,a+32768
+    fld.s       $f1,$r13,0
+    fld.s       $f0,$r12,-768
+    fadd.s      $f0,$f1,$f0
+
+by trunk.  But as we've explained in r14-4851, the following would be
+better with -mexplicit-relocs=auto:
+
+    pcalau12i   $r13,%pc_hi20(a)
+    pcalau12i   $r12,%pc_hi20(a+32000)
+    fld.s       $f1,$r13,%pc_lo12(a)
+    fld.s       $f0,$r12,%pc_lo12(a+32000)
+    fadd.s      $f0,$f1,$f0
+
+However the sliding-window algorithm just won't detect the pcalau12i/fld
+pair to be optimized.  Use a define_insn_and_rewrite in combine pass
+will work around the issue.
+
+gcc/ChangeLog:
+
+	* config/loongarch/predicates.md
+	(symbolic_pcrel_offset_operand): New define_predicate.
+	(mem_simple_ldst_operand): Likewise.
+	* config/loongarch/loongarch-protos.h
+	(loongarch_rewrite_mem_for_simple_ldst): Declare.
+	* config/loongarch/loongarch.cc
+	(loongarch_rewrite_mem_for_simple_ldst): Implement.
+	* config/loongarch/loongarch.md (simple_load<mode>): New
+	define_insn_and_rewrite.
+	(simple_load_<su>ext<SUBDI:mode><GPR:mode>): Likewise.
+	(simple_store<mode>): Likewise.
+	(define_peephole2): Remove la.local/fld peepholes.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c:
+	New test.
+	* gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c:
+	New test.
+---
+ gcc/config/loongarch/loongarch-protos.h       |   1 +
+ gcc/config/loongarch/loongarch.cc             |  16 +++
+ gcc/config/loongarch/loongarch.md             | 114 +++++-------------
+ gcc/config/loongarch/predicates.md            |  13 ++
+ ...explicit-relocs-auto-single-load-store-2.c |  11 ++
+ ...explicit-relocs-auto-single-load-store-3.c |  18 +++
+ 6 files changed, 86 insertions(+), 87 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c
+
+diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
+index 2067e50c3..5060efbb6 100644
+--- a/gcc/config/loongarch/loongarch-protos.h
++++ b/gcc/config/loongarch/loongarch-protos.h
+@@ -163,6 +163,7 @@ extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+ extern bool loongarch_check_zero_div_p (void);
+ extern bool loongarch_pre_reload_split (void);
+ extern int loongarch_use_bstrins_for_ior_with_mask (machine_mode, rtx *);
++extern rtx loongarch_rewrite_mem_for_simple_ldst (rtx);
+ 
+ union loongarch_gen_fn_ptrs
+ {
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 2e305f940..c6318bee9 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -5713,6 +5713,22 @@ loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op)
+   return 0;
+ }
+ 
++/* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto
++   -mcmodel={normal/medium}.  */
++rtx
++loongarch_rewrite_mem_for_simple_ldst (rtx mem)
++{
++  rtx addr = XEXP (mem, 0);
++  rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
++			   UNSPEC_PCALAU12I_GR);
++  rtx new_mem;
++
++  addr = gen_rtx_LO_SUM (Pmode, force_reg (Pmode, hi), addr);
++  new_mem = gen_rtx_MEM (GET_MODE (mem), addr);
++  MEM_COPY_ATTRIBUTES (new_mem, mem);
++  return new_mem;
++}
++
+ /* Print the text for PRINT_OPERAND punctation character CH to FILE.
+    The punctuation characters are:
+ 
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index ed4d4b906..3c61a0cf4 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -4135,101 +4135,41 @@
+ ;;
+ ;; And if the pseudo op cannot be relaxed, we'll get a worse result (with
+ ;; 3 instructions).
+-(define_peephole2
+-  (set (match_operand:P 0 "register_operand")
+-	(match_operand:P 1 "symbolic_pcrel_operand"))
+-   (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand")
+-	(mem:LD_AT_LEAST_32_BIT (match_dup 0)))
+-  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
+-   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
+-   && (peep2_reg_dead_p (2, operands0) \
+-       || REGNO (operands0) == REGNO (operands2))"
+-  (set (match_dup 2)
+-	(mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))
+-  {
+-    emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1));
+-  })
+-
+-(define_peephole2
+-  (set (match_operand:P 0 "register_operand")
+-	(match_operand:P 1 "symbolic_pcrel_operand"))
+-   (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand")
+-	(mem:LD_AT_LEAST_32_BIT (plus (match_dup 0)
+-				(match_operand 3 "const_int_operand"))))
+-  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
+-   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
+-   && (peep2_reg_dead_p (2, operands0) \
+-       || REGNO (operands0) == REGNO (operands2))"
+-  (set (match_dup 2)
+-	(mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))
+-  {
+-    operands1 = plus_constant (Pmode, operands1, INTVAL (operands3));
+-    emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1));
+-  })
+-
+-(define_peephole2
+-  (set (match_operand:P 0 "register_operand")
+-	(match_operand:P 1 "symbolic_pcrel_operand"))
+-   (set (match_operand:GPR 2 "register_operand")
+-	(any_extend:GPR (mem:SUBDI (match_dup 0))))
+-  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
+-   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
+-   && (peep2_reg_dead_p (2, operands0) \
+-       || REGNO (operands0) == REGNO (operands2))"
+-  (set (match_dup 2)
+-	(any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0)
+-					     (match_dup 1)))))
++(define_insn_and_rewrite "simple_load<mode>"
++  (set (match_operand:LD_AT_LEAST_32_BIT 0 "register_operand" "=r,f")
++	(match_operand:LD_AT_LEAST_32_BIT 1 "mem_simple_ldst_operand" ""))
++  "loongarch_pre_reload_split ()
++   && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO
++   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
++  "#"
++  "&& true"
+   {
+-    emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1));
++    operands1 = loongarch_rewrite_mem_for_simple_ldst (operands1);
+   })
+ 
+-(define_peephole2
+-  (set (match_operand:P 0 "register_operand")
+-	(match_operand:P 1 "symbolic_pcrel_operand"))
+-   (set (match_operand:GPR 2 "register_operand")
++(define_insn_and_rewrite "simple_load_<su>ext<SUBDI:mode><GPR:mode>"
++  (set (match_operand:GPR 0 "register_operand" "=r")
+ 	(any_extend:GPR
+-	  (mem:SUBDI (plus (match_dup 0)
+-			   (match_operand 3 "const_int_operand")))))
+-  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
+-   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
+-   && (peep2_reg_dead_p (2, operands0) \
+-       || REGNO (operands0) == REGNO (operands2))"
+-  (set (match_dup 2)
+-	(any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0)
+-					     (match_dup 1)))))
+-  {
+-    operands1 = plus_constant (Pmode, operands1, INTVAL (operands3));
+-    emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1));
+-  })
+-
+-(define_peephole2
+-  (set (match_operand:P 0 "register_operand")
+-	(match_operand:P 1 "symbolic_pcrel_operand"))
+-   (set (mem:ST_ANY (match_dup 0))
+-	(match_operand:ST_ANY 2 "register_operand"))
+-  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
+-   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
+-   && (peep2_reg_dead_p (2, operands0)) \
+-   && REGNO (operands0) != REGNO (operands2)"

_service:tar_scm:0091-LoongArch-Fix-the-format-of-bstrins_-mode-_for_ior_m.patch Added

@@ -0,0 +1,33 @@
+From 4d569c5fde85ca426eecf57119048ec25f048758 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Fri, 29 Dec 2023 20:04:34 +0800
+Subject: PATCH 091/188 LoongArch: Fix the format of
+ bstrins_<mode>_for_ior_mask condition (NFC)
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (bstrins_<mode>_for_ior_mask):
+	For the condition, remove unneeded trailing "\" and move "&&" to
+	follow GNU coding style.  NFC.
+---
+ gcc/config/loongarch/loongarch.md | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 3c61a0cf4..996df66e8 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -1486,8 +1486,8 @@
+                           (match_operand:GPR 2 "const_int_operand"))
+ 		 (and:GPR (match_operand:GPR 3 "register_operand")
+ 			  (match_operand:GPR 4 "const_int_operand"))))
+-  "loongarch_pre_reload_split () && \
+-   loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)"
++  "loongarch_pre_reload_split ()
++   && loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)"
+   "#"
+   "&& true"
+   (set (match_dup 0) (match_dup 1))
+-- 
+2.43.0
+

_service:tar_scm:0092-LoongArch-Added-TLS-Le-Relax-support.patch Added

@@ -0,0 +1,280 @@
+From 58d41ffad306a359ecd2902ec19d582506f14b10 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Tue, 12 Dec 2023 16:32:31 +0800
+Subject: PATCH 092/188 LoongArch: Added TLS Le Relax support.
+
+Check whether the assembler supports tls le relax. If it supports it, the assembly
+instruction sequence of tls le relax will be generated by default.
+
+The original way to obtain the tls le symbol address:
+    lu12i.w $rd, %le_hi20(sym)
+    ori $rd, $rd, %le_lo12(sym)
+    add.{w/d} $rd, $rd, $tp
+
+If the assembler supports tls le relax, the following sequence is generated:
+
+    lu12i.w $rd, %le_hi20_r(sym)
+    add.{w/d} $rd,$rd,$tp,%le_add_r(sym)
+    addi.{w/d} $rd,$rd,%le_lo12_r(sym)
+
+gcc/ChangeLog:
+
+	* config.in: Regenerate.
+	* config/loongarch/loongarch-opts.h (HAVE_AS_TLS_LE_RELAXATION): Define.
+	* config/loongarch/loongarch.cc (loongarch_legitimize_tls_address):
+	Added TLS Le Relax support.
+	(loongarch_print_operand_reloc): Add the output string of TLS Le Relax.
+	* config/loongarch/loongarch.md (@add_tls_le_relax<mode>): New template.
+	* configure: Regenerate.
+	* configure.ac: Check if binutils supports TLS le relax.
+
+gcc/testsuite/ChangeLog:
+
+	* lib/target-supports.exp: Add a function to check whether binutil supports
+	TLS Le Relax.
+	* gcc.target/loongarch/tls-le-relax.c: New test.
+---
+ gcc/config.in                                 |  6 +++
+ gcc/config/loongarch/loongarch-opts.h         |  4 ++
+ gcc/config/loongarch/loongarch.cc             | 46 +++++++++++++++++--
+ gcc/config/loongarch/loongarch.md             | 12 +++++
+ gcc/configure                                 | 31 +++++++++++++
+ gcc/configure.ac                              |  5 ++
+ .../gcc.target/loongarch/tls-le-relax.c       | 12 +++++
+ gcc/testsuite/lib/target-supports.exp         | 12 +++++
+ 8 files changed, 125 insertions(+), 3 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-le-relax.c
+
+diff --git a/gcc/config.in b/gcc/config.in
+index 033cfb98b..7220b2b2b 100644
+--- a/gcc/config.in
++++ b/gcc/config.in
+@@ -771,6 +771,12 @@
+ #endif
+ 
+ 
++/* Define if your assembler supports tls le relocation. */
++#ifndef USED_FOR_TARGET
++#undef HAVE_AS_TLS_LE_RELAXATION
++#endif
++
++
+ /* Define if your assembler supports vl/vst/vlm/vstm with an optional
+    alignment hint argument. */
+ #ifndef USED_FOR_TARGET
+diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
+index 639ed50bd..8491bee0d 100644
+--- a/gcc/config/loongarch/loongarch-opts.h
++++ b/gcc/config/loongarch/loongarch-opts.h
+@@ -114,4 +114,8 @@ struct loongarch_flags {
+ #define HAVE_AS_TLS 0
+ #endif
+ 
++#ifndef HAVE_AS_TLS_LE_RELAXATION
++#define HAVE_AS_TLS_LE_RELAXATION 0
++#endif
++
+ #endif /* LOONGARCH_OPTS_H */
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index c6318bee9..d1b1950dc 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -2993,7 +2993,29 @@ loongarch_legitimize_tls_address (rtx loc)
+ 
+     case TLS_MODEL_LOCAL_EXEC:
+ 	{
+-	  /* la.tls.le; tp-relative add.  */
++	  /* la.tls.le; tp-relative add.
++
++	     normal:
++	      lu12i.w $rd, %le_hi20(sym)
++	      ori $rd, $rd, %le_lo12(sym)
++	      add.{w/d} $rd, $rd, $tp
++	      (st.{w/d}/ld.{w/d} $rs, $rd, 0)
++
++	     tls le relax:
++	      lu12i.w $rd, %le_hi20_r(sym)
++	      add.{w/d} $rd,$rd,$tp
++	      addi.{w/d} $rd,$rd,%le_lo12_r(sym)
++	      (st.{w/d}/ld.{w/d} $rs, $rd, 0)
++
++	     extreme (When the code model is set to extreme, the TLS le Relax
++	     instruction sequence is not generated):
++	      lu12i.w $rd, %le_hi20(sym)
++	      ori $rd, $rd, %le_lo12(sym)
++	      lu32i.d $rd, %le64_lo20(sym)
++	      lu52i.d $rd, $rd, %le64_hi12(sym)
++	      add.d $rd, $rd, $tp
++	      (st.{w/d}/ld.{w/d} $rs, $rd, 0)  */
++
+ 	  tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
+ 	  tmp1 = gen_reg_rtx (Pmode);
+ 	  dest = gen_reg_rtx (Pmode);
+@@ -3004,7 +3026,20 @@ loongarch_legitimize_tls_address (rtx loc)
+ 	      tmp3 = gen_reg_rtx (Pmode);
+ 	      rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
+ 	      high = loongarch_force_temporary (tmp3, high);
+-	      emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2));
++
++	      /* The assembler does not implement tls le relax support when the
++		 code model is extreme, so when the code model is extreme, the
++		 old symbol address acquisition method is still used.  */
++	      if (HAVE_AS_TLS_LE_RELAXATION && !TARGET_CMODEL_EXTREME)
++		{
++		  emit_insn (gen_add_tls_le_relax (Pmode, dest, high,
++						   tp, loc));
++		  loongarch_emit_move (dest,
++				       gen_rtx_LO_SUM (Pmode, dest, tmp2));
++		  return dest;
++		}
++	      else
++		emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2));
+ 
+ 	      if (TARGET_CMODEL_EXTREME)
+ 		{
+@@ -5936,7 +5971,12 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
+ 	    gcc_unreachable ();
+ 	}
+       else
+-	reloc = hi_reloc ? "%le_hi20" : "%le_lo12";
++	{
++	  if (HAVE_AS_TLS_LE_RELAXATION && !TARGET_CMODEL_EXTREME)
++	    reloc = hi_reloc ? "%le_hi20_r" : "%le_lo12_r";
++	  else
++	    reloc = hi_reloc ? "%le_hi20" : "%le_lo12";
++	}
+       break;
+ 
+     case SYMBOL_TLSGD:
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 996df66e8..02c537d4c 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -73,6 +73,7 @@
+   UNSPEC_LOAD_FROM_GOT
+   UNSPEC_PCALAU12I
+   UNSPEC_PCALAU12I_GR
++  UNSPEC_ADD_TLS_LE_RELAX
+   UNSPEC_ORI_L_LO12
+   UNSPEC_LUI_L_HI20
+   UNSPEC_LUI_H_LO20
+@@ -2503,6 +2504,17 @@
+   "pcalau12i\t%0,%%pc_hi20(%1)"
+   (set_attr "type" "move"))
+ 
++(define_insn "@add_tls_le_relax<mode>"
++  (set (match_operand:P 0 "register_operand" "=r")
++	(unspec:P (match_operand:P 1 "register_operand" "r")
++		   (match_operand:P 2 "register_operand" "r")
++		   (match_operand:P 3 "symbolic_operand")
++	  UNSPEC_ADD_TLS_LE_RELAX))
++  "HAVE_AS_TLS_LE_RELAXATION"
++  "add.<d>\t%0,%1,%2,%%le_add_r(%3)"
++  (set_attr "type" "move")
++)
++
+ (define_insn "@ori_l_lo12<mode>"
+   (set (match_operand:P 0 "register_operand" "=r")
+ 	(unspec:P (match_operand:P 1 "register_operand" "r")
+diff --git a/gcc/configure b/gcc/configure
+index 5842e7a18..eecfe60d6 100755
+--- a/gcc/configure
++++ b/gcc/configure
+@@ -28968,6 +28968,37 @@ if test $gcc_cv_as_loongarch_cond_branch_relax = yes; then
+ 
+ $as_echo "#define HAVE_AS_COND_BRANCH_RELAXATION 1" >>confdefs.h
+ 
++fi
++
++    { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for tls le relaxation support" >&5
++$as_echo_n "checking assembler for tls le relaxation support... " >&6; }
++if ${gcc_cv_as_loongarch_tls_le_relaxation_support+:} false; then :
++  $as_echo_n "(cached) " >&6
++else
++  gcc_cv_as_loongarch_tls_le_relaxation_support=no
++  if test x$gcc_cv_as != x; then
++    $as_echo 'lu12i.w $t0,%le_hi20_r(a)' > conftest.s
++    if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
++  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
++  (eval $ac_try) 2>&5

_service:tar_scm:0093-LoongArch-Provide-fmin-fmax-RTL-pattern-for-vectors.patch Added

@@ -0,0 +1,112 @@
+From 97081ba053424e35b1869a00d6ac0e84362d09ea Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sat, 30 Dec 2023 21:40:11 +0800
+Subject: PATCH 093/188 LoongArch: Provide fmin/fmax RTL pattern for vectors
+
+We already had smin/smax RTL pattern using vfmin/vfmax instructions.
+But for smin/smax, it's unspecified what will happen if either operand
+contains any NaN operands.  So we would not vectorize the loop with
+-fno-finite-math-only (the default for all optimization levels expect
+-Ofast).
+
+But, LoongArch vfmin/vfmax instruction is IEEE-754-2008 conformant so we
+can also use them and vectorize the loop.
+
+gcc/ChangeLog:
+
+	* config/loongarch/simd.md (fmax<mode>3): New define_insn.
+	(fmin<mode>3): Likewise.
+	(reduc_fmax_scal_<mode>3): New define_expand.
+	(reduc_fmin_scal_<mode>3): Likewise.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vfmax-vfmin.c: New test.
+---
+ gcc/config/loongarch/simd.md                  | 31 +++++++++++++++++++
+ .../gcc.target/loongarch/vfmax-vfmin.c        | 31 +++++++++++++++++++
+ 2 files changed, 62 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c
+
+diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
+index 93fb39abc..8ac1d75a8 100644
+--- a/gcc/config/loongarch/simd.md
++++ b/gcc/config/loongarch/simd.md
+@@ -426,6 +426,37 @@
+   (set_attr "type" "simd_fcmp")
+    (set_attr "mode" "<MODE>"))
+ 
++; xvf{min/max} instructions are IEEE-754-2008 conforming, use them for
++; the corresponding IEEE-754-2008 operations.  We must use UNSPEC instead
++; of smin/smax though, see PR105414 and PR107013.
++
++(define_int_iterator UNSPEC_FMAXMIN UNSPEC_FMAX UNSPEC_FMIN)
++(define_int_attr fmaxmin (UNSPEC_FMAX "fmax") (UNSPEC_FMIN "fmin"))
++
++(define_insn "<fmaxmin><mode>3"
++  (set (match_operand:FVEC 0 "register_operand" "=f")
++	(unspec:FVEC (match_operand:FVEC 1 "register_operand" "f")
++		      (match_operand:FVEC 2 "register_operand" "f")
++		     UNSPEC_FMAXMIN))
++  ""
++  "<x>v<fmaxmin>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
++  (set_attr "type" "simd_fminmax")
++   (set_attr "mode" "<MODE>"))
++
++;; ... and also reduc operations.
++(define_expand "reduc_<fmaxmin>_scal_<mode>"
++  (match_operand:<UNITMODE> 0 "register_operand")
++   (match_operand:FVEC 1 "register_operand")
++   (const_int UNSPEC_FMAXMIN)
++  ""
++{
++  rtx tmp = gen_reg_rtx (<MODE>mode);
++  loongarch_expand_vector_reduc (gen_<fmaxmin><mode>3, tmp, operands1);
++  emit_insn (gen_vec_extract<mode><unitmode> (operands0, tmp,
++					      const0_rtx));
++  DONE;
++})
++
+ ; The LoongArch SX Instructions.
+ (include "lsx.md")
+ 
+diff --git a/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c b/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c
+new file mode 100644
+index 000000000..811fee361
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c
+@@ -0,0 +1,31 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mtune=la464 -mlasx" } */
++/* { dg-final { scan-assembler "\tvfmin\\.d" } } */
++/* { dg-final { scan-assembler "\tvfmax\\.d" } } */
++/* { dg-final { scan-assembler "\txvfmin\\.d" } } */
++/* { dg-final { scan-assembler "\txvfmax\\.d" } } */
++/* { dg-final { scan-assembler "\tvfmin\\.s" } } */
++/* { dg-final { scan-assembler "\tvfmax\\.s" } } */
++/* { dg-final { scan-assembler "\txvfmin\\.s" } } */
++/* { dg-final { scan-assembler "\txvfmax\\.s" } } */
++
++#define T(OP) __typeof__ (__builtin_##OP (0, 0))
++
++#define TEST(OP, LEN) \
++void \
++test_##OP##LEN (T (OP) *restrict dest, \
++		const T (OP) *restrict src1, \
++		const T (OP) *restrict src2) \
++{ \
++  for (int i = 0; i < LEN / sizeof (T(OP)); i++) \
++    desti = __builtin_##OP (src1i, src2i); \
++}
++
++TEST(fmin, 16)
++TEST(fmax, 16)
++TEST(fmin, 32)
++TEST(fmax, 32)
++TEST(fminf, 16)
++TEST(fmaxf, 16)
++TEST(fminf, 32)
++TEST(fmaxf, 32)
+-- 
+2.43.0
+

_service:tar_scm:0094-LoongArch-Merge-constant-vector-permuatation-impleme.patch Added

@@ -0,0 +1,1484 @@
+From 06a6a571fd557b53f805d990dd1a40a2ab7c1e5c Mon Sep 17 00:00:00 2001
+From: Li Wei <liwei@loongson.cn>
+Date: Thu, 28 Dec 2023 20:26:46 +0800
+Subject: PATCH 094/188 LoongArch: Merge constant vector permuatation
+ implementations.
+
+There are currently two versions of the implementations of constant
+vector permutation: loongarch_expand_vec_perm_const_1 and
+loongarch_expand_vec_perm_const_2.  The implementations of the two
+versions are different. Currently, only the implementation of
+loongarch_expand_vec_perm_const_1 is used for 256-bit vectors.  We
+hope to streamline the code as much as possible while retaining the
+better-performing implementation of the two.  By repeatedly testing
+spec2006 and spec2017, we got the following Merged version.
+Compared with the pre-merger version, the number of lines of code
+in loongarch.cc has been reduced by 888 lines.  At the same time,
+the performance of SPECint2006 under Ofast has been improved by 0.97%,
+and the performance of SPEC2017 fprate has been improved by 0.27%.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_is_odd_extraction):
+	Remove useless forward declaration.
+	(loongarch_is_even_extraction): Remove useless forward declaration.
+	(loongarch_try_expand_lsx_vshuf_const): Removed.
+	(loongarch_expand_vec_perm_const_1): Merged.
+	(loongarch_is_double_duplicate): Removed.
+	(loongarch_is_center_extraction): Ditto.
+	(loongarch_is_reversing_permutation): Ditto.
+	(loongarch_is_di_misalign_extract): Ditto.
+	(loongarch_is_si_misalign_extract): Ditto.
+	(loongarch_is_lasx_lowpart_extract): Ditto.
+	(loongarch_is_op_reverse_perm): Ditto.
+	(loongarch_is_single_op_perm): Ditto.
+	(loongarch_is_divisible_perm): Ditto.
+	(loongarch_is_triple_stride_extract): Ditto.
+	(loongarch_expand_vec_perm_const_2): Merged.
+	(loongarch_expand_vec_perm_const): New.
+	(loongarch_vectorize_vec_perm_const): Adjust.
+---
+ gcc/config/loongarch/loongarch.cc | 1308 +++++------------------------
+ 1 file changed, 210 insertions(+), 1098 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index d1b1950dc..9d2374a46 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -8823,143 +8823,6 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
+     }
+ }
+ 
+-static bool
+-loongarch_is_odd_extraction (struct expand_vec_perm_d *);
+-
+-static bool
+-loongarch_is_even_extraction (struct expand_vec_perm_d *);
+-
+-static bool
+-loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
+-{
+-  int i;
+-  rtx target, op0, op1, sel, tmp;
+-  rtx rpermMAX_VECT_LEN;
+-
+-  if (d->vmode == E_V2DImode || d->vmode == E_V2DFmode
+-	|| d->vmode == E_V4SImode || d->vmode == E_V4SFmode
+-	|| d->vmode == E_V8HImode || d->vmode == E_V16QImode)
+-    {
+-      target = d->target;
+-      op0 = d->op0;
+-      op1 = d->one_vector_p ? d->op0 : d->op1;
+-
+-      if (GET_MODE (op0) != GET_MODE (op1)
+-	  || GET_MODE (op0) != GET_MODE (target))
+-	return false;
+-
+-      if (d->testing_p)
+-	return true;
+-
+-      /* If match extract-even and extract-odd permutations pattern, use
+-       * vselect much better than vshuf.  */
+-      if (loongarch_is_odd_extraction (d)
+-	  || loongarch_is_even_extraction (d))
+-	{
+-	  if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1,
+-						d->perm, d->nelt))
+-	    return true;
+-
+-	  unsigned char perm2MAX_VECT_LEN;
+-	  for (i = 0; i < d->nelt; ++i)
+-	    perm2i = (d->permi + d->nelt) & (2 * d->nelt - 1);
+-
+-	  if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0,
+-						perm2, d->nelt))
+-	    return true;
+-	}
+-
+-      for (i = 0; i < d->nelt; i += 1)
+-	{
+-	  rpermi = GEN_INT (d->permi);
+-	}
+-
+-      if (d->vmode == E_V2DFmode)
+-	{
+-	  sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm));
+-	  tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0);
+-	  emit_move_insn (tmp, sel);
+-	}
+-      else if (d->vmode == E_V4SFmode)
+-	{
+-	  sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm));
+-	  tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0);
+-	  emit_move_insn (tmp, sel);
+-	}
+-      else
+-	{
+-	  sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm));
+-	  emit_move_insn (d->target, sel);
+-	}
+-
+-      switch (d->vmode)
+-	{
+-	case E_V2DFmode:
+-	  emit_insn (gen_lsx_vshuf_d_f (target, target, op1, op0));
+-	  break;
+-	case E_V2DImode:
+-	  emit_insn (gen_lsx_vshuf_d (target, target, op1, op0));
+-	  break;
+-	case E_V4SFmode:
+-	  emit_insn (gen_lsx_vshuf_w_f (target, target, op1, op0));
+-	  break;
+-	case E_V4SImode:
+-	  emit_insn (gen_lsx_vshuf_w (target, target, op1, op0));
+-	  break;
+-	case E_V8HImode:
+-	  emit_insn (gen_lsx_vshuf_h (target, target, op1, op0));
+-	  break;
+-	case E_V16QImode:
+-	  emit_insn (gen_lsx_vshuf_b (target, op1, op0, target));
+-	  break;
+-	default:
+-	  break;
+-	}
+-
+-      return true;
+-    }
+-  return false;
+-}
+-
+-static bool
+-loongarch_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
+-{
+-  unsigned int i, nelt = d->nelt;
+-  unsigned char perm2MAX_VECT_LEN;
+-
+-  if (d->one_vector_p)
+-    {
+-      /* Try interleave with alternating operands.  */
+-      memcpy (perm2, d->perm, sizeof (perm2));
+-      for (i = 1; i < nelt; i += 2)
+-	perm2i += nelt;
+-      if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, perm2,
+-					    nelt))
+-	return true;
+-    }
+-  else
+-    {
+-      if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1,
+-					    d->perm, nelt))
+-	return true;
+-
+-      /* Try again with swapped operands.  */
+-      for (i = 0; i < nelt; ++i)
+-	perm2i = (d->permi + nelt) & (2 * nelt - 1);
+-      if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, perm2,
+-					    nelt))
+-	return true;
+-    }
+-
+-  if (loongarch_expand_lsx_shuffle (d))
+-    return true;
+-  if (loongarch_expand_vec_perm_even_odd (d))
+-    return true;
+-  if (loongarch_expand_vec_perm_interleave (d))
+-    return true;
+-  return false;
+-}
+-
+ /* Following are the assist function for const vector permutation support.  */
+ static bool
+ loongarch_is_quad_duplicate (struct expand_vec_perm_d *d)
+@@ -8991,36 +8854,6 @@ loongarch_is_quad_duplicate (struct expand_vec_perm_d *d)
+   return result;
+ }
+ 
+-static bool
+-loongarch_is_double_duplicate (struct expand_vec_perm_d *d)
+-{
+-  if (!d->one_vector_p)

_service:tar_scm:0095-LoongArch-testsuite-Fix-FAIL-in-lasx-xvstelm.c-file.patch Added

@@ -0,0 +1,34 @@
+From 6263acd411b9685ebc7b16d19b91aad39cb7e184 Mon Sep 17 00:00:00 2001
+From: chenxiaolong <chenxiaolong@loongson.cn>
+Date: Fri, 29 Dec 2023 09:45:15 +0800
+Subject: PATCH 095/188 LoongArch: testsuite:Fix FAIL in lasx-xvstelm.c file.
+
+After implementing the cost model on the LoongArch architecture, the GCC
+compiler code has this feature turned on by default, which causes the
+lasx-xvstelm.c file test to fail. Through analysis, this test case can
+generate vectorization instructions required for detection only after
+disabling the functionality of the cost model with the "-fno-vect-cost-model"
+compilation option.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/lasx/lasx-xvstelm.c:Add compile
+	option "-fno-vect-cost-model" to dg-options.
+---
+ gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
+index 1a7b0e86f..4b846204a 100644
+--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O3 -mlasx" } */
++/* { dg-options "-O3 -mlasx -fno-vect-cost-model" } */
+ /* { dg-final { scan-assembler-times "xvstelm.w" 8} } */
+ 
+ #define LEN 256
+-- 
+2.43.0
+

_service:tar_scm:0096-LoongArch-testsuite-Modify-the-test-behavior-of-the-.patch Added

@@ -0,0 +1,47 @@
+From c21f2c7e6c2385a3783977bbca79ebe178d0d141 Mon Sep 17 00:00:00 2001
+From: chenxiaolong <chenxiaolong@loongson.cn>
+Date: Fri, 5 Jan 2024 11:43:24 +0800
+Subject: PATCH 096/188 LoongArch: testsuite:Modify the test behavior of the
+ vect-bic-bitmask-{12, 23}.c file.
+
+Before modifying the test behavior of the program, dg-do is set to assemble in
+vect-bic-bitmask-{12,23}.c. However, when the binutils library does not support
+the vector instruction set, it will FAIL to recognize the vector instruction
+and fail item will appear in the assembly stage. So set the program's dg-do to
+compile.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.dg/vect/vect-bic-bitmask-12.c: Change the default
+	setting of assembly to compile.
+	* gcc.dg/vect/vect-bic-bitmask-23.c: Dito.
+---
+ gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c | 2 +-
+ gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c
+index 36ec5a8b1..213e4c2a4 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c
+@@ -1,5 +1,5 @@
+ /* { dg-skip-if "missing optab for vectorization" { sparc*-*-* } } */
+-/* { dg-do assemble } */
++/* { dg-do compile } */
+ /* { dg-additional-options "-O3 -fdump-tree-dce -w" } */
+ 
+ #include <stdint.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c
+index 5b4c3b6e1..5dceb4bbc 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c
+@@ -1,5 +1,5 @@
+ /* { dg-skip-if "missing optab for vectorization" { sparc*-*-* } } */
+-/* { dg-do assemble } */
++/* { dg-do compile } */
+ /* { dg-additional-options "-O1 -fdump-tree-dce -w" } */
+ 
+ #include <stdint.h>
+-- 
+2.43.0
+

_service:tar_scm:0097-Improve-non-loop-disambiguation.patch Added

@@ -0,0 +1,101 @@
+From 6de2e0d400cbe46da482a672810c37b1832c408c Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com>
+Date: Thu, 25 Jul 2024 19:45:43 +0800
+Subject: PATCH Improve non-loop disambiguation
+
+This optimization is brought from https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=038b077689bb5310386b04d40a2cea234f01e6aa.
+
+When dr_may_alias_p is called without a loop context, it tries
+to use the tree-affine interface to calculate the difference
+between the two addresses and use that difference to check whether
+the gap between the accesses is known at compile time.  However, as the
+example in the PR shows, this doesn't expand SSA_NAMEs and so can easily
+be defeated by things like reassociation.
+
+One fix would have been to use aff_combination_expand to expand the
+SSA_NAMEs, but we'd then need some way of maintaining the associated
+cache.  This patch instead reuses the innermost_loop_behavior fields
+(which exist even when no loop context is provided).
+
+It might still be useful to do the aff_combination_expand thing too,
+if an example turns out to need it.
+---
+ gcc/common.opt                              |  4 ++++
+ gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c | 16 +++++++++++++++
+ gcc/tree-data-ref.cc                        | 22 +++++++++++++++++++++
+ 3 files changed, 42 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index b18f0b944..75bf9c9c1 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -3217,6 +3217,10 @@ ftree-loop-vectorize
+ Common Var(flag_tree_loop_vectorize) Optimization EnabledBy(ftree-vectorize)
+ Enable loop vectorization on trees.
+ 
++falias-analysis-expand-ssa
++Common Var(flag_alias_analysis_expand_ssa) Init(0)
++Enable expanded SSA name analysis during alias analysis.
++
+ ftree-slp-vectorize
+ Common Var(flag_tree_slp_vectorize) Optimization EnabledBy(ftree-vectorize)
+ Enable basic block vectorization (SLP) on trees.
+diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c
+new file mode 100644
+index 000000000..5ff8a8a62
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c
+@@ -0,0 +1,16 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-falias-analysis-expand-ssa" } */
++
++void f(double *p, long i)
++{
++    pi+0 += 1;
++    pi+1 += 1;
++}
++void g(double *p, long i)
++{
++    double *q = p + i;
++    q0 += 1;
++    q1 += 1;
++}
++
++/* { dg-final { scan-tree-dump-not "can't determine dependence" slp2 } } */
+diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc
+index e6ae9e847..a05073c51 100644
+--- a/gcc/tree-data-ref.cc
++++ b/gcc/tree-data-ref.cc
+@@ -2993,6 +2993,28 @@ dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
+      disambiguation.  */
+   if (!loop_nest)
+     {
++      if (flag_alias_analysis_expand_ssa)
++	{
++	  tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
++	  tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
++
++	  if (DR_BASE_ADDRESS (a)
++	     && DR_BASE_ADDRESS (b)
++	     && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
++	     && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
++	     && poly_int_tree_p (tree_size_a)
++	     && poly_int_tree_p (tree_size_b)
++	     && !ranges_maybe_overlap_p (wi::to_widest (DR_INIT (a)),
++					 wi::to_widest (tree_size_a),
++					 wi::to_widest (DR_INIT (b)),
++					 wi::to_widest (tree_size_b)))
++	     {
++	       gcc_assert (integer_zerop (DR_STEP (a))
++	     		   && integer_zerop (DR_STEP (b)));
++	       return false;
++	     }
++	}
++
+       aff_tree off1, off2;
+       poly_widest_int size1, size2;
+       get_inner_reference_aff (DR_REF (a), &off1, &size1);
+-- 
+2.33.0
+

_service:tar_scm:0097-LoongArch-testsuite-Delete-the-default-run-behavior-.patch Added

@@ -0,0 +1,31 @@
+From cdee2d1e7391d95bf6fd471fddcb86ee81247929 Mon Sep 17 00:00:00 2001
+From: chenxiaolong <chenxiaolong@loongson.cn>
+Date: Fri, 5 Jan 2024 11:43:27 +0800
+Subject: PATCH 097/188 LoongArch: testsuite:Delete the default run behavior
+ in pr60510.f.
+
+When binutils does not support vector instruction sets, the test program fails
+because it does not recognize vectorization at the assembly stage. Therefore,
+the default run behavior of the program is deleted, so that the behavior of
+the program depends on whether the software supports vectorization.
+
+gcc/testsuite/ChangeLog:
+
+	* gfortran.dg/vect/pr60510.f: Delete the default behavior of the
+	program.
+---
+ gcc/testsuite/gfortran.dg/vect/pr60510.f | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/gcc/testsuite/gfortran.dg/vect/pr60510.f b/gcc/testsuite/gfortran.dg/vect/pr60510.f
+index ecd50dd55..c1e11b27d 100644
+--- a/gcc/testsuite/gfortran.dg/vect/pr60510.f
++++ b/gcc/testsuite/gfortran.dg/vect/pr60510.f
+@@ -1,4 +1,3 @@
+-! { dg-do run }
+ ! { dg-require-effective-target vect_double }
+ ! { dg-require-effective-target vect_intdouble_cvt }
+ ! { dg-additional-options "-fno-inline -ffast-math" }
+-- 
+2.43.0
+

_service:tar_scm:0097-aarch64-Use-local-frame-vars-in-shrink-wrapping-code.patch Deleted

@@ -1,378 +0,0 @@
-From 62fbb215cc817e9f2c1ca80282a64f4ee30806bc Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:08:48 +0100
-Subject: PATCH aarch64: Use local frame vars in shrink-wrapping code
-
-aarch64_layout_frame uses a shorthand for referring to
-cfun->machine->frame:
-
-  aarch64_frame &frame = cfun->machine->frame;
-
-This patch does the same for some other heavy users of the structure.
-No functional change intended.
-
-gcc/
-	* config/aarch64/aarch64.cc (aarch64_save_callee_saves): Use
-	a local shorthand for cfun->machine->frame.
-	(aarch64_restore_callee_saves, aarch64_get_separate_components):
-	(aarch64_process_components): Likewise.
-	(aarch64_allocate_and_probe_stack_space): Likewise.
-	(aarch64_expand_prologue, aarch64_expand_epilogue): Likewise.
-	(aarch64_layout_frame): Use existing shorthand for one more case.
----
- gcc/config/aarch64/aarch64.cc | 123 ++++++++++++++++++----------------
- 1 file changed, 64 insertions(+), 59 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 226dc9dffd47..ae42ffdedbeb 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8351,7 +8351,7 @@ aarch64_layout_frame (void)
-   frame.is_scs_enabled
-     = (!crtl->calls_eh_return
-        && sanitize_flags_p (SANITIZE_SHADOW_CALL_STACK)
--       && known_ge (cfun->machine->frame.reg_offsetLR_REGNUM, 0));
-+       && known_ge (frame.reg_offsetLR_REGNUM, 0));
- 
-   /* When shadow call stack is enabled, the scs_pop in the epilogue will
-      restore x30, and we don't need to pop x30 again in the traditional
-@@ -8763,6 +8763,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
- 			   unsigned start, unsigned limit, bool skip_wb,
- 			   bool hard_fp_valid_p)
- {
-+  aarch64_frame &frame = cfun->machine->frame;
-   rtx_insn *insn;
-   unsigned regno;
-   unsigned regno2;
-@@ -8777,8 +8778,8 @@ aarch64_save_callee_saves (poly_int64 start_offset,
-       bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
- 
-       if (skip_wb
--	  && (regno == cfun->machine->frame.wb_push_candidate1
--	      || regno == cfun->machine->frame.wb_push_candidate2))
-+	  && (regno == frame.wb_push_candidate1
-+	      || regno == frame.wb_push_candidate2))
- 	continue;
- 
-       if (cfun->machine->reg_is_wrapped_separatelyregno)
-@@ -8786,7 +8787,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
- 
-       machine_mode mode = aarch64_reg_save_mode (regno);
-       reg = gen_rtx_REG (mode, regno);
--      offset = start_offset + cfun->machine->frame.reg_offsetregno;
-+      offset = start_offset + frame.reg_offsetregno;
-       rtx base_rtx = stack_pointer_rtx;
-       poly_int64 sp_offset = offset;
- 
-@@ -8799,7 +8800,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
- 	{
- 	  gcc_assert (known_eq (start_offset, 0));
- 	  poly_int64 fp_offset
--	    = cfun->machine->frame.below_hard_fp_saved_regs_size;
-+	    = frame.below_hard_fp_saved_regs_size;
- 	  if (hard_fp_valid_p)
- 	    base_rtx = hard_frame_pointer_rtx;
- 	  else
-@@ -8821,8 +8822,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
- 	  && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
- 	  && !cfun->machine->reg_is_wrapped_separatelyregno2
- 	  && known_eq (GET_MODE_SIZE (mode),
--		       cfun->machine->frame.reg_offsetregno2
--		       - cfun->machine->frame.reg_offsetregno))
-+		       frame.reg_offsetregno2 - frame.reg_offsetregno))
- 	{
- 	  rtx reg2 = gen_rtx_REG (mode, regno2);
- 	  rtx mem2;
-@@ -8872,6 +8872,7 @@ static void
- aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
- 			      unsigned limit, bool skip_wb, rtx *cfi_ops)
- {
-+  aarch64_frame &frame = cfun->machine->frame;
-   unsigned regno;
-   unsigned regno2;
-   poly_int64 offset;
-@@ -8888,13 +8889,13 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
-       rtx reg, mem;
- 
-       if (skip_wb
--	  && (regno == cfun->machine->frame.wb_pop_candidate1
--	      || regno == cfun->machine->frame.wb_pop_candidate2))
-+	  && (regno == frame.wb_pop_candidate1
-+	      || regno == frame.wb_pop_candidate2))
- 	continue;
- 
-       machine_mode mode = aarch64_reg_save_mode (regno);
-       reg = gen_rtx_REG (mode, regno);
--      offset = start_offset + cfun->machine->frame.reg_offsetregno;
-+      offset = start_offset + frame.reg_offsetregno;
-       rtx base_rtx = stack_pointer_rtx;
-       if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
- 	aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
-@@ -8905,8 +8906,7 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
- 	  && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
- 	  && !cfun->machine->reg_is_wrapped_separatelyregno2
- 	  && known_eq (GET_MODE_SIZE (mode),
--		       cfun->machine->frame.reg_offsetregno2
--		       - cfun->machine->frame.reg_offsetregno))
-+		       frame.reg_offsetregno2 - frame.reg_offsetregno))
- 	{
- 	  rtx reg2 = gen_rtx_REG (mode, regno2);
- 	  rtx mem2;
-@@ -9011,6 +9011,7 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
- static sbitmap
- aarch64_get_separate_components (void)
- {
-+  aarch64_frame &frame = cfun->machine->frame;
-   sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
-   bitmap_clear (components);
- 
-@@ -9027,18 +9028,18 @@ aarch64_get_separate_components (void)
- 	if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
- 	  continue;
- 
--	poly_int64 offset = cfun->machine->frame.reg_offsetregno;
-+	poly_int64 offset = frame.reg_offsetregno;
- 
- 	/* If the register is saved in the first SVE save slot, we use
- 	   it as a stack probe for -fstack-clash-protection.  */
- 	if (flag_stack_clash_protection
--	    && maybe_ne (cfun->machine->frame.below_hard_fp_saved_regs_size, 0)
-+	    && maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
- 	    && known_eq (offset, 0))
- 	  continue;
- 
- 	/* Get the offset relative to the register we'll use.  */
- 	if (frame_pointer_needed)
--	  offset -= cfun->machine->frame.below_hard_fp_saved_regs_size;
-+	  offset -= frame.below_hard_fp_saved_regs_size;
- 	else
- 	  offset += crtl->outgoing_args_size;
- 
-@@ -9057,11 +9058,11 @@ aarch64_get_separate_components (void)
-   /* If the spare predicate register used by big-endian SVE code
-      is call-preserved, it must be saved in the main prologue
-      before any saves that use it.  */
--  if (cfun->machine->frame.spare_pred_reg != INVALID_REGNUM)
--    bitmap_clear_bit (components, cfun->machine->frame.spare_pred_reg);
-+  if (frame.spare_pred_reg != INVALID_REGNUM)
-+    bitmap_clear_bit (components, frame.spare_pred_reg);
- 
--  unsigned reg1 = cfun->machine->frame.wb_push_candidate1;
--  unsigned reg2 = cfun->machine->frame.wb_push_candidate2;
-+  unsigned reg1 = frame.wb_push_candidate1;
-+  unsigned reg2 = frame.wb_push_candidate2;
-   /* If registers have been chosen to be stored/restored with
-      writeback don't interfere with them to avoid having to output explicit
-      stack adjustment instructions.  */
-@@ -9170,6 +9171,7 @@ aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
- static void
- aarch64_process_components (sbitmap components, bool prologue_p)
- {
-+  aarch64_frame &frame = cfun->machine->frame;
-   rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
- 			     ? HARD_FRAME_POINTER_REGNUM
- 			     : STACK_POINTER_REGNUM);
-@@ -9184,9 +9186,9 @@ aarch64_process_components (sbitmap components, bool prologue_p)
-       machine_mode mode = aarch64_reg_save_mode (regno);
-       
-       rtx reg = gen_rtx_REG (mode, regno);
--      poly_int64 offset = cfun->machine->frame.reg_offsetregno;
-+      poly_int64 offset = frame.reg_offsetregno;
-       if (frame_pointer_needed)
--	offset -= cfun->machine->frame.below_hard_fp_saved_regs_size;
-+	offset -= frame.below_hard_fp_saved_regs_size;
-       else
- 	offset += crtl->outgoing_args_size;
- 
-@@ -9211,14 +9213,14 @@ aarch64_process_components (sbitmap components, bool prologue_p)
- 	  break;
- 	}
- 
--      poly_int64 offset2 = cfun->machine->frame.reg_offsetregno2;
-+      poly_int64 offset2 = frame.reg_offsetregno2;
-       /* The next register is not of the same class or its offset is not
- 	 mergeable with the current one into a pair.  */
-       if (aarch64_sve_mode_p (mode)
- 	  || !satisfies_constraint_Ump (mem)
- 	  || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
- 	  || (crtl->abi->id () == ARM_PCS_SIMD && FP_REGNUM_P (regno))
--	  || maybe_ne ((offset2 - cfun->machine->frame.reg_offsetregno),

_service:tar_scm:0098-CHREC-multiplication-and-undefined-overflow.patch Added

@@ -0,0 +1,265 @@
+From c4e4fef145c1e402f0558cc35f6c1ed0a08beffb Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com>
+Date: Thu, 25 Jul 2024 20:16:52 +0800
+Subject: PATCH CHREC multiplication and undefined overflow
+
+This optimization is brought from https://gcc.gnu.org/pipermail/gcc-patches/2024-February/646531.html
+
+When folding a multiply CHRECs are handled like {a, +, b} * c
+is {a*c, +, b*c} but that isn't generally correct when overflow
+invokes undefined behavior.  The following uses unsigned arithmetic
+unless either a is zero or a and b have the same sign.
+
+I've used simple early outs for INTEGER_CSTs and otherwise use
+a range-query since we lack a tree_expr_nonpositive_p and
+get_range_pos_neg isn't a good fit.
+---
+ gcc/common.opt                          |  4 ++
+ gcc/testsuite/gcc.dg/pr68317.c          |  6 +-
+ gcc/testsuite/gcc.dg/torture/pr114074.c | 31 ++++++++++
+ gcc/tree-chrec.cc                       | 81 +++++++++++++++++++++----
+ gcc/tree-chrec.h                        |  2 +-
+ gcc/value-range.cc                      | 12 ++++
+ gcc/value-range.h                       |  2 +
+ 7 files changed, 123 insertions(+), 15 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/torture/pr114074.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index b18f0b944..d3af3ba39 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1771,6 +1771,10 @@ floop-interchange
+ Common Var(flag_loop_interchange) Optimization
+ Enable loop interchange on trees.
+ 
++fchrec-mul-fold-strict-overflow
++Common Var(flag_chrec_mul_fold_strict_overflow) Init(0)
++Enable strict overflow handling during constant folding of multiply CHRECs.
++
+ floop-block
+ Common Alias(floop-nest-optimize)
+ Enable loop nest transforms.  Same as -floop-nest-optimize.
+diff --git a/gcc/testsuite/gcc.dg/pr68317.c b/gcc/testsuite/gcc.dg/pr68317.c
+index bd053a752..671a67d95 100644
+--- a/gcc/testsuite/gcc.dg/pr68317.c
++++ b/gcc/testsuite/gcc.dg/pr68317.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -fdisable-tree-ethread" } */
++/* { dg-options "-O2 -fdisable-tree-ethread -fchrec-mul-fold-strict-overflow" } */
+ 
+ /* Note: Threader will collapse loop.  */
+ 
+@@ -12,8 +12,8 @@ foo ()
+ {
+  int32_t index = 0;
+ 
+- for (index; index <= 10; index--) // expected warning here
++ for (index; index <= 10; index--) /* { dg-warning "iteration \0-9\+ invokes undefined behavior" } */
+    /* Result of the following multiply will overflow
+       when converted to signed int32_t.  */
+-   bar ((0xcafe + index) * 0xdead);  /* { dg-warning "iteration \0-9\+ invokes undefined behavior" } */
++   bar ((0xcafe + index) * 0xdead);
+ }
+diff --git a/gcc/testsuite/gcc.dg/torture/pr114074.c b/gcc/testsuite/gcc.dg/torture/pr114074.c
+new file mode 100644
+index 000000000..9a383d8fc
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/torture/pr114074.c
+@@ -0,0 +1,31 @@
++/* { dg-do run } */
++<<<<<<< HEAD
++/* { dg-options "-fchrec-mul-fold-strict-overflow" } */
++=======
++/* { dg-options "-fchrec-mul-fold-strict-overflow"" } */
++>>>>>>> 47092575e7696f5a21cf75284fe3d4feb0c813ab
++int a, b, d;
++
++__attribute__((noipa)) void
++foo (void)
++{
++  ++d;
++}
++
++int
++main ()
++{
++  for (a = 0; a > -3; a -= 2)
++    {
++      int c = a;
++      b = __INT_MAX__ - 3000;
++      a = ~c * b;
++      foo ();
++      if (!a)
++	break;
++      a = c;
++    }
++  if (d != 2)
++    __builtin_abort ();
++  return 0;
++}
+diff --git a/gcc/tree-chrec.cc b/gcc/tree-chrec.cc
+index c44cea754..3323901bc 100644
+--- a/gcc/tree-chrec.cc
++++ b/gcc/tree-chrec.cc
+@@ -38,6 +38,8 @@ along with GCC; see the file COPYING3.  If not see
+ #include "gimple.h"
+ #include "tree-ssa-loop.h"
+ #include "dumpfile.h"
++#include "value-range.h"
++#include "value-query.h"
+ #include "tree-scalar-evolution.h"
+ 
+ /* Extended folder for chrecs.  */
+@@ -404,6 +406,13 @@ chrec_fold_multiply (tree type,
+       || automatically_generated_chrec_p (op1))
+     return chrec_fold_automatically_generated_operands (op0, op1);
+ 
++  if (flag_chrec_mul_fold_strict_overflow)
++    {
++      if (TREE_CODE (op0) != POLYNOMIAL_CHREC
++	  && TREE_CODE (op1) == POLYNOMIAL_CHREC)
++	std::swap (op0, op1);
++    }
++
+   switch (TREE_CODE (op0))
+     {
+     case POLYNOMIAL_CHREC:
+@@ -428,10 +437,53 @@ chrec_fold_multiply (tree type,
+ 	  if (integer_zerop (op1))
+ 	    return build_int_cst (type, 0);
+ 
+-	  return build_polynomial_chrec
+-	    (CHREC_VARIABLE (op0),
+-	     chrec_fold_multiply (type, CHREC_LEFT (op0), op1),
+-	     chrec_fold_multiply (type, CHREC_RIGHT (op0), op1));
++	  if (flag_chrec_mul_fold_strict_overflow)
++	    {
++	      /* When overflow is undefined and CHREC_LEFT/RIGHT do not have the
++		 same sign or CHREC_LEFT is zero then folding the multiply into
++		 the addition does not have the same behavior on overflow.  Use
++		 unsigned arithmetic in that case.  */
++	      value_range rl, rr;
++	      if (!ANY_INTEGRAL_TYPE_P (type)
++		  || TYPE_OVERFLOW_WRAPS (type)
++		  || integer_zerop (CHREC_LEFT (op0))
++		  || (TREE_CODE (CHREC_LEFT (op0)) == INTEGER_CST
++		  && TREE_CODE (CHREC_RIGHT (op0)) == INTEGER_CST
++		  && (tree_int_cst_sgn (CHREC_LEFT (op0))
++		      == tree_int_cst_sgn (CHREC_RIGHT (op0))))
++		  || (get_range_query (cfun)->range_of_expr (rl, CHREC_LEFT (op0))
++		  && !rl.undefined_p ()
++		  && (rl.nonpositive_p () || rl.nonnegative_p ())
++		  && get_range_query (cfun)->range_of_expr (rr,
++							CHREC_RIGHT (op0))
++		  && !rr.undefined_p ()
++		  && ((rl.nonpositive_p () && rr.nonpositive_p ())
++		  || (rl.nonnegative_p () && rr.nonnegative_p ()))))
++		{
++		  tree left = chrec_fold_multiply (type, CHREC_LEFT (op0), op1);
++		  tree right = chrec_fold_multiply (type, CHREC_RIGHT (op0), op1);
++		  return build_polynomial_chrec (CHREC_VARIABLE (op0), left, right);
++		}
++	      else
++		{
++		  tree utype = unsigned_type_for (type);
++		  tree uop1 = chrec_convert_rhs (utype, op1);
++		  tree uleft0 = chrec_convert_rhs (utype, CHREC_LEFT (op0));
++		  tree uright0 = chrec_convert_rhs (utype, CHREC_RIGHT (op0));
++		  tree left = chrec_fold_multiply (utype, uleft0, uop1);
++		  tree right = chrec_fold_multiply (utype, uright0, uop1);
++		  tree tem = build_polynomial_chrec (CHREC_VARIABLE (op0),
++							left, right);
++		  return chrec_convert_rhs (type, tem);
++		}
++	     }
++	   else
++	     {
++	       return build_polynomial_chrec
++		  (CHREC_VARIABLE (op0),
++		   chrec_fold_multiply (type, CHREC_LEFT (op0), op1),
++		   chrec_fold_multiply (type, CHREC_RIGHT (op0), op1));
++	     }
+ 	}
+ 
+     CASE_CONVERT:
+@@ -449,13 +501,20 @@ chrec_fold_multiply (tree type,
+       switch (TREE_CODE (op1))
+ 	{
+ 	case POLYNOMIAL_CHREC:
+-	  gcc_checking_assert
+-	    (!chrec_contains_symbols_defined_in_loop (op1,
+-						      CHREC_VARIABLE (op1)));
+-	  return build_polynomial_chrec
+-	    (CHREC_VARIABLE (op1),
+-	     chrec_fold_multiply (type, CHREC_LEFT (op1), op0),
+-	     chrec_fold_multiply (type, CHREC_RIGHT (op1), op0));
++	  if (flag_chrec_mul_fold_strict_overflow)
++	    {
++	      gcc_unreachable ();

_service:tar_scm:0098-LoongArch-testsuite-Added-additional-vectorization-m.patch Added

@@ -0,0 +1,157 @@
+From c8fa8efa3297ebced55da8a69cf44f314573be7c Mon Sep 17 00:00:00 2001
+From: chenxiaolong <chenxiaolong@loongson.cn>
+Date: Fri, 5 Jan 2024 11:43:28 +0800
+Subject: PATCH 098/188 LoongArch: testsuite:Added additional vectorization
+ "-mlasx" compilation option.
+
+In the LoongArch architecture, the reason for not adding the 128-bit
+vector-width-*hi* instruction template in the GCC back end is that it causes
+program performance loss, so we can only add the "-mlasx" compilation option
+to use 256-bit vectorization functions in test files.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.dg/vect/bb-slp-pattern-1.c: If you are testing on the
+	LoongArch architecture, you need to add the "-mlasx" compilation
+	option to generate vectorized code.
+	* gcc.dg/vect/slp-widen-mult-half.c: Dito.
+	* gcc.dg/vect/vect-widen-mult-const-s16.c: Dito.
+	* gcc.dg/vect/vect-widen-mult-const-u16.c: Dito.
+	* gcc.dg/vect/vect-widen-mult-half-u8.c: Dito.
+	* gcc.dg/vect/vect-widen-mult-half.c: Dito.
+	* gcc.dg/vect/vect-widen-mult-u16.c: Dito.
+	* gcc.dg/vect/vect-widen-mult-u8-s16-s32.c: Dito.
+	* gcc.dg/vect/vect-widen-mult-u8-u32.c: Dito.
+	* gcc.dg/vect/vect-widen-mult-u8.c: Dito.
+---
+ gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c           | 1 +
+ gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c        | 1 +
+ gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c  | 1 +
+ gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c  | 1 +
+ gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c    | 1 +
+ gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c       | 1 +
+ gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c        | 1 +
+ gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c | 1 +
+ gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c     | 1 +
+ gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c         | 1 +
+ 10 files changed, 10 insertions(+)
+
+diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c
+index 47b1a4366..52ffca82a 100644
+--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c
++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c
+@@ -1,4 +1,5 @@
+ /* { dg-require-effective-target vect_int } */
++/* { dg-additional-options "-mlasx" { target loongarch*-*-* } } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+diff --git a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
+index e3bfee333..cd44e551f 100644
+--- a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
++++ b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
+@@ -1,6 +1,7 @@
+ /* Disabling epilogues until we find a better way to deal with scans.  */
+ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
++/* { dg-additional-options "-mlasx" { target loongarch*-*-* } } */
+ 
+ #include "tree-vect.h"
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
+index 4c95dd201..082c758cb 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
+@@ -2,6 +2,7 @@
+ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-additional-options "-fno-ipa-icf" } */
++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
+ 
+ #include "tree-vect.h"
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
+index 4075f815c..a95e617ad 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
+@@ -2,6 +2,7 @@
+ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-additional-options "-fno-ipa-icf" } */
++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
+ 
+ #include "tree-vect.h"
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
+index c4ac88e18..14d96645a 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
+@@ -2,6 +2,7 @@
+ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-additional-options "-fno-ipa-icf" } */
++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
+ 
+ #include "tree-vect.h"
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
+index ebbf4f5e8..7901dae85 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
+@@ -1,6 +1,7 @@
+ /* Disabling epilogues until we find a better way to deal with scans.  */
+ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
+ 
+ #include "tree-vect.h"
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
+index 2e28baae0..21b39953e 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
+@@ -1,6 +1,7 @@
+ /* Disabling epilogues until we find a better way to deal with scans.  */
+ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
+index d277f0b2b..4827e11b2 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
+@@ -1,6 +1,7 @@
+ /* Disabling epilogues until we find a better way to deal with scans.  */
+ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c
+index f50358802..87eb9e0cb 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c
+@@ -1,5 +1,6 @@
+ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
++/* { dg-additional-options "-mlasx" { target loongarch*-*-* } } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c
+index 03d137941..507d30c35 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c
+@@ -1,5 +1,6 @@
+ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+-- 
+2.43.0
+

_service:tar_scm:0098-aarch64-Avoid-a-use-of-callee-offset.patch Deleted

@@ -1,73 +0,0 @@
-From 12a8889de169f892d2e927584c00d20b8b7e456f Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:08:49 +0100
-Subject: PATCH aarch64: Avoid a use of callee_offset
-
-When we emit the frame chain, i.e. when we reach Here in this statement
-of aarch64_expand_prologue:
-
-  if (emit_frame_chain)
-    {
-      // Here
-      ...
-    }
-
-the stack is in one of two states:
-
-- We've allocated up to the frame chain, but no more.
-
-- We've allocated the whole frame, and the frame chain is within easy
-  reach of the new SP.
-
-The offset of the frame chain from the current SP is available
-in aarch64_frame as callee_offset.  It is also available as the
-chain_offset local variable, where the latter is calculated from other
-data.  (However, chain_offset is not always equal to callee_offset when
-!emit_frame_chain, so chain_offset isn't redundant.)
-
-In c600df9a4060da3c6121ff4d0b93f179eafd69d1 I switched to using
-chain_offset for the initialisation of the hard frame pointer:
-
-       aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
--                         stack_pointer_rtx, callee_offset,
-+                         stack_pointer_rtx, chain_offset,
-                          tmp1_rtx, tmp0_rtx, frame_pointer_needed);
-
-But the later REG_CFA_ADJUST_CFA handling still used callee_offset.
-
-I think the difference is harmless, but it's more logical for the
-CFA note to be in sync, and it's more convenient for later patches
-if it uses chain_offset.
-
-gcc/
-	* config/aarch64/aarch64.cc (aarch64_expand_prologue): Use
-	chain_offset rather than callee_offset.
----
- gcc/config/aarch64/aarch64.cc | 4 +---
- 1 file changed, 1 insertion(+), 3 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index ae42ffdedbeb..79253322fd7c 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -9670,7 +9670,6 @@ aarch64_expand_prologue (void)
-   poly_int64 initial_adjust = frame.initial_adjust;
-   HOST_WIDE_INT callee_adjust = frame.callee_adjust;
-   poly_int64 final_adjust = frame.final_adjust;
--  poly_int64 callee_offset = frame.callee_offset;
-   poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
-   poly_int64 below_hard_fp_saved_regs_size
-     = frame.below_hard_fp_saved_regs_size;
-@@ -9783,8 +9782,7 @@ aarch64_expand_prologue (void)
- 	     implicit.  */
- 	  if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX))
- 	    {
--	      rtx src = plus_constant (Pmode, stack_pointer_rtx,
--				       callee_offset);
-+	      rtx src = plus_constant (Pmode, stack_pointer_rtx, chain_offset);
- 	      add_reg_note (insn, REG_CFA_ADJUST_CFA,
- 			    gen_rtx_SET (hard_frame_pointer_rtx, src));
- 	    }
--- 
-2.43.5
-

_service:tar_scm:0099-Enable-Transposed-SLP.patch Added

@@ -0,0 +1,5624 @@
+From 0dd3b8532f35486bd5db2c71342c8dfed4c0893a Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com>
+Date: Thu, 25 Jul 2024 17:25:23 +0800
+Subject: PATCH Enable Transposed SLP.
+
+---
+ gcc/common.opt                          |    4 +
+ gcc/testsuite/gcc.dg/vect/transpose-1.c |   53 +
+ gcc/testsuite/gcc.dg/vect/transpose-2.c |   50 +
+ gcc/testsuite/gcc.dg/vect/transpose-3.c |   54 +
+ gcc/testsuite/gcc.dg/vect/transpose-4.c |   53 +
+ gcc/testsuite/gcc.dg/vect/transpose-5.c |   74 ++
+ gcc/testsuite/gcc.dg/vect/transpose-6.c |   67 +
+ gcc/testsuite/gcc.dg/vect/transpose-7.c |   53 +
+ gcc/testsuite/gcc.dg/vect/transpose-8.c |   53 +
+ gcc/testsuite/gcc.dg/vect/vect.exp      |    7 +
+ gcc/tree-loop-distribution.cc           | 1464 ++++++++++++++++++++-
+ gcc/tree-vect-data-refs.cc              |  237 ++++
+ gcc/tree-vect-loop.cc                   |   42 +-
+ gcc/tree-vect-patterns.cc               |    4 +-
+ gcc/tree-vect-slp.cc                    | 1553 ++++++++++++++++++++---
+ gcc/tree-vect-stmts.cc                  |  973 +++++++++++++-
+ gcc/tree-vectorizer.h                   |   96 +-
+ 17 files changed, 4648 insertions(+), 189 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-2.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-3.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-4.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-5.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-6.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-7.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-8.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index b18f0b944..5958c4e0b 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -3221,6 +3221,10 @@ ftree-slp-vectorize
+ Common Var(flag_tree_slp_vectorize) Optimization EnabledBy(ftree-vectorize)
+ Enable basic block vectorization (SLP) on trees.
+ 
++ftree-slp-transpose-vectorize
++Common Var(flag_tree_slp_transpose_vectorize) Optimization Init(0)
++Enable basic block vectorization (SLP) for transposed stores and loads on trees.
++
+ fvect-cost-model=
+ Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
+ -fvect-cost-model=unlimited|dynamic|cheap|very-cheap	Specifies the cost model for vectorization.
+diff --git a/gcc/testsuite/gcc.dg/vect/transpose-1.c b/gcc/testsuite/gcc.dg/vect/transpose-1.c
+new file mode 100644
+index 000000000..8237a8b9e
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/transpose-1.c
+@@ -0,0 +1,53 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 4
++#define M 256
++
++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned c0N, c1N, c2N, c3N, c4N, c5N, c6N, c7N;
++  for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0i = pix10 - pix20;
++      c1i = pix11 - pix21;
++      c2i = pix12 - pix22;
++      c3i = pix13 - pix23;
++      c4i = pix14 - pix24;
++      c5i = pix15 - pix25;
++      c6i = pix16 - pix26;
++      c7i = pix17 - pix27;
++    }
++  for (int i = 0; i < N; i++)
++    {
++      sum += c0i + c1i + c2i + c3i + c4i + c5i + c6i + c7i;
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv)
++{
++  unsigned char input1M;
++  unsigned char input2M;
++  int i1 = 16;
++  int i2 = 8;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1i = i * 2;
++	input2i = i;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 1264)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/transpose-2.c b/gcc/testsuite/gcc.dg/vect/transpose-2.c
+new file mode 100644
+index 000000000..fdf4dbd96
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/transpose-2.c
+@@ -0,0 +1,50 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-additional-options "-fno-tree-loop-vectorize -fno-tree-dse" } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 8
++#define M 256
++
++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned short c0N, c1N, c2N, c3N, c4N, c5N, c6N, c7N;
++  for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0i = pix10 - pix20;
++      c1i = pix11 - pix21;
++      c2i = pix12 - pix22;
++      c3i = pix13 - pix23;
++    }
++  for (int i = 0; i < N; i++)
++    {
++      sum += c0i + c1i + c2i + c3i;
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv)
++{
++  unsigned char input1M;
++  unsigned char input2M;
++  int i1 = 5;
++  int i2 = 4;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1i = i * 4;
++	input2i = i * 2;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 1440)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/transpose-3.c b/gcc/testsuite/gcc.dg/vect/transpose-3.c
+new file mode 100644
+index 000000000..e492e3717
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/transpose-3.c
+@@ -0,0 +1,54 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-additional-options "-fno-tree-loop-vectorize -fno-tree-dse -fno-tree-fre" } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 4
++#define M 256
++
++int foo (unsigned short *pix1, int i_pix1, unsigned short *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned c0N, c1N, c2N, c3N, c4N, c5N, c6N, c7N;
++  for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0i = pix10 - pix20;
++      c1i = pix11 - pix21;
++      c2i = pix12 - pix22;
++      c3i = pix13 - pix23;
++      c4i = pix14 - pix24;
++      c5i = pix15 - pix25;
++      c6i = pix16 - pix26;
++      c7i = pix17 - pix27;
++    }
++  for (int i = 0; i < N; i++)
++     {
++      sum += c0i + c1i + c2i + c3i + c4i + c5i + c6i + c7i;
++    }

_service:tar_scm:0099-LoongArch-testsuite-Give-up-the-detection-of-the-gcc.patch Added

@@ -0,0 +1,80 @@
+From df18d0c85049402b8f2f44c3c4e013a0b6d91cee Mon Sep 17 00:00:00 2001
+From: chenxiaolong <chenxiaolong@loongson.cn>
+Date: Fri, 5 Jan 2024 11:43:29 +0800
+Subject: PATCH 099/188 LoongArch: testsuite:Give up the detection of the
+ gcc.dg/fma-{3, 4, 6, 7}.c file.
+
+On the LoongArch architecture, the above four test cases need to be waived
+during testing. There are two situations:
+
+1. The function of fma-{3,6}.c test is to find the value of c-a*b, but on
+the LoongArch architecture, the function of the existing fnmsub instruction
+is to find the value of -(a*b - c);
+
+2. The function of fma-{4,7}.c test is to find the value of -(a*b)-c, but on
+the LoongArch architecture, the function of the existing fnmadd instruction
+is to find the value of -(a*b + c);
+
+Through the analysis of the above two cases, there will be positive and
+negative zero inequality.
+
+gcc/testsuite/ChangeLog
+
+	* gcc.dg/fma-3.c: The intermediate file corresponding to the
+	function does not produce the corresponding FNMA symbol, so the test
+	rules should be skipped when testing.
+	* gcc.dg/fma-4.c: The intermediate file corresponding to the
+	function does not produce the corresponding FNMS symbol, so skip the
+	test rules when testing.
+	* gcc.dg/fma-6.c: The cause is the same as fma-3.c.
+	* gcc.dg/fma-7.c: The cause is the same as fma-4.c
+---
+ gcc/testsuite/gcc.dg/fma-3.c | 2 +-
+ gcc/testsuite/gcc.dg/fma-4.c | 2 +-
+ gcc/testsuite/gcc.dg/fma-6.c | 2 +-
+ gcc/testsuite/gcc.dg/fma-7.c | 2 +-
+ 4 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/testsuite/gcc.dg/fma-3.c b/gcc/testsuite/gcc.dg/fma-3.c
+index 699aa2c95..6649b54b6 100644
+--- a/gcc/testsuite/gcc.dg/fma-3.c
++++ b/gcc/testsuite/gcc.dg/fma-3.c
+@@ -12,4 +12,4 @@ f2 (double a, double b, double c)
+   return c - a * b;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 2 "widening_mul" { target scalar_all_fma } } } */
++/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 2 "widening_mul" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */
+diff --git a/gcc/testsuite/gcc.dg/fma-4.c b/gcc/testsuite/gcc.dg/fma-4.c
+index bff928f1f..f1701c196 100644
+--- a/gcc/testsuite/gcc.dg/fma-4.c
++++ b/gcc/testsuite/gcc.dg/fma-4.c
+@@ -12,4 +12,4 @@ f2 (double a, double b, double c)
+   return -(a * b) - c;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 2 "widening_mul" { target scalar_all_fma } } } */
++/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 2 "widening_mul" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */
+diff --git a/gcc/testsuite/gcc.dg/fma-6.c b/gcc/testsuite/gcc.dg/fma-6.c
+index 87258cec4..9e49b62b6 100644
+--- a/gcc/testsuite/gcc.dg/fma-6.c
++++ b/gcc/testsuite/gcc.dg/fma-6.c
+@@ -64,4 +64,4 @@ f10 (double a, double b, double c)
+   return -__builtin_fma (a, b, -c);
+ }
+ 
+-/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 14 "optimized" { target scalar_all_fma } } } */
++/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 14 "optimized" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */
+diff --git a/gcc/testsuite/gcc.dg/fma-7.c b/gcc/testsuite/gcc.dg/fma-7.c
+index f409cc8ee..86aacad7b 100644
+--- a/gcc/testsuite/gcc.dg/fma-7.c
++++ b/gcc/testsuite/gcc.dg/fma-7.c
+@@ -64,4 +64,4 @@ f10 (double a, double b, double c)
+   return -__builtin_fma (a, b, c);
+ }
+ 
+-/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 14 "optimized" { target scalar_all_fma } } } */
++/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 14 "optimized" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */
+-- 
+2.43.0
+

_service:tar_scm:0099-aarch64-Explicitly-handle-frames-with-no-saved-registers.patch Deleted

@@ -1,48 +0,0 @@
-From 03d5e89e7f3be53fd7142556e8e0a2774c653dca Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:08:49 +0100
-Subject: PATCH aarch64: Explicitly handle frames with no saved registers
-
-If a frame has no saved registers, it can be allocated in one go.
-There is no need to treat the areas below and above the saved
-registers as separate.
-
-And if we allocate the frame in one go, it should be allocated
-as the initial_adjust rather than the final_adjust.  This allows the
-frame size to grow to guard_size - guard_used_by_caller before a stack
-probe is needed.  (A frame with no register saves is necessarily a
-leaf frame.)
-
-This is a no-op as thing stand, since a leaf function will have
-no outgoing arguments, and so all the frame will be above where
-the saved registers normally go.
-
-gcc/
-	* config/aarch64/aarch64.cc (aarch64_layout_frame): Explicitly
-	allocate the frame in one go if there are no saved registers.
----
- gcc/config/aarch64/aarch64.cc | 8 +++++---
- 1 file changed, 5 insertions(+), 3 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 79253322fd7c..e1f21230c15e 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8378,9 +8378,11 @@ aarch64_layout_frame (void)
- 
-   HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset;
-   HOST_WIDE_INT const_saved_regs_size;
--  if (frame.frame_size.is_constant (&const_size)
--      && const_size < max_push_offset
--      && known_eq (frame.hard_fp_offset, const_size))
-+  if (known_eq (frame.saved_regs_size, 0))
-+    frame.initial_adjust = frame.frame_size;
-+  else if (frame.frame_size.is_constant (&const_size)
-+	   && const_size < max_push_offset
-+	   && known_eq (frame.hard_fp_offset, const_size))
-     {
-       /* Simple, small frame with no outgoing arguments:
- 
--- 
-2.43.5
-

_service:tar_scm:0100-Add-hip09-machine-discribtion.patch Added

@@ -0,0 +1,882 @@
+From d9131757175667d35e74d9ee84689039990af768 Mon Sep 17 00:00:00 2001
+From: xingyushuai <xingyushuai@huawei.com>
+Date: Fri, 3 Mar 2023 09:31:04 +0800
+Subject: PATCH 001/157 Add hip09 machine discribtion
+
+Here is the patch introducing hip09 machine model
+for the scheduler.
+---
+ gcc/config/aarch64/aarch64-cores.def     |   1 +
+ gcc/config/aarch64/aarch64-cost-tables.h | 104 +++++
+ gcc/config/aarch64/aarch64-tune.md       |   2 +-
+ gcc/config/aarch64/aarch64.cc            | 109 +++++
+ gcc/config/aarch64/aarch64.md            |   1 +
+ gcc/config/aarch64/hip09.md              | 558 +++++++++++++++++++++++
+ 6 files changed, 774 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/config/aarch64/hip09.md
+
+diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
+index 70b11eb80..a854bdb24 100644
+--- a/gcc/config/aarch64/aarch64-cores.def
++++ b/gcc/config/aarch64/aarch64-cores.def
+@@ -130,6 +130,7 @@ AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F
+ 
+ /* HiSilicon ('H') cores. */
+ AARCH64_CORE("tsv110",  tsv110, tsv110, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110,   0x48, 0xd01, -1)
++AARCH64_CORE("hip09", hip09, hip09, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_PROFILE | AARCH64_FL_PREDRES, hip09, 0x48, 0xd02, 0x0)
+ 
+ /* ARMv8.3-A Architecture Processors.  */
+ 
+diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
+index 48522606f..fc5a3cbe4 100644
+--- a/gcc/config/aarch64/aarch64-cost-tables.h
++++ b/gcc/config/aarch64/aarch64-cost-tables.h
+@@ -668,6 +668,110 @@ const struct cpu_cost_table a64fx_extra_costs =
+   }
+ };
+ 
++const struct cpu_cost_table hip09_extra_costs =
++{
++  /* ALU */
++  {
++    0,                 /* arith.  */
++    0,                 /* logical.  */
++    0,                 /* shift.  */
++    0,                 /* shift_reg.  */
++    COSTS_N_INSNS (1), /* arith_shift.  */
++    COSTS_N_INSNS (1), /* arith_shift_reg.  */
++    COSTS_N_INSNS (1), /* log_shift.  */
++    COSTS_N_INSNS (1), /* log_shift_reg.  */
++    0,                 /* extend.  */
++    COSTS_N_INSNS (1), /* extend_arith.  */
++    0,                 /* bfi.  */
++    0,                 /* bfx.  */
++    0,                 /* clz.  */
++    0,                 /* rev.  */
++    0,                 /* non_exec.  */
++    true               /* non_exec_costs_exec.  */
++  },
++
++  {
++    /* MULT SImode */
++    {
++      COSTS_N_INSNS (2),       /* simple.  */
++      COSTS_N_INSNS (2),       /* flag_setting.  */
++      COSTS_N_INSNS (2),       /* extend.  */
++      COSTS_N_INSNS (2),       /* add.  */
++      COSTS_N_INSNS (2),       /* extend_add.  */
++      COSTS_N_INSNS (11)       /* idiv.  */
++    },
++        /* MULT DImode */
++    {
++      COSTS_N_INSNS (3),       /* simple.  */
++      0,                       /* flag_setting (N/A).  */
++      COSTS_N_INSNS (3),       /* extend.  */
++      COSTS_N_INSNS (3),       /* add.  */
++      COSTS_N_INSNS (3),       /* extend_add.  */
++      COSTS_N_INSNS (19)       /* idiv.  */
++    }
++  },
++  /* LD/ST */
++  {
++    COSTS_N_INSNS (3),         /* load.  */
++    COSTS_N_INSNS (4),         /* load_sign_extend.  */
++    COSTS_N_INSNS (3),         /* ldrd.  */
++    COSTS_N_INSNS (3),         /* ldm_1st.  */
++    1,                         /* ldm_regs_per_insn_1st.  */
++    2,                         /* ldm_regs_per_insn_subsequent.  */
++    COSTS_N_INSNS (4),         /* loadf.  */
++    COSTS_N_INSNS (4),         /* loadd.  */
++    COSTS_N_INSNS (4),         /* load_unaligned.  */
++    0,                         /* store.  */
++    0,                         /* strd.  */
++    0,                         /* stm_1st.  */
++    1,                         /* stm_regs_per_insn_1st.  */
++    2,                         /* stm_regs_per_insn_subsequent.  */
++    0,                         /* storef.  */
++    0,                         /* stored.  */
++    COSTS_N_INSNS (1),         /* store_unaligned.  */
++    COSTS_N_INSNS (4),         /* loadv.  */
++    COSTS_N_INSNS (4)          /* storev.  */
++  },
++  {
++    /* FP SFmode */
++    {
++      COSTS_N_INSNS (10),      /* div.  */
++      COSTS_N_INSNS (4),       /* mult.  */
++      COSTS_N_INSNS (4),       /* mult_addsub.  */
++      COSTS_N_INSNS (4),       /* fma.  */
++      COSTS_N_INSNS (4),       /* addsub.  */
++      COSTS_N_INSNS (1),       /* fpconst.  */
++      COSTS_N_INSNS (1),       /* neg.  */
++      COSTS_N_INSNS (1),       /* compare.  */
++      COSTS_N_INSNS (2),       /* widen.  */
++      COSTS_N_INSNS (2),       /* narrow.  */
++      COSTS_N_INSNS (2),       /* toint.  */
++      COSTS_N_INSNS (1),       /* fromint.  */
++      COSTS_N_INSNS (2)        /* roundint.  */
++    },
++    /* FP DFmode */
++    {
++      COSTS_N_INSNS (17),      /* div.  */
++      COSTS_N_INSNS (4),       /* mult.  */
++      COSTS_N_INSNS (6),       /* mult_addsub.  */
++      COSTS_N_INSNS (6),       /* fma.  */
++      COSTS_N_INSNS (3),       /* addsub.  */
++      COSTS_N_INSNS (1),       /* fpconst.  */
++      COSTS_N_INSNS (1),       /* neg.  */
++      COSTS_N_INSNS (1),       /* compare.  */
++      COSTS_N_INSNS (2),       /* widen.  */
++      COSTS_N_INSNS (2),       /* narrow.  */
++      COSTS_N_INSNS (2),       /* toint.  */
++      COSTS_N_INSNS (1),       /* fromint.  */
++      COSTS_N_INSNS (2)        /* roundint.  */
++    }
++  },
++  /* Vector */
++  {
++    COSTS_N_INSNS (1)  /* alu.  */
++  }
++};
++
+ const struct cpu_cost_table ampere1_extra_costs =
+ {
+   /* ALU */
+diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
+index 9dc9adc70..238bb6e31 100644
+--- a/gcc/config/aarch64/aarch64-tune.md
++++ b/gcc/config/aarch64/aarch64-tune.md
+@@ -1,5 +1,5 @@
+ ;; -*- buffer-read-only: t -*-
+ ;; Generated automatically by gentune.sh from aarch64-cores.def
+ (define_attr "tune"
+-	"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter,neoversev2"
++	"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter,neoversev2"
+ 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 5537a537c..e9b3980c4 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -465,6 +465,22 @@ static const struct cpu_addrcost_table tsv110_addrcost_table =
+   0, /* imm_offset  */
+ };
+ 
++static const struct cpu_addrcost_table hip09_addrcost_table =
++{
++    {
++        1, /* hi  */
++        0, /* si  */
++        0, /* di  */
++        1, /* ti  */
++    },
++  0, /* pre_modify  */
++  0, /* post_modify  */
++  0, /* register_offset  */
++  1, /* register_sextend  */
++  1, /* register_zextend  */
++  0, /* imm_offset  */
++};
++
+ static const struct cpu_addrcost_table qdf24xx_addrcost_table =
+ {
+     {
+@@ -660,6 +676,16 @@ static const struct cpu_regmove_cost a64fx_regmove_cost =
+   2 /* FP2FP  */
+ };
+ 
++static const struct cpu_regmove_cost hip09_regmove_cost =
++{
++  1, /* GP2GP  */
++  /* Avoid the use of slow int<->fp moves for spilling by setting
++     their cost higher than memmov_cost.  */
++  2, /* GP2FP  */
++  3, /* FP2GP  */
++  2  /* FP2FP  */
++};
++
+ static const struct cpu_regmove_cost neoversen2_regmove_cost =
+ {
+   1, /* GP2GP  */

_service:tar_scm:0100-LoongArch-Fixed-the-problem-of-incorrect-judgment-of.patch Added

@@ -0,0 +1,206 @@
+From 90db6906a92b685403d9220e94f779737d2dd100 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 4 Jan 2024 10:37:53 +0800
+Subject: PATCH 100/188 LoongArch: Fixed the problem of incorrect judgment of
+ the immediate field of the xvld/xvst instruction.
+
+The xvld/xvst directive is defined as follows:
+  xvld/xvst {x/v}d, rj, si12
+
+When not modified, the immediate field of xvld/xvst is between 10 and
+14 bits depending on the type. However, in loongarch_valid_offset_p, the
+immediate field is restricted first, so there is no error. However, in
+some cases redundant instructions will be generated, see test cases.
+Now modify it according to the description in the instruction manual.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasx.md (lasx_mxld_<lasxfmt_f>):
+	Modify the method of determining the memory offset of xvld/xvst.
+	(lasx_mxst_<lasxfmt_f>): Likewise.
+	* config/loongarch/loongarch.cc (loongarch_valid_offset_p): Delete.
+	(loongarch_address_insns): Likewise.
+	* config/loongarch/lsx.md (lsx_ld_<lsxfmt_f>): Likewise.
+	(lsx_st_<lsxfmt_f>): Likewise.
+	* config/loongarch/predicates.md (aq10b_operand): Likewise.
+	(aq10h_operand): Likewise.
+	(aq10w_operand): Likewise.
+	(aq10d_operand): Likewise.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vect-ld-st-imm12.c: New test.
+---
+ gcc/config/loongarch/lasx.md                  | 26 -------------------
+ gcc/config/loongarch/loongarch.cc             | 19 +++-----------
+ gcc/config/loongarch/lsx.md                   | 26 -------------------
+ gcc/config/loongarch/predicates.md            | 16 ------------
+ .../gcc.target/loongarch/vect-ld-st-imm12.c   | 15 +++++++++++
+ 5 files changed, 19 insertions(+), 83 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index dbbf5a136..95c6bae20 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -846,32 +846,6 @@
+   DONE;
+ })
+ 
+-;; Offset load
+-(define_expand "lasx_mxld_<lasxfmt_f>"
+-  (match_operand:LASX 0 "register_operand")
+-   (match_operand 1 "pmode_register_operand")
+-   (match_operand 2 "aq10<lasxfmt>_operand")
+-  "ISA_HAS_LASX"
+-{
+-  rtx addr = plus_constant (GET_MODE (operands1), operands1,
+-				      INTVAL (operands2));
+-  loongarch_emit_move (operands0, gen_rtx_MEM (<MODE>mode, addr));
+-  DONE;
+-})
+-
+-;; Offset store
+-(define_expand "lasx_mxst_<lasxfmt_f>"
+-  (match_operand:LASX 0 "register_operand")
+-   (match_operand 1 "pmode_register_operand")
+-   (match_operand 2 "aq10<lasxfmt>_operand")
+-  "ISA_HAS_LASX"
+-{
+-  rtx addr = plus_constant (GET_MODE (operands1), operands1,
+-			    INTVAL (operands2));
+-  loongarch_emit_move (gen_rtx_MEM (<MODE>mode, addr), operands0);
+-  DONE;
+-})
+-
+ ;; LASX
+ (define_insn "add<mode>3"
+   (set (match_operand:ILASX 0 "register_operand" "=f,f,f")
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 9d2374a46..ddb32cea2 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -2123,21 +2123,11 @@ loongarch_valid_offset_p (rtx x, machine_mode mode)
+ 
+   /* We may need to split multiword moves, so make sure that every word
+      is accessible.  */
+-  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
++  if (!(LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))
++      && GET_MODE_SIZE (mode) > UNITS_PER_WORD
+       && !IMM12_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD))
+     return false;
+ 
+-  /* LSX LD.* and ST.* supports 10-bit signed offsets.  */
+-  if (LSX_SUPPORTED_MODE_P (mode)
+-      && !loongarch_signed_immediate_p (INTVAL (x), 10,
+-					loongarch_ldst_scaled_shift (mode)))
+-    return false;
+-
+-  /* LASX XVLD.B and XVST.B supports 10-bit signed offsets without shift.  */
+-  if (LASX_SUPPORTED_MODE_P (mode)
+-      && !loongarch_signed_immediate_p (INTVAL (x), 10, 0))
+-    return false;
+-
+   return true;
+ }
+ 
+@@ -2372,9 +2362,8 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
+       case ADDRESS_REG:
+ 	if (lsx_p)
+ 	  {
+-	    /* LSX LD.* and ST.* supports 10-bit signed offsets.  */
+-	    if (loongarch_signed_immediate_p (INTVAL (addr.offset), 10,
+-					      loongarch_ldst_scaled_shift (mode)))
++	    /* LSX LD.* and ST.* supports 12-bit signed offsets.  */
++	    if (IMM12_OPERAND (INTVAL (addr.offset)))
+ 	      return 1;
+ 	    else
+ 	      return 0;
+diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
+index 3e3248ef4..02e89247b 100644
+--- a/gcc/config/loongarch/lsx.md
++++ b/gcc/config/loongarch/lsx.md
+@@ -812,32 +812,6 @@
+   DONE;
+ })
+ 
+-;; Offset load
+-(define_expand "lsx_ld_<lsxfmt_f>"
+-  (match_operand:LSX 0 "register_operand")
+-   (match_operand 1 "pmode_register_operand")
+-   (match_operand 2 "aq10<lsxfmt>_operand")
+-  "ISA_HAS_LSX"
+-{
+-  rtx addr = plus_constant (GET_MODE (operands1), operands1,
+-			    INTVAL (operands2));
+-  loongarch_emit_move (operands0, gen_rtx_MEM (<MODE>mode, addr));
+-  DONE;
+-})
+-
+-;; Offset store
+-(define_expand "lsx_st_<lsxfmt_f>"
+-  (match_operand:LSX 0 "register_operand")
+-   (match_operand 1 "pmode_register_operand")
+-   (match_operand 2 "aq10<lsxfmt>_operand")
+-  "ISA_HAS_LSX"
+-{
+-  rtx addr = plus_constant (GET_MODE (operands1), operands1,
+-			    INTVAL (operands2));
+-  loongarch_emit_move (gen_rtx_MEM (<MODE>mode, addr), operands0);
+-  DONE;
+-})
+-
+ ;; Integer operations
+ (define_insn "add<mode>3"
+   (set (match_operand:ILSX 0 "register_operand" "=f,f,f")
+diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
+index 3698b9103..824a85b36 100644
+--- a/gcc/config/loongarch/predicates.md
++++ b/gcc/config/loongarch/predicates.md
+@@ -167,22 +167,6 @@
+   (and (match_code "const_int")
+        (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 3)")))
+ 
+-(define_predicate "aq10b_operand"
+-  (and (match_code "const_int")
+-       (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 0)")))
+-
+-(define_predicate "aq10h_operand"
+-  (and (match_code "const_int")
+-       (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 1)")))
+-
+-(define_predicate "aq10w_operand"
+-  (and (match_code "const_int")
+-       (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 2)")))
+-
+-(define_predicate "aq10d_operand"
+-  (and (match_code "const_int")
+-       (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 3)")))
+-
+ (define_predicate "aq12b_operand"
+   (and (match_code "const_int")
+        (match_test "loongarch_signed_immediate_p (INTVAL (op), 12, 0)")))
+diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c b/gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c
+new file mode 100644
+index 000000000..bfc208e4f
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c
+@@ -0,0 +1,15 @@
++/* { dg-do compile } */
++/* { dg-options "-march=loongarch64 -mabi=lp64d -mlasx -O2" } */
++/* { dg-final { scan-assembler-not "addi.d" } } */
++
++extern short a1000;
++extern short b1000;
++extern short c1000;
++
++void
++test (void)
++{

_service:tar_scm:0100-aarch64-Add-bytes-below-saved-regs-to-frame-info.patch Deleted

@@ -1,233 +0,0 @@
-From 49c2eb7616756c323b7f6b18d8616ec945eb1263 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:08:49 +0100
-Subject: PATCH aarch64: Add bytes_below_saved_regs to frame info
-
-The frame layout code currently hard-codes the assumption that
-the number of bytes below the saved registers is equal to the
-size of the outgoing arguments.  This patch abstracts that
-value into a new field of aarch64_frame.
-
-gcc/
-	* config/aarch64/aarch64.h (aarch64_frame::bytes_below_saved_regs): New
-	field.
-	* config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it,
-	and use it instead of crtl->outgoing_args_size.
-	(aarch64_get_separate_components): Use bytes_below_saved_regs instead
-	of outgoing_args_size.
-	(aarch64_process_components): Likewise.
----
- gcc/config/aarch64/aarch64.cc | 71 ++++++++++++++++++-----------------
- gcc/config/aarch64/aarch64.h  |  5 +++
- 2 files changed, 41 insertions(+), 35 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index e1f21230c15e..94e1b6865849 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8217,6 +8217,8 @@ aarch64_layout_frame (void)
-   gcc_assert (crtl->is_leaf
- 	      || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED));
- 
-+  frame.bytes_below_saved_regs = crtl->outgoing_args_size;
-+
-   /* Now assign stack slots for the registers.  Start with the predicate
-      registers, since predicate LDR and STR have a relatively small
-      offset range.  These saves happen below the hard frame pointer.  */
-@@ -8321,18 +8323,18 @@ aarch64_layout_frame (void)
- 
-   poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
- 
--  poly_int64 above_outgoing_args
-+  poly_int64 saved_regs_and_above
-     = aligned_upper_bound (varargs_and_saved_regs_size
- 			   + get_frame_size (),
- 			   STACK_BOUNDARY / BITS_PER_UNIT);
- 
-   frame.hard_fp_offset
--    = above_outgoing_args - frame.below_hard_fp_saved_regs_size;
-+    = saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
- 
-   /* Both these values are already aligned.  */
--  gcc_assert (multiple_p (crtl->outgoing_args_size,
-+  gcc_assert (multiple_p (frame.bytes_below_saved_regs,
- 			  STACK_BOUNDARY / BITS_PER_UNIT));
--  frame.frame_size = above_outgoing_args + crtl->outgoing_args_size;
-+  frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
- 
-   frame.locals_offset = frame.saved_varargs_size;
- 
-@@ -8376,7 +8378,7 @@ aarch64_layout_frame (void)
-   else if (frame.wb_pop_candidate1 != INVALID_REGNUM)
-     max_push_offset = 256;
- 
--  HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset;
-+  HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset;
-   HOST_WIDE_INT const_saved_regs_size;
-   if (known_eq (frame.saved_regs_size, 0))
-     frame.initial_adjust = frame.frame_size;
-@@ -8384,31 +8386,31 @@ aarch64_layout_frame (void)
- 	   && const_size < max_push_offset
- 	   && known_eq (frame.hard_fp_offset, const_size))
-     {
--      /* Simple, small frame with no outgoing arguments:
-+      /* Simple, small frame with no data below the saved registers.
- 
- 	 stp reg1, reg2, sp, -frame_size!
- 	 stp reg3, reg4, sp, 16  */
-       frame.callee_adjust = const_size;
-     }
--  else if (crtl->outgoing_args_size.is_constant (&const_outgoing_args_size)
-+  else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs)
- 	   && frame.saved_regs_size.is_constant (&const_saved_regs_size)
--	   && const_outgoing_args_size + const_saved_regs_size < 512
--	   /* We could handle this case even with outgoing args, provided
--	      that the number of args left us with valid offsets for all
--	      predicate and vector save slots.  It's such a rare case that
--	      it hardly seems worth the effort though.  */
--	   && (!saves_below_hard_fp_p || const_outgoing_args_size == 0)
-+	   && const_below_saved_regs + const_saved_regs_size < 512
-+	   /* We could handle this case even with data below the saved
-+	      registers, provided that that data left us with valid offsets
-+	      for all predicate and vector save slots.  It's such a rare
-+	      case that it hardly seems worth the effort though.  */
-+	   && (!saves_below_hard_fp_p || const_below_saved_regs == 0)
- 	   && !(cfun->calls_alloca
- 		&& frame.hard_fp_offset.is_constant (&const_fp_offset)
- 		&& const_fp_offset < max_push_offset))
-     {
--      /* Frame with small outgoing arguments:
-+      /* Frame with small area below the saved registers:
- 
- 	 sub sp, sp, frame_size
--	 stp reg1, reg2, sp, outgoing_args_size
--	 stp reg3, reg4, sp, outgoing_args_size + 16  */
-+	 stp reg1, reg2, sp, bytes_below_saved_regs
-+	 stp reg3, reg4, sp, bytes_below_saved_regs + 16  */
-       frame.initial_adjust = frame.frame_size;
--      frame.callee_offset = const_outgoing_args_size;
-+      frame.callee_offset = const_below_saved_regs;
-     }
-   else if (saves_below_hard_fp_p
- 	   && known_eq (frame.saved_regs_size,
-@@ -8418,30 +8420,29 @@ aarch64_layout_frame (void)
- 
- 	 sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
- 	 save SVE registers relative to SP
--	 sub sp, sp, outgoing_args_size  */
-+	 sub sp, sp, bytes_below_saved_regs  */
-       frame.initial_adjust = (frame.hard_fp_offset
- 			      + frame.below_hard_fp_saved_regs_size);
--      frame.final_adjust = crtl->outgoing_args_size;
-+      frame.final_adjust = frame.bytes_below_saved_regs;
-     }
-   else if (frame.hard_fp_offset.is_constant (&const_fp_offset)
- 	   && const_fp_offset < max_push_offset)
-     {
--      /* Frame with large outgoing arguments or SVE saves, but with
--	 a small local area:
-+      /* Frame with large area below the saved registers, or with SVE saves,
-+	 but with a small area above:
- 
- 	 stp reg1, reg2, sp, -hard_fp_offset!
- 	 stp reg3, reg4, sp, 16
- 	 sub sp, sp, below_hard_fp_saved_regs_size
- 	 save SVE registers relative to SP
--	 sub sp, sp, outgoing_args_size  */
-+	 sub sp, sp, bytes_below_saved_regs  */
-       frame.callee_adjust = const_fp_offset;
-       frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
--      frame.final_adjust = crtl->outgoing_args_size;
-+      frame.final_adjust = frame.bytes_below_saved_regs;
-     }
-   else
-     {
--      /* Frame with large local area and outgoing arguments or SVE saves,
--	 using frame pointer:
-+      /* General case:
- 
- 	 sub sp, sp, hard_fp_offset
- 	 stp x29, x30, sp, 0
-@@ -8449,10 +8450,10 @@ aarch64_layout_frame (void)
- 	 stp reg3, reg4, sp, 16
- 	 sub sp, sp, below_hard_fp_saved_regs_size
- 	 save SVE registers relative to SP
--	 sub sp, sp, outgoing_args_size  */
-+	 sub sp, sp, bytes_below_saved_regs  */
-       frame.initial_adjust = frame.hard_fp_offset;
-       frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
--      frame.final_adjust = crtl->outgoing_args_size;
-+      frame.final_adjust = frame.bytes_below_saved_regs;
-     }
- 
-   /* Make sure the individual adjustments add up to the full frame size.  */
-@@ -9043,7 +9044,7 @@ aarch64_get_separate_components (void)
- 	if (frame_pointer_needed)
- 	  offset -= frame.below_hard_fp_saved_regs_size;
- 	else
--	  offset += crtl->outgoing_args_size;
-+	  offset += frame.bytes_below_saved_regs;
- 
- 	/* Check that we can access the stack slot of the register with one
- 	   direct load with no adjustments needed.  */
-@@ -9192,7 +9193,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
-       if (frame_pointer_needed)
- 	offset -= frame.below_hard_fp_saved_regs_size;
-       else
--	offset += crtl->outgoing_args_size;
-+	offset += frame.bytes_below_saved_regs;
- 
-       rtx addr = plus_constant (Pmode, ptr_reg, offset);
-       rtx mem = gen_frame_mem (mode, addr);
-@@ -9246,7 +9247,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
-       if (frame_pointer_needed)
- 	offset2 -= frame.below_hard_fp_saved_regs_size;
-       else
--	offset2 += crtl->outgoing_args_size;
-+	offset2 += frame.bytes_below_saved_regs;
-       rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
-       rtx mem2 = gen_frame_mem (mode, addr2);
-       rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
-@@ -9320,10 +9321,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void)
-    registers.  If POLY_SIZE is not large enough to require a probe this function
-    will only adjust the stack.  When allocating the stack space
-    FRAME_RELATED_P is then used to indicate if the allocation is frame related.
--   FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing
--   arguments.  If we are then we ensure that any allocation larger than the ABI
--   defined buffer needs a probe so that the invariant of having a 1KB buffer is
--   maintained.
-+   FINAL_ADJUSTMENT_P indicates whether we are allocating the area below

_service:tar_scm:0101-Add-hip11-CPU-pipeline-scheduling.patch Added

@@ -0,0 +1,755 @@
+From 824fccdab1d3c5e87fb88b31f0eeb7abd1b35c1f Mon Sep 17 00:00:00 2001
+From: XingYuShuai <1150775134@qq.com>
+Date: Mon, 26 Feb 2024 20:34:06 +0800
+Subject: PATCH 002/157 Add hip11 CPU pipeline scheduling
+
+This patch adds an mcpu: hip11. It has been tested on aarch64
+and no regressions from this patch.
+---
+ gcc/config/aarch64/aarch64-cores.def     |   1 +
+ gcc/config/aarch64/aarch64-cost-tables.h | 104 ++++++
+ gcc/config/aarch64/aarch64-tune.md       |   2 +-
+ gcc/config/aarch64/aarch64.cc            | 108 ++++++
+ gcc/config/aarch64/aarch64.md            |   1 +
+ gcc/config/aarch64/hip11.md              | 418 +++++++++++++++++++++++
+ gcc/doc/invoke.texi                      |   2 +-
+ 7 files changed, 634 insertions(+), 2 deletions(-)
+ create mode 100644 gcc/config/aarch64/hip11.md
+
+diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
+index a854bdb24..601b72abb 100644
+--- a/gcc/config/aarch64/aarch64-cores.def
++++ b/gcc/config/aarch64/aarch64-cores.def
+@@ -173,6 +173,7 @@ AARCH64_CORE("cortex-a710",  cortexa710, cortexa57, 9A,  AARCH64_FL_FOR_ARCH9 |
+ AARCH64_CORE("cortex-x2",  cortexx2, cortexa57, 9A,  AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
+ 
+ AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
++AARCH64_CORE("hip11", hip11, hip11, 8_5A, AARCH64_FL_FOR_ARCH8_5| AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_F16, hip11, 0x48, 0xd22, -1)
+ 
+ AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
+ AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
+diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
+index fc5a3cbe4..0ee427b61 100644
+--- a/gcc/config/aarch64/aarch64-cost-tables.h
++++ b/gcc/config/aarch64/aarch64-cost-tables.h
+@@ -561,6 +561,110 @@ const struct cpu_cost_table tsv110_extra_costs =
+   }
+ };
+ 
++const struct cpu_cost_table hip11_extra_costs =
++{
++  /* ALU */
++  {
++    0,                 /* arith.  */
++    0,                 /* logical.  */
++    0,                 /* shift.  */
++    0,                 /* shift_reg.  */
++    COSTS_N_INSNS (1), /* arith_shift.  */
++    COSTS_N_INSNS (1), /* arith_shift_reg.  */
++    COSTS_N_INSNS (1), /* log_shift.  */
++    COSTS_N_INSNS (1), /* log_shift_reg.  */
++    0,                 /* extend.  */
++    COSTS_N_INSNS (1), /* extend_arith.  */
++    0,                 /* bfi.  */
++    0,                 /* bfx.  */
++    0,                 /* clz.  */
++    0,                 /* rev.  */
++    0,                 /* non_exec.  */
++    true               /* non_exec_costs_exec.  */
++  },
++
++  {
++    /* MULT SImode */
++    {
++      COSTS_N_INSNS (2),       /* simple.  */
++      COSTS_N_INSNS (2),       /* flag_setting.  */
++      COSTS_N_INSNS (2),       /* extend.  */
++      COSTS_N_INSNS (2),       /* add.  */
++      COSTS_N_INSNS (2),       /* extend_add.  */
++      COSTS_N_INSNS (11)       /* idiv.  */
++    },
++    /* MULT DImode */
++    {
++      COSTS_N_INSNS (3),       /* simple.  */
++      0,                       /* flag_setting (N/A).  */
++      COSTS_N_INSNS (3),       /* extend.  */
++      COSTS_N_INSNS (3),       /* add.  */
++      COSTS_N_INSNS (3),       /* extend_add.  */
++      COSTS_N_INSNS (19)       /* idiv.  */
++    }
++  },
++  /* LD/ST */
++  {
++    COSTS_N_INSNS (3),         /* load.  */
++    COSTS_N_INSNS (4),         /* load_sign_extend.  */
++    COSTS_N_INSNS (3),         /* ldrd.  */
++    COSTS_N_INSNS (3),         /* ldm_1st.  */
++    1,                         /* ldm_regs_per_insn_1st.  */
++    2,                         /* ldm_regs_per_insn_subsequent.  */
++    COSTS_N_INSNS (4),         /* loadf.  */
++    COSTS_N_INSNS (4),         /* loadd.  */
++    COSTS_N_INSNS (4),         /* load_unaligned.  */
++    0,                         /* store.  */
++    0,                         /* strd.  */
++    0,                         /* stm_1st.  */
++    1,                         /* stm_regs_per_insn_1st.  */
++    2,                         /* stm_regs_per_insn_subsequent.  */
++    0,                         /* storef.  */
++    0,                         /* stored.  */
++    COSTS_N_INSNS (1),         /* store_unaligned.  */
++    COSTS_N_INSNS (4),         /* loadv.  */
++    COSTS_N_INSNS (4)          /* storev.  */
++  },
++  {
++    /* FP SFmode */
++    {
++      COSTS_N_INSNS (10),      /* div.  */
++      COSTS_N_INSNS (4),       /* mult.  */
++      COSTS_N_INSNS (4),       /* mult_addsub.  */
++      COSTS_N_INSNS (4),       /* fma.  */
++      COSTS_N_INSNS (4),       /* addsub.  */
++      COSTS_N_INSNS (1),       /* fpconst.  */
++      COSTS_N_INSNS (1),       /* neg.  */
++      COSTS_N_INSNS (1),       /* compare.  */
++      COSTS_N_INSNS (2),       /* widen.  */
++      COSTS_N_INSNS (2),       /* narrow.  */
++      COSTS_N_INSNS (2),       /* toint.  */
++      COSTS_N_INSNS (1),       /* fromint.  */
++      COSTS_N_INSNS (2)        /* roundint.  */
++    },
++    /* FP DFmode */
++    {
++      COSTS_N_INSNS (17),      /* div.  */
++      COSTS_N_INSNS (4),       /* mult.  */
++      COSTS_N_INSNS (6),       /* mult_addsub.  */
++      COSTS_N_INSNS (6),       /* fma.  */
++      COSTS_N_INSNS (3),       /* addsub.  */
++      COSTS_N_INSNS (1),       /* fpconst.  */
++      COSTS_N_INSNS (1),       /* neg.  */
++      COSTS_N_INSNS (1),       /* compare.  */
++      COSTS_N_INSNS (2),       /* widen.  */
++      COSTS_N_INSNS (2),       /* narrow.  */
++      COSTS_N_INSNS (2),       /* toint.  */
++      COSTS_N_INSNS (1),       /* fromint.  */
++      COSTS_N_INSNS (2)        /* roundint.  */
++    }
++  },
++  /* Vector */
++  {
++    COSTS_N_INSNS (1)  /* alu.  */
++  }
++};
++
+ const struct cpu_cost_table a64fx_extra_costs =
+ {
+   /* ALU */
+diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
+index 238bb6e31..511422081 100644
+--- a/gcc/config/aarch64/aarch64-tune.md
++++ b/gcc/config/aarch64/aarch64-tune.md
+@@ -1,5 +1,5 @@
+ ;; -*- buffer-read-only: t -*-
+ ;; Generated automatically by gentune.sh from aarch64-cores.def
+ (define_attr "tune"
+-	"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter,neoversev2"
++	"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,hip11,demeter,neoversev2"
+ 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index e9b3980c4..7c62ddb2a 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -481,6 +481,22 @@ static const struct cpu_addrcost_table hip09_addrcost_table =
+   0, /* imm_offset  */
+ };
+ 
++static const struct cpu_addrcost_table hip11_addrcost_table =
++{
++    {
++      1, /* hi  */
++      0, /* si  */
++      0, /* di  */
++      1, /* ti  */
++    },
++  0, /* pre_modify  */
++  0, /* post_modify  */
++  0, /* register_offset  */
++  1, /* register_sextend  */
++  1, /* register_zextend  */
++  0, /* imm_offset  */
++};
++
+ static const struct cpu_addrcost_table qdf24xx_addrcost_table =
+ {
+     {
+@@ -666,6 +682,16 @@ static const struct cpu_regmove_cost tsv110_regmove_cost =
+   2  /* FP2FP  */
+ };
+ 
++static const struct cpu_regmove_cost hip11_regmove_cost =
++{
++  1, /* GP2GP  */
++  /* Avoid the use of slow int<->fp moves for spilling by setting
++     their cost higher than memmov_cost.  */
++  2, /* GP2FP  */
++  3, /* FP2GP  */
++  2  /* FP2FP  */
++};
++
+ static const struct cpu_regmove_cost a64fx_regmove_cost =
+ {

_service:tar_scm:0101-LoongArch-Improve-lasx_xvpermi_q_-LASX-mode-insn-pat.patch Added

@@ -0,0 +1,150 @@
+From f5355c67104cb5d150e1fd3b58807b2ad4e67b7c Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Fri, 5 Jan 2024 15:37:13 +0800
+Subject: PATCH 101/188 LoongArch: Improve lasx_xvpermi_q_<LASX:mode> insn
+ pattern
+
+For instruction xvpermi.q, unused bits in operands3 need be set to 0 to avoid
+causing undefined behavior on LA464.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasx.md: Set the unused bits in operand3 to 0.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/lasx/lasx-xvpremi.c: Removed.
+	* gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c: New test.
+---
+ gcc/config/loongarch/lasx.md                  |  9 ++-
+ .../loongarch/vector/lasx/lasx-xvpermi_q.c    | 64 +++++++++++++++++++
+ .../loongarch/vector/lasx/lasx-xvpremi.c      | 19 ------
+ 3 files changed, 72 insertions(+), 20 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c
+ delete mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index 95c6bae20..b4aa8e261 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -635,6 +635,8 @@
+    (set_attr "mode" "<MODE>"))
+ 
+ ;; xvpermi.q
++;; Unused bits in operands3 need be set to 0 to avoid
++;; causing undefined behavior on LA464.
+ (define_insn "lasx_xvpermi_q_<LASX:mode>"
+   (set (match_operand:LASX 0 "register_operand" "=f")
+ 	(unspec:LASX
+@@ -643,7 +645,12 @@
+ 	   (match_operand     3 "const_uimm8_operand")
+ 	  UNSPEC_LASX_XVPERMI_Q))
+   "ISA_HAS_LASX"
+-  "xvpermi.q\t%u0,%u2,%3"
++{
++  int mask = 0x33;
++  mask &= INTVAL (operands3);
++  operands3 = GEN_INT (mask);
++  return "xvpermi.q\t%u0,%u2,%3";
++}
+   (set_attr "type" "simd_splat")
+    (set_attr "mode" "<MODE>"))
+ 
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c
+new file mode 100644
+index 000000000..dbc29d2fb
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c
+@@ -0,0 +1,64 @@
++/* { dg-options "-mlasx -w -fno-strict-aliasing" } */
++#include "../simd_correctness_check.h"
++#include <lasxintrin.h>
++
++int
++main ()
++{
++  __m256i __m256i_op0, __m256i_op1, __m256i_op2, __m256i_out, __m256i_result;
++  __m256 __m256_op0, __m256_op1, __m256_op2, __m256_out, __m256_result;
++  __m256d __m256d_op0, __m256d_op1, __m256d_op2, __m256d_out, __m256d_result;
++
++  int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
++  long int long_op0, long_op1, long_op2, lont_out, lont_result;
++  long int long_int_out, long_int_result;
++  unsigned int unsigned_int_out, unsigned_int_result;
++  unsigned long int unsigned_long_int_out, unsigned_long_int_result;
++
++  *((unsigned long*)& __m256i_op03) = 0x7fe37fe3001d001d;
++  *((unsigned long*)& __m256i_op02) = 0x7fff7fff7fff0000;
++  *((unsigned long*)& __m256i_op01) = 0x7fe37fe3001d001d;
++  *((unsigned long*)& __m256i_op00) = 0x7fff7fff7fff0000;
++  *((unsigned long*)& __m256i_op13) = 0x7575757575757575;
++  *((unsigned long*)& __m256i_op12) = 0x7575757575757575;
++  *((unsigned long*)& __m256i_op11) = 0x7575757575757575;
++  *((unsigned long*)& __m256i_op10) = 0x7575757575757575;
++  *((unsigned long*)& __m256i_result3) = 0x7fe37fe3001d001d;
++  *((unsigned long*)& __m256i_result2) = 0x7fff7fff7fff0000;
++  *((unsigned long*)& __m256i_result1) = 0x7fe37fe3001d001d;
++  *((unsigned long*)& __m256i_result0) = 0x7fff7fff7fff0000;
++  __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x2a);
++  ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
++
++  *((unsigned long*)& __m256i_op03) = 0x0000000000000000;
++  *((unsigned long*)& __m256i_op02) = 0x000000000019001c;
++  *((unsigned long*)& __m256i_op01) = 0x0000000000000000;
++  *((unsigned long*)& __m256i_op00) = 0x000000000019001c;
++  *((unsigned long*)& __m256i_op13) = 0x0000000000000000;
++  *((unsigned long*)& __m256i_op12) = 0x00000000000001fe;
++  *((unsigned long*)& __m256i_op11) = 0x0000000000000000;
++  *((unsigned long*)& __m256i_op10) = 0x00000000000001fe;
++  *((unsigned long*)& __m256i_result3) = 0x0000000000000000;
++  *((unsigned long*)& __m256i_result2) = 0x000000000019001c;
++  *((unsigned long*)& __m256i_result1) = 0x0000000000000000;
++  *((unsigned long*)& __m256i_result0) = 0x00000000000001fe;
++  __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0xb9);
++  ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
++
++  *((unsigned long*)& __m256i_op03) = 0x00ff00ff00ff00ff;
++  *((unsigned long*)& __m256i_op02) = 0x00ff00ff00ff00ff;
++  *((unsigned long*)& __m256i_op01) = 0x00ff00ff00ff00ff;
++  *((unsigned long*)& __m256i_op00) = 0x00ff00ff00ff00ff;
++  *((unsigned long*)& __m256i_op13) = 0xffffffffffffffff;
++  *((unsigned long*)& __m256i_op12) = 0xffff0000ffff0000;
++  *((unsigned long*)& __m256i_op11) = 0xffffffffffffffff;
++  *((unsigned long*)& __m256i_op10) = 0xffff0000ffff0000;
++  *((unsigned long*)& __m256i_result3) = 0xffffffffffffffff;
++  *((unsigned long*)& __m256i_result2) = 0xffff0000ffff0000;
++  *((unsigned long*)& __m256i_result1) = 0x00ff00ff00ff00ff;
++  *((unsigned long*)& __m256i_result0) = 0x00ff00ff00ff00ff;
++  __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0xca);
++  ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
++
++  return 0;
++}
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c
+deleted file mode 100644
+index e9fc1d7d3..000000000
+--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c
++++ /dev/null
+@@ -1,19 +0,0 @@
+-/* { dg-options "-mlasx -w -fno-strict-aliasing" } */
+-#include "../simd_correctness_check.h"
+-#include <lasxintrin.h>
+-
+-int
+-main ()
+-{
+-  __m256i __m256i_op0, __m256i_op1, __m256i_op2, __m256i_out, __m256i_result;
+-  __m256 __m256_op0, __m256_op1, __m256_op2, __m256_out, __m256_result;
+-  __m256d __m256d_op0, __m256d_op1, __m256d_op2, __m256d_out, __m256d_result;
+-
+-  int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
+-  long int long_op0, long_op1, long_op2, lont_out, lont_result;
+-  long int long_int_out, long_int_result;
+-  unsigned int unsigned_int_out, unsigned_int_result;
+-  unsigned long int unsigned_long_int_out, unsigned_long_int_result;
+-
+-  return 0;
+-}
+-- 
+2.43.0
+

_service:tar_scm:0101-aarch64-Add-bytes-below-hard-fp-to-frame-info.patch Deleted

@@ -1,84 +0,0 @@
-From 34081079ea4de0c98331843f574b5f6f94d7b234 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:08:50 +0100
-Subject: PATCH aarch64: Add bytes_below_hard_fp to frame info
-
-Following on from the previous bytes_below_saved_regs patch, this one
-records the number of bytes that are below the hard frame pointer.
-This eventually replaces below_hard_fp_saved_regs_size.
-
-If a frame pointer is not needed, the epilogue adds final_adjust
-to the stack pointer before restoring registers:
-
-     aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true);
-
-Therefore, if the epilogue needs to restore the stack pointer from
-the hard frame pointer, the directly corresponding offset is:
-
-     -bytes_below_hard_fp + final_adjust
-
-i.e. go from the hard frame pointer to the bottom of the frame,
-then add the same amount as if we were using the stack pointer
-from the outset.
-
-gcc/
-	* config/aarch64/aarch64.h (aarch64_frame::bytes_below_hard_fp): New
-	field.
-	* config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it.
-	(aarch64_expand_epilogue): Use it instead of
-	below_hard_fp_saved_regs_size.
----
- gcc/config/aarch64/aarch64.cc | 6 +++---
- gcc/config/aarch64/aarch64.h  | 5 +++++
- 2 files changed, 8 insertions(+), 3 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 94e1b6865849..c7d84245fbfc 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8269,6 +8269,7 @@ aarch64_layout_frame (void)
-      of the callee save area.  */
-   bool saves_below_hard_fp_p = maybe_ne (offset, 0);
-   frame.below_hard_fp_saved_regs_size = offset;
-+  frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs;
-   if (frame.emit_frame_chain)
-     {
-       /* FP and LR are placed in the linkage record.  */
-@@ -9856,8 +9857,7 @@ aarch64_expand_epilogue (bool for_sibcall)
-   poly_int64 final_adjust = frame.final_adjust;
-   poly_int64 callee_offset = frame.callee_offset;
-   poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
--  poly_int64 below_hard_fp_saved_regs_size
--    = frame.below_hard_fp_saved_regs_size;
-+  poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp;
-   unsigned reg1 = frame.wb_pop_candidate1;
-   unsigned reg2 = frame.wb_pop_candidate2;
-   unsigned int last_gpr = (frame.is_scs_enabled
-@@ -9915,7 +9915,7 @@ aarch64_expand_epilogue (bool for_sibcall)
-        is restored on the instruction doing the writeback.  */
-     aarch64_add_offset (Pmode, stack_pointer_rtx,
- 			hard_frame_pointer_rtx,
--			-callee_offset - below_hard_fp_saved_regs_size,
-+			-bytes_below_hard_fp + final_adjust,
- 			tmp1_rtx, tmp0_rtx, callee_adjust == 0);
-   else
-      /* The case where we need to re-use the register here is very rare, so
-diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
-index 1e105e12db8d..de68ff7202fc 100644
---- a/gcc/config/aarch64/aarch64.h
-+++ b/gcc/config/aarch64/aarch64.h
-@@ -880,6 +880,11 @@ struct GTY (()) aarch64_frame
-      are saved below the hard frame pointer.  */
-   poly_int64 below_hard_fp_saved_regs_size;
- 
-+  /* The number of bytes between the bottom of the static frame (the bottom
-+     of the outgoing arguments) and the hard frame pointer.  This value is
-+     always a multiple of STACK_BOUNDARY.  */
-+  poly_int64 bytes_below_hard_fp;
-+
-   /* Offset from the base of the frame (incomming SP) to the
-      top of the locals area.  This value is always a multiple of
-      STACK_BOUNDARY.  */
--- 
-2.43.5
-

_service:tar_scm:0102-Add-Crc32-Optimization-in-Gzip-For-crc32-algorithm-i.patch Added

@@ -0,0 +1,2164 @@
+From 8fa9788ac64a9ea5dc92c61c8f2ec11075cd17ec Mon Sep 17 00:00:00 2001
+From: XingYushuai <xingyushuai@huawei.com>
+Date: Thu, 15 Dec 2022 14:34:16 +0800
+Subject: PATCH 003/157 Add Crc32 Optimization in Gzip For crc32 algorithm in
+ APBC int_gzip.
+
+Match crc32 lookup table algorithm. An example for crc32 lookup table
+elg: ```c do { c = crc_32_tab((int)c ^ (*s++)) & 0xff ^ (c >> 8); } while (--n);
+
+Usage: `gcc -O3 -march=armv8.1-a -floop-crc yourfile.c`
+Node: The cpu you use needs to support the crc32 instructions
+---
+ gcc/Makefile.in                               |    1 +
+ gcc/common.opt                                |    4 +
+ gcc/config/aarch64/aarch64-builtins.cc        |   30 +
+ gcc/config/aarch64/aarch64-protos.h           |    1 +
+ gcc/config/aarch64/aarch64.cc                 |   12 +
+ gcc/doc/invoke.texi                           |    6 +-
+ gcc/doc/tm.texi                               |    9 +
+ gcc/doc/tm.texi.in                            |    2 +
+ gcc/match.pd                                  |   23 +
+ gcc/passes.def                                |    1 +
+ gcc/target.def                                |   14 +
+ .../tree-ssa/loop-crc-loop-condition-fail.c   |   85 ++
+ .../tree-ssa/loop-crc-loop-form-fail-2.c      |   90 ++
+ .../gcc.dg/tree-ssa/loop-crc-loop-form-fail.c |  112 ++
+ .../gcc.dg/tree-ssa/loop-crc-sucess.c         |   83 +
+ .../tree-ssa/loop-crc-table-check-fail.c      |  114 ++
+ gcc/timevar.def                               |    1 +
+ gcc/tree-pass.h                               |    1 +
+ gcc/tree-ssa-loop-crc.cc                      | 1333 +++++++++++++++++
+ 19 files changed, 1921 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-condition-fail.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail-2.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c
+ create mode 100644 gcc/tree-ssa-loop-crc.cc
+
+diff --git a/gcc/Makefile.in b/gcc/Makefile.in
+index 5cd838270..2b9f025dc 100644
+--- a/gcc/Makefile.in
++++ b/gcc/Makefile.in
+@@ -1649,6 +1649,7 @@ OBJS = \
+ 	tree-ssa-ifcombine.o \
+ 	tree-ssa-live.o \
+ 	tree-ssa-loop-ch.o \
++	tree-ssa-loop-crc.o \
+ 	tree-ssa-loop-im.o \
+ 	tree-ssa-loop-ivcanon.o \
+ 	tree-ssa-loop-ivopts.o \
+diff --git a/gcc/common.opt b/gcc/common.opt
+index b18f0b944..42fb2fc19 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1119,6 +1119,10 @@ fcrypto-accel-aes
+ Common Var(flag_crypto_accel_aes) Init(0) Optimization
+ Perform crypto acceleration AES pattern matching.
+ 
++floop-crc
++Common Var(flag_loop_crc) Optimization
++Do the loop crc conversion.
++
+ fauto-inc-dec
+ Common Var(flag_auto_inc_dec) Init(1) Optimization
+ Generate auto-inc/dec instructions.
+diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
+index 42276e7ca..3b952ef39 100644
+--- a/gcc/config/aarch64/aarch64-builtins.cc
++++ b/gcc/config/aarch64/aarch64-builtins.cc
+@@ -551,6 +551,12 @@ typedef struct
+ #define VAR1(T, N, MAP, FLAG, A) \
+   AARCH64_SIMD_BUILTIN_##T##_##N##A,
+ 
++enum aarch64_crc_builtins{
++  AARCH64_BUILTIN_CRC32B,
++  AARCH64_BUILTIN_CRC32H,
++  AARCH64_BUILTIN_CRC32W,
++};
++
+ enum aarch64_builtins
+ {
+   AARCH64_BUILTIN_MIN,
+@@ -1812,6 +1818,30 @@ aarch64_general_builtin_decl (unsigned code, bool)
+   return aarch64_builtin_declscode;
+ }
+ 
++/* Implement TARGET_GET_CRC_BUILTIN_CODE  */
++unsigned 
++get_crc_builtin_code(unsigned code, bool)
++{
++  if (code > AARCH64_BUILTIN_CRC32W)
++    return AARCH64_BUILTIN_MIN;
++
++  unsigned res = AARCH64_BUILTIN_MIN;
++  switch (code) {
++    case AARCH64_BUILTIN_CRC32B:
++      res = AARCH64_BUILTIN_crc32b;
++      break;
++    case AARCH64_BUILTIN_CRC32H:
++      res = AARCH64_BUILTIN_crc32h;
++      break;
++    case AARCH64_BUILTIN_CRC32W:
++      res = AARCH64_BUILTIN_crc32w;
++      break;
++    default:
++      break;
++  }
++  return res;
++}
++
+ typedef enum
+ {
+   SIMD_ARG_COPY_TO_REG,
+diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
+index 475d174dd..853197ee9 100644
+--- a/gcc/config/aarch64/aarch64-protos.h
++++ b/gcc/config/aarch64/aarch64-protos.h
+@@ -994,6 +994,7 @@ gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *,
+ 					     gimple_stmt_iterator *);
+ rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int);
+ tree aarch64_general_builtin_decl (unsigned, bool);
++unsigned  get_crc_builtin_code(unsigned , bool);
+ tree aarch64_general_builtin_rsqrt (unsigned int);
+ tree aarch64_builtin_vectorized_function (unsigned int, tree, tree);
+ void handle_arm_acle_h (void);
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 5537a537c..280e0b618 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -15210,6 +15210,15 @@ aarch64_builtin_decl (unsigned int code, bool initialize_p)
+   gcc_unreachable ();
+ }
+ 
++/* Implement TARGET_GET_CRC_BUILTIN_CODE.  */
++static unsigned 
++aarch64_get_crc_builtin_code(unsigned code, bool initialize_p)
++{
++  unsigned subcode = get_crc_builtin_code(code,initialize_p);
++  unsigned res = subcode << AARCH64_BUILTIN_SHIFT;
++  return res;
++}
++
+ /* Return true if it is safe and beneficial to use the approximate rsqrt optabs
+    to optimize 1.0/sqrt.  */
+ 
+@@ -27677,6 +27686,9 @@ aarch64_get_v16qi_mode ()
+ #undef TARGET_BUILTIN_DECL
+ #define TARGET_BUILTIN_DECL aarch64_builtin_decl
+ 
++#undef TARGET_GET_CRC_BUILTIN_CODE
++#define TARGET_GET_CRC_BUILTIN_CODE aarch64_get_crc_builtin_code
++
+ #undef TARGET_BUILTIN_RECIPROCAL
+ #define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
+ 
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 7ca60dd64..c3ce148b0 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -537,7 +537,7 @@ Objective-C and Objective-C++ Dialects}.
+ -fisolate-erroneous-paths-dereference  -fisolate-erroneous-paths-attribute @gol
+ -fivopts  -fkeep-inline-functions  -fkeep-static-functions @gol
+ -fkeep-static-consts  -flimit-function-alignment  -flive-range-shrinkage @gol
+--floop-block  -floop-interchange  -floop-strip-mine @gol
++-floop-block  -floop-crc  -floop-interchange  -floop-strip-mine @gol
+ -floop-unroll-and-jam  -floop-nest-optimize @gol
+ -floop-parallelize-all  -flra-remat  -flto  -flto-compression-level @gol
+ -flto-partition=@var{alg}  -fmerge-all-constants @gol
+@@ -12159,6 +12159,10 @@ GIMPLE -> GRAPHITE -> GIMPLE transformation.  Some minimal optimizations
+ are also performed by the code generator isl, like index splitting and
+ dead code elimination in loops.
+ 
++@item -floop-crc 
++@opindex floop-crc
++Do the loop crc conversion
++
+ @item -floop-nest-optimize
+ @opindex floop-nest-optimize
+ Enable the isl based loop nest optimizer.  This is a generic loop nest
+diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
+index 851d31c18..5a1e0fe43 100644
+--- a/gcc/doc/tm.texi
++++ b/gcc/doc/tm.texi
+@@ -11658,6 +11658,15 @@ If @var{code} is out of range the function should return
+ @code{error_mark_node}.
+ @end deftypefn
+ 
++@deftypefn {Target Hook} unsigned TARGET_GET_CRC_BUILTIN_CODE (unsigned @var{code}, bool @var{initialize_p})
++Define this hook to get crc32 builtin code.  It should be a function that
++returns the crc32 builtin function code @var{code}.
++If there is no such builtin and it cannot be initialized at this time
++if @var{initialize_p} is true the function should return @code{NULL_TREE}.
++If @var{code} is out of range the function should return
++@code{error_mark_node}.
++@end deftypefn
++
+ @deftypefn {Target Hook} rtx TARGET_EXPAND_BUILTIN (tree @var{exp}, rtx @var{target}, rtx @var{subtarget}, machine_mode @var{mode}, int @var{ignore})
+

_service:tar_scm:0102-LoongArch-Implement-vec_init-M-N-where-N-is-a-LSX-ve.patch Added

@@ -0,0 +1,253 @@
+From a321a294407781b2694fe9a3be0099fe38ccf13a Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Fri, 5 Jan 2024 15:38:25 +0800
+Subject: PATCH 102/188 LoongArch: Implement vec_init<M><N> where N is a LSX
+ vector mode
+
+This patch implements more vec_init optabs that can handle two LSX vectors producing a LASX
+vector by concatenating them. When an lsx vector is concatenated with an LSX const_vector of
+zeroes, the vec_concatz pattern can be used effectively. For example as below
+
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef short v16hi __attribute__ ((vector_size (32)));
+v8hi a, b;
+
+v16hi vec_initv16hiv8hi ()
+{
+ return __builtin_shufflevector (a, b, 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15);
+}
+
+Before this patch:
+
+vec_initv16hiv8hi:
+    addi.d  $r3,$r3,-64
+    .cfi_def_cfa_offset 64
+    xvrepli.h   $xr0,0
+    la.local    $r12,.LANCHOR0
+    xvst    $xr0,$r3,0
+    xvst    $xr0,$r3,32
+    vld $vr0,$r12,0
+    vst $vr0,$r3,0
+    vld $vr0,$r12,16
+    vst $vr0,$r3,32
+    xvld    $xr1,$r3,32
+    xvld    $xr2,$r3,32
+    xvld    $xr0,$r3,0
+    xvilvh.h    $xr0,$xr1,$xr0
+    xvld    $xr1,$r3,0
+    xvilvl.h    $xr1,$xr2,$xr1
+    addi.d  $r3,$r3,64
+    .cfi_def_cfa_offset 0
+    xvpermi.q   $xr0,$xr1,32
+    jr  $r1
+
+After this patch:
+
+vec_initv16hiv8hi:
+    la.local        $r12,.LANCHOR0
+    vld     $vr0,$r12,32
+    vld     $vr2,$r12,48
+    xvilvh.h        $xr1,$xr2,$xr0
+    xvilvl.h        $xr0,$xr2,$xr0
+    xvpermi.q       $xr1,$xr0,32
+    xvst    $xr1,$r4,0
+    jr      $r1
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasx.md (vec_initv32qiv16qi): Rename to ..
+	(vec_init<mode><lasxhalf>): .. this, and extend to mode.
+	(@vec_concatz<mode>): New insn pattern.
+	* config/loongarch/loongarch.cc (loongarch_expand_vector_group_init):
+	Handle VALS containing two vectors.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c: New test.
+---
+ gcc/config/loongarch/lasx.md                  | 26 +++++++-
+ gcc/config/loongarch/loongarch.cc             | 44 +++++++++++--
+ .../loongarch/vector/lasx/lasx-vec-init-2.c   | 65 +++++++++++++++++++
+ 3 files changed, 128 insertions(+), 7 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index b4aa8e261..803c5dd93 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -465,6 +465,11 @@
+    (V16HI "w")
+    (V32QI "w"))
+ 
++;; Half modes of all LASX vector modes, in lower-case.
++(define_mode_attr lasxhalf (V32QI "v16qi")  (V16HI "v8hi")
++             (V8SI "v4si")  (V4DI  "v2di")
++             (V8SF  "v4sf") (V4DF  "v2df"))
++
+ (define_expand "vec_init<mode><unitmode>"
+   (match_operand:LASX 0 "register_operand")
+    (match_operand:LASX 1 "")
+@@ -474,9 +479,9 @@
+   DONE;
+ })
+ 
+-(define_expand "vec_initv32qiv16qi"
+- (match_operand:V32QI 0 "register_operand")
+-  (match_operand:V16QI 1 "")
++(define_expand "vec_init<mode><lasxhalf>"
++ (match_operand:LASX 0 "register_operand")
++  (match_operand:<VHMODE256_ALL> 1 "")
+   "ISA_HAS_LASX"
+ {
+   loongarch_expand_vector_group_init (operands0, operands1);
+@@ -577,6 +582,21 @@
+   (set_attr "type" "simd_insert")
+    (set_attr "mode" "<MODE>"))
+ 
++(define_insn "@vec_concatz<mode>"
++  (set (match_operand:LASX 0 "register_operand" "=f")
++    (vec_concat:LASX
++      (match_operand:<VHMODE256_ALL> 1 "nonimmediate_operand")
++      (match_operand:<VHMODE256_ALL> 2 "const_0_operand")))
++  "ISA_HAS_LASX"
++{
++  if (MEM_P (operands1))
++    return "vld\t%w0,%1";
++  else
++    return "vori.b\t%w0,%w1,0";
++}
++  (set_attr "type" "simd_splat")
++   (set_attr "mode" "<MODE>"))
++
+ (define_insn "vec_concat<mode>"
+   (set (match_operand:LASX 0 "register_operand" "=f")
+ 	(vec_concat:LASX
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index ddb32cea2..fccdc21a8 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -9842,10 +9842,46 @@ loongarch_gen_const_int_vector_shuffle (machine_mode mode, int val)
+ void
+ loongarch_expand_vector_group_init (rtx target, rtx vals)
+ {
+-  rtx ops2 = { force_reg (E_V16QImode, XVECEXP (vals, 0, 0)),
+-      force_reg (E_V16QImode, XVECEXP (vals, 0, 1)) };
+-  emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (E_V32QImode, ops0,
+-						      ops1)));
++  machine_mode vmode = GET_MODE (target);
++  machine_mode half_mode = VOIDmode;
++  rtx low = XVECEXP (vals, 0, 0);
++  rtx high = XVECEXP (vals, 0, 1);
++
++  switch (vmode)
++    {
++    case E_V32QImode:
++      half_mode = V16QImode;
++      break;
++    case E_V16HImode:
++      half_mode = V8HImode;
++      break;
++    case E_V8SImode:
++      half_mode = V4SImode;
++      break;
++    case E_V4DImode:
++      half_mode = V2DImode;
++      break;
++    case E_V8SFmode:
++      half_mode = V4SFmode;
++      break;
++    case E_V4DFmode:
++      half_mode = V2DFmode;
++      break;
++    default:
++      gcc_unreachable ();
++    }
++
++  if (high == CONST0_RTX (half_mode))
++    emit_insn (gen_vec_concatz (vmode, target, low, high));
++  else
++    {
++      if (!register_operand (low, half_mode))
++	low = force_reg (half_mode, low);
++      if (!register_operand (high, half_mode))
++	high = force_reg (half_mode, high);
++      emit_insn (gen_rtx_SET (target,
++			      gen_rtx_VEC_CONCAT (vmode, low, high)));
++    }
+ }
+ 
+ /* Expand initialization of a vector which has all same elements.  */
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c
+new file mode 100644
+index 000000000..7592198c4
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c
+@@ -0,0 +1,65 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -fno-vect-cost-model -mlasx" } */
++/* { dg-final { scan-assembler-times "vld" 12 } } */
++
++
++typedef char v16qi __attribute__ ((vector_size (16)));
++typedef char v32qi __attribute__ ((vector_size (32)));
++
++typedef short v8hi __attribute__ ((vector_size (16)));
++typedef short v16hi __attribute__ ((vector_size (32)));
++
++typedef int v4si __attribute__ ((vector_size (16)));
++typedef int v8si __attribute__ ((vector_size (32)));
++

_service:tar_scm:0102-aarch64-Tweak-aarch64-save-restore-callee-saves.patch Deleted

@@ -1,225 +0,0 @@
-From 187861af7c51db9eddc6f954b589c121b210fc74 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:08:50 +0100
-Subject: PATCH aarch64: Tweak aarch64_save/restore_callee_saves
-
-aarch64_save_callee_saves and aarch64_restore_callee_saves took
-a parameter called start_offset that gives the offset of the
-bottom of the saved register area from the current stack pointer.
-However, it's more convenient for later patches if we use the
-bottom of the entire frame as the reference point, rather than
-the bottom of the saved registers.
-
-Doing that removes the need for the callee_offset field.
-Other than that, this is not a win on its own.  It only really
-makes sense in combination with the follow-on patches.
-
-gcc/
-	* config/aarch64/aarch64.h (aarch64_frame::callee_offset): Delete.
-	* config/aarch64/aarch64.cc (aarch64_layout_frame): Remove
-	callee_offset handling.
-	(aarch64_save_callee_saves): Replace the start_offset parameter
-	with a bytes_below_sp parameter.
-	(aarch64_restore_callee_saves): Likewise.
-	(aarch64_expand_prologue): Update accordingly.
-	(aarch64_expand_epilogue): Likewise.
----
- gcc/config/aarch64/aarch64.cc | 56 +++++++++++++++++------------------
- gcc/config/aarch64/aarch64.h  |  4 ---
- 2 files changed, 28 insertions(+), 32 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index c7d84245fbfc..e79551af41df 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8343,7 +8343,6 @@ aarch64_layout_frame (void)
-   frame.final_adjust = 0;
-   frame.callee_adjust = 0;
-   frame.sve_callee_adjust = 0;
--  frame.callee_offset = 0;
- 
-   frame.wb_pop_candidate1 = frame.wb_push_candidate1;
-   frame.wb_pop_candidate2 = frame.wb_push_candidate2;
-@@ -8411,7 +8410,6 @@ aarch64_layout_frame (void)
- 	 stp reg1, reg2, sp, bytes_below_saved_regs
- 	 stp reg3, reg4, sp, bytes_below_saved_regs + 16  */
-       frame.initial_adjust = frame.frame_size;
--      frame.callee_offset = const_below_saved_regs;
-     }
-   else if (saves_below_hard_fp_p
- 	   && known_eq (frame.saved_regs_size,
-@@ -8758,12 +8756,13 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg,
- }
- 
- /* Emit code to save the callee-saved registers from register number START
--   to LIMIT to the stack at the location starting at offset START_OFFSET,
--   skipping any write-back candidates if SKIP_WB is true.  HARD_FP_VALID_P
--   is true if the hard frame pointer has been set up.  */
-+   to LIMIT to the stack.  The stack pointer is currently BYTES_BELOW_SP
-+   bytes above the bottom of the static frame.  Skip any write-back
-+   candidates if SKIP_WB is true.  HARD_FP_VALID_P is true if the hard
-+   frame pointer has been set up.  */
- 
- static void
--aarch64_save_callee_saves (poly_int64 start_offset,
-+aarch64_save_callee_saves (poly_int64 bytes_below_sp,
- 			   unsigned start, unsigned limit, bool skip_wb,
- 			   bool hard_fp_valid_p)
- {
-@@ -8791,7 +8790,9 @@ aarch64_save_callee_saves (poly_int64 start_offset,
- 
-       machine_mode mode = aarch64_reg_save_mode (regno);
-       reg = gen_rtx_REG (mode, regno);
--      offset = start_offset + frame.reg_offsetregno;
-+      offset = (frame.reg_offsetregno
-+		+ frame.bytes_below_saved_regs
-+		- bytes_below_sp);
-       rtx base_rtx = stack_pointer_rtx;
-       poly_int64 sp_offset = offset;
- 
-@@ -8802,9 +8803,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
-       else if (GP_REGNUM_P (regno)
- 	       && (!offset.is_constant (&const_offset) || const_offset >= 512))
- 	{
--	  gcc_assert (known_eq (start_offset, 0));
--	  poly_int64 fp_offset
--	    = frame.below_hard_fp_saved_regs_size;
-+	  poly_int64 fp_offset = frame.bytes_below_hard_fp - bytes_below_sp;
- 	  if (hard_fp_valid_p)
- 	    base_rtx = hard_frame_pointer_rtx;
- 	  else
-@@ -8868,12 +8867,13 @@ aarch64_save_callee_saves (poly_int64 start_offset,
- }
- 
- /* Emit code to restore the callee registers from register number START
--   up to and including LIMIT.  Restore from the stack offset START_OFFSET,
--   skipping any write-back candidates if SKIP_WB is true.  Write the
--   appropriate REG_CFA_RESTORE notes into CFI_OPS.  */
-+   up to and including LIMIT.  The stack pointer is currently BYTES_BELOW_SP
-+   bytes above the bottom of the static frame.  Skip any write-back
-+   candidates if SKIP_WB is true.  Write the appropriate REG_CFA_RESTORE
-+   notes into CFI_OPS.  */
- 
- static void
--aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
-+aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
- 			      unsigned limit, bool skip_wb, rtx *cfi_ops)
- {
-   aarch64_frame &frame = cfun->machine->frame;
-@@ -8899,7 +8899,9 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
- 
-       machine_mode mode = aarch64_reg_save_mode (regno);
-       reg = gen_rtx_REG (mode, regno);
--      offset = start_offset + frame.reg_offsetregno;
-+      offset = (frame.reg_offsetregno
-+		+ frame.bytes_below_saved_regs
-+		- bytes_below_sp);
-       rtx base_rtx = stack_pointer_rtx;
-       if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
- 	aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
-@@ -9675,8 +9677,6 @@ aarch64_expand_prologue (void)
-   HOST_WIDE_INT callee_adjust = frame.callee_adjust;
-   poly_int64 final_adjust = frame.final_adjust;
-   poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
--  poly_int64 below_hard_fp_saved_regs_size
--    = frame.below_hard_fp_saved_regs_size;
-   unsigned reg1 = frame.wb_push_candidate1;
-   unsigned reg2 = frame.wb_push_candidate2;
-   bool emit_frame_chain = frame.emit_frame_chain;
-@@ -9752,8 +9752,8 @@ aarch64_expand_prologue (void)
- 			     - frame.hard_fp_offset);
-   gcc_assert (known_ge (chain_offset, 0));
- 
--  /* The offset of the bottom of the save area from the current SP.  */
--  poly_int64 saved_regs_offset = chain_offset - below_hard_fp_saved_regs_size;
-+  /* The offset of the current SP from the bottom of the static frame.  */
-+  poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust;
- 
-   if (emit_frame_chain)
-     {
-@@ -9761,7 +9761,7 @@ aarch64_expand_prologue (void)
- 	{
- 	  reg1 = R29_REGNUM;
- 	  reg2 = R30_REGNUM;
--	  aarch64_save_callee_saves (saved_regs_offset, reg1, reg2,
-+	  aarch64_save_callee_saves (bytes_below_sp, reg1, reg2,
- 				     false, false);
- 	}
-       else
-@@ -9801,7 +9801,7 @@ aarch64_expand_prologue (void)
-       emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
-     }
- 
--  aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM,
-+  aarch64_save_callee_saves (bytes_below_sp, R0_REGNUM, R30_REGNUM,
- 			     callee_adjust != 0 || emit_frame_chain,
- 			     emit_frame_chain);
-   if (maybe_ne (sve_callee_adjust, 0))
-@@ -9811,16 +9811,17 @@ aarch64_expand_prologue (void)
-       aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx,
- 					      sve_callee_adjust,
- 					      !frame_pointer_needed, false);
--      saved_regs_offset += sve_callee_adjust;
-+      bytes_below_sp -= sve_callee_adjust;
-     }
--  aarch64_save_callee_saves (saved_regs_offset, P0_REGNUM, P15_REGNUM,
-+  aarch64_save_callee_saves (bytes_below_sp, P0_REGNUM, P15_REGNUM,
- 			     false, emit_frame_chain);
--  aarch64_save_callee_saves (saved_regs_offset, V0_REGNUM, V31_REGNUM,
-+  aarch64_save_callee_saves (bytes_below_sp, V0_REGNUM, V31_REGNUM,
- 			     callee_adjust != 0 || emit_frame_chain,
- 			     emit_frame_chain);
- 
-   /* We may need to probe the final adjustment if it is larger than the guard
-      that is assumed by the called.  */
-+  gcc_assert (known_eq (bytes_below_sp, final_adjust));
-   aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
- 					  !frame_pointer_needed, true);
- }
-@@ -9855,7 +9856,6 @@ aarch64_expand_epilogue (bool for_sibcall)
-   poly_int64 initial_adjust = frame.initial_adjust;
-   HOST_WIDE_INT callee_adjust = frame.callee_adjust;
-   poly_int64 final_adjust = frame.final_adjust;
--  poly_int64 callee_offset = frame.callee_offset;
-   poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
-   poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp;
-   unsigned reg1 = frame.wb_pop_candidate1;
-@@ -9925,9 +9925,9 @@ aarch64_expand_epilogue (bool for_sibcall)
- 
-   /* Restore the vector registers before the predicate registers,
-      so that we can use P4 as a temporary for big-endian SVE frames.  */
--  aarch64_restore_callee_saves (callee_offset, V0_REGNUM, V31_REGNUM,
-+  aarch64_restore_callee_saves (final_adjust, V0_REGNUM, V31_REGNUM,
- 				callee_adjust != 0, &cfi_ops);
--  aarch64_restore_callee_saves (callee_offset, P0_REGNUM, P15_REGNUM,
-+  aarch64_restore_callee_saves (final_adjust, P0_REGNUM, P15_REGNUM,
- 				false, &cfi_ops);
-   if (maybe_ne (sve_callee_adjust, 0))
-     aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true);
-@@ -9935,7 +9935,7 @@ aarch64_expand_epilogue (bool for_sibcall)

_service:tar_scm:0103-LoongArch-Handle-ISA-evolution-switches-along-with-o.patch Added

@@ -0,0 +1,533 @@
+From 901663758281d4ce87a75e4d6e45de621b65f0cb Mon Sep 17 00:00:00 2001
+From: Yang Yujie <yangyujie@loongson.cn>
+Date: Mon, 8 Jan 2024 09:14:07 +0800
+Subject: PATCH 103/188 LoongArch: Handle ISA evolution switches along with
+ other options
+
+gcc/ChangeLog:
+
+	* config/loongarch/genopts/genstr.sh: Prepend the isa_evolution
+	variable with the common la_ prefix.
+	* config/loongarch/genopts/loongarch.opt.in: Mark ISA evolution
+	flags as saved using TargetVariable.
+	* config/loongarch/loongarch.opt: Same.
+	* config/loongarch/loongarch-def.h: Define evolution_set to
+	mark changes to the -march default.
+	* config/loongarch/loongarch-driver.cc: Same.
+	* config/loongarch/loongarch-opts.cc: Same.
+	* config/loongarch/loongarch-opts.h: Define and use ISA evolution
+	conditions around the la_target structure.
+	* config/loongarch/loongarch.cc: Same.
+	* config/loongarch/loongarch.md: Same.
+	* config/loongarch/loongarch-builtins.cc: Same.
+	* config/loongarch/loongarch-c.cc: Same.
+	* config/loongarch/lasx.md: Same.
+	* config/loongarch/lsx.md: Same.
+	* config/loongarch/sync.md: Same.
+---
+ gcc/config/loongarch/genopts/genstr.sh        |  2 +-
+ gcc/config/loongarch/genopts/loongarch.opt.in |  6 ++---
+ gcc/config/loongarch/lasx.md                  |  4 ++--
+ gcc/config/loongarch/loongarch-builtins.cc    |  6 ++---
+ gcc/config/loongarch/loongarch-c.cc           |  2 +-
+ gcc/config/loongarch/loongarch-def.h          |  5 +++-
+ gcc/config/loongarch/loongarch-driver.cc      |  5 ++--
+ gcc/config/loongarch/loongarch-opts.cc        | 17 ++++++++++++-
+ gcc/config/loongarch/loongarch-opts.h         | 24 +++++++++++++++----
+ gcc/config/loongarch/loongarch.cc             | 24 ++++++++-----------
+ gcc/config/loongarch/loongarch.md             | 12 +++++-----
+ gcc/config/loongarch/loongarch.opt            | 16 ++++++-------
+ gcc/config/loongarch/lsx.md                   |  4 ++--
+ gcc/config/loongarch/sync.md                  | 22 ++++++++---------
+ 14 files changed, 90 insertions(+), 59 deletions(-)
+
+diff --git a/gcc/config/loongarch/genopts/genstr.sh b/gcc/config/loongarch/genopts/genstr.sh
+index bcc616e98..391eca121 100755
+--- a/gcc/config/loongarch/genopts/genstr.sh
++++ b/gcc/config/loongarch/genopts/genstr.sh
+@@ -107,7 +107,7 @@ EOF
+       print("")
+       print("m"$3)
+       gsub(/-/, "_", $3)
+-      print("Target Mask(ISA_"toupper($3)") Var(isa_evolution)")
++      print("Target Mask(ISA_"toupper($3)") Var(la_isa_evolution)")
+       $1=""; $2=""; $3=""
+       sub(/^ */, "", $0)
+       print($0)
+diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
+index 102202b03..a866dab84 100644
+--- a/gcc/config/loongarch/genopts/loongarch.opt.in
++++ b/gcc/config/loongarch/genopts/loongarch.opt.in
+@@ -259,6 +259,6 @@ default value is 4.
+ ; Features added during ISA evolution.  This concept is different from ISA
+ ; extension, read Section 1.5 of LoongArch v1.10 Volume 1 for the
+ ; explanation.  These features may be implemented and enumerated with
+-; CPUCFG independantly, so we use bit flags to specify them.
+-Variable
+-HOST_WIDE_INT isa_evolution = 0
++; CPUCFG independently, so we use bit flags to specify them.
++TargetVariable
++HOST_WIDE_INT la_isa_evolution = 0
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index 803c5dd93..fdfd65e4a 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -1540,7 +1540,7 @@
+   (set (match_operand:FLASX 0 "register_operand" "=f")
+     (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f")
+ 		  UNSPEC_LASX_XVFRECIPE))
+-  "ISA_HAS_LASX && TARGET_FRECIPE"
++  "ISA_HAS_LASX && ISA_HAS_FRECIPE"
+   "xvfrecipe.<flasxfmt>\t%u0,%u1"
+   (set_attr "type" "simd_fdiv")
+    (set_attr "mode" "<MODE>"))
+@@ -1573,7 +1573,7 @@
+   (set (match_operand:FLASX 0 "register_operand" "=f")
+     (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f")
+ 		  UNSPEC_LASX_XVFRSQRTE))
+-  "ISA_HAS_LASX && TARGET_FRECIPE"
++  "ISA_HAS_LASX && ISA_HAS_FRECIPE"
+   "xvfrsqrte.<flasxfmt>\t%u0,%u1"
+   (set_attr "type" "simd_fdiv")
+    (set_attr "mode" "<MODE>"))
+diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
+index 85849ed29..e3b4dbc52 100644
+--- a/gcc/config/loongarch/loongarch-builtins.cc
++++ b/gcc/config/loongarch/loongarch-builtins.cc
+@@ -120,9 +120,9 @@ struct loongarch_builtin_description
+ AVAIL_ALL (hard_float, TARGET_HARD_FLOAT_ABI)
+ AVAIL_ALL (lsx, ISA_HAS_LSX)
+ AVAIL_ALL (lasx, ISA_HAS_LASX)
+-AVAIL_ALL (frecipe, TARGET_FRECIPE && TARGET_HARD_FLOAT_ABI)
+-AVAIL_ALL (lsx_frecipe, ISA_HAS_LSX && TARGET_FRECIPE)
+-AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE)
++AVAIL_ALL (frecipe, ISA_HAS_FRECIPE && TARGET_HARD_FLOAT_ABI)
++AVAIL_ALL (lsx_frecipe, ISA_HAS_LSX && ISA_HAS_FRECIPE)
++AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
+ 
+ /* Construct a loongarch_builtin_description from the given arguments.
+ 
+diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc
+index a89477a74..df2a482ad 100644
+--- a/gcc/config/loongarch/loongarch-c.cc
++++ b/gcc/config/loongarch/loongarch-c.cc
+@@ -102,7 +102,7 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile)
+   else
+     builtin_define ("__loongarch_frlen=0");
+ 
+-  if (TARGET_HARD_FLOAT && TARGET_FRECIPE)
++  if (TARGET_HARD_FLOAT && ISA_HAS_FRECIPE)
+     builtin_define ("__loongarch_frecipe");
+ 
+   if (ISA_HAS_LSX)
+diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
+index f8f36f0e2..9e5eee0e2 100644
+--- a/gcc/config/loongarch/loongarch-def.h
++++ b/gcc/config/loongarch/loongarch-def.h
+@@ -132,8 +132,11 @@ struct loongarch_isa
+ 
+      Using int64_t instead of HOST_WIDE_INT for C compatibility.  */
+   int64_t evolution;
++  int64_t evolution_set;
+ 
+-  loongarch_isa () : base (0), fpu (0), simd (0), evolution (0) {}
++  loongarch_isa () :
++    base (0), fpu (0), simd (0), evolution (0), evolution_set (0)
++  {}
+   loongarch_isa base_ (int _base) { base = _base; return *this; }
+   loongarch_isa fpu_ (int _fpu) { fpu = _fpu; return *this; }
+   loongarch_isa simd_ (int _simd) { simd = _simd; return *this; }
+diff --git a/gcc/config/loongarch/loongarch-driver.cc b/gcc/config/loongarch/loongarch-driver.cc
+index b3626984d..b84a6eaf7 100644
+--- a/gcc/config/loongarch/loongarch-driver.cc
++++ b/gcc/config/loongarch/loongarch-driver.cc
+@@ -42,9 +42,10 @@ extern struct obstack opts_obstack;
+ const char*
+ la_driver_init (int argc ATTRIBUTE_UNUSED, const char **argv ATTRIBUTE_UNUSED)
+ {
+-  /* Initialize all fields of la_target to -1 */
++  /* Initialize all fields of la_target.  */
+   loongarch_init_target (&la_target, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET,
+-			 M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET);
++			 M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET,
++			 0, 0);
+   return "";
+ }
+ 
+diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
+index d31becc67..935d09f45 100644
+--- a/gcc/config/loongarch/loongarch-opts.cc
++++ b/gcc/config/loongarch/loongarch-opts.cc
+@@ -140,7 +140,9 @@ static int with_default_simd = 0;
+ void
+ loongarch_init_target (struct loongarch_target *target,
+ 		       int cpu_arch, int cpu_tune, int fpu, int simd,
+-		       int abi_base, int abi_ext, int cmodel)
++		       int abi_base, int abi_ext, int cmodel,
++		       HOST_WIDE_INT isa_evolution,
++		       HOST_WIDE_INT isa_evolution_set)
+ {
+   if (!target)
+     return;
+@@ -148,6 +150,8 @@ loongarch_init_target (struct loongarch_target *target,
+   target->cpu_tune = cpu_tune;
+   target->isa.fpu = fpu;
+   target->isa.simd = simd;
++  target->isa.evolution = isa_evolution;
++  target->isa.evolution_set = isa_evolution_set;
+   target->abi.base = abi_base;
+   target->abi.ext = abi_ext;
+   target->cmodel = cmodel;
+@@ -184,6 +188,9 @@ loongarch_config_target (struct loongarch_target *target,
+       M_OPT_ABSENT (target->abi.base)	  ? 0 : 1,
+   };
+ 
++  int64_t isa_evolution = target->isa.evolution;
++  int64_t isa_evolution_set = target->isa.evolution_set;
++
+   /* 1.  Target ABI */
+   if (constrained.abi_base)
+     t.abi.base = target->abi.base;
+@@ -394,6 +401,13 @@ config_target_isa:
+ 	}
+     }
+ 
++  /* Apply the ISA evolution feature switches from the user.  */
++  HOST_WIDE_INT isa_evolution_orig = t.isa.evolution;
++  t.isa.evolution &= ~(~isa_evolution & isa_evolution_set);
++  t.isa.evolution |= isa_evolution & isa_evolution_set;
++

_service:tar_scm:0103-SME-Remove-hip09-and-hip11-in-aarch64-cores.def-to-b.patch Added

@@ -0,0 +1,34 @@
+From 72c48ade495ef99ef032a6c44365eb102b74888e Mon Sep 17 00:00:00 2001
+From: xiezhiheng <xiezhiheng@huawei.com>
+Date: Fri, 23 Aug 2024 15:14:04 +0800
+Subject: PATCH 004/157 SME Remove hip09 and hip11 in aarch64-cores.def to
+ backport SME
+
+Will apply it in the end.
+---
+ gcc/config/aarch64/aarch64-cores.def | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
+index 601b72abb..70b11eb80 100644
+--- a/gcc/config/aarch64/aarch64-cores.def
++++ b/gcc/config/aarch64/aarch64-cores.def
+@@ -130,7 +130,6 @@ AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F
+ 
+ /* HiSilicon ('H') cores. */
+ AARCH64_CORE("tsv110",  tsv110, tsv110, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110,   0x48, 0xd01, -1)
+-AARCH64_CORE("hip09", hip09, hip09, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_PROFILE | AARCH64_FL_PREDRES, hip09, 0x48, 0xd02, 0x0)
+ 
+ /* ARMv8.3-A Architecture Processors.  */
+ 
+@@ -173,7 +172,6 @@ AARCH64_CORE("cortex-a710",  cortexa710, cortexa57, 9A,  AARCH64_FL_FOR_ARCH9 |
+ AARCH64_CORE("cortex-x2",  cortexx2, cortexa57, 9A,  AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
+ 
+ AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
+-AARCH64_CORE("hip11", hip11, hip11, 8_5A, AARCH64_FL_FOR_ARCH8_5| AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_F16, hip11, 0x48, 0xd22, -1)
+ 
+ AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
+ AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
+-- 
+2.33.0
+

_service:tar_scm:0103-aarch64-Only-calculate-chain-offset-if-there-is-a-chain.patch Deleted

@@ -1,44 +0,0 @@
-From 2b983f9064d808daf909bde1d4a13980934a7e6e Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:08:51 +0100
-Subject: PATCH aarch64: Only calculate chain_offset if there is a chain
-
-After previous patches, it is no longer necessary to calculate
-a chain_offset in cases where there is no chain record.
-
-gcc/
-	* config/aarch64/aarch64.cc (aarch64_expand_prologue): Move the
-	calculation of chain_offset into the emit_frame_chain block.
----
- gcc/config/aarch64/aarch64.cc | 10 +++++-----
- 1 file changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index e79551af41df..d71a042d6112 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -9747,16 +9747,16 @@ aarch64_expand_prologue (void)
-   if (callee_adjust != 0)
-     aarch64_push_regs (reg1, reg2, callee_adjust);
- 
--  /* The offset of the frame chain record (if any) from the current SP.  */
--  poly_int64 chain_offset = (initial_adjust + callee_adjust
--			     - frame.hard_fp_offset);
--  gcc_assert (known_ge (chain_offset, 0));
--
-   /* The offset of the current SP from the bottom of the static frame.  */
-   poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust;
- 
-   if (emit_frame_chain)
-     {
-+      /* The offset of the frame chain record (if any) from the current SP.  */
-+      poly_int64 chain_offset = (initial_adjust + callee_adjust
-+				 - frame.hard_fp_offset);
-+      gcc_assert (known_ge (chain_offset, 0));
-+
-       if (callee_adjust == 0)
- 	{
- 	  reg1 = R29_REGNUM;
--- 
-2.43.5
-

_service:tar_scm:0104-Backport-SME-AArch64-Cleanup-CPU-option-processing-c.patch Added

@@ -0,0 +1,336 @@
+From 9a36ca4e9188ee402327ec908d4f6860f2ee67eb Mon Sep 17 00:00:00 2001
+From: Wilco Dijkstra <wilco.dijkstra@arm.com>
+Date: Wed, 18 May 2022 16:02:12 +0100
+Subject: PATCH 005/157 BackportSME AArch64: Cleanup CPU option
+ processing code
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1be715f31605976d8e4336973d3b81c5b7cea79f
+
+The --with-cpu/--with-arch configure option processing not only checks valid
+arguments but also sets TARGET_CPU_DEFAULT with a CPU and extension bitmask.
+This isn't used however since a --with-cpu is translated into a -mcpu option
+which is processed as if written on the command-line (so TARGET_CPU_DEFAULT
+is never accessed).
+
+So remove all the complex processing and bitmask, and just validate the
+option. Fix a bug that always reports valid architecture extensions as invalid.
+As a result the CPU processing in aarch64.c can be simplified.
+
+gcc/
+	* config.gcc (aarch64*-*-*): Simplify --with-cpu and --with-arch
+	processing.  Add support for architectural extensions.
+	* config/aarch64/aarch64.h (TARGET_CPU_DEFAULT): Remove
+	AARCH64_CPU_DEFAULT_FLAGS.
+	(TARGET_CPU_NBITS): Remove.
+	(TARGET_CPU_MASK): Remove.
+	* config/aarch64/aarch64.cc (AARCH64_CPU_DEFAULT_FLAGS): Remove define.
+	(get_tune_cpu): Assert CPU is always valid.
+	(get_arch): Assert architecture is always valid.
+	(aarch64_override_options): Cleanup CPU selection code and simplify logic.
+	(aarch64_option_restore): Remove unnecessary checks on tune.
+---
+ gcc/config.gcc                |  43 +------------
+ gcc/config/aarch64/aarch64.cc | 115 +++++++++-------------------------
+ gcc/config/aarch64/aarch64.h  |   9 +--
+ 3 files changed, 32 insertions(+), 135 deletions(-)
+
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index 8fdde1576..3be450471 100644
+--- a/gcc/config.gcc
++++ b/gcc/config.gcc
+@@ -4190,8 +4190,6 @@ case "${target}" in
+ 			  pattern=AARCH64_CORE
+ 			fi
+ 
+-			ext_mask=AARCH64_CPU_DEFAULT_FLAGS
+-
+ 			# Find the base CPU or ARCH id in aarch64-cores.def or
+ 			# aarch64-arches.def
+ 			if  x"$base_val" = x  \
+@@ -4199,23 +4197,6 @@ case "${target}" in
+ 				    ${srcdir}/config/aarch64/$def \
+ 				    > /dev/null; then
+ 
+-			  if  $which = arch ; then
+-				base_id=`grep "^$pattern(\"$base_val\"," \
+-				  ${srcdir}/config/aarch64/$def | \
+-				  sed -e 's/^^,*, 	*//' | \
+-				  sed -e 's/,.*$//'`
+-				# Extract the architecture flags from aarch64-arches.def
+-				ext_mask=`grep "^$pattern(\"$base_val\"," \
+-				   ${srcdir}/config/aarch64/$def | \
+-				   sed -e 's/)$//' | \
+-				   sed -e 's/^.*,//'`
+-			  else
+-				base_id=`grep "^$pattern(\"$base_val\"," \
+-				  ${srcdir}/config/aarch64/$def | \
+-				  sed -e 's/^^,*, 	*//' | \
+-				  sed -e 's/,.*$//'`
+-			  fi
+-
+ 			  # Disallow extensions in --with-tune=cortex-a53+crc.
+ 			  if  $which = tune  &&  x"$ext_val" != x ; then
+ 			    echo "Architecture extensions not supported in --with-$which=$val" 1>&2
+@@ -4246,25 +4227,7 @@ case "${target}" in
+ 					grep "^\"$base_ext\""`
+ 
+ 				if  x"$base_ext" = x  \
+-				    ||  -n $opt_line ; then
+-
+-				  # These regexp extract the elements based on
+-				  # their group match index in the regexp.
+-				  ext_canon=`echo -e "$opt_line" | \
+-					sed -e "s/$sed_patt/\2/"`
+-				  ext_on=`echo -e "$opt_line" | \
+-					sed -e "s/$sed_patt/\3/"`
+-				  ext_off=`echo -e "$opt_line" | \
+-					sed -e "s/$sed_patt/\4/"`
+-
+-				  if  $ext = $base_ext ; then
+-					# Adding extension
+-					ext_mask="("$ext_mask") | ("$ext_on" | "$ext_canon")"
+-				  else
+-					# Removing extension
+-					ext_mask="("$ext_mask") & ~("$ext_off" | "$ext_canon")"
+-				  fi
+-
++				    ||  x"$opt_line" != x ; then
+ 				  true
+ 				else
+ 				  echo "Unknown extension used in --with-$which=$val" 1>&2
+@@ -4273,10 +4236,6 @@ case "${target}" in
+ 				ext_val=`echo $ext_val | sed -e 's/a-z0-9\+//'`
+ 			  done
+ 
+-			  ext_mask="(("$ext_mask") << TARGET_CPU_NBITS)"
+-			  if  x"$base_id" != x ; then
+-				target_cpu_cname="TARGET_CPU_$base_id | $ext_mask"
+-			  fi
+ 			  true
+ 			else
+ 			  # Allow --with-$which=native.
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 7c62ddb2a..ba888beb0 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -3014,8 +3014,6 @@ static const struct attribute_spec aarch64_attribute_table =
+   { NULL,                 0, 0, false, false, false, false, NULL, NULL }
+ };
+ 
+-#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
+-
+ /* An ISA extension in the co-processor and main instruction set space.  */
+ struct aarch64_option_extension
+ {
+@@ -18411,39 +18409,24 @@ aarch64_validate_mtune (const char *str, const struct processor **res)
+   return false;
+ }
+ 
+-static_assert (TARGET_CPU_generic < TARGET_CPU_MASK,
+-	       "TARGET_CPU_NBITS is big enough");
+-
+-/* Return the CPU corresponding to the enum CPU.
+-   If it doesn't specify a cpu, return the default.  */
++/* Return the CPU corresponding to the enum CPU.  */
+ 
+ static const struct processor *
+ aarch64_get_tune_cpu (enum aarch64_processor cpu)
+ {
+-  if (cpu != aarch64_none)
+-    return &all_corescpu;
++  gcc_assert (cpu != aarch64_none);
+ 
+-  /* The & TARGET_CPU_MASK is to extract the bottom TARGET_CPU_NBITS bits that
+-     encode the default cpu as selected by the --with-cpu GCC configure option
+-     in config.gcc.
+-     ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
+-     flags mechanism should be reworked to make it more sane.  */
+-  return &all_coresTARGET_CPU_DEFAULT & TARGET_CPU_MASK;
++  return &all_corescpu;
+ }
+ 
+-/* Return the architecture corresponding to the enum ARCH.
+-   If it doesn't specify a valid architecture, return the default.  */
++/* Return the architecture corresponding to the enum ARCH.  */
+ 
+ static const struct processor *
+ aarch64_get_arch (enum aarch64_arch arch)
+ {
+-  if (arch != aarch64_no_arch)
+-    return &all_architecturesarch;
+-
+-  const struct processor *cpu
+-    = &all_coresTARGET_CPU_DEFAULT & TARGET_CPU_MASK;
++  gcc_assert (arch != aarch64_no_arch);
+ 
+-  return &all_architecturescpu->arch;
++  return &all_architecturesarch;
+ }
+ 
+ /* Return the VG value associated with -msve-vector-bits= value VALUE.  */
+@@ -18481,10 +18464,6 @@ aarch64_override_options (void)
+   uint64_t arch_isa = 0;
+   aarch64_isa_flags = 0;
+ 
+-  bool valid_cpu = true;
+-  bool valid_tune = true;
+-  bool valid_arch = true;
+-
+   selected_cpu = NULL;
+   selected_arch = NULL;
+   selected_tune = NULL;
+@@ -18499,77 +18478,56 @@ aarch64_override_options (void)
+      If either of -march or -mtune is given, they override their
+      respective component of -mcpu.  */
+   if (aarch64_cpu_string)
+-    valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
+-					&cpu_isa);
++    aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu, &cpu_isa);
+ 
+   if (aarch64_arch_string)
+-    valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
+-					  &arch_isa);
++    aarch64_validate_march (aarch64_arch_string, &selected_arch, &arch_isa);
+ 
+   if (aarch64_tune_string)
+-    valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
++    aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
+ 
+ #ifdef SUBTARGET_OVERRIDE_OPTIONS

_service:tar_scm:0104-LoongArch-Rename-ISA_BASE_LA64V100-to-ISA_BASE_LA64.patch Added

@@ -0,0 +1,220 @@
+From 282b0847a86fab49fb3582371647fa4cb2d941ed Mon Sep 17 00:00:00 2001
+From: Yang Yujie <yangyujie@loongson.cn>
+Date: Mon, 8 Jan 2024 09:14:08 +0800
+Subject: PATCH 104/188 LoongArch: Rename ISA_BASE_LA64V100 to ISA_BASE_LA64
+
+LoongArch ISA manual v1.10 suggests that software should not depend on
+the ISA version number for marking processor features.  The ISA version
+number is now defined as a collective name of individual ISA evolutions.
+Since there is a independent ISA evolution mask now, we can drop the
+version information from the base ISA.
+
+gcc/ChangeLog:
+
+	* config/loongarch/genopts/loongarch-strings: Rename.
+	* config/loongarch/genopts/loongarch.opt.in: Same.
+	* config/loongarch/loongarch-cpu.cc: Same.
+	* config/loongarch/loongarch-def.cc: Same.
+	* config/loongarch/loongarch-def.h: Same.
+	* config/loongarch/loongarch-opts.cc: Same.
+	* config/loongarch/loongarch-opts.h: Same.
+	* config/loongarch/loongarch-str.h: Same.
+	* config/loongarch/loongarch.opt: Same.
+---
+ gcc/config/loongarch/genopts/loongarch-strings |  2 +-
+ gcc/config/loongarch/genopts/loongarch.opt.in  |  2 +-
+ gcc/config/loongarch/loongarch-cpu.cc          |  2 +-
+ gcc/config/loongarch/loongarch-def.cc          | 14 +++++++-------
+ gcc/config/loongarch/loongarch-def.h           |  6 +++---
+ gcc/config/loongarch/loongarch-opts.cc         | 10 +++++-----
+ gcc/config/loongarch/loongarch-opts.h          |  2 +-
+ gcc/config/loongarch/loongarch-str.h           |  2 +-
+ gcc/config/loongarch/loongarch.opt             |  2 +-
+ 9 files changed, 21 insertions(+), 21 deletions(-)
+
+diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings
+index 411ad5696..ce70b8b9c 100644
+--- a/gcc/config/loongarch/genopts/loongarch-strings
++++ b/gcc/config/loongarch/genopts/loongarch-strings
+@@ -29,7 +29,7 @@ STR_CPU_LA464	      la464
+ STR_CPU_LA664	      la664
+ 
+ # Base architecture
+-STR_ISA_BASE_LA64V100 la64
++STR_ISA_BASE_LA64 la64
+ 
+ # -mfpu
+ OPTSTR_ISA_EXT_FPU    fpu
+diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
+index a866dab84..851d8d1f3 100644
+--- a/gcc/config/loongarch/genopts/loongarch.opt.in
++++ b/gcc/config/loongarch/genopts/loongarch.opt.in
+@@ -33,7 +33,7 @@ Name(isa_base) Type(int)
+ Basic ISAs of LoongArch:
+ 
+ EnumValue
+-Enum(isa_base) String(@@STR_ISA_BASE_LA64V100@@) Value(ISA_BASE_LA64V100)
++Enum(isa_base) String(@@STR_ISA_BASE_LA64@@) Value(ISA_BASE_LA64)
+ 
+ ;; ISA extensions / adjustments
+ Enum
+diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc
+index 7e0625835..551d4f72c 100644
+--- a/gcc/config/loongarch/loongarch-cpu.cc
++++ b/gcc/config/loongarch/loongarch-cpu.cc
+@@ -133,7 +133,7 @@ fill_native_cpu_config (struct loongarch_target *tgt)
+ 	switch (cpucfg_cache1 & 0x3)
+ 	  {
+ 	    case 0x02:
+-	      tmp = ISA_BASE_LA64V100;
++	      tmp = ISA_BASE_LA64;
+ 	      break;
+ 
+ 	    default:
+diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
+index 843be78e4..533dd0af2 100644
+--- a/gcc/config/loongarch/loongarch-def.cc
++++ b/gcc/config/loongarch/loongarch-def.cc
+@@ -48,16 +48,16 @@ array_arch<loongarch_isa> loongarch_cpu_default_isa =
+   array_arch<loongarch_isa> ()
+     .set (CPU_LOONGARCH64,
+ 	  loongarch_isa ()
+-	    .base_ (ISA_BASE_LA64V100)
++	    .base_ (ISA_BASE_LA64)
+ 	    .fpu_ (ISA_EXT_FPU64))
+     .set (CPU_LA464,
+ 	  loongarch_isa ()
+-	    .base_ (ISA_BASE_LA64V100)
++	    .base_ (ISA_BASE_LA64)
+ 	    .fpu_ (ISA_EXT_FPU64)
+ 	    .simd_ (ISA_EXT_SIMD_LASX))
+     .set (CPU_LA664,
+ 	  loongarch_isa ()
+-	    .base_ (ISA_BASE_LA64V100)
++	    .base_ (ISA_BASE_LA64)
+ 	    .fpu_ (ISA_EXT_FPU64)
+ 	    .simd_ (ISA_EXT_SIMD_LASX)
+ 	    .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA
+@@ -153,7 +153,7 @@ array_tune<int> loongarch_cpu_multipass_dfa_lookahead = array_tune<int> ()
+ 
+ array<const char *, N_ISA_BASE_TYPES> loongarch_isa_base_strings =
+   array<const char *, N_ISA_BASE_TYPES> ()
+-    .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100);
++    .set (ISA_BASE_LA64, STR_ISA_BASE_LA64);
+ 
+ array<const char *, N_ISA_EXT_TYPES> loongarch_isa_ext_strings =
+   array<const char *, N_ISA_EXT_TYPES> ()
+@@ -189,15 +189,15 @@ array<array<loongarch_isa, N_ABI_EXT_TYPES>, N_ABI_BASE_TYPES>
+ 	  array<loongarch_isa, N_ABI_EXT_TYPES> ()
+ 	    .set (ABI_EXT_BASE,
+ 		  loongarch_isa ()
+-		    .base_ (ISA_BASE_LA64V100)
++		    .base_ (ISA_BASE_LA64)
+ 		    .fpu_ (ISA_EXT_FPU64)))
+     .set (ABI_BASE_LP64F,
+ 	  array<loongarch_isa, N_ABI_EXT_TYPES> ()
+ 	    .set (ABI_EXT_BASE,
+ 		  loongarch_isa ()
+-		    .base_ (ISA_BASE_LA64V100)
++		    .base_ (ISA_BASE_LA64)
+ 		    .fpu_ (ISA_EXT_FPU32)))
+     .set (ABI_BASE_LP64S,
+ 	  array<loongarch_isa, N_ABI_EXT_TYPES> ()
+ 	    .set (ABI_EXT_BASE,
+-		  loongarch_isa ().base_ (ISA_BASE_LA64V100)));
++		  loongarch_isa ().base_ (ISA_BASE_LA64)));
+diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
+index 9e5eee0e2..a133ea265 100644
+--- a/gcc/config/loongarch/loongarch-def.h
++++ b/gcc/config/loongarch/loongarch-def.h
+@@ -55,9 +55,9 @@ along with GCC; see the file COPYING3.  If not see
+ 
+ /* enum isa_base */
+ 
+-/* LoongArch V1.00.  */
+-#define ISA_BASE_LA64V100	0
+-#define N_ISA_BASE_TYPES	1
++/* LoongArch64 */
++#define ISA_BASE_LA64	      0
++#define N_ISA_BASE_TYPES      1
+ extern loongarch_def_array<const char *, N_ISA_BASE_TYPES>
+   loongarch_isa_base_strings;
+ 
+diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
+index 935d09f45..cf4c7bc93 100644
+--- a/gcc/config/loongarch/loongarch-opts.cc
++++ b/gcc/config/loongarch/loongarch-opts.cc
+@@ -567,17 +567,17 @@ isa_default_abi (const struct loongarch_isa *isa)
+   switch (isa->fpu)
+     {
+       case ISA_EXT_FPU64:
+-	if (isa->base >= ISA_BASE_LA64V100)
++	if (isa->base >= ISA_BASE_LA64)
+ 	  abi.base = ABI_BASE_LP64D;
+ 	break;
+ 
+       case ISA_EXT_FPU32:
+-	if (isa->base >= ISA_BASE_LA64V100)
++	if (isa->base >= ISA_BASE_LA64)
+ 	  abi.base = ABI_BASE_LP64F;
+ 	break;
+ 
+       case ISA_EXT_NONE:
+-	if (isa->base >= ISA_BASE_LA64V100)
++	if (isa->base >= ISA_BASE_LA64)
+ 	  abi.base = ABI_BASE_LP64S;
+ 	break;
+ 
+@@ -596,8 +596,8 @@ isa_base_compat_p (const struct loongarch_isa *set1,
+ {
+   switch (set2->base)
+     {
+-      case ISA_BASE_LA64V100:
+-	return (set1->base >= ISA_BASE_LA64V100);
++      case ISA_BASE_LA64:
++	return (set1->base >= ISA_BASE_LA64);
+ 
+       default:
+ 	gcc_unreachable ();
+diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
+index 204338553..463812136 100644
+--- a/gcc/config/loongarch/loongarch-opts.h
++++ b/gcc/config/loongarch/loongarch-opts.h
+@@ -79,7 +79,7 @@ struct loongarch_flags {
+ #define TARGET_DOUBLE_FLOAT	  (la_target.isa.fpu == ISA_EXT_FPU64)
+ #define TARGET_DOUBLE_FLOAT_ABI	  (la_target.abi.base == ABI_BASE_LP64D)
+ 
+-#define TARGET_64BIT		  (la_target.isa.base == ISA_BASE_LA64V100)
++#define TARGET_64BIT		  (la_target.isa.base == ISA_BASE_LA64)
+ #define TARGET_ABI_LP64		  (la_target.abi.base == ABI_BASE_LP64D	\
+ 				   || la_target.abi.base == ABI_BASE_LP64F \
+ 				   || la_target.abi.base == ABI_BASE_LP64S)
+diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
+index a8821acb0..2251df38b 100644
+--- a/gcc/config/loongarch/loongarch-str.h
++++ b/gcc/config/loongarch/loongarch-str.h
+@@ -32,7 +32,7 @@ along with GCC; see the file COPYING3.  If not see
+ #define STR_CPU_LA464 "la464"
+ #define STR_CPU_LA664 "la664"
+

_service:tar_scm:0104-aarch64-Rename-locals-offset-to-bytes-above-locals.patch Deleted

@@ -1,91 +0,0 @@
-From 0a0a824808d1dec51004fb5805c1a0ae2a35433f Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:08:51 +0100
-Subject: PATCH aarch64: Rename locals_offset to bytes_above_locals
-MIME-Version: 1.0
-Content-Type: text/plain; charset=utf8
-Content-Transfer-Encoding: 8bit
-
-locals_offset was described as:
-
-  /* Offset from the base of the frame (incomming SP) to the
-     top of the locals area.  This value is always a multiple of
-     STACK_BOUNDARY.  */
-
-This is implicitly an âupside downâ view of the frame: the incoming
-SP is at offset 0, and anything N bytes below the incoming SP is at
-offset N (rather than -N).
-
-However, reg_offset instead uses a âright way upâ view; that is,
-it views offsets in address terms.  Something above X is at a
-positive offset from X and something below X is at a negative
-offset from X.
-
-Also, even on FRAME_GROWS_DOWNWARD targets like AArch64,
-target-independent code views offsets in address terms too:
-locals are allocated at negative offsets to virtual_stack_vars.
-
-It seems confusing to have *_offset fields of the same structure
-using different polarities like this.  This patch tries to avoid
-that by renaming locals_offset to bytes_above_locals.
-
-gcc/
-	* config/aarch64/aarch64.h (aarch64_frame::locals_offset): Rename to...
-	(aarch64_frame::bytes_above_locals): ...this.
-	* config/aarch64/aarch64.cc (aarch64_layout_frame)
-	(aarch64_initial_elimination_offset): Update accordingly.
----
- gcc/config/aarch64/aarch64.cc | 6 +++---
- gcc/config/aarch64/aarch64.h  | 6 +++---
- 2 files changed, 6 insertions(+), 6 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index d71a042d6112..d4ec352ba98a 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8337,7 +8337,7 @@ aarch64_layout_frame (void)
- 			  STACK_BOUNDARY / BITS_PER_UNIT));
-   frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
- 
--  frame.locals_offset = frame.saved_varargs_size;
-+  frame.bytes_above_locals = frame.saved_varargs_size;
- 
-   frame.initial_adjust = 0;
-   frame.final_adjust = 0;
-@@ -12578,13 +12578,13 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
- 	return frame.hard_fp_offset;
- 
-       if (from == FRAME_POINTER_REGNUM)
--	return frame.hard_fp_offset - frame.locals_offset;
-+	return frame.hard_fp_offset - frame.bytes_above_locals;
-     }
- 
-   if (to == STACK_POINTER_REGNUM)
-     {
-       if (from == FRAME_POINTER_REGNUM)
--	return frame.frame_size - frame.locals_offset;
-+	return frame.frame_size - frame.bytes_above_locals;
-     }
- 
-   return frame.frame_size;
-diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
-index 94fca4b94716..bf46e6124aa9 100644
---- a/gcc/config/aarch64/aarch64.h
-+++ b/gcc/config/aarch64/aarch64.h
-@@ -885,10 +885,10 @@ struct GTY (()) aarch64_frame
-      always a multiple of STACK_BOUNDARY.  */
-   poly_int64 bytes_below_hard_fp;
- 
--  /* Offset from the base of the frame (incomming SP) to the
--     top of the locals area.  This value is always a multiple of
-+  /* The number of bytes between the top of the locals area and the top
-+     of the frame (the incomming SP).  This value is always a multiple of
-      STACK_BOUNDARY.  */
--  poly_int64 locals_offset;
-+  poly_int64 bytes_above_locals;
- 
-   /* Offset from the base of the frame (incomming SP) to the
-      hard_frame_pointer.  This value is always a multiple of
--- 
-2.43.5
-

_service:tar_scm:0105-Backport-SME-AArch64-Cleanup-option-processing-code.patch Added

@@ -0,0 +1,528 @@
+From ba32885874fc6caa90f6ae5e264bc3d51f64a26e Mon Sep 17 00:00:00 2001
+From: Wilco Dijkstra <wilco.dijkstra@arm.com>
+Date: Wed, 1 Jun 2022 16:46:36 +0100
+Subject: PATCH 006/157 BackportSME AArch64: Cleanup option processing
+ code
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ae54c1b09963779c5c3914782324ff48af32e2f1
+
+Further cleanup option processing. Remove the duplication of global
+variables for CPU and tune settings so that CPU option processing is
+simplified even further. Move global variables that need save and
+restore due to target option processing into aarch64.opt. This removes
+the need for explicit saving/restoring and unnecessary reparsing of
+options.
+
+gcc/
+	* config/aarch64/aarch64.opt (explicit_tune_core): Rename to
+	selected_tune.
+	(explicit_arch): Rename to selected_arch.
+	(x_aarch64_override_tune_string): Remove.
+	(aarch64_ra_sign_key): Add as TargetVariable so it gets saved/restored.
+	(aarch64_override_tune_string): Add Save so it gets saved/restored.
+	* config/aarch64/aarch64.h (aarch64_architecture_version): Remove.
+	* config/aarch64/aarch64.cc (aarch64_architecture_version): Remove.
+	(processor): Remove archtecture_version field.
+	(selected_arch): Remove global.
+	(selected_cpu): Remove global.
+	(selected_tune): Remove global.
+	(aarch64_ra_sign_key): Move global to aarch64.opt so it is saved.
+	(aarch64_override_options_internal): Use aarch64_get_tune_cpu.
+	(aarch64_override_options): Further simplify code to only set
+	selected_arch and selected_tune globals.
+	(aarch64_option_save): Remove now that target options are saved.
+	(aarch64_option_restore): Remove redundant target option restores.
+	* config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Use
+	AARCH64_ISA_V9.
+	* config/aarch64/aarch64-opts.h (aarch64_key_type): Add, moved from...
+	* config/aarch64/aarch64-protos.h (aarch64_key_type): Remove.
+	(aarch64_ra_sign_key): Remove.
+---
+ gcc/config/aarch64/aarch64-c.cc     |   2 +-
+ gcc/config/aarch64/aarch64-opts.h   |   6 +
+ gcc/config/aarch64/aarch64-protos.h |   8 --
+ gcc/config/aarch64/aarch64.cc       | 183 ++++++++++------------------
+ gcc/config/aarch64/aarch64.h        |   3 -
+ gcc/config/aarch64/aarch64.opt      |  12 +-
+ 6 files changed, 76 insertions(+), 138 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
+index a4c407724..90d45e45d 100644
+--- a/gcc/config/aarch64/aarch64-c.cc
++++ b/gcc/config/aarch64/aarch64-c.cc
+@@ -82,7 +82,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
+ {
+   aarch64_def_or_undef (flag_unsafe_math_optimizations, "__ARM_FP_FAST", pfile);
+ 
+-  builtin_define_with_int_value ("__ARM_ARCH", aarch64_architecture_version);
++  builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9 ? 9 : 8);
+ 
+   builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM",
+ 				 flag_short_enums ? 1 : 4);
+diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h
+index 93572fe83..421648a15 100644
+--- a/gcc/config/aarch64/aarch64-opts.h
++++ b/gcc/config/aarch64/aarch64-opts.h
+@@ -98,4 +98,10 @@ enum stack_protector_guard {
+   SSP_GLOBAL			/* global canary */
+ };
+ 
++/* The key type that -msign-return-address should use.  */
++enum aarch64_key_type {
++  AARCH64_KEY_A,
++  AARCH64_KEY_B
++};
++
+ #endif
+diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
+index 475d174dd..e60ce3c36 100644
+--- a/gcc/config/aarch64/aarch64-protos.h
++++ b/gcc/config/aarch64/aarch64-protos.h
+@@ -672,14 +672,6 @@ enum simd_immediate_check {
+   AARCH64_CHECK_MOV  = AARCH64_CHECK_ORR | AARCH64_CHECK_BIC
+ };
+ 
+-/* The key type that -msign-return-address should use.  */
+-enum aarch64_key_type {
+-  AARCH64_KEY_A,
+-  AARCH64_KEY_B
+-};
+-
+-extern enum aarch64_key_type aarch64_ra_sign_key;
+-
+ extern struct tune_params aarch64_tune_params;
+ 
+ /* The available SVE predicate patterns, known in the ACLE as "svpattern".  */
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index ba888beb0..254ecfaa2 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -306,9 +306,6 @@ static bool aarch64_print_address_internal (FILE*, machine_mode, rtx,
+ 					    aarch64_addr_query_type);
+ static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val);
+ 
+-/* Major revision number of the ARM Architecture implemented by the target.  */
+-unsigned aarch64_architecture_version;
+-
+ /* The processor for which instructions should be scheduled.  */
+ enum aarch64_processor aarch64_tune = cortexa53;
+ 
+@@ -2931,7 +2928,6 @@ struct processor
+   enum aarch64_processor ident;
+   enum aarch64_processor sched_core;
+   enum aarch64_arch arch;
+-  unsigned architecture_version;
+   const uint64_t flags;
+   const struct tune_params *const tune;
+ };
+@@ -2940,9 +2936,9 @@ struct processor
+ static const struct processor all_architectures =
+ {
+ #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
+-  {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
++  {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, FLAGS, NULL},
+ #include "aarch64-arches.def"
+-  {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
++  {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL}
+ };
+ 
+ /* Processor cores implementing AArch64.  */
+@@ -2950,23 +2946,13 @@ static const struct processor all_cores =
+ {
+ #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
+   {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH,				\
+-  all_architecturesAARCH64_ARCH_##ARCH.architecture_version,	\
+   FLAGS, &COSTS##_tunings},
+ #include "aarch64-cores.def"
+-  {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
++  {"generic", generic, cortexa53, AARCH64_ARCH_8A,
+     AARCH64_FL_FOR_ARCH8, &generic_tunings},
+-  {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
++  {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL}
+ };
+ 
+-
+-/* Target specification.  These are populated by the -march, -mtune, -mcpu
+-   handling code or by target attributes.  */
+-static const struct processor *selected_arch;
+-static const struct processor *selected_cpu;
+-static const struct processor *selected_tune;
+-
+-enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A;
+-
+ /* The current tuning set.  */
+ struct tune_params aarch64_tune_params = generic_tunings;
+ 
+@@ -10633,8 +10619,8 @@ aarch64_case_values_threshold (void)
+   /* Use the specified limit for the number of cases before using jump
+      tables at higher optimization levels.  */
+   if (optimize > 2
+-      && selected_cpu->tune->max_case_values != 0)
+-    return selected_cpu->tune->max_case_values;
++      && aarch64_tune_params.max_case_values != 0)
++    return aarch64_tune_params.max_case_values;
+   else
+     return optimize_size ? 8 : 11;
+ }
+@@ -17769,6 +17755,26 @@ initialize_aarch64_tls_size (struct gcc_options *opts)
+   return;
+ }
+ 
++/* Return the CPU corresponding to the enum CPU.  */
++
++static const struct processor *
++aarch64_get_tune_cpu (enum aarch64_processor cpu)
++{
++  gcc_assert (cpu != aarch64_none);
++
++  return &all_corescpu;
++}
++
++/* Return the architecture corresponding to the enum ARCH.  */
++
++static const struct processor *
++aarch64_get_arch (enum aarch64_arch arch)
++{
++  gcc_assert (arch != aarch64_no_arch);
++
++  return &all_architecturesarch;
++}
++
+ /* Parse STRING looking for options in the format:
+      string	:: option:string
+      option	:: name=substring
+@@ -17879,18 +17885,18 @@ aarch64_override_options_after_change_1 (struct gcc_options *opts)
+ void
+ aarch64_override_options_internal (struct gcc_options *opts)
+ {
+-  aarch64_tune_flags = selected_tune->flags;
+-  aarch64_tune = selected_tune->sched_core;

_service:tar_scm:0105-LoongArch-Use-enums-for-constants.patch Added

@@ -0,0 +1,181 @@
+From 907b35525c8abcdfe22152ebce6640dbe3905cce Mon Sep 17 00:00:00 2001
+From: Yang Yujie <yangyujie@loongson.cn>
+Date: Mon, 8 Jan 2024 09:14:09 +0800
+Subject: PATCH 105/188 LoongArch: Use enums for constants
+
+Target features constants from loongarch-def.h are currently defined as macros.
+Switch to enums for better look in the debugger.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-def.h: Define constants with
+	enums instead of Macros.
+---
+ gcc/config/loongarch/loongarch-def.h | 115 ++++++++++++++++-----------
+ 1 file changed, 67 insertions(+), 48 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
+index a133ea265..28da3ae5f 100644
+--- a/gcc/config/loongarch/loongarch-def.h
++++ b/gcc/config/loongarch/loongarch-def.h
+@@ -23,12 +23,10 @@ along with GCC; see the file COPYING3.  If not see
+     - ISA extensions		(isa_ext),
+     - base ABI types		(abi_base),
+     - ABI extension types	(abi_ext).
+-
+-    - code models		      (cmodel)
+-    - other command-line switches     (switch)
++    - code models		(cmodel)
+ 
+    These values are primarily used for implementing option handling
+-   logic in "loongarch.opt", "loongarch-driver.c" and "loongarch-opt.c".
++   logic in "loongarch.opt", "loongarch-driver.cc" and "loongarch-opt.cc".
+ 
+    As for the result of this option handling process, the following
+    scheme is adopted to represent the final configuration:
+@@ -53,30 +51,40 @@ along with GCC; see the file COPYING3.  If not see
+ #include "loongarch-def-array.h"
+ #include "loongarch-tune.h"
+ 
+-/* enum isa_base */
+ 
+-/* LoongArch64 */
+-#define ISA_BASE_LA64	      0
+-#define N_ISA_BASE_TYPES      1
++/* ISA base */
++enum {
++  ISA_BASE_LA64		= 0,  /* LoongArch64 */
++  N_ISA_BASE_TYPES	= 1
++};
++
+ extern loongarch_def_array<const char *, N_ISA_BASE_TYPES>
+   loongarch_isa_base_strings;
+ 
+-/* enum isa_ext_* */
+-#define ISA_EXT_NONE	      0
+-#define ISA_EXT_FPU32	      1
+-#define ISA_EXT_FPU64	      2
+-#define N_ISA_EXT_FPU_TYPES   3
+-#define ISA_EXT_SIMD_LSX      3
+-#define ISA_EXT_SIMD_LASX     4
+-#define N_ISA_EXT_TYPES	      5
++
++/* ISA extensions */
++enum {
++  ISA_EXT_NONE		= 0,
++  ISA_EXT_FPU32		= 1,
++  ISA_EXT_FPU64		= 2,
++  N_ISA_EXT_FPU_TYPES   = 3,
++  ISA_EXT_SIMD_LSX      = 3,
++  ISA_EXT_SIMD_LASX     = 4,
++  N_ISA_EXT_TYPES	= 5
++};
++
+ extern loongarch_def_array<const char *, N_ISA_EXT_TYPES>
+   loongarch_isa_ext_strings;
+ 
+-/* enum abi_base */
+-#define ABI_BASE_LP64D	      0
+-#define ABI_BASE_LP64F	      1
+-#define ABI_BASE_LP64S	      2
+-#define N_ABI_BASE_TYPES      3
++
++/* Base ABI */
++enum {
++  ABI_BASE_LP64D	= 0,
++  ABI_BASE_LP64F	= 1,
++  ABI_BASE_LP64S	= 2,
++  N_ABI_BASE_TYPES	= 3
++};
++
+ extern loongarch_def_array<const char *, N_ABI_BASE_TYPES>
+   loongarch_abi_base_strings;
+ 
+@@ -90,28 +98,38 @@ extern loongarch_def_array<const char *, N_ABI_BASE_TYPES>
+   (abi_base == ABI_BASE_LP64S)
+ 
+ 
+-/* enum abi_ext */
+-#define ABI_EXT_BASE	      0
+-#define N_ABI_EXT_TYPES	      1
++/* ABI Extension */
++enum {
++  ABI_EXT_BASE		= 0,
++  N_ABI_EXT_TYPES	= 1
++};
++
+ extern loongarch_def_array<const char *, N_ABI_EXT_TYPES>
+   loongarch_abi_ext_strings;
+ 
+-/* enum cmodel */
+-#define CMODEL_NORMAL	      0
+-#define CMODEL_TINY	      1
+-#define CMODEL_TINY_STATIC    2
+-#define CMODEL_MEDIUM	      3
+-#define CMODEL_LARGE	      4
+-#define CMODEL_EXTREME	      5
+-#define N_CMODEL_TYPES	      6
++
++/* Code Model */
++enum {
++  CMODEL_NORMAL		= 0,
++  CMODEL_TINY		= 1,
++  CMODEL_TINY_STATIC	= 2,
++  CMODEL_MEDIUM		= 3,
++  CMODEL_LARGE		= 4,
++  CMODEL_EXTREME	= 5,
++  N_CMODEL_TYPES	= 6
++};
++
+ extern loongarch_def_array<const char *, N_CMODEL_TYPES>
+   loongarch_cmodel_strings;
+ 
+-/* enum explicit_relocs */
+-#define EXPLICIT_RELOCS_AUTO	0
+-#define EXPLICIT_RELOCS_NONE	1
+-#define EXPLICIT_RELOCS_ALWAYS	2
+-#define N_EXPLICIT_RELOCS_TYPES	3
++
++/* Explicit Reloc Type */
++enum {
++  EXPLICIT_RELOCS_AUTO	    = 0,
++  EXPLICIT_RELOCS_NONE	    = 1,
++  EXPLICIT_RELOCS_ALWAYS    = 2,
++  N_EXPLICIT_RELOCS_TYPES   = 3
++};
+ 
+ /* The common default value for variables whose assignments
+    are triggered by command-line options.  */
+@@ -159,17 +177,18 @@ struct loongarch_target
+   int cmodel;	    /* CMODEL_ */
+ };
+ 
+-/* CPU properties.  */
+-/* index */
+-#define CPU_NATIVE	  0
+-#define CPU_ABI_DEFAULT   1
+-#define CPU_LOONGARCH64	  2
+-#define CPU_LA464	  3
+-#define CPU_LA664	  4
+-#define N_ARCH_TYPES	  5
+-#define N_TUNE_TYPES	  5
+-
+-/* parallel tables.  */
++/* CPU model */
++enum {
++  CPU_NATIVE	    = 0,
++  CPU_ABI_DEFAULT   = 1,
++  CPU_LOONGARCH64   = 2,
++  CPU_LA464	    = 3,
++  CPU_LA664	    = 4,
++  N_ARCH_TYPES	    = 5,
++  N_TUNE_TYPES	    = 5
++};
++
++/* CPU model properties */
+ extern loongarch_def_array<const char *, N_ARCH_TYPES>
+   loongarch_cpu_strings;
+ extern loongarch_def_array<loongarch_isa, N_ARCH_TYPES>
+-- 
+2.43.0
+

_service:tar_scm:0105-aarch64-Rename-hard-fp-offset-to-bytes-above-hard-fp.patch Deleted

@@ -1,148 +0,0 @@
-From 3fbf0789202b30a67b12e1fb785c7130f098d665 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:08:52 +0100
-Subject: PATCH aarch64: Rename hard_fp_offset to bytes_above_hard_fp
-MIME-Version: 1.0
-Content-Type: text/plain; charset=utf8
-Content-Transfer-Encoding: 8bit
-
-Similarly to the previous locals_offset patch, hard_fp_offset
-was described as:
-
-  /* Offset from the base of the frame (incomming SP) to the
-     hard_frame_pointer.  This value is always a multiple of
-     STACK_BOUNDARY.  */
-  poly_int64 hard_fp_offset;
-
-which again took an âupside-downâ view: higher offsets meant lower
-addresses.  This patch renames the field to bytes_above_hard_fp instead.
-
-gcc/
-	* config/aarch64/aarch64.h (aarch64_frame::hard_fp_offset): Rename
-	to...
-	(aarch64_frame::bytes_above_hard_fp): ...this.
-	* config/aarch64/aarch64.cc (aarch64_layout_frame)
-	(aarch64_expand_prologue): Update accordingly.
-	(aarch64_initial_elimination_offset): Likewise.
----
- gcc/config/aarch64/aarch64.cc | 26 +++++++++++++-------------
- gcc/config/aarch64/aarch64.h  |  6 +++---
- 2 files changed, 16 insertions(+), 16 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index d4ec352ba98a..3c4052740e7a 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8329,7 +8329,7 @@ aarch64_layout_frame (void)
- 			   + get_frame_size (),
- 			   STACK_BOUNDARY / BITS_PER_UNIT);
- 
--  frame.hard_fp_offset
-+  frame.bytes_above_hard_fp
-     = saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
- 
-   /* Both these values are already aligned.  */
-@@ -8378,13 +8378,13 @@ aarch64_layout_frame (void)
-   else if (frame.wb_pop_candidate1 != INVALID_REGNUM)
-     max_push_offset = 256;
- 
--  HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset;
-+  HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp;
-   HOST_WIDE_INT const_saved_regs_size;
-   if (known_eq (frame.saved_regs_size, 0))
-     frame.initial_adjust = frame.frame_size;
-   else if (frame.frame_size.is_constant (&const_size)
- 	   && const_size < max_push_offset
--	   && known_eq (frame.hard_fp_offset, const_size))
-+	   && known_eq (frame.bytes_above_hard_fp, const_size))
-     {
-       /* Simple, small frame with no data below the saved registers.
- 
-@@ -8401,8 +8401,8 @@ aarch64_layout_frame (void)
- 	      case that it hardly seems worth the effort though.  */
- 	   && (!saves_below_hard_fp_p || const_below_saved_regs == 0)
- 	   && !(cfun->calls_alloca
--		&& frame.hard_fp_offset.is_constant (&const_fp_offset)
--		&& const_fp_offset < max_push_offset))
-+		&& frame.bytes_above_hard_fp.is_constant (&const_above_fp)
-+		&& const_above_fp < max_push_offset))
-     {
-       /* Frame with small area below the saved registers:
- 
-@@ -8420,12 +8420,12 @@ aarch64_layout_frame (void)
- 	 sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
- 	 save SVE registers relative to SP
- 	 sub sp, sp, bytes_below_saved_regs  */
--      frame.initial_adjust = (frame.hard_fp_offset
-+      frame.initial_adjust = (frame.bytes_above_hard_fp
- 			      + frame.below_hard_fp_saved_regs_size);
-       frame.final_adjust = frame.bytes_below_saved_regs;
-     }
--  else if (frame.hard_fp_offset.is_constant (&const_fp_offset)
--	   && const_fp_offset < max_push_offset)
-+  else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp)
-+	   && const_above_fp < max_push_offset)
-     {
-       /* Frame with large area below the saved registers, or with SVE saves,
- 	 but with a small area above:
-@@ -8435,7 +8435,7 @@ aarch64_layout_frame (void)
- 	 sub sp, sp, below_hard_fp_saved_regs_size
- 	 save SVE registers relative to SP
- 	 sub sp, sp, bytes_below_saved_regs  */
--      frame.callee_adjust = const_fp_offset;
-+      frame.callee_adjust = const_above_fp;
-       frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
-       frame.final_adjust = frame.bytes_below_saved_regs;
-     }
-@@ -8450,7 +8450,7 @@ aarch64_layout_frame (void)
- 	 sub sp, sp, below_hard_fp_saved_regs_size
- 	 save SVE registers relative to SP
- 	 sub sp, sp, bytes_below_saved_regs  */
--      frame.initial_adjust = frame.hard_fp_offset;
-+      frame.initial_adjust = frame.bytes_above_hard_fp;
-       frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
-       frame.final_adjust = frame.bytes_below_saved_regs;
-     }
-@@ -9754,7 +9754,7 @@ aarch64_expand_prologue (void)
-     {
-       /* The offset of the frame chain record (if any) from the current SP.  */
-       poly_int64 chain_offset = (initial_adjust + callee_adjust
--				 - frame.hard_fp_offset);
-+				 - frame.bytes_above_hard_fp);
-       gcc_assert (known_ge (chain_offset, 0));
- 
-       if (callee_adjust == 0)
-@@ -12575,10 +12575,10 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
-   if (to == HARD_FRAME_POINTER_REGNUM)
-     {
-       if (from == ARG_POINTER_REGNUM)
--	return frame.hard_fp_offset;
-+	return frame.bytes_above_hard_fp;
- 
-       if (from == FRAME_POINTER_REGNUM)
--	return frame.hard_fp_offset - frame.bytes_above_locals;
-+	return frame.bytes_above_hard_fp - frame.bytes_above_locals;
-     }
- 
-   if (to == STACK_POINTER_REGNUM)
-diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
-index bf46e6124aa9..dd1f403f9393 100644
---- a/gcc/config/aarch64/aarch64.h
-+++ b/gcc/config/aarch64/aarch64.h
-@@ -890,10 +890,10 @@ struct GTY (()) aarch64_frame
-      STACK_BOUNDARY.  */
-   poly_int64 bytes_above_locals;
- 
--  /* Offset from the base of the frame (incomming SP) to the
--     hard_frame_pointer.  This value is always a multiple of
-+  /* The number of bytes between the hard_frame_pointer and the top of
-+     the frame (the incomming SP).  This value is always a multiple of
-      STACK_BOUNDARY.  */
--  poly_int64 hard_fp_offset;
-+  poly_int64 bytes_above_hard_fp;
- 
-   /* The size of the frame.  This value is the offset from base of the
-      frame (incomming SP) to the stack_pointer.  This value is always
--- 
-2.43.5
-

_service:tar_scm:0106-Backport-SME-aarch64-Add-march-support-for-Armv9.1-A.patch Added

@@ -0,0 +1,108 @@
+From 0bfb7b0b745d0a9af13772ad48ccc102e557f95a Mon Sep 17 00:00:00 2001
+From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+Date: Mon, 26 Sep 2022 10:10:25 +0100
+Subject: PATCH 007/157 BackportSME aarch64: Add -march support for
+ Armv9.1-A, Armv9.2-A, Armv9.3-A
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c33e12fa479c01848f4a288883bf1ef848c94ca3
+
+This is a straightforward patch that allows targeting the architecture revisions mentioned in the subject
+through -march. These are already supported in binutils.
+
+Bootstrapped and tested on aarch64-none-linux-gnu.
+
+gcc/ChangeLog:
+
+	* config/aarch64/aarch64-arches.def (armv9.1-a): Define.
+	(armv9.2-a): Likewise.
+	(armv9.3-a): Likewise.
+	* config/aarch64/aarch64.h (AARCH64_FL_V9_1): Likewise.
+	(AARCH64_FL_V9_2): Likewise.
+	(AARCH64_FL_V9_3): Likewise.
+	(AARCH64_FL_FOR_ARCH9_1): Likewise.
+	(AARCH64_FL_FOR_ARCH9_2): Likewise.
+	(AARCH64_FL_FOR_ARCH9_3): Likewise.
+	(AARCH64_ISA_V9_1): Likewise.
+	(AARCH64_ISA_V9_2): Likewise.
+	(AARCH64_ISA_V9_3): Likewise.
+	* doc/invoke.texi (AArch64 Options): Document armv9.1-a, armv9.2-a,
+	armv9.3-a values to -march.
+---
+ gcc/config/aarch64/aarch64-arches.def |  3 +++
+ gcc/config/aarch64/aarch64.h          | 18 ++++++++++++++++++
+ gcc/doc/invoke.texi                   |  3 +++
+ 3 files changed, 24 insertions(+)
+
+diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
+index 3c2b16588..6150448dc 100644
+--- a/gcc/config/aarch64/aarch64-arches.def
++++ b/gcc/config/aarch64/aarch64-arches.def
+@@ -41,5 +41,8 @@ AARCH64_ARCH("armv8.7-a",     generic,       8_7A,      8,  AARCH64_FL_FOR_ARCH8
+ AARCH64_ARCH("armv8.8-a",     generic,       8_8A,      8,  AARCH64_FL_FOR_ARCH8_8)
+ AARCH64_ARCH("armv8-r",       generic,	     8R  ,	8,  AARCH64_FL_FOR_ARCH8_R)
+ AARCH64_ARCH("armv9-a",       generic,	     9A  ,	9,  AARCH64_FL_FOR_ARCH9)
++AARCH64_ARCH("armv9.1-a",     generic,       9_1A,      9,  AARCH64_FL_FOR_ARCH9_1)
++AARCH64_ARCH("armv9.2-a",     generic,       9_2A,      9,  AARCH64_FL_FOR_ARCH9_2)
++AARCH64_ARCH("armv9.3-a",     generic,       9_3A,      9,  AARCH64_FL_FOR_ARCH9_3)
+ 
+ #undef AARCH64_ARCH
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index 7d73689e4..42aae37ef 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -239,6 +239,15 @@
+ /* Armv8.8-a architecture extensions.  */
+ #define AARCH64_FL_V8_8       (1ULL << 45)
+ 
++/* Armv9.1-A.  */
++#define AARCH64_FL_V9_1       (1ULL << 46)
++
++/* Armv9.2-A.  */
++#define AARCH64_FL_V9_2       (1ULL << 47)
++
++/* Armv9.3-A.  */
++#define AARCH64_FL_V9_3       (1ULL << 48)
++
+ /* Has FP and SIMD.  */
+ #define AARCH64_FL_FPSIMD     (AARCH64_FL_FP | AARCH64_FL_SIMD)
+ 
+@@ -274,6 +283,12 @@
+ #define AARCH64_FL_FOR_ARCH9       \
+   (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9 \
+    | AARCH64_FL_F16)
++#define AARCH64_FL_FOR_ARCH9_1	\
++  (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1)
++#define AARCH64_FL_FOR_ARCH9_2	\
++  (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2)
++#define AARCH64_FL_FOR_ARCH9_3	\
++  (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3)
+ 
+ /* Macros to test ISA flags.  */
+ 
+@@ -314,6 +329,9 @@
+ #define AARCH64_ISA_V8_R	   (aarch64_isa_flags & AARCH64_FL_V8_R)
+ #define AARCH64_ISA_PAUTH	   (aarch64_isa_flags & AARCH64_FL_PAUTH)
+ #define AARCH64_ISA_V9		   (aarch64_isa_flags & AARCH64_FL_V9)
++#define AARCH64_ISA_V9_1           (aarch64_isa_flags & AARCH64_FL_V9_1)
++#define AARCH64_ISA_V9_2           (aarch64_isa_flags & AARCH64_FL_V9_2)
++#define AARCH64_ISA_V9_3           (aarch64_isa_flags & AARCH64_FL_V9_3)
+ #define AARCH64_ISA_MOPS	   (aarch64_isa_flags & AARCH64_FL_MOPS)
+ #define AARCH64_ISA_LS64	   (aarch64_isa_flags & AARCH64_FL_LS64)
+ 
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 17d9e4126..53709b246 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -19176,6 +19176,9 @@ and the features that they enable by default:
+ @item @samp{armv8.7-a} @tab Armv8.7-A @tab @samp{armv8.6-a}, @samp{+ls64}
+ @item @samp{armv8.8-a} @tab Armv8.8-a @tab @samp{armv8.7-a}, @samp{+mops}
+ @item @samp{armv9-a} @tab Armv9-A @tab @samp{armv8.5-a}, @samp{+sve}, @samp{+sve2}
++@item @samp{armv9.1-a} @tab Armv9.1-A @tab @samp{armv9-a}, @samp{+bf16}, @samp{+i8mm}
++@item @samp{armv9.2-a} @tab Armv9.2-A @tab @samp{armv9.1-a}, @samp{+ls64}
++@item @samp{armv9.3-a} @tab Armv9.3-A @tab @samp{armv9.2-a}, @samp{+mops}
+ @item @samp{armv8-r} @tab Armv8-R @tab @samp{armv8-r}
+ @end multitable
+ 
+-- 
+2.33.0
+

_service:tar_scm:0106-LoongArch-Simplify-mexplicit-reloc-definitions.patch Added

@@ -0,0 +1,124 @@
+From dc572aebb3a2c9062014ec50764bbc702dbb8a20 Mon Sep 17 00:00:00 2001
+From: Yang Yujie <yangyujie@loongson.cn>
+Date: Mon, 8 Jan 2024 09:14:10 +0800
+Subject: PATCH 106/188 LoongArch: Simplify -mexplicit-reloc definitions
+
+Since we do not need printing or manual parsing of this option,
+(whether in the driver or for target attributes to be supported later)
+it can be handled in the .opt file framework.
+
+gcc/ChangeLog:
+
+	* config/loongarch/genopts/loongarch-strings: Remove explicit-reloc
+	argument string definitions.
+	* config/loongarch/loongarch-str.h: Same.
+	* config/loongarch/genopts/loongarch.opt.in: Mark -mno-explicit-relocs
+	as aliases to -mexplicit-relocs={always,none}
+	* config/loongarch/loongarch.opt: Regenerate.
+	* config/loongarch/loongarch.cc: Same.
+---
+ gcc/config/loongarch/genopts/loongarch-strings |  6 ------
+ gcc/config/loongarch/genopts/loongarch.opt.in  |  8 ++++----
+ gcc/config/loongarch/loongarch-str.h           |  5 -----
+ gcc/config/loongarch/loongarch.cc              | 12 ------------
+ gcc/config/loongarch/loongarch.opt             |  2 +-
+ 5 files changed, 5 insertions(+), 28 deletions(-)
+
+diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings
+index ce70b8b9c..99fd4e7cd 100644
+--- a/gcc/config/loongarch/genopts/loongarch-strings
++++ b/gcc/config/loongarch/genopts/loongarch-strings
+@@ -64,9 +64,3 @@ STR_CMODEL_TS	      tiny-static
+ STR_CMODEL_MEDIUM     medium
+ STR_CMODEL_LARGE      large
+ STR_CMODEL_EXTREME    extreme
+-
+-# -mexplicit-relocs
+-OPTSTR_EXPLICIT_RELOCS		explicit-relocs
+-STR_EXPLICIT_RELOCS_AUTO	auto
+-STR_EXPLICIT_RELOCS_NONE	none
+-STR_EXPLICIT_RELOCS_ALWAYS	always
+diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
+index 851d8d1f3..f2055b55e 100644
+--- a/gcc/config/loongarch/genopts/loongarch.opt.in
++++ b/gcc/config/loongarch/genopts/loongarch.opt.in
+@@ -181,20 +181,20 @@ Name(explicit_relocs) Type(int)
+ The code model option names for -mexplicit-relocs:
+ 
+ EnumValue
+-Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_AUTO@@) Value(EXPLICIT_RELOCS_AUTO)
++Enum(explicit_relocs) String(auto) Value(EXPLICIT_RELOCS_AUTO)
+ 
+ EnumValue
+-Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_NONE@@) Value(EXPLICIT_RELOCS_NONE)
++Enum(explicit_relocs) String(none) Value(EXPLICIT_RELOCS_NONE)
+ 
+ EnumValue
+-Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_ALWAYS@@) Value(EXPLICIT_RELOCS_ALWAYS)
++Enum(explicit_relocs) String(always) Value(EXPLICIT_RELOCS_ALWAYS)
+ 
+ mexplicit-relocs=
+ Target RejectNegative Joined Enum(explicit_relocs) Var(la_opt_explicit_relocs) Init(M_OPT_UNSET)
+ Use %reloc() assembly operators.
+ 
+ mexplicit-relocs
+-Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET)
++Target Alias(mexplicit-relocs=, always, none)
+ Use %reloc() assembly operators (for backward compatibility).
+ 
+ mrecip
+diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
+index 2251df38b..cacae38c0 100644
+--- a/gcc/config/loongarch/loongarch-str.h
++++ b/gcc/config/loongarch/loongarch-str.h
+@@ -63,11 +63,6 @@ along with GCC; see the file COPYING3.  If not see
+ #define STR_CMODEL_LARGE "large"
+ #define STR_CMODEL_EXTREME "extreme"
+ 
+-#define OPTSTR_EXPLICIT_RELOCS "explicit-relocs"
+-#define STR_EXPLICIT_RELOCS_AUTO "auto"
+-#define STR_EXPLICIT_RELOCS_NONE "none"
+-#define STR_EXPLICIT_RELOCS_ALWAYS "always"
+-
+ #define OPTSTR_FRECIPE "frecipe"
+ #define OPTSTR_DIV32   "div32"
+ #define OPTSTR_LAM_BH  "lam-bh"
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index b0bb67d60..8cd703caa 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -7518,18 +7518,6 @@ loongarch_option_override_internal (struct gcc_options *opts,
+   loongarch_update_gcc_opt_status (&la_target, opts, opts_set);
+   loongarch_cpu_option_override (&la_target, opts, opts_set);
+ 
+-  if (la_opt_explicit_relocs != M_OPT_UNSET
+-      && la_opt_explicit_relocs_backward != M_OPT_UNSET)
+-    error ("do not use %qs (with %qs) and %qs (without %qs) together",
+-	   "-mexplicit-relocs=", "=",
+-	   la_opt_explicit_relocs_backward ? "-mexplicit-relocs"
+-					   : "-mno-explicit-relocs", "=");
+-
+-  if (la_opt_explicit_relocs_backward != M_OPT_UNSET)
+-    la_opt_explicit_relocs = (la_opt_explicit_relocs_backward
+-			      ? EXPLICIT_RELOCS_ALWAYS
+-			      : EXPLICIT_RELOCS_NONE);
+-
+   if (la_opt_explicit_relocs == M_OPT_UNSET)
+     la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS
+ 			      ? (loongarch_mrelax
+diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
+index df7314973..d6e337ac2 100644
+--- a/gcc/config/loongarch/loongarch.opt
++++ b/gcc/config/loongarch/loongarch.opt
+@@ -202,7 +202,7 @@ Target RejectNegative Joined Enum(explicit_relocs) Var(la_opt_explicit_relocs) I
+ Use %reloc() assembly operators.
+ 
+ mexplicit-relocs
+-Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET)
++Target Alias(mexplicit-relocs=, always, none)
+ Use %reloc() assembly operators (for backward compatibility).
+ 
+ mrecip
+-- 
+2.43.0
+

_service:tar_scm:0106-aarch64-Tweak-frame-size-comment.patch Deleted

@@ -1,35 +0,0 @@
-From aac8b31379ac3bbd14fc6427dce23f56e54e8485 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:08:52 +0100
-Subject: PATCH aarch64: Tweak frame_size comment
-MIME-Version: 1.0
-Content-Type: text/plain; charset=utf8
-Content-Transfer-Encoding: 8bit
-
-This patch fixes another case in which a value was described with
-an âupside-downâ view.
-
-gcc/
-	* config/aarch64/aarch64.h (aarch64_frame::frame_size): Tweak comment.
----
- gcc/config/aarch64/aarch64.h | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
-index dd1f403f9393..700524ae22bf 100644
---- a/gcc/config/aarch64/aarch64.h
-+++ b/gcc/config/aarch64/aarch64.h
-@@ -895,8 +895,8 @@ struct GTY (()) aarch64_frame
-      STACK_BOUNDARY.  */
-   poly_int64 bytes_above_hard_fp;
- 
--  /* The size of the frame.  This value is the offset from base of the
--     frame (incomming SP) to the stack_pointer.  This value is always
-+  /* The size of the frame, i.e. the number of bytes between the bottom
-+     of the outgoing arguments and the incoming SP.  This value is always
-      a multiple of STACK_BOUNDARY.  */
-   poly_int64 frame_size;
- 
--- 
-2.43.5
-

_service:tar_scm:0107-Backport-SME-Revert-aarch64-Define-__ARM_FEATURE_RCP.patch Added

@@ -0,0 +1,112 @@
+From b36c8c41cab42d3df45197bb287f06381d660001 Mon Sep 17 00:00:00 2001
+From: xiezhiheng <xiezhiheng@huawei.com>
+Date: Mon, 19 Feb 2024 19:27:29 +0800
+Subject: PATCH 008/157 BackportSME Revert "aarch64: Define
+ __ARM_FEATURE_RCPC"
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=40a727379f3e8e6a83aea4e94c38dfa5dd8ef33d
+
+Revert this commit to solve conflicts with later patches,
+and will apply it later.
+---
+ gcc/config/aarch64/aarch64-c.cc               |  1 -
+ gcc/config/aarch64/aarch64-cores.def          | 10 +++++-----
+ gcc/config/aarch64/aarch64.h                  |  4 +---
+ .../gcc.target/aarch64/pragma_cpp_predefs_1.c | 20 -------------------
+ 4 files changed, 6 insertions(+), 29 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
+index 90d45e45d..3d2fb5ec2 100644
+--- a/gcc/config/aarch64/aarch64-c.cc
++++ b/gcc/config/aarch64/aarch64-c.cc
+@@ -202,7 +202,6 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
+ 			"__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile);
+   aarch64_def_or_undef (TARGET_LS64,
+ 			"__ARM_FEATURE_LS64", pfile);
+-  aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile);
+ 
+   /* Not for ACLE, but required to keep "float.h" correct if we switch
+      target between implementations that do or do not support ARMv8.2-A
+diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
+index 70b11eb80..842d64932 100644
+--- a/gcc/config/aarch64/aarch64-cores.def
++++ b/gcc/config/aarch64/aarch64-cores.def
+@@ -134,17 +134,17 @@ AARCH64_CORE("tsv110",  tsv110, tsv110, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_
+ /* ARMv8.3-A Architecture Processors.  */
+ 
+ /* Marvell cores (TX3). */
+-AARCH64_CORE("thunderx3t110",  thunderx3t110,  thunderx3t110, 8_3A,  AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
++AARCH64_CORE("thunderx3t110",  thunderx3t110,  thunderx3t110, 8_3A,  AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
+ 
+ /* ARMv8.4-A Architecture Processors.  */
+ 
+ /* Arm ('A') cores.  */
+-AARCH64_CORE("zeus", zeus, cortexa57, 8_4A,  AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
+-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A,  AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
+-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A,  AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
++AARCH64_CORE("zeus", zeus, cortexa57, 8_4A,  AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
++AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A,  AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
++AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A,  AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
+ 
+ /* Qualcomm ('Q') cores. */
+-AARCH64_CORE("saphira",     saphira,    saphira,    8_4A,  AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO, saphira,   0x51, 0xC01, -1)
++AARCH64_CORE("saphira",     saphira,    saphira,    8_4A,  AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira,   0x51, 0xC01, -1)
+ 
+ /* ARMv8-A big.LITTLE implementations.  */
+ 
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index 42aae37ef..7c090c8f2 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -262,8 +262,7 @@
+ #define AARCH64_FL_FOR_ARCH8_2			\
+   (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2)
+ #define AARCH64_FL_FOR_ARCH8_3			\
+-  (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3 | AARCH64_FL_PAUTH \
+-   | AARCH64_FL_RCPC)
++  (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3 | AARCH64_FL_PAUTH)
+ #define AARCH64_FL_FOR_ARCH8_4			\
+   (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4 | AARCH64_FL_F16FML \
+    | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM)
+@@ -314,7 +313,6 @@
+ #define AARCH64_ISA_SM4	           (aarch64_isa_flags & AARCH64_FL_SM4)
+ #define AARCH64_ISA_SHA3	   (aarch64_isa_flags & AARCH64_FL_SHA3)
+ #define AARCH64_ISA_F16FML	   (aarch64_isa_flags & AARCH64_FL_F16FML)
+-#define AARCH64_ISA_RCPC	   (aarch64_isa_flags & AARCH64_FL_RCPC)
+ #define AARCH64_ISA_RCPC8_4	   (aarch64_isa_flags & AARCH64_FL_RCPC8_4)
+ #define AARCH64_ISA_RNG		   (aarch64_isa_flags & AARCH64_FL_RNG)
+ #define AARCH64_ISA_V8_5	   (aarch64_isa_flags & AARCH64_FL_V8_5)
+diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
+index 307fa3d67..bfb044f5d 100644
+--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
++++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
+@@ -248,26 +248,6 @@
+ #error "__ARM_FEATURE_CRC32 is not defined but should be!"
+ #endif
+ 
+-#pragma GCC target ("arch=armv8.2-a")
+-#ifdef __ARM_FEATURE_RCPC
+-#error "__ARM_FEATURE_RCPC is defined but should not be!"
+-#endif
+-
+-#pragma GCC target ("arch=armv8.2-a+rcpc")
+-#ifndef __ARM_FEATURE_RCPC
+-#error "__ARM_FEATURE_RCPC is not defined but should be!"
+-#endif
+-
+-#pragma GCC target ("+norcpc")
+-#ifdef __ARM_FEATURE_RCPC
+-#error "__ARM_FEATURE_RCPC is defined but should not be!"
+-#endif
+-
+-#pragma GCC target ("arch=armv8.3-a")
+-#ifndef __ARM_FEATURE_RCPC
+-#error "__ARM_FEATURE_RCPC is not defined but should be!"
+-#endif
+-
+ int
+ foo (int a)
+ {
+-- 
+2.33.0
+

_service:tar_scm:0107-LoongArch-testsuite-Add-loongarch-support-to-slp-21..patch Added

@@ -0,0 +1,35 @@
+From f90e31b6dc8c99f6670dee9a120c5dd9fa9a18d9 Mon Sep 17 00:00:00 2001
+From: chenxiaolong <chenxiaolong@loongson.cn>
+Date: Wed, 10 Jan 2024 15:25:21 +0800
+Subject: PATCH 107/188 LoongArch: testsuite: Add loongarch support to
+ slp-21.c.
+
+The function of this test is to check that the compiler supports vectorization
+using SLP and vec_{load/store/*}_lanes. However, vec_{load/store/*}_lanes are
+not supported on LoongArch, such as the corresponding "st4/ld4" directives on
+aarch64.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.dg/vect/slp-21.c: Add loongarch.
+---
+ gcc/testsuite/gcc.dg/vect/slp-21.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/testsuite/gcc.dg/vect/slp-21.c b/gcc/testsuite/gcc.dg/vect/slp-21.c
+index 4b83adb98..3b7e92fe8 100644
+--- a/gcc/testsuite/gcc.dg/vect/slp-21.c
++++ b/gcc/testsuite/gcc.dg/vect/slp-21.c
+@@ -210,7 +210,7 @@ int main (void)
+ 
+    Not all vect_perm targets support that, and it's a bit too specific to have
+    its own effective-target selector, so we just test targets directly.  */
+-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target { powerpc64*-*-* s390*-*-* } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_strided4 && { ! { powerpc64*-*-* s390*-*-* } } } } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target { powerpc64*-*-* s390*-*-* loongarch*-*-* } } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_strided4 && { ! { powerpc64*-*-* s390*-*-* loongarch*-*-* } } } } } } */
+ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect"  { target { ! { vect_strided4 } } } } } */
+   
+-- 
+2.43.0
+

_service:tar_scm:0107-aarch64-Measure-reg-offset-from-the-bottom-of-the-frame.patch Deleted

@@ -1,195 +0,0 @@
-From 8d5506a8aeb8dd7e8b209a3663b07688478f76b9 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:08:53 +0100
-Subject: PATCH aarch64: Measure reg_offset from the bottom of the frame
-
-reg_offset was measured from the bottom of the saved register area.
-This made perfect sense with the original layout, since the bottom
-of the saved register area was also the hard frame pointer address.
-It became slightly less obvious with SVE, since we save SVE
-registers below the hard frame pointer, but it still made sense.
-
-However, if we want to allow different frame layouts, it's more
-convenient and obvious to measure reg_offset from the bottom of
-the frame.  After previous patches, it's also a slight simplification
-in its own right.
-
-gcc/
-	* config/aarch64/aarch64.h (aarch64_frame): Add comment above
-	reg_offset.
-	* config/aarch64/aarch64.cc (aarch64_layout_frame): Walk offsets
-	from the bottom of the frame, rather than the bottom of the saved
-	register area.  Measure reg_offset from the bottom of the frame
-	rather than the bottom of the saved register area.
-	(aarch64_save_callee_saves): Update accordingly.
-	(aarch64_restore_callee_saves): Likewise.
-	(aarch64_get_separate_components): Likewise.
-	(aarch64_process_components): Likewise.
----
- gcc/config/aarch64/aarch64.cc | 53 ++++++++++++++++-------------------
- gcc/config/aarch64/aarch64.h  |  3 ++
- 2 files changed, 27 insertions(+), 29 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 3c4052740e7a..97dd077844b4 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8139,7 +8139,6 @@ aarch64_needs_frame_chain (void)
- static void
- aarch64_layout_frame (void)
- {
--  poly_int64 offset = 0;
-   int regno, last_fp_reg = INVALID_REGNUM;
-   machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM);
-   poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
-@@ -8217,7 +8216,9 @@ aarch64_layout_frame (void)
-   gcc_assert (crtl->is_leaf
- 	      || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED));
- 
--  frame.bytes_below_saved_regs = crtl->outgoing_args_size;
-+  poly_int64 offset = crtl->outgoing_args_size;
-+  gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
-+  frame.bytes_below_saved_regs = offset;
- 
-   /* Now assign stack slots for the registers.  Start with the predicate
-      registers, since predicate LDR and STR have a relatively small
-@@ -8229,7 +8230,8 @@ aarch64_layout_frame (void)
- 	offset += BYTES_PER_SVE_PRED;
-       }
- 
--  if (maybe_ne (offset, 0))
-+  poly_int64 saved_prs_size = offset - frame.bytes_below_saved_regs;
-+  if (maybe_ne (saved_prs_size, 0))
-     {
-       /* If we have any vector registers to save above the predicate registers,
- 	 the offset of the vector register save slots need to be a multiple
-@@ -8247,10 +8249,10 @@ aarch64_layout_frame (void)
- 	offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-       else
- 	{
--	  if (known_le (offset, vector_save_size))
--	    offset = vector_save_size;
--	  else if (known_le (offset, vector_save_size * 2))
--	    offset = vector_save_size * 2;
-+	  if (known_le (saved_prs_size, vector_save_size))
-+	    offset = frame.bytes_below_saved_regs + vector_save_size;
-+	  else if (known_le (saved_prs_size, vector_save_size * 2))
-+	    offset = frame.bytes_below_saved_regs + vector_save_size * 2;
- 	  else
- 	    gcc_unreachable ();
- 	}
-@@ -8267,9 +8269,10 @@ aarch64_layout_frame (void)
- 
-   /* OFFSET is now the offset of the hard frame pointer from the bottom
-      of the callee save area.  */
--  bool saves_below_hard_fp_p = maybe_ne (offset, 0);
--  frame.below_hard_fp_saved_regs_size = offset;
--  frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs;
-+  frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
-+  bool saves_below_hard_fp_p
-+    = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
-+  frame.bytes_below_hard_fp = offset;
-   if (frame.emit_frame_chain)
-     {
-       /* FP and LR are placed in the linkage record.  */
-@@ -8320,9 +8323,10 @@ aarch64_layout_frame (void)
- 
-   offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
- 
--  frame.saved_regs_size = offset;
-+  frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
- 
--  poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
-+  poly_int64 varargs_and_saved_regs_size
-+    = frame.saved_regs_size + frame.saved_varargs_size;
- 
-   poly_int64 saved_regs_and_above
-     = aligned_upper_bound (varargs_and_saved_regs_size
-@@ -8790,9 +8794,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
- 
-       machine_mode mode = aarch64_reg_save_mode (regno);
-       reg = gen_rtx_REG (mode, regno);
--      offset = (frame.reg_offsetregno
--		+ frame.bytes_below_saved_regs
--		- bytes_below_sp);
-+      offset = frame.reg_offsetregno - bytes_below_sp;
-       rtx base_rtx = stack_pointer_rtx;
-       poly_int64 sp_offset = offset;
- 
-@@ -8899,9 +8901,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
- 
-       machine_mode mode = aarch64_reg_save_mode (regno);
-       reg = gen_rtx_REG (mode, regno);
--      offset = (frame.reg_offsetregno
--		+ frame.bytes_below_saved_regs
--		- bytes_below_sp);
-+      offset = frame.reg_offsetregno - bytes_below_sp;
-       rtx base_rtx = stack_pointer_rtx;
-       if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
- 	aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
-@@ -9040,14 +9040,12 @@ aarch64_get_separate_components (void)
- 	   it as a stack probe for -fstack-clash-protection.  */
- 	if (flag_stack_clash_protection
- 	    && maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
--	    && known_eq (offset, 0))
-+	    && known_eq (offset, frame.bytes_below_saved_regs))
- 	  continue;
- 
- 	/* Get the offset relative to the register we'll use.  */
- 	if (frame_pointer_needed)
--	  offset -= frame.below_hard_fp_saved_regs_size;
--	else
--	  offset += frame.bytes_below_saved_regs;
-+	  offset -= frame.bytes_below_hard_fp;
- 
- 	/* Check that we can access the stack slot of the register with one
- 	   direct load with no adjustments needed.  */
-@@ -9194,9 +9192,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
-       rtx reg = gen_rtx_REG (mode, regno);
-       poly_int64 offset = frame.reg_offsetregno;
-       if (frame_pointer_needed)
--	offset -= frame.below_hard_fp_saved_regs_size;
--      else
--	offset += frame.bytes_below_saved_regs;
-+	offset -= frame.bytes_below_hard_fp;
- 
-       rtx addr = plus_constant (Pmode, ptr_reg, offset);
-       rtx mem = gen_frame_mem (mode, addr);
-@@ -9248,9 +9244,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
-       /* REGNO2 can be saved/restored in a pair with REGNO.  */
-       rtx reg2 = gen_rtx_REG (mode, regno2);
-       if (frame_pointer_needed)
--	offset2 -= frame.below_hard_fp_saved_regs_size;
--      else
--	offset2 += frame.bytes_below_saved_regs;
-+	offset2 -= frame.bytes_below_hard_fp;
-       rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
-       rtx mem2 = gen_frame_mem (mode, addr2);
-       rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
-@@ -9366,7 +9360,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
-   if (final_adjustment_p
-       && known_eq (frame.below_hard_fp_saved_regs_size, 0))
-     {
--      poly_int64 lr_offset = frame.reg_offsetLR_REGNUM;
-+      poly_int64 lr_offset = (frame.reg_offsetLR_REGNUM
-+			      - frame.bytes_below_saved_regs);
-       if (known_ge (lr_offset, 0))
- 	min_probe_threshold -= lr_offset.to_constant ();
-       else
-diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
-index 700524ae22bf..b61358370732 100644
---- a/gcc/config/aarch64/aarch64.h
-+++ b/gcc/config/aarch64/aarch64.h
-@@ -860,6 +860,9 @@ extern enum aarch64_processor aarch64_tune;
- #ifdef HAVE_POLY_INT_H
- struct GTY (()) aarch64_frame
- {
-+  /* The offset from the bottom of the static frame (the bottom of the
-+     outgoing arguments) of each register save slot, or -2 if no save is
-+     needed.  */
-   poly_int64 reg_offsetLAST_SAVED_REGNUM + 1;
- 
-   /* The number of extra stack bytes taken up by register varargs.
--- 
-2.43.5
-

_service:tar_scm:0108-Backport-SME-Revert-Ampere-1-and-Ampere-1A-core-defi.patch Added

@@ -0,0 +1,39 @@
+From 34374de5edde59f27a1b3b443e8a163fc5b528d7 Mon Sep 17 00:00:00 2001
+From: xiezhiheng <xiezhiheng@huawei.com>
+Date: Tue, 20 Feb 2024 10:13:06 +0800
+Subject: PATCH 009/157 BackportSME Revert "Ampere-1 and Ampere-1A core
+ definition in aarch64-cores.def"
+
+Revert it to solve conflicts with later patches, and will apply it
+later. It's introduced by commit 3668a59ae22a and e9f0d974600e.
+---
+ gcc/config/aarch64/aarch64-cores.def | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
+index 842d64932..0402bfb74 100644
+--- a/gcc/config/aarch64/aarch64-cores.def
++++ b/gcc/config/aarch64/aarch64-cores.def
+@@ -69,8 +69,7 @@ AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8A,  AARCH64_FL_FOR_ARCH
+ AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
+ 
+ /* Ampere Computing ('\xC0') cores. */
+-AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_RNG | AARCH64_FL_SHA3, ampere1, 0xC0, 0xac3, -1)
+-AARCH64_CORE("ampere1a", ampere1a, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_RNG | AARCH64_FL_SHA3 | AARCH64_FL_MEMTAG, ampere1a, 0xC0, 0xac4, -1)
++AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6, ampere1, 0xC0, 0xac3, -1)
+ /* Do not swap around "emag" and "xgene1",
+    this order is required to handle variant correctly. */
+ AARCH64_CORE("emag",        emag,      xgene1,    8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
+@@ -164,8 +163,7 @@ AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_ARCH8_R, cor
+ /* Armv9.0-A Architecture Processors.  */
+ 
+ /* Arm ('A') cores. */
+-AARCH64_CORE("cortex-a510",  cortexa510, cortexa55, 9A,  AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG
+- | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1)
++AARCH64_CORE("cortex-a510",  cortexa510, cortexa55, 9A,  AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1)
+ 
+ AARCH64_CORE("cortex-a710",  cortexa710, cortexa57, 9A,  AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1)
+ 
+-- 
+2.33.0
+

_service:tar_scm:0108-LoongArch-Optimized-some-of-the-symbolic-expansion-i.patch Added

@@ -0,0 +1,228 @@
+From 9b19eb071fe3826aa61567b927fc95a37f6560f7 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Fri, 8 Dec 2023 10:16:48 +0800
+Subject: PATCH 108/188 LoongArch: Optimized some of the symbolic expansion
+ instructions generated during bitwise operations.
+
+There are two mode iterators defined in the loongarch.md:
+	(define_mode_iterator GPR SI (DI "TARGET_64BIT"))
+  and
+	(define_mode_iterator X (SI "!TARGET_64BIT") (DI "TARGET_64BIT"))
+Replace the mode in the bit arithmetic from GPR to X.
+
+Since the bitwise operation instruction does not distinguish between 64-bit,
+32-bit, etc., it is necessary to perform symbolic expansion if the bitwise
+operation is less than 64 bits.
+The original definition would have generated a lot of redundant symbolic
+extension instructions. This problem is optimized with reference to the
+implementation of RISCV.
+
+Add this patch spec2017 500.perlbench performance improvement by 1.8%
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (one_cmpl<mode>2): Replace GPR with X.
+	(*nor<mode>3): Likewise.
+	(nor<mode>3): Likewise.
+	(*negsi2_extended): New template.
+	(*<optab>si3_internal): Likewise.
+	(*one_cmplsi2_internal): Likewise.
+	(*norsi3_internal): Likewise.
+	(*<optab>nsi_internal): Likewise.
+	(bytepick_w_<bytepick_imm>_extend): Modify this template according to the
+	modified bit operation to make the optimization work.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/sign-extend-bitwise.c: New test.
+---
+ gcc/config/loongarch/loongarch.md             | 93 ++++++++++++++-----
+ .../loongarch/sign-extend-bitwise.c           | 21 +++++
+ 2 files changed, 90 insertions(+), 24 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/sign-extend-bitwise.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 23653a2b0..6ebf33cbe 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -736,7 +736,7 @@
+ 
+ (define_insn "sub<mode>3"
+   (set (match_operand:GPR 0 "register_operand" "=r")
+-	(minus:GPR (match_operand:GPR 1 "register_operand" "rJ")
++	(minus:GPR (match_operand:GPR 1 "register_operand" "r")
+ 		   (match_operand:GPR 2 "register_operand" "r")))
+   ""
+   "sub.<d>\t%0,%z1,%2"
+@@ -1412,13 +1412,13 @@
+   (set_attr "alu_type"	"sub")
+    (set_attr "mode" "<MODE>"))
+ 
+-(define_insn "one_cmpl<mode>2"
+-  (set (match_operand:GPR 0 "register_operand" "=r")
+-	(not:GPR (match_operand:GPR 1 "register_operand" "r")))
+-  ""
+-  "nor\t%0,%.,%1"
+-  (set_attr "alu_type" "not")
+-   (set_attr "mode" "<MODE>"))
++(define_insn "*negsi2_extended"
++  (set (match_operand:DI 0 "register_operand" "=r")
++	(sign_extend:DI (neg:SI (match_operand:SI 1 "register_operand" "r"))))
++  "TARGET_64BIT"
++  "sub.w\t%0,%.,%1"
++  (set_attr "alu_type" "sub")
++   (set_attr "mode" "SI"))
+ 
+ (define_insn "neg<mode>2"
+   (set (match_operand:ANYF 0 "register_operand" "=f")
+@@ -1438,14 +1438,39 @@
+ ;;
+ 
+ (define_insn "<optab><mode>3"
+-  (set (match_operand:GPR 0 "register_operand" "=r,r")
+-	(any_bitwise:GPR (match_operand:GPR 1 "register_operand" "%r,r")
+-			 (match_operand:GPR 2 "uns_arith_operand" "r,K")))
++  (set (match_operand:X 0 "register_operand" "=r,r")
++	(any_bitwise:X (match_operand:X 1 "register_operand" "%r,r")
++		       (match_operand:X 2 "uns_arith_operand" "r,K")))
+   ""
+   "<insn>%i2\t%0,%1,%2"
+   (set_attr "type" "logical")
+    (set_attr "mode" "<MODE>"))
+ 
++(define_insn "*<optab>si3_internal"
++  (set (match_operand:SI 0 "register_operand" "=r,r")
++	(any_bitwise:SI (match_operand:SI 1 "register_operand" "%r,r")
++			(match_operand:SI 2 "uns_arith_operand"    " r,K")))
++  "TARGET_64BIT"
++  "<insn>%i2\t%0,%1,%2"
++  (set_attr "type" "logical")
++   (set_attr "mode" "SI"))
++
++(define_insn "one_cmpl<mode>2"
++  (set (match_operand:X 0 "register_operand" "=r")
++	(not:X (match_operand:X 1 "register_operand" "r")))
++  ""
++  "nor\t%0,%.,%1"
++  (set_attr "alu_type" "not")
++   (set_attr "mode" "<MODE>"))
++
++(define_insn "*one_cmplsi2_internal"
++  (set (match_operand:SI 0 "register_operand" "=r")
++	(not:SI (match_operand:SI 1 "register_operand" " r")))
++  "TARGET_64BIT"
++  "nor\t%0,%.,%1"
++  (set_attr "type" "logical")
++   (set_attr "mode" "SI"))
++
+ (define_insn "and<mode>3_extended"
+   (set (match_operand:GPR 0 "register_operand" "=r")
+ 	(and:GPR (match_operand:GPR 1 "nonimmediate_operand" "r")
+@@ -1561,25 +1586,43 @@
+   (set_attr "type" "logical")
+    (set_attr "mode" "HI"))
+ 
+-(define_insn "*nor<mode>3"
+-  (set (match_operand:GPR 0 "register_operand" "=r")
+-	(and:GPR (not:GPR (match_operand:GPR 1 "register_operand" "%r"))
+-		 (not:GPR (match_operand:GPR 2 "register_operand" "r"))))
++(define_insn "nor<mode>3"
++  (set (match_operand:X 0 "register_operand" "=r")
++	(and:X (not:X (match_operand:X 1 "register_operand" "%r"))
++		 (not:X (match_operand:X 2 "register_operand" "r"))))
+   ""
+   "nor\t%0,%1,%2"
+   (set_attr "type" "logical")
+    (set_attr "mode" "<MODE>"))
+ 
++(define_insn "*norsi3_internal"
++  (set (match_operand:SI 0 "register_operand" "=r")
++	(and:SI (not:SI (match_operand:SI 1 "register_operand" "%r"))
++		 (not:SI (match_operand:SI 2 "register_operand" "r"))))
++  "TARGET_64BIT"
++  "nor\t%0,%1,%2"
++  (set_attr "type" "logical")
++   (set_attr "mode" "SI"))
++
+ (define_insn "<optab>n<mode>"
+-  (set (match_operand:GPR 0 "register_operand" "=r")
+-	(neg_bitwise:GPR
+-	    (not:GPR (match_operand:GPR 1 "register_operand" "r"))
+-	    (match_operand:GPR 2 "register_operand" "r")))
++  (set (match_operand:X 0 "register_operand" "=r")
++	(neg_bitwise:X
++	    (not:X (match_operand:X 1 "register_operand" "r"))
++	    (match_operand:X 2 "register_operand" "r")))
+   ""
+   "<insn>n\t%0,%2,%1"
+   (set_attr "type" "logical")
+    (set_attr "mode" "<MODE>"))
+ 
++(define_insn "*<optab>nsi_internal"
++  (set (match_operand:SI 0 "register_operand" "=r")
++	(neg_bitwise:SI
++	    (not:SI (match_operand:SI 1 "register_operand" "r"))
++	    (match_operand:SI 2 "register_operand" "r")))
++  "TARGET_64BIT"
++  "<insn>n\t%0,%2,%1"
++  (set_attr "type" "logical")
++   (set_attr "mode" "SI"))
+ &#xc;
+ ;;
+ ;;  ....................
+@@ -3167,7 +3210,6 @@
+ 		      (label_ref (match_operand 1))
+ 		      (pc))))
+ 
+-
+ &#xc;
+ ;;
+ ;;  ....................
+@@ -3967,10 +4009,13 @@
+ (define_insn "bytepick_w_<bytepick_imm>_extend"
+   (set (match_operand:DI 0 "register_operand" "=r")
+ 	(sign_extend:DI
+-	  (ior:SI (lshiftrt (match_operand:SI 1 "register_operand" "r")
+-			    (const_int <bytepick_w_lshiftrt_amount>))
+-		  (ashift (match_operand:SI 2 "register_operand" "r")
+-			  (const_int bytepick_w_ashift_amount)))))
++	 (subreg:SI
++	  (ior:DI (subreg:DI (lshiftrt
++			      (match_operand:SI 1 "register_operand" "r")
++			      (const_int <bytepick_w_lshiftrt_amount>)) 0)
++		  (subreg:DI (ashift
++			      (match_operand:SI 2 "register_operand" "r")
++			      (const_int bytepick_w_ashift_amount)) 0)) 0)))
+   "TARGET_64BIT"
+   "bytepick.w\t%0,%1,%2,<bytepick_imm>"
+   (set_attr "mode" "SI"))
+diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend-bitwise.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-bitwise.c

_service:tar_scm:0108-aarch64-Simplify-top-of-frame-allocation.patch Deleted

@@ -1,55 +0,0 @@
-From b47766614df3b9df878262efb2ad73aaac108363 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:08:53 +0100
-Subject: PATCH aarch64: Simplify top of frame allocation
-
-After previous patches, it no longer really makes sense to allocate
-the top of the frame in terms of varargs_and_saved_regs_size and
-saved_regs_and_above.
-
-gcc/
-	* config/aarch64/aarch64.cc (aarch64_layout_frame): Simplify
-	the allocation of the top of the frame.
----
- gcc/config/aarch64/aarch64.cc | 23 ++++++++---------------
- 1 file changed, 8 insertions(+), 15 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 97dd077844b4..81935852d5b2 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8325,23 +8325,16 @@ aarch64_layout_frame (void)
- 
-   frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
- 
--  poly_int64 varargs_and_saved_regs_size
--    = frame.saved_regs_size + frame.saved_varargs_size;
--
--  poly_int64 saved_regs_and_above
--    = aligned_upper_bound (varargs_and_saved_regs_size
--			   + get_frame_size (),
--			   STACK_BOUNDARY / BITS_PER_UNIT);
--
--  frame.bytes_above_hard_fp
--    = saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
-+  offset += get_frame_size ();
-+  offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-+  auto top_of_locals = offset;
- 
--  /* Both these values are already aligned.  */
--  gcc_assert (multiple_p (frame.bytes_below_saved_regs,
--			  STACK_BOUNDARY / BITS_PER_UNIT));
--  frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
-+  offset += frame.saved_varargs_size;
-+  gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
-+  frame.frame_size = offset;
- 
--  frame.bytes_above_locals = frame.saved_varargs_size;
-+  frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp;
-+  frame.bytes_above_locals = frame.frame_size - top_of_locals;
- 
-   frame.initial_adjust = 0;
-   frame.final_adjust = 0;
--- 
-2.43.5
-

_service:tar_scm:0109-Backport-SME-aarch64-Rename-AARCH64_ISA-architecture.patch Added

@@ -0,0 +1,157 @@
+From 244780570ebc85c44806559ba165d4a70a2333d1 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Sep 2022 11:32:50 +0100
+Subject: PATCH 010/157 BackportSME aarch64: Rename AARCH64_ISA
+ architecture-level macros
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2a4788ac3bae1467b0379852d5a6690a8496d0c9
+
+All AARCH64_ISA_* architecture-level macros except AARCH64_ISA_V8_R
+are for the A profile: they cause __ARM_ARCH_PROFILE to be set to
+'A' and they are associated with architecture names like armv8.4-a.
+
+It's convenient for later patches if we make this explicit
+by adding an "A" to the name.  Also, rather than add an underscore
+(as for V8_R) it's more convenient to add the profile directly
+to the number, like we already do in the ARCH_IDENT field of the
+aarch64-arches.def entries.
+
+gcc/
+	* config/aarch64/aarch64.h (AARCH64_ISA_V8_2, AARCH64_ISA_V8_3)
+	(AARCH64_ISA_V8_4, AARCH64_ISA_V8_5, AARCH64_ISA_V8_6)
+	(AARCH64_ISA_V9, AARCH64_ISA_V9_1, AARCH64_ISA_V9_2)
+	(AARCH64_ISA_V9_3): Add "A" to the end of the name.
+	(AARCH64_ISA_V8_R): Rename to AARCH64_ISA_V8R.
+	(TARGET_ARMV8_3, TARGET_JSCVT, TARGET_FRINT, TARGET_MEMTAG): Update
+	accordingly.
+	* common/config/aarch64/aarch64-common.cc
+	(aarch64_get_extension_string_for_isa_flags): Likewise.
+	* config/aarch64/aarch64-c.cc
+	(aarch64_define_unconditional_macros): Likewise.
+---
+ gcc/common/config/aarch64/aarch64-common.cc |  2 +-
+ gcc/config/aarch64/aarch64-c.cc             |  4 +--
+ gcc/config/aarch64/aarch64.h                | 28 ++++++++++-----------
+ 3 files changed, 17 insertions(+), 17 deletions(-)
+
+diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
+index 85ce8133b..3dc020f0c 100644
+--- a/gcc/common/config/aarch64/aarch64-common.cc
++++ b/gcc/common/config/aarch64/aarch64-common.cc
+@@ -506,7 +506,7 @@ aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags,
+ 
+       Note that assemblers with Armv8-R AArch64 support should not have this
+       issue, so we don't need this fix when targeting Armv8-R.  */
+-  if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8_R)
++  if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8R)
+     isa_flag_bits |= AARCH64_ISA_CRC;
+ 
+   /* Pass Two:
+diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
+index 3d2fb5ec2..18c9b975b 100644
+--- a/gcc/config/aarch64/aarch64-c.cc
++++ b/gcc/config/aarch64/aarch64-c.cc
+@@ -64,7 +64,7 @@ aarch64_define_unconditional_macros (cpp_reader *pfile)
+   builtin_define ("__ARM_ARCH_8A");
+ 
+   builtin_define_with_int_value ("__ARM_ARCH_PROFILE",
+-      AARCH64_ISA_V8_R ? 'R' : 'A');
++      AARCH64_ISA_V8R ? 'R' : 'A');
+   builtin_define ("__ARM_FEATURE_CLZ");
+   builtin_define ("__ARM_FEATURE_IDIV");
+   builtin_define ("__ARM_FEATURE_UNALIGNED");
+@@ -82,7 +82,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
+ {
+   aarch64_def_or_undef (flag_unsafe_math_optimizations, "__ARM_FP_FAST", pfile);
+ 
+-  builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9 ? 9 : 8);
++  builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9A ? 9 : 8);
+ 
+   builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM",
+ 				 flag_short_enums ? 1 : 4);
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index 7c090c8f2..356a263b2 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -297,7 +297,7 @@
+ #define AARCH64_ISA_SIMD           (aarch64_isa_flags & AARCH64_FL_SIMD)
+ #define AARCH64_ISA_LSE		   (aarch64_isa_flags & AARCH64_FL_LSE)
+ #define AARCH64_ISA_RDMA	   (aarch64_isa_flags & AARCH64_FL_RDMA)
+-#define AARCH64_ISA_V8_2	   (aarch64_isa_flags & AARCH64_FL_V8_2)
++#define AARCH64_ISA_V8_2A	   (aarch64_isa_flags & AARCH64_FL_V8_2)
+ #define AARCH64_ISA_F16		   (aarch64_isa_flags & AARCH64_FL_F16)
+ #define AARCH64_ISA_SVE            (aarch64_isa_flags & AARCH64_FL_SVE)
+ #define AARCH64_ISA_SVE2	   (aarch64_isa_flags & AARCH64_FL_SVE2)
+@@ -305,31 +305,31 @@
+ #define AARCH64_ISA_SVE2_BITPERM  (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM)
+ #define AARCH64_ISA_SVE2_SHA3	   (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3)
+ #define AARCH64_ISA_SVE2_SM4	   (aarch64_isa_flags & AARCH64_FL_SVE2_SM4)
+-#define AARCH64_ISA_V8_3	   (aarch64_isa_flags & AARCH64_FL_V8_3)
++#define AARCH64_ISA_V8_3A	   (aarch64_isa_flags & AARCH64_FL_V8_3)
+ #define AARCH64_ISA_DOTPROD	   (aarch64_isa_flags & AARCH64_FL_DOTPROD)
+ #define AARCH64_ISA_AES	           (aarch64_isa_flags & AARCH64_FL_AES)
+ #define AARCH64_ISA_SHA2	   (aarch64_isa_flags & AARCH64_FL_SHA2)
+-#define AARCH64_ISA_V8_4	   (aarch64_isa_flags & AARCH64_FL_V8_4)
++#define AARCH64_ISA_V8_4A	   (aarch64_isa_flags & AARCH64_FL_V8_4)
+ #define AARCH64_ISA_SM4	           (aarch64_isa_flags & AARCH64_FL_SM4)
+ #define AARCH64_ISA_SHA3	   (aarch64_isa_flags & AARCH64_FL_SHA3)
+ #define AARCH64_ISA_F16FML	   (aarch64_isa_flags & AARCH64_FL_F16FML)
+ #define AARCH64_ISA_RCPC8_4	   (aarch64_isa_flags & AARCH64_FL_RCPC8_4)
+ #define AARCH64_ISA_RNG		   (aarch64_isa_flags & AARCH64_FL_RNG)
+-#define AARCH64_ISA_V8_5	   (aarch64_isa_flags & AARCH64_FL_V8_5)
++#define AARCH64_ISA_V8_5A	   (aarch64_isa_flags & AARCH64_FL_V8_5)
+ #define AARCH64_ISA_TME		   (aarch64_isa_flags & AARCH64_FL_TME)
+ #define AARCH64_ISA_MEMTAG	   (aarch64_isa_flags & AARCH64_FL_MEMTAG)
+-#define AARCH64_ISA_V8_6	   (aarch64_isa_flags & AARCH64_FL_V8_6)
++#define AARCH64_ISA_V8_6A	   (aarch64_isa_flags & AARCH64_FL_V8_6)
+ #define AARCH64_ISA_I8MM	   (aarch64_isa_flags & AARCH64_FL_I8MM)
+ #define AARCH64_ISA_F32MM	   (aarch64_isa_flags & AARCH64_FL_F32MM)
+ #define AARCH64_ISA_F64MM	   (aarch64_isa_flags & AARCH64_FL_F64MM)
+ #define AARCH64_ISA_BF16	   (aarch64_isa_flags & AARCH64_FL_BF16)
+ #define AARCH64_ISA_SB		   (aarch64_isa_flags & AARCH64_FL_SB)
+-#define AARCH64_ISA_V8_R	   (aarch64_isa_flags & AARCH64_FL_V8_R)
++#define AARCH64_ISA_V8R		   (aarch64_isa_flags & AARCH64_FL_V8_R)
+ #define AARCH64_ISA_PAUTH	   (aarch64_isa_flags & AARCH64_FL_PAUTH)
+-#define AARCH64_ISA_V9		   (aarch64_isa_flags & AARCH64_FL_V9)
+-#define AARCH64_ISA_V9_1           (aarch64_isa_flags & AARCH64_FL_V9_1)
+-#define AARCH64_ISA_V9_2           (aarch64_isa_flags & AARCH64_FL_V9_2)
+-#define AARCH64_ISA_V9_3           (aarch64_isa_flags & AARCH64_FL_V9_3)
++#define AARCH64_ISA_V9A		   (aarch64_isa_flags & AARCH64_FL_V9)
++#define AARCH64_ISA_V9_1A          (aarch64_isa_flags & AARCH64_FL_V9_1)
++#define AARCH64_ISA_V9_2A          (aarch64_isa_flags & AARCH64_FL_V9_2)
++#define AARCH64_ISA_V9_3A          (aarch64_isa_flags & AARCH64_FL_V9_3)
+ #define AARCH64_ISA_MOPS	   (aarch64_isa_flags & AARCH64_FL_MOPS)
+ #define AARCH64_ISA_LS64	   (aarch64_isa_flags & AARCH64_FL_LS64)
+ 
+@@ -383,16 +383,16 @@
+ #define TARGET_SVE2_SM4 (TARGET_SVE2 && AARCH64_ISA_SVE2_SM4)
+ 
+ /* ARMv8.3-A features.  */
+-#define TARGET_ARMV8_3	(AARCH64_ISA_V8_3)
++#define TARGET_ARMV8_3	(AARCH64_ISA_V8_3A)
+ 
+ /* Javascript conversion instruction from Armv8.3-a.  */
+-#define TARGET_JSCVT	(TARGET_FLOAT && AARCH64_ISA_V8_3)
++#define TARGET_JSCVT	(TARGET_FLOAT && AARCH64_ISA_V8_3A)
+ 
+ /* Armv8.3-a Complex number extension to AdvSIMD extensions.  */
+ #define TARGET_COMPLEX (TARGET_SIMD && TARGET_ARMV8_3)
+ 
+ /* Floating-point rounding instructions from Armv8.5-a.  */
+-#define TARGET_FRINT (AARCH64_ISA_V8_5 && TARGET_FLOAT)
++#define TARGET_FRINT (AARCH64_ISA_V8_5A && TARGET_FLOAT)
+ 
+ /* TME instructions are enabled.  */
+ #define TARGET_TME (AARCH64_ISA_TME)
+@@ -401,7 +401,7 @@
+ #define TARGET_RNG (AARCH64_ISA_RNG)
+ 
+ /* Memory Tagging instructions optional to Armv8.5 enabled through +memtag.  */
+-#define TARGET_MEMTAG (AARCH64_ISA_V8_5 && AARCH64_ISA_MEMTAG)
++#define TARGET_MEMTAG (AARCH64_ISA_V8_5A && AARCH64_ISA_MEMTAG)
+ 
+ /* I8MM instructions are enabled through +i8mm.  */
+ #define TARGET_I8MM (AARCH64_ISA_I8MM)
+-- 
+2.33.0
+

_service:tar_scm:0109-LoongArch-Implement-option-save-restore.patch Added

@@ -0,0 +1,467 @@
+From 146c85fa8b32d88acacf8645096d004e0c6f2f9c Mon Sep 17 00:00:00 2001
+From: Yang Yujie <yangyujie@loongson.cn>
+Date: Thu, 11 Jan 2024 09:07:10 +0800
+Subject: PATCH 109/188 LoongArch: Implement option save/restore
+
+LTO option streaming and target attributes both require per-function
+target configuration, which is achieved via option save/restore.
+
+We implement TARGET_OPTION_{SAVE,RESTORE} to switch the la_target
+context in addition to other automatically maintained option states
+(via the "Save" option property in the .opt files).
+
+Tested on loongarch64-linux-gnu without regression.
+
+	PR target/113233
+
+gcc/ChangeLog:
+
+	* config/loongarch/genopts/loongarch.opt.in: Mark options with
+	the "Save" property.
+	* config/loongarch/loongarch.opt: Same.
+	* config/loongarch/loongarch-opts.cc: Refresh -mcmodel= state
+	according to la_target.
+	* config/loongarch/loongarch.cc: Implement TARGET_OPTION_{SAVE,
+	RESTORE} for the la_target structure; Rename option conditions
+	to have the same "la_" prefix.
+	* config/loongarch/loongarch.h: Same.
+---
+ gcc/config/loongarch/genopts/loongarch.opt.in | 38 ++++-----
+ gcc/config/loongarch/loongarch-opts.cc        |  7 ++
+ gcc/config/loongarch/loongarch.cc             | 80 +++++++++++++++----
+ gcc/config/loongarch/loongarch.h              |  2 +-
+ gcc/config/loongarch/loongarch.opt            | 38 ++++-----
+ 5 files changed, 111 insertions(+), 54 deletions(-)
+
+diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
+index f2055b55e..4d6b1902d 100644
+--- a/gcc/config/loongarch/genopts/loongarch.opt.in
++++ b/gcc/config/loongarch/genopts/loongarch.opt.in
+@@ -50,7 +50,7 @@ EnumValue
+ Enum(isa_ext_fpu) String(@@STR_ISA_EXT_FPU64@@) Value(ISA_EXT_FPU64)
+ 
+ m@@OPTSTR_ISA_EXT_FPU@@=
+-Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) Init(M_OPT_UNSET)
++Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) Init(M_OPT_UNSET) Save
+ -m@@OPTSTR_ISA_EXT_FPU@@=FPU	Generate code for the given FPU.
+ 
+ m@@OPTSTR_ISA_EXT_FPU@@=@@STR_ISA_EXT_FPU0@@
+@@ -82,7 +82,7 @@ EnumValue
+ Enum(isa_ext_simd) String(@@STR_ISA_EXT_LASX@@) Value(ISA_EXT_SIMD_LASX)
+ 
+ m@@OPTSTR_ISA_EXT_SIMD@@=
+-Target RejectNegative Joined ToLower Enum(isa_ext_simd) Var(la_opt_simd) Init(M_OPT_UNSET)
++Target RejectNegative Joined ToLower Enum(isa_ext_simd) Var(la_opt_simd) Init(M_OPT_UNSET) Save
+ -m@@OPTSTR_ISA_EXT_SIMD@@=SIMD	Generate code for the given SIMD extension.
+ 
+ m@@STR_ISA_EXT_LSX@@
+@@ -114,11 +114,11 @@ EnumValue
+ Enum(cpu_type) String(@@STR_CPU_LA664@@) Value(CPU_LA664)
+ 
+ m@@OPTSTR_ARCH@@=
+-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET)
++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) Save
+ -m@@OPTSTR_ARCH@@=PROCESSOR	Generate code for the given PROCESSOR ISA.
+ 
+ m@@OPTSTR_TUNE@@=
+-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET)
++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) Save
+ -m@@OPTSTR_TUNE@@=PROCESSOR	Generate optimized code for PROCESSOR.
+ 
+ 
+@@ -149,31 +149,31 @@ Variable
+ int la_opt_abi_ext = M_OPT_UNSET
+ 
+ mbranch-cost=
+-Target RejectNegative Joined UInteger Var(loongarch_branch_cost)
++Target RejectNegative Joined UInteger Var(la_branch_cost) Save
+ -mbranch-cost=COST	Set the cost of branches to roughly COST instructions.
+ 
+ mcheck-zero-division
+-Target Mask(CHECK_ZERO_DIV)
++Target Mask(CHECK_ZERO_DIV) Save
+ Trap on integer divide by zero.
+ 
+ mcond-move-int
+-Target Var(TARGET_COND_MOVE_INT) Init(1)
++Target Mask(COND_MOVE_INT) Save
+ Conditional moves for integral are enabled.
+ 
+ mcond-move-float
+-Target Var(TARGET_COND_MOVE_FLOAT) Init(1)
++Target Mask(COND_MOVE_FLOAT) Save
+ Conditional moves for float are enabled.
+ 
+ mmemcpy
+-Target Mask(MEMCPY)
++Target Mask(MEMCPY) Save
+ Prevent optimizing block moves, which is also the default behavior of -Os.
+ 
+ mstrict-align
+-Target Var(TARGET_STRICT_ALIGN) Init(0)
++Target Mask(STRICT_ALIGN) Save
+ Do not generate unaligned memory accesses.
+ 
+ mmax-inline-memcpy-size=
+-Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024)
++Target Joined RejectNegative UInteger Var(la_max_inline_memcpy_size) Init(1024) Save
+ -mmax-inline-memcpy-size=SIZE	Set the max size of memcpy to inline, default is 1024.
+ 
+ Enum
+@@ -198,11 +198,11 @@ Target Alias(mexplicit-relocs=, always, none)
+ Use %reloc() assembly operators (for backward compatibility).
+ 
+ mrecip
+-Target RejectNegative Var(loongarch_recip)
++Target RejectNegative Var(la_recip) Save
+ Generate approximate reciprocal divide and square root for better throughput.
+ 
+ mrecip=
+-Target RejectNegative Joined Var(loongarch_recip_name)
++Target RejectNegative Joined Var(la_recip_name) Save
+ Control generation of reciprocal estimates.
+ 
+ ; The code model option names for -mcmodel.
+@@ -229,29 +229,29 @@ EnumValue
+ Enum(cmodel) String(@@STR_CMODEL_EXTREME@@) Value(CMODEL_EXTREME)
+ 
+ mcmodel=
+-Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) Init(M_OPT_UNSET)
++Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) Init(M_OPT_UNSET) Save
+ Specify the code model.
+ 
+ mdirect-extern-access
+-Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0)
++Target Mask(DIRECT_EXTERN_ACCESS) Save
+ Avoid using the GOT to access external symbols.
+ 
+ mrelax
+-Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION)
++Target Mask(LINKER_RELAXATION)
+ Take advantage of linker relaxations to reduce the number of instructions
+ required to materialize symbol addresses.
+ 
+ mpass-mrelax-to-as
+-Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION)
++Driver Var(la_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION)
+ Pass -mrelax or -mno-relax option to the assembler.
+ 
+ -param=loongarch-vect-unroll-limit=
+-Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param
++Target Joined UInteger Var(la_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param
+ Used to limit unroll factor which indicates how much the autovectorizer may
+ unroll a loop.  The default value is 6.
+ 
+ -param=loongarch-vect-issue-info=
+-Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) IntegerRange(1, 64) Param
++Target Undocumented Joined UInteger Var(la_vect_issue_info) Init(4) IntegerRange(1, 64) Param
+ Indicate how many non memory access vector instructions can be issued per
+ cycle, it's used in unroll factor determination for autovectorizer.  The
+ default value is 4.
+diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
+index cf4c7bc93..a2b069d83 100644
+--- a/gcc/config/loongarch/loongarch-opts.cc
++++ b/gcc/config/loongarch/loongarch-opts.cc
+@@ -785,8 +785,15 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
+   opts->x_la_opt_cpu_arch = target->cpu_arch;
+   opts->x_la_opt_cpu_tune = target->cpu_tune;
+ 
++  /* status of -mcmodel */
++  opts->x_la_opt_cmodel = target->cmodel;
++
+   /* status of -mfpu */
+   opts->x_la_opt_fpu = target->isa.fpu;
++
++  /* status of -msimd */
+   opts->x_la_opt_simd = target->isa.simd;
++
++  /* ISA evolution features */
+   opts->x_la_isa_evolution = target->isa.evolution;
+ }
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 8cd703caa..533bae5b2 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -4079,10 +4079,10 @@ loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vi
+ 
+   /* Use this simple hardware resource model that how many non vld/vst
+      vector instructions can be issued per cycle.  */
+-  unsigned int issue_info = loongarch_vect_issue_info;
++  unsigned int issue_info = la_vect_issue_info;
+   unsigned int reduc_factor = m_reduc_factor > 1 ? m_reduc_factor : 1;
+   unsigned int uf = CEIL (reduc_factor * issue_info, nstmts_nonldst);
+-  uf = MIN ((unsigned int) loongarch_vect_unroll_limit, uf);
++  uf = MIN ((unsigned int) la_vect_unroll_limit, uf);
+ 
+   return 1 << ceil_log2 (uf);
+ }
+@@ -5540,7 +5540,7 @@ loongarch_expand_block_move (rtx dest, rtx src, rtx r_length, rtx r_align)
+     return false;

_service:tar_scm:0109-aarch64-Minor-initial-adjustment-tweak.patch Deleted

@@ -1,38 +0,0 @@
-From 08f71b4bb28fb74d20e8d2927a557e8119ce9f4d Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:08:54 +0100
-Subject: PATCH aarch64: Minor initial adjustment tweak
-
-This patch just changes a calculation of initial_adjust
-to one that makes it slightly more obvious that the total
-adjustment is frame.frame_size.
-
-gcc/
-	* config/aarch64/aarch64.cc (aarch64_layout_frame): Tweak
-	calculation of initial_adjust for frames in which all saves
-	are SVE saves.
----
- gcc/config/aarch64/aarch64.cc | 5 ++---
- 1 file changed, 2 insertions(+), 3 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 81935852d5b2..4d9fcf3d1623 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8414,11 +8414,10 @@ aarch64_layout_frame (void)
-     {
-       /* Frame in which all saves are SVE saves:
- 
--	 sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
-+	 sub sp, sp, frame_size - bytes_below_saved_regs
- 	 save SVE registers relative to SP
- 	 sub sp, sp, bytes_below_saved_regs  */
--      frame.initial_adjust = (frame.bytes_above_hard_fp
--			      + frame.below_hard_fp_saved_regs_size);
-+      frame.initial_adjust = frame.frame_size - frame.bytes_below_saved_regs;
-       frame.final_adjust = frame.bytes_below_saved_regs;
-     }
-   else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp)
--- 
-2.43.5
-

_service:tar_scm:0110-Backport-SME-aarch64-Rename-AARCH64_FL-architecture-.patch Added

@@ -0,0 +1,220 @@
+From e1b067871c4c39565bf6059b4924a810923c6eeb Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Sep 2022 11:32:51 +0100
+Subject: PATCH 011/157 BackportSME aarch64: Rename AARCH64_FL
+ architecture-level macros
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=78aaafc3d4dc0ef997b4747349d3836ca2f7e301
+
+Following on from the previous AARCH64_ISA patch, this one adds the
+profile name directly to the end of architecture-level AARCH64_FL_*
+macros.
+
+gcc/
+	* config/aarch64/aarch64.h (AARCH64_FL_V8_1, AARCH64_FL_V8_2)
+	(AARCH64_FL_V8_3, AARCH64_FL_V8_4, AARCH64_FL_V8_5, AARCH64_FL_V8_6)
+	(AARCH64_FL_V9, AARCH64_FL_V8_7, AARCH64_FL_V8_8, AARCH64_FL_V9_1)
+	(AARCH64_FL_V9_2, AARCH64_FL_V9_3): Add "A" to the end of the name.
+	(AARCH64_FL_V8_R): Rename to AARCH64_FL_V8R.
+	(AARCH64_FL_FOR_ARCH8_1, AARCH64_FL_FOR_ARCH8_2): Update accordingly.
+	(AARCH64_FL_FOR_ARCH8_3, AARCH64_FL_FOR_ARCH8_4): Likewise.
+	(AARCH64_FL_FOR_ARCH8_5, AARCH64_FL_FOR_ARCH8_6): Likewise.
+	(AARCH64_FL_FOR_ARCH8_7, AARCH64_FL_FOR_ARCH8_8): Likewise.
+	(AARCH64_FL_FOR_ARCH8_R, AARCH64_FL_FOR_ARCH9): Likewise.
+	(AARCH64_FL_FOR_ARCH9_1, AARCH64_FL_FOR_ARCH9_2): Likewise.
+	(AARCH64_FL_FOR_ARCH9_3, AARCH64_ISA_V8_2A, AARCH64_ISA_V8_3A)
+	(AARCH64_ISA_V8_4A, AARCH64_ISA_V8_5A, AARCH64_ISA_V8_6A): Likewise.
+	(AARCH64_ISA_V8R, AARCH64_ISA_V9A, AARCH64_ISA_V9_1A): Likewise.
+	(AARCH64_ISA_V9_2A, AARCH64_ISA_V9_3A): Likewise.
+---
+ gcc/config/aarch64/aarch64.h | 72 ++++++++++++++++++------------------
+ 1 file changed, 36 insertions(+), 36 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index 356a263b2..5a91dfdd2 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -154,22 +154,22 @@
+ /* ARMv8.1-A architecture extensions.  */
+ #define AARCH64_FL_LSE	      (1 << 4)  /* Has Large System Extensions.  */
+ #define AARCH64_FL_RDMA       (1 << 5)  /* Has Round Double Multiply Add.  */
+-#define AARCH64_FL_V8_1       (1 << 6)  /* Has ARMv8.1-A extensions.  */
++#define AARCH64_FL_V8_1A      (1 << 6)  /* Has ARMv8.1-A extensions.  */
+ /* Armv8-R.  */
+-#define AARCH64_FL_V8_R       (1 << 7)  /* Armv8-R AArch64.  */
++#define AARCH64_FL_V8R        (1 << 7)  /* Armv8-R AArch64.  */
+ /* ARMv8.2-A architecture extensions.  */
+-#define AARCH64_FL_V8_2       (1 << 8)  /* Has ARMv8.2-A features.  */
++#define AARCH64_FL_V8_2A      (1 << 8)  /* Has ARMv8.2-A features.  */
+ #define AARCH64_FL_F16	      (1 << 9)  /* Has ARMv8.2-A FP16 extensions.  */
+ #define AARCH64_FL_SVE        (1 << 10) /* Has Scalable Vector Extensions.  */
+ /* ARMv8.3-A architecture extensions.  */
+-#define AARCH64_FL_V8_3       (1 << 11)  /* Has ARMv8.3-A features.  */
++#define AARCH64_FL_V8_3A      (1 << 11)  /* Has ARMv8.3-A features.  */
+ #define AARCH64_FL_RCPC       (1 << 12)  /* Has support for RCpc model.  */
+ #define AARCH64_FL_DOTPROD    (1 << 13)  /* Has ARMv8.2-A Dot Product ins.  */
+ /* New flags to split crypto into aes and sha2.  */
+ #define AARCH64_FL_AES	      (1 << 14)  /* Has Crypto AES.  */
+ #define AARCH64_FL_SHA2	      (1 << 15)  /* Has Crypto SHA2.  */
+ /* ARMv8.4-A architecture extensions.  */
+-#define AARCH64_FL_V8_4	      (1 << 16)  /* Has ARMv8.4-A features.  */
++#define AARCH64_FL_V8_4A      (1 << 16)  /* Has ARMv8.4-A features.  */
+ #define AARCH64_FL_SM4	      (1 << 17)  /* Has ARMv8.4-A SM3 and SM4.  */
+ #define AARCH64_FL_SHA3	      (1 << 18)  /* Has ARMv8.4-a SHA3 and SHA512.  */
+ #define AARCH64_FL_F16FML     (1 << 19)  /* Has ARMv8.4-a FP16 extensions.  */
+@@ -179,7 +179,7 @@
+ #define AARCH64_FL_PROFILE    (1 << 21)
+ 
+ /* ARMv8.5-A architecture extensions.  */
+-#define AARCH64_FL_V8_5	      (1 << 22)  /* Has ARMv8.5-A features.  */
++#define AARCH64_FL_V8_5A      (1 << 22)  /* Has ARMv8.5-A features.  */
+ #define AARCH64_FL_RNG	      (1 << 23)  /* ARMv8.5-A Random Number Insns.  */
+ #define AARCH64_FL_MEMTAG     (1 << 24)  /* ARMv8.5-A Memory Tagging
+ 					    Extensions.  */
+@@ -204,7 +204,7 @@
+ #define AARCH64_FL_TME	      (1ULL << 33)  /* Has TME instructions.  */
+ 
+ /* Armv8.6-A architecture extensions.  */
+-#define AARCH64_FL_V8_6	      (1ULL << 34)
++#define AARCH64_FL_V8_6A      (1ULL << 34)
+ 
+ /* 8-bit Integer Matrix Multiply (I8MM) extensions.  */
+ #define AARCH64_FL_I8MM	      (1ULL << 35)
+@@ -225,28 +225,28 @@
+ #define AARCH64_FL_PAUTH      (1ULL << 40)
+ 
+ /* Armv9.0-A.  */
+-#define AARCH64_FL_V9         (1ULL << 41)  /* Armv9.0-A Architecture.  */
++#define AARCH64_FL_V9A        (1ULL << 41)  /* Armv9.0-A Architecture.  */
+ 
+ /* 64-byte atomic load/store extensions.  */
+ #define AARCH64_FL_LS64      (1ULL << 42)
+ 
+ /* Armv8.7-a architecture extensions.  */
+-#define AARCH64_FL_V8_7       (1ULL << 43)
++#define AARCH64_FL_V8_7A      (1ULL << 43)
+ 
+ /* Hardware memory operation instructions.  */
+ #define AARCH64_FL_MOPS       (1ULL << 44)
+ 
+ /* Armv8.8-a architecture extensions.  */
+-#define AARCH64_FL_V8_8       (1ULL << 45)
++#define AARCH64_FL_V8_8A      (1ULL << 45)
+ 
+ /* Armv9.1-A.  */
+-#define AARCH64_FL_V9_1       (1ULL << 46)
++#define AARCH64_FL_V9_1A      (1ULL << 46)
+ 
+ /* Armv9.2-A.  */
+-#define AARCH64_FL_V9_2       (1ULL << 47)
++#define AARCH64_FL_V9_2A      (1ULL << 47)
+ 
+ /* Armv9.3-A.  */
+-#define AARCH64_FL_V9_3       (1ULL << 48)
++#define AARCH64_FL_V9_3A      (1ULL << 48)
+ 
+ /* Has FP and SIMD.  */
+ #define AARCH64_FL_FPSIMD     (AARCH64_FL_FP | AARCH64_FL_SIMD)
+@@ -258,36 +258,36 @@
+ #define AARCH64_FL_FOR_ARCH8       (AARCH64_FL_FPSIMD)
+ #define AARCH64_FL_FOR_ARCH8_1			       \
+   (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC \
+-   | AARCH64_FL_RDMA | AARCH64_FL_V8_1)
++   | AARCH64_FL_RDMA | AARCH64_FL_V8_1A)
+ #define AARCH64_FL_FOR_ARCH8_2			\
+-  (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2)
++  (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2A)
+ #define AARCH64_FL_FOR_ARCH8_3			\
+-  (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3 | AARCH64_FL_PAUTH)
++  (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH)
+ #define AARCH64_FL_FOR_ARCH8_4			\
+-  (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4 | AARCH64_FL_F16FML \
++  (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \
+    | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM)
+ #define AARCH64_FL_FOR_ARCH8_5			\
+-  (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_5	\
++  (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_5A	\
+    | AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES)
+ #define AARCH64_FL_FOR_ARCH8_6			\
+-  (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_V8_6 | AARCH64_FL_FPSIMD \
++  (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_V8_6A | AARCH64_FL_FPSIMD \
+    | AARCH64_FL_I8MM | AARCH64_FL_BF16)
+ #define AARCH64_FL_FOR_ARCH8_7			\
+-  (AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V8_7 | AARCH64_FL_LS64)
++  (AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V8_7A | AARCH64_FL_LS64)
+ #define AARCH64_FL_FOR_ARCH8_8			\
+-  (AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V8_8 | AARCH64_FL_MOPS)
++  (AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V8_8A | AARCH64_FL_MOPS)
+ 
+ #define AARCH64_FL_FOR_ARCH8_R     \
+-  (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_R)
++  (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8R)
+ #define AARCH64_FL_FOR_ARCH9       \
+-  (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9 \
++  (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9A \
+    | AARCH64_FL_F16)
+ #define AARCH64_FL_FOR_ARCH9_1	\
+-  (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1)
++  (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1A)
+ #define AARCH64_FL_FOR_ARCH9_2	\
+-  (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2)
++  (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2A)
+ #define AARCH64_FL_FOR_ARCH9_3	\
+-  (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3)
++  (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3A)
+ 
+ /* Macros to test ISA flags.  */
+ 
+@@ -297,7 +297,7 @@
+ #define AARCH64_ISA_SIMD           (aarch64_isa_flags & AARCH64_FL_SIMD)
+ #define AARCH64_ISA_LSE		   (aarch64_isa_flags & AARCH64_FL_LSE)
+ #define AARCH64_ISA_RDMA	   (aarch64_isa_flags & AARCH64_FL_RDMA)
+-#define AARCH64_ISA_V8_2A	   (aarch64_isa_flags & AARCH64_FL_V8_2)
++#define AARCH64_ISA_V8_2A	   (aarch64_isa_flags & AARCH64_FL_V8_2A)
+ #define AARCH64_ISA_F16		   (aarch64_isa_flags & AARCH64_FL_F16)
+ #define AARCH64_ISA_SVE            (aarch64_isa_flags & AARCH64_FL_SVE)
+ #define AARCH64_ISA_SVE2	   (aarch64_isa_flags & AARCH64_FL_SVE2)
+@@ -305,31 +305,31 @@
+ #define AARCH64_ISA_SVE2_BITPERM  (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM)
+ #define AARCH64_ISA_SVE2_SHA3	   (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3)
+ #define AARCH64_ISA_SVE2_SM4	   (aarch64_isa_flags & AARCH64_FL_SVE2_SM4)
+-#define AARCH64_ISA_V8_3A	   (aarch64_isa_flags & AARCH64_FL_V8_3)
++#define AARCH64_ISA_V8_3A	   (aarch64_isa_flags & AARCH64_FL_V8_3A)
+ #define AARCH64_ISA_DOTPROD	   (aarch64_isa_flags & AARCH64_FL_DOTPROD)
+ #define AARCH64_ISA_AES	           (aarch64_isa_flags & AARCH64_FL_AES)
+ #define AARCH64_ISA_SHA2	   (aarch64_isa_flags & AARCH64_FL_SHA2)
+-#define AARCH64_ISA_V8_4A	   (aarch64_isa_flags & AARCH64_FL_V8_4)
++#define AARCH64_ISA_V8_4A	   (aarch64_isa_flags & AARCH64_FL_V8_4A)
+ #define AARCH64_ISA_SM4	           (aarch64_isa_flags & AARCH64_FL_SM4)
+ #define AARCH64_ISA_SHA3	   (aarch64_isa_flags & AARCH64_FL_SHA3)
+ #define AARCH64_ISA_F16FML	   (aarch64_isa_flags & AARCH64_FL_F16FML)
+ #define AARCH64_ISA_RCPC8_4	   (aarch64_isa_flags & AARCH64_FL_RCPC8_4)
+ #define AARCH64_ISA_RNG		   (aarch64_isa_flags & AARCH64_FL_RNG)
+-#define AARCH64_ISA_V8_5A	   (aarch64_isa_flags & AARCH64_FL_V8_5)
++#define AARCH64_ISA_V8_5A	   (aarch64_isa_flags & AARCH64_FL_V8_5A)
+ #define AARCH64_ISA_TME		   (aarch64_isa_flags & AARCH64_FL_TME)
+ #define AARCH64_ISA_MEMTAG	   (aarch64_isa_flags & AARCH64_FL_MEMTAG)
+-#define AARCH64_ISA_V8_6A	   (aarch64_isa_flags & AARCH64_FL_V8_6)
++#define AARCH64_ISA_V8_6A	   (aarch64_isa_flags & AARCH64_FL_V8_6A)
+ #define AARCH64_ISA_I8MM	   (aarch64_isa_flags & AARCH64_FL_I8MM)

_service:tar_scm:0110-LoongArch-Redundant-sign-extension-elimination-optim.patch Added

@@ -0,0 +1,234 @@
+From 54786cec1f52854a70369a3060ed22b1e070f000 Mon Sep 17 00:00:00 2001
+From: Li Wei <liwei@loongson.cn>
+Date: Thu, 11 Jan 2024 19:36:19 +0800
+Subject: PATCH 110/188 LoongArch: Redundant sign extension elimination
+ optimization.
+
+We found that the current combine optimization pass in gcc cannot handle
+the following redundant sign extension situations:
+
+(insn 77 76 78 5 (set (reg:SI 143)
+        (plus:SI (subreg/s/u:SI (reg/v:DI 104  len ) 0)
+            (const_int 1 0x1))) {addsi3}
+    (expr_list:REG_DEAD (reg/v:DI 104  len )
+        (nil)))
+(insn 78 77 82 5 (set (reg/v:DI 104  len )
+        (sign_extend:DI (reg:SI 143))) {extendsidi2}
+        (nil))
+
+Because reg:SI 143 is not died or set in insn 78, no replacement merge will
+be performed for the insn sequence. We adjusted the add template to eliminate
+redundant sign extensions during the expand pass.
+Adjusted based on upstream comments:
+https://gcc.gnu.org/pipermail/gcc-patches/2024-January/641988.html
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (add<mode>3): Removed.
+	(*addsi3): New.
+	(addsi3): Ditto.
+	(adddi3): Ditto.
+	(*addsi3_extended): Removed.
+	(addsi3_extended): New.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/sign-extend.c: Moved to...
+	* gcc.target/loongarch/sign-extend-1.c: ...here.
+	* gcc.target/loongarch/sign-extend-2.c: New test.
+---
+ gcc/config/loongarch/loongarch.md             | 93 ++++++++++++++-----
+ .../{sign-extend.c => sign-extend-1.c}        |  0
+ .../gcc.target/loongarch/sign-extend-2.c      | 59 ++++++++++++
+ 3 files changed, 128 insertions(+), 24 deletions(-)
+ rename gcc/testsuite/gcc.target/loongarch/{sign-extend.c => sign-extend-1.c} (100%)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 6ebf33cbe..4c7e28ace 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -657,42 +657,87 @@
+   (set_attr "type" "fadd")
+    (set_attr "mode" "<UNITMODE>"))
+ 
+-(define_insn_and_split "add<mode>3"
+-  (set (match_operand:GPR 0 "register_operand" "=r,r,r,r,r,r,r")
+-	(plus:GPR (match_operand:GPR 1 "register_operand" "r,r,r,r,r,r,r")
+-		  (match_operand:GPR 2 "plus_<mode>_operand"
+-				       "r,I,La,Lb,Lc,Ld,Le")))
++(define_insn_and_split "*addsi3"
++  (set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
++	(plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r")
++		  (match_operand:SI 2 "plus_si_operand"
++				       "r,I,La,Lb,Le")))
+   ""
+   "@
+-   add.<d>\t%0,%1,%2
+-   addi.<d>\t%0,%1,%2
++   add.w\t%0,%1,%2
++   addi.w\t%0,%1,%2
+    #
+    * operands2 = GEN_INT (INTVAL (operands2) / 65536); \
+      return \"addu16i.d\t%0,%1,%2\";
++   #"
++  "CONST_INT_P (operands2) && !IMM12_INT (operands2) \
++   && !ADDU16I_OPERAND (INTVAL (operands2))"
++  (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3)))
++   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 4)))
++  {
++    loongarch_split_plus_constant (&operands2, SImode);
++  }
++  (set_attr "alu_type" "add")
++   (set_attr "mode" "SI")
++   (set_attr "insn_count" "1,1,2,1,2"))
++
++(define_expand "addsi3"
++  (set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
++	(plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r")
++		 (match_operand:SI 2 "plus_si_operand"  "r,I,La,Le,Lb")))
++  "TARGET_64BIT"
++{
++  if (CONST_INT_P (operands2) && !IMM12_INT (operands2)
++      && ADDU16I_OPERAND (INTVAL (operands2)))
++    {
++      rtx t1 = gen_reg_rtx (DImode);
++      rtx t2 = gen_reg_rtx (DImode);
++      rtx t3 = gen_reg_rtx (DImode);
++      emit_insn (gen_extend_insn (t1, operands1, DImode, SImode, 0));
++      t2 = operands2;
++      emit_insn (gen_adddi3 (t3, t1, t2));
++      t3 = gen_lowpart (SImode, t3);
++      emit_move_insn (operands0, t3);
++      DONE;
++    }
++  else
++    {
++      rtx t = gen_reg_rtx (DImode);
++      emit_insn (gen_addsi3_extended (t, operands1, operands2));
++      t = gen_lowpart (SImode, t);
++      SUBREG_PROMOTED_VAR_P (t) = 1;
++      SUBREG_PROMOTED_SET (t, SRP_SIGNED);
++      emit_move_insn (operands0, t);
++      DONE;
++    }
++})
++
++(define_insn_and_split "adddi3"
++  (set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r")
++	(plus:DI (match_operand:DI 1 "register_operand" "r,r,r,r,r,r")
++		  (match_operand:DI 2 "plus_di_operand"
++				       "r,I,La,Lb,Lc,Ld")))
++  "TARGET_64BIT"
++  "@
++   add.d\t%0,%1,%2
++   addi.d\t%0,%1,%2
+    #
++   * operands2 = GEN_INT (INTVAL (operands2) / 65536); \
++     return \"addu16i.d\t%0,%1,%2\";
+    #
+    #"
+-  "CONST_INT_P (operands2) && !IMM12_INT (operands2) \
++  "&& CONST_INT_P (operands2) && !IMM12_INT (operands2) \
+    && !ADDU16I_OPERAND (INTVAL (operands2))"
+-  (set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3)))
+-   (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))
++  (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 3)))
++   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))
+   {
+-    loongarch_split_plus_constant (&operands2, <MODE>mode);
++    loongarch_split_plus_constant (&operands2, DImode);
+   }
+   (set_attr "alu_type" "add")
+-   (set_attr "mode" "<MODE>")
+-   (set_attr "insn_count" "1,1,2,1,2,2,2")
+-   (set (attr "enabled")
+-      (cond
+-	(match_test "<MODE>mode != DImode && which_alternative == 4")
+-	 (const_string "no")
+-	 (match_test "<MODE>mode != DImode && which_alternative == 5")
+-	 (const_string "no")
+-	 (match_test "<MODE>mode != SImode && which_alternative == 6")
+-	 (const_string "no")
+-	(const_string "yes"))))
+-
+-(define_insn_and_split "*addsi3_extended"
++   (set_attr "mode" "DI")
++   (set_attr "insn_count" "1,1,2,1,2,2"))
++
++(define_insn_and_split "addsi3_extended"
+   (set (match_operand:DI 0 "register_operand" "=r,r,r,r")
+ 	(sign_extend:DI
+ 	     (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r")
+diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-1.c
+similarity index 100%
+rename from gcc/testsuite/gcc.target/loongarch/sign-extend.c
+rename to gcc/testsuite/gcc.target/loongarch/sign-extend-1.c
+diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
+new file mode 100644
+index 000000000..a45dde4f7
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
+@@ -0,0 +1,59 @@
++/* { dg-do compile } */
++/* { dg-options "-mabi=lp64d -O2" } */
++/* { dg-final { scan-assembler-times "slli.w\t\\\$r\0-9\+,\\\$r\0-9\+,0" 1 } } */
++
++#include <stdint.h>
++#define my_min(x, y) ((x) < (y) ? (x) : (y))
++
++void
++bt_skip_func (const uint32_t len_limit, const uint32_t pos,
++              const uint8_t *const cur, uint32_t cur_match,
++              uint32_t *const son, const uint32_t cyclic_pos,
++              const uint32_t cyclic_size)
++{
++  uint32_t *ptr0 = son + (cyclic_pos << 1) + 1;
++  uint32_t *ptr1 = son + (cyclic_pos << 1);
++
++  uint32_t len0 = 0;
++  uint32_t len1 = 0;
++
++  while (1)
++    {
++      const uint32_t delta = pos - cur_match;
++      uint32_t *pair
++          = son
++            + ((cyclic_pos - delta + (delta > cyclic_pos ? cyclic_size : 0))
++               << 1);
++      const uint8_t *pb = cur - delta;

_service:tar_scm:0110-aarch64-Tweak-stack-clash-boundary-condition.patch Deleted

@@ -1,125 +0,0 @@
-From f22315d5c19e8310e4dc880fd509678fd291fca8 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:08:54 +0100
-Subject: PATCH aarch64: Tweak stack clash boundary condition
-
-The AArch64 ABI says that, when stack clash protection is used,
-there can be a maximum of 1KiB of unprobed space at sp on entry
-to a function.  Therefore, we need to probe when allocating
->= guard_size - 1KiB of data (>= rather than >).  This is what
-GCC does.
-
-If an allocation is exactly guard_size bytes, it is enough to allocate
-those bytes and probe once at offset 1024.  It isn't possible to use a
-single probe at any other offset: higher would conmplicate later code,
-by leaving more unprobed space than usual, while lower would risk
-leaving an entire page unprobed.  For simplicity, the code probes all
-allocations at offset 1024.
-
-Some register saves also act as probes.  If we need to allocate
-more space below the last such register save probe, we need to
-probe the allocation if it is > 1KiB.  Again, this allocation is
-then sometimes (but not always) probed at offset 1024.  This sort of
-allocation is currently only used for outgoing arguments, which are
-rarely this big.
-
-However, the code also probed if this final outgoing-arguments
-allocation was == 1KiB, rather than just > 1KiB.  This isn't
-necessary, since the register save then probes at offset 1024
-as required.  Continuing to probe allocations of exactly 1KiB
-would complicate later patches.
-
-gcc/
-	* config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space):
-	Don't probe final allocations that are exactly 1KiB in size (after
-	unprobed space above the final allocation has been deducted).
-
-gcc/testsuite/
-	* gcc.target/aarch64/stack-check-prologue-17.c: New test.
----
- gcc/config/aarch64/aarch64.cc                 |  4 +-
- .../aarch64/stack-check-prologue-17.c         | 55 +++++++++++++++++++
- 2 files changed, 58 insertions(+), 1 deletion(-)
- create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 4d9fcf3d1623..34c1d8614cd9 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -9333,9 +9333,11 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
-   HOST_WIDE_INT guard_size
-     = 1 << param_stack_clash_protection_guard_size;
-   HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
-+  HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
-+  gcc_assert (multiple_p (poly_size, byte_sp_alignment));
-   HOST_WIDE_INT min_probe_threshold
-     = (final_adjustment_p
--       ? guard_used_by_caller
-+       ? guard_used_by_caller + byte_sp_alignment
-        : guard_size - guard_used_by_caller);
-   /* When doing the final adjustment for the outgoing arguments, take into
-      account any unprobed space there is above the current SP.  There are
-diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
-new file mode 100644
-index 000000000000..0d8a25d73a24
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
-@@ -0,0 +1,55 @@
-+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */
-+/* { dg-final { check-function-bodies "**" "" } } */
-+
-+void f(int, ...);
-+void g();
-+
-+/*
-+** test1:
-+**	...
-+**	str	x30, \sp\
-+**	sub	sp, sp, #1024
-+**	cbnz	w0, .*
-+**	bl	g
-+**	...
-+*/
-+int test1(int z) {
-+  __uint128_t x = 0;
-+  int y0x400;
-+  if (z)
-+    {
-+      f(0, 0, 0, 0, 0, 0, 0, &y,
-+	x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+	x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+	x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+	x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
-+    }
-+  g();
-+  return 1;
-+}
-+
-+/*
-+** test2:
-+**	...
-+**	str	x30, \sp\
-+**	sub	sp, sp, #1040
-+**	str	xzr, \sp\
-+**	cbnz	w0, .*
-+**	bl	g
-+**	...
-+*/
-+int test2(int z) {
-+  __uint128_t x = 0;
-+  int y0x400;
-+  if (z)
-+    {
-+      f(0, 0, 0, 0, 0, 0, 0, &y,
-+	x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+	x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+	x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+	x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+	x);
-+    }
-+  g();
-+  return 1;
-+}
--- 
-2.43.5
-

_service:tar_scm:0111-Backport-SME-aarch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch Added

@@ -0,0 +1,398 @@
+From 7da27deb7413d7d1fd2c543617640e2de5b10db0 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Sep 2022 11:32:51 +0100
+Subject: PATCH 012/157 BackportSME aarch64: Rename AARCH64_FL_FOR_ARCH
+ macros
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0f833d1900176509e16b6f5563cfe58508fef5d2
+
+This patch renames AARCH64_FL_FOR_ARCH* macros to follow the
+same V<number><profile> names that we (now) use elsewhere.
+
+The names are only temporary -- a later patch will move the
+information to the .def file instead.  However, it helps with
+the sequencing to do this first.
+
+gcc/
+	* config/aarch64/aarch64.h (AARCH64_FL_FOR_ARCH8): Rename to...
+	(AARCH64_FL_FOR_V8A): ...this.
+	(AARCH64_FL_FOR_ARCH8_1): Rename to...
+	(AARCH64_FL_FOR_V8_1A): ...this.
+	(AARCH64_FL_FOR_ARCH8_2): Rename to...
+	(AARCH64_FL_FOR_V8_2A): ...this.
+	(AARCH64_FL_FOR_ARCH8_3): Rename to...
+	(AARCH64_FL_FOR_V8_3A): ...this.
+	(AARCH64_FL_FOR_ARCH8_4): Rename to...
+	(AARCH64_FL_FOR_V8_4A): ...this.
+	(AARCH64_FL_FOR_ARCH8_5): Rename to...
+	(AARCH64_FL_FOR_V8_5A): ...this.
+	(AARCH64_FL_FOR_ARCH8_6): Rename to...
+	(AARCH64_FL_FOR_V8_6A): ...this.
+	(AARCH64_FL_FOR_ARCH8_7): Rename to...
+	(AARCH64_FL_FOR_V8_7A): ...this.
+	(AARCH64_FL_FOR_ARCH8_8): Rename to...
+	(AARCH64_FL_FOR_V8_8A): ...this.
+	(AARCH64_FL_FOR_ARCH8_R): Rename to...
+	(AARCH64_FL_FOR_V8R): ...this.
+	(AARCH64_FL_FOR_ARCH9): Rename to...
+	(AARCH64_FL_FOR_V9A): ...this.
+	(AARCH64_FL_FOR_ARCH9_1): Rename to...
+	(AARCH64_FL_FOR_V9_1A): ...this.
+	(AARCH64_FL_FOR_ARCH9_2): Rename to...
+	(AARCH64_FL_FOR_V9_2A): ...this.
+	(AARCH64_FL_FOR_ARCH9_3): Rename to...
+	(AARCH64_FL_FOR_V9_3A): ...this.
+	* common/config/aarch64/aarch64-common.cc (all_cores): Update
+	accordingly.
+	* config/aarch64/aarch64-arches.def: Likewise.
+	* config/aarch64/aarch64-cores.def: Likewise.
+	* config/aarch64/aarch64.cc (all_cores): Likewise.
+---
+ gcc/common/config/aarch64/aarch64-common.cc |   2 +-
+ gcc/config/aarch64/aarch64-arches.def       |  28 ++---
+ gcc/config/aarch64/aarch64-cores.def        | 130 ++++++++++----------
+ gcc/config/aarch64/aarch64.cc               |   2 +-
+ gcc/config/aarch64/aarch64.h                |  56 ++++-----
+ 5 files changed, 109 insertions(+), 109 deletions(-)
+
+diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
+index 3dc020f0c..0461201a5 100644
+--- a/gcc/common/config/aarch64/aarch64-common.cc
++++ b/gcc/common/config/aarch64/aarch64-common.cc
+@@ -253,7 +253,7 @@ static const struct processor_name_to_arch all_cores =
+ #define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \
+   {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS},
+ #include "config/aarch64/aarch64-cores.def"
+-  {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_ARCH8},
++  {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_V8A},
+   {"", aarch64_no_arch, 0}
+ };
+ 
+diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
+index 6150448dc..c6bf7d82c 100644
+--- a/gcc/config/aarch64/aarch64-arches.def
++++ b/gcc/config/aarch64/aarch64-arches.def
+@@ -30,19 +30,19 @@
+    Due to the assumptions about the positions of these fields in config.gcc,
+    the NAME should be kept as the first argument and FLAGS as the last.  */
+ 
+-AARCH64_ARCH("armv8-a",	      generic,	     8A,	8,  AARCH64_FL_FOR_ARCH8)
+-AARCH64_ARCH("armv8.1-a",     generic,	     8_1A,	8,  AARCH64_FL_FOR_ARCH8_1)
+-AARCH64_ARCH("armv8.2-a",     generic,	     8_2A,	8,  AARCH64_FL_FOR_ARCH8_2)
+-AARCH64_ARCH("armv8.3-a",     generic,	     8_3A,	8,  AARCH64_FL_FOR_ARCH8_3)
+-AARCH64_ARCH("armv8.4-a",     generic,	     8_4A,	8,  AARCH64_FL_FOR_ARCH8_4)
+-AARCH64_ARCH("armv8.5-a",     generic,	     8_5A,	8,  AARCH64_FL_FOR_ARCH8_5)
+-AARCH64_ARCH("armv8.6-a",     generic,	     8_6A,	8,  AARCH64_FL_FOR_ARCH8_6)
+-AARCH64_ARCH("armv8.7-a",     generic,       8_7A,      8,  AARCH64_FL_FOR_ARCH8_7)
+-AARCH64_ARCH("armv8.8-a",     generic,       8_8A,      8,  AARCH64_FL_FOR_ARCH8_8)
+-AARCH64_ARCH("armv8-r",       generic,	     8R  ,	8,  AARCH64_FL_FOR_ARCH8_R)
+-AARCH64_ARCH("armv9-a",       generic,	     9A  ,	9,  AARCH64_FL_FOR_ARCH9)
+-AARCH64_ARCH("armv9.1-a",     generic,       9_1A,      9,  AARCH64_FL_FOR_ARCH9_1)
+-AARCH64_ARCH("armv9.2-a",     generic,       9_2A,      9,  AARCH64_FL_FOR_ARCH9_2)
+-AARCH64_ARCH("armv9.3-a",     generic,       9_3A,      9,  AARCH64_FL_FOR_ARCH9_3)
++AARCH64_ARCH("armv8-a",	      generic,	     8A,	8,  AARCH64_FL_FOR_V8A)
++AARCH64_ARCH("armv8.1-a",     generic,	     8_1A,	8,  AARCH64_FL_FOR_V8_1A)
++AARCH64_ARCH("armv8.2-a",     generic,	     8_2A,	8,  AARCH64_FL_FOR_V8_2A)
++AARCH64_ARCH("armv8.3-a",     generic,	     8_3A,	8,  AARCH64_FL_FOR_V8_3A)
++AARCH64_ARCH("armv8.4-a",     generic,	     8_4A,	8,  AARCH64_FL_FOR_V8_4A)
++AARCH64_ARCH("armv8.5-a",     generic,	     8_5A,	8,  AARCH64_FL_FOR_V8_5A)
++AARCH64_ARCH("armv8.6-a",     generic,	     8_6A,	8,  AARCH64_FL_FOR_V8_6A)
++AARCH64_ARCH("armv8.7-a",     generic,       8_7A,      8,  AARCH64_FL_FOR_V8_7A)
++AARCH64_ARCH("armv8.8-a",     generic,       8_8A,      8,  AARCH64_FL_FOR_V8_8A)
++AARCH64_ARCH("armv8-r",       generic,	     8R  ,	8,  AARCH64_FL_FOR_V8R)
++AARCH64_ARCH("armv9-a",       generic,	     9A  ,	9,  AARCH64_FL_FOR_V9A)
++AARCH64_ARCH("armv9.1-a",     generic,       9_1A,      9,  AARCH64_FL_FOR_V9_1A)
++AARCH64_ARCH("armv9.2-a",     generic,       9_2A,      9,  AARCH64_FL_FOR_V9_2A)
++AARCH64_ARCH("armv9.3-a",     generic,       9_3A,      9,  AARCH64_FL_FOR_V9_3A)
+ 
+ #undef AARCH64_ARCH
+diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
+index 0402bfb74..c4038c641 100644
+--- a/gcc/config/aarch64/aarch64-cores.def
++++ b/gcc/config/aarch64/aarch64-cores.def
+@@ -46,132 +46,132 @@
+ /* ARMv8-A Architecture Processors.  */
+ 
+ /* ARM ('A') cores. */
+-AARCH64_CORE("cortex-a34",  cortexa34, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
+-AARCH64_CORE("cortex-a35",  cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
+-AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
+-AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
+-AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
+-AARCH64_CORE("cortex-a73",  cortexa73, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
++AARCH64_CORE("cortex-a34",  cortexa34, cortexa53, 8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
++AARCH64_CORE("cortex-a35",  cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
++AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
++AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
++AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
++AARCH64_CORE("cortex-a73",  cortexa73, cortexa57, 8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
+ 
+ /* Cavium ('C') cores. */
+-AARCH64_CORE("thunderx",      thunderx,      thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a0, -1)
++AARCH64_CORE("thunderx",      thunderx,      thunderx,  8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a0, -1)
+ /* Do not swap around "thunderxt88p1" and "thunderxt88",
+    this order is required to handle variant correctly. */
+-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO,	thunderxt88,  0x43, 0x0a1, 0)
+-AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88,  0x43, 0x0a1, -1)
++AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx,  8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO,	thunderxt88,  0x43, 0x0a1, 0)
++AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88,  0x43, 0x0a1, -1)
+ 
+ /* OcteonTX is the official name for T81/T83. */
+-AARCH64_CORE("octeontx",      octeontx,      thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a0, -1)
+-AARCH64_CORE("octeontx81",    octeontxt81,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
+-AARCH64_CORE("octeontx83",    octeontxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
++AARCH64_CORE("octeontx",      octeontx,      thunderx,  8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a0, -1)
++AARCH64_CORE("octeontx81",    octeontxt81,   thunderx,  8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
++AARCH64_CORE("octeontx83",    octeontxt83,   thunderx,  8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
+ 
+-AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
+-AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
++AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
++AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
+ 
+ /* Ampere Computing ('\xC0') cores. */
+-AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6, ampere1, 0xC0, 0xac3, -1)
++AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1)
+ /* Do not swap around "emag" and "xgene1",
+    this order is required to handle variant correctly. */
+-AARCH64_CORE("emag",        emag,      xgene1,    8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
++AARCH64_CORE("emag",        emag,      xgene1,    8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
+ 
+ /* APM ('P') cores. */
+-AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
++AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1)
+ 
+ /* Qualcomm ('Q') cores. */
+-AARCH64_CORE("falkor",      falkor,    falkor,    8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx,   0x51, 0xC00, -1)
+-AARCH64_CORE("qdf24xx",     qdf24xx,   falkor,    8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx,   0x51, 0xC00, -1)
++AARCH64_CORE("falkor",      falkor,    falkor,    8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx,   0x51, 0xC00, -1)
++AARCH64_CORE("qdf24xx",     qdf24xx,   falkor,    8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx,   0x51, 0xC00, -1)
+ 
+ /* Samsung ('S') cores. */
+-AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  0x53, 0x001, -1)
++AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  0x53, 0x001, -1)
+ 
+ /* HXT ('h') cores. */
+-AARCH64_CORE("phecda",      phecda,    falkor,    8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x68, 0x000, -1)
++AARCH64_CORE("phecda",      phecda,    falkor,    8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x68, 0x000, -1)
+ 
+ /* ARMv8.1-A Architecture Processors.  */
+ 
+ /* Broadcom ('B') cores. */
+-AARCH64_CORE("thunderx2t99p1",  thunderx2t99p1, thunderx2t99, 8_1A,  AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
+-AARCH64_CORE("vulcan",  vulcan, thunderx2t99, 8_1A,  AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
++AARCH64_CORE("thunderx2t99p1",  thunderx2t99p1, thunderx2t99, 8_1A,  AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
++AARCH64_CORE("vulcan",  vulcan, thunderx2t99, 8_1A,  AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
+ 
+ /* Cavium ('C') cores. */
+-AARCH64_CORE("thunderx2t99",  thunderx2t99,  thunderx2t99, 8_1A,  AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
++AARCH64_CORE("thunderx2t99",  thunderx2t99,  thunderx2t99, 8_1A,  AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
+ 
+ /* ARMv8.2-A Architecture Processors.  */
+ 
+ /* ARM ('A') cores. */
+-AARCH64_CORE("cortex-a55",  cortexa55, cortexa53, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1)
+-AARCH64_CORE("cortex-a75",  cortexa75, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1)
+-AARCH64_CORE("cortex-a76",  cortexa76, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
+-AARCH64_CORE("cortex-a76ae",  cortexa76ae, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
+-AARCH64_CORE("cortex-a77",  cortexa77, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
+-AARCH64_CORE("cortex-a78",  cortexa78, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)

_service:tar_scm:0111-LoongArch-Redundant-sign-extension-elimination-optim.patch Added

@@ -0,0 +1,56 @@
+From 7bb1a356ca9eefab48d64bd3deeaac081c1ae7ea Mon Sep 17 00:00:00 2001
+From: Li Wei <liwei@loongson.cn>
+Date: Thu, 11 Jan 2024 19:36:33 +0800
+Subject: PATCH 111/188 LoongArch: Redundant sign extension elimination
+ optimization 2.
+
+Eliminate the redundant sign extension that exists after the conditional
+move when the target register is SImode.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_expand_conditional_move):
+	Adjust.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/sign-extend-2.c: Adjust.
+---
+ gcc/config/loongarch/loongarch.cc                  | 6 ++++++
+ gcc/testsuite/gcc.target/loongarch/sign-extend-2.c | 5 +++--
+ 2 files changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 533bae5b2..13481130b 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -5367,6 +5367,12 @@ loongarch_expand_conditional_move (rtx *operands)
+ 	  rtx temp3 = gen_reg_rtx (mode);
+ 	  emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
+ 	  temp3 = gen_lowpart (GET_MODE (operands0), temp3);
++	  /* Nonzero in a subreg if it was made when accessing an object that
++	     was promoted to a wider mode in accord with the PROMOTED_MODE
++	     machine description macro.  */
++	  SUBREG_PROMOTED_VAR_P (temp3) = 1;
++	  /* Sets promoted mode for SUBREG_PROMOTED_VAR_P.  */
++	  SUBREG_PROMOTED_SET (temp3, SRP_SIGNED);
+ 	  loongarch_emit_move (operands0, temp3);
+ 	}
+       else
+diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
+index a45dde4f7..e57a2727d 100644
+--- a/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
++++ b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
+@@ -1,6 +1,7 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -O2" } */
+-/* { dg-final { scan-assembler-times "slli.w\t\\\$r\0-9\+,\\\$r\0-9\+,0" 1 } } */
++/* { dg-options "-mabi=lp64d -O2 -fdump-rtl-expand" } */
++/* { dg-final { scan-rtl-dump "subreg/s" "expand" } } */
++/* { dg-final { scan-assembler-not "slli.w\t\\\$r\0-9\+,\\\$r\0-9\+,0" } } */
+ 
+ #include <stdint.h>
+ #define my_min(x, y) ((x) < (y) ? (x) : (y))
+-- 
+2.43.0
+

_service:tar_scm:0111-aarch64-Put-LR-save-probe-in-first-16-bytes.patch Deleted

@@ -1,406 +0,0 @@
-From 15e18831bf98fd25af098b970ebf0c9a6200a34b Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:08:55 +0100
-Subject: PATCH aarch64: Put LR save probe in first 16 bytes
-
--fstack-clash-protection uses the save of LR as a probe for the next
-allocation.  The next allocation could be:
-
-* another part of the static frame, e.g. when allocating SVE save slots
-  or outgoing arguments
-
-* an alloca in the same function
-
-* an allocation made by a callee function
-
-However, when -fomit-frame-pointer is used, the LR save slot is placed
-above the other GPR save slots.  It could therefore be up to 80 bytes
-above the base of the GPR save area (which is also the hard fp address).
-
-aarch64_allocate_and_probe_stack_space took this into account when
-deciding how much subsequent space could be allocated without needing
-a probe.  However, it interacted badly with:
-
-      /* If doing a small final adjustment, we always probe at offset 0.
-	 This is done to avoid issues when LR is not at position 0 or when
-	 the final adjustment is smaller than the probing offset.  */
-      else if (final_adjustment_p && rounded_size == 0)
-	residual_probe_offset = 0;
-
-which forces any allocation that is smaller than the guard page size
-to be probed at offset 0 rather than the usual offset 1024.  It was
-therefore possible to construct cases in which we had:
-
-* a probe using LR at SP + 80 bytes (or some other value >= 16)
-* an allocation of the guard page size - 16 bytes
-* a probe at SP + 0
-
-which allocates guard page size + 64 consecutive unprobed bytes.
-
-This patch requires the LR probe to be in the first 16 bytes of the
-save area when stack clash protection is active.  Doing it
-unconditionally would cause code-quality regressions.
-
-Putting LR before other registers prevents push/pop allocation
-when shadow call stacks are enabled, since LR is restored
-separately from the other callee-saved registers.
-
-The new comment doesn't say that the probe register is required
-to be LR, since a later patch removes that restriction.
-
-gcc/
-	* config/aarch64/aarch64.cc (aarch64_layout_frame): Ensure that
-	the LR save slot is in the first 16 bytes of the register save area.
-	Only form STP/LDP push/pop candidates if both registers are valid.
-	(aarch64_allocate_and_probe_stack_space): Remove workaround for
-	when LR was not in the first 16 bytes.
-
-gcc/testsuite/
-	* gcc.target/aarch64/stack-check-prologue-18.c: New test.
-	* gcc.target/aarch64/stack-check-prologue-19.c: Likewise.
-	* gcc.target/aarch64/stack-check-prologue-20.c: Likewise.
----
- gcc/config/aarch64/aarch64.cc                 |  72 ++++++-------
- .../aarch64/stack-check-prologue-18.c         | 100 ++++++++++++++++++
- .../aarch64/stack-check-prologue-19.c         | 100 ++++++++++++++++++
- .../aarch64/stack-check-prologue-20.c         |   3 +
- 4 files changed, 233 insertions(+), 42 deletions(-)
- create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
- create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
- create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 34c1d8614cd9..16433fb70f4f 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8273,26 +8273,34 @@ aarch64_layout_frame (void)
-   bool saves_below_hard_fp_p
-     = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
-   frame.bytes_below_hard_fp = offset;
-+
-+  auto allocate_gpr_slot = &(unsigned int regno)
-+    {
-+      frame.reg_offsetregno = offset;
-+      if (frame.wb_push_candidate1 == INVALID_REGNUM)
-+	frame.wb_push_candidate1 = regno;
-+      else if (frame.wb_push_candidate2 == INVALID_REGNUM)
-+	frame.wb_push_candidate2 = regno;
-+      offset += UNITS_PER_WORD;
-+    };
-+
-   if (frame.emit_frame_chain)
-     {
-       /* FP and LR are placed in the linkage record.  */
--      frame.reg_offsetR29_REGNUM = offset;
--      frame.wb_push_candidate1 = R29_REGNUM;
--      frame.reg_offsetR30_REGNUM = offset + UNITS_PER_WORD;
--      frame.wb_push_candidate2 = R30_REGNUM;
--      offset += 2 * UNITS_PER_WORD;
-+      allocate_gpr_slot (R29_REGNUM);
-+      allocate_gpr_slot (R30_REGNUM);
-     }
-+  else if (flag_stack_clash_protection
-+	   && known_eq (frame.reg_offsetR30_REGNUM, SLOT_REQUIRED))
-+    /* Put the LR save slot first, since it makes a good choice of probe
-+       for stack clash purposes.  The idea is that the link register usually
-+       has to be saved before a call anyway, and so we lose little by
-+       stopping it from being individually shrink-wrapped.  */
-+    allocate_gpr_slot (R30_REGNUM);
- 
-   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
-     if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED))
--      {
--	frame.reg_offsetregno = offset;
--	if (frame.wb_push_candidate1 == INVALID_REGNUM)
--	  frame.wb_push_candidate1 = regno;
--	else if (frame.wb_push_candidate2 == INVALID_REGNUM)
--	  frame.wb_push_candidate2 = regno;
--	offset += UNITS_PER_WORD;
--      }
-+      allocate_gpr_slot (regno);
- 
-   poly_int64 max_int_offset = offset;
-   offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-@@ -8370,10 +8378,13 @@ aarch64_layout_frame (void)
-      max_push_offset to 0, because no registers are popped at this time,
-      so callee_adjust cannot be adjusted.  */
-   HOST_WIDE_INT max_push_offset = 0;
--  if (frame.wb_pop_candidate2 != INVALID_REGNUM)
--    max_push_offset = 512;
--  else if (frame.wb_pop_candidate1 != INVALID_REGNUM)
--    max_push_offset = 256;
-+  if (frame.wb_pop_candidate1 != INVALID_REGNUM)
-+    {
-+      if (frame.wb_pop_candidate2 != INVALID_REGNUM)
-+	max_push_offset = 512;
-+      else
-+	max_push_offset = 256;
-+    }
- 
-   HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp;
-   HOST_WIDE_INT const_saved_regs_size;
-@@ -9339,29 +9350,6 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
-     = (final_adjustment_p
-        ? guard_used_by_caller + byte_sp_alignment
-        : guard_size - guard_used_by_caller);
--  /* When doing the final adjustment for the outgoing arguments, take into
--     account any unprobed space there is above the current SP.  There are
--     two cases:
--
--     - When saving SVE registers below the hard frame pointer, we force
--       the lowest save to take place in the prologue before doing the final
--       adjustment (i.e. we don't allow the save to be shrink-wrapped).
--       This acts as a probe at SP, so there is no unprobed space.
--
--     - When there are no SVE register saves, we use the store of the link
--       register as a probe.  We can't assume that LR was saved at position 0
--       though, so treat any space below it as unprobed.  */
--  if (final_adjustment_p
--      && known_eq (frame.below_hard_fp_saved_regs_size, 0))
--    {
--      poly_int64 lr_offset = (frame.reg_offsetLR_REGNUM
--			      - frame.bytes_below_saved_regs);
--      if (known_ge (lr_offset, 0))
--	min_probe_threshold -= lr_offset.to_constant ();
--      else
--	gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0));
--    }
--
-   poly_int64 frame_size = frame.frame_size;
- 
-   /* We should always have a positive probe threshold.  */
-@@ -9541,8 +9529,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
-       if (final_adjustment_p && rounded_size != 0)
- 	min_probe_threshold = 0;
-       /* If doing a small final adjustment, we always probe at offset 0.
--	 This is done to avoid issues when LR is not at position 0 or when
--	 the final adjustment is smaller than the probing offset.  */
-+	 This is done to avoid issues when the final adjustment is smaller
-+	 than the probing offset.  */
-       else if (final_adjustment_p && rounded_size == 0)
- 	residual_probe_offset = 0;
- 
-diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
-new file mode 100644
-index 000000000000..82447d20fff5
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
-@@ -0,0 +1,100 @@
-+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */
-+/* { dg-final { check-function-bodies "**" "" } } */
-+
-+void f(int, ...);
-+void g();
-+
-+/*
-+** test1:
-+**	...
-+**	str	x30, \sp\
-+**	sub	sp, sp, #4064

_service:tar_scm:0112-Backport-SME-aarch64-Add-V-to-aarch64-arches.def-nam.patch Added

@@ -0,0 +1,315 @@
+From ed8ce0b31f2b608f0360af1ffd5375ea7809aba7 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Sep 2022 11:32:52 +0100
+Subject: PATCH 013/157 BackportSME aarch64: Add "V" to
+ aarch64-arches.def names
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=00c22ba69d8e738a4789b30165ff9c925c508fc1
+
+This patch completes the renaming of architecture-level related
+things by adding "V" to the name of the architecture in
+aarch64-arches.def.  Since the "V" is predictable, we can easily
+drop it when we don't need it (as when matching /proc/cpuinfo).
+
+Having a valid C identifier is necessary for later patches.
+
+gcc/
+	* config/aarch64/aarch64-arches.def: Add a leading "V" to the
+	ARCH_IDENT fields.
+	* config/aarch64/aarch64-cores.def: Update accordingly.
+	* common/config/aarch64/aarch64-common.cc (all_cores): Likewise.
+	* config/aarch64/aarch64.cc (all_cores): Likewise.
+	* config/aarch64/driver-aarch64.cc (aarch64_arches): Skip the
+	leading "V".
+---
+ gcc/common/config/aarch64/aarch64-common.cc |   2 +-
+ gcc/config/aarch64/aarch64-arches.def       |  28 ++---
+ gcc/config/aarch64/aarch64-cores.def        | 130 ++++++++++----------
+ gcc/config/aarch64/aarch64.cc               |   2 +-
+ gcc/config/aarch64/driver-aarch64.cc        |   3 +-
+ 5 files changed, 83 insertions(+), 82 deletions(-)
+
+diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
+index 0461201a5..6ca89d31f 100644
+--- a/gcc/common/config/aarch64/aarch64-common.cc
++++ b/gcc/common/config/aarch64/aarch64-common.cc
+@@ -253,7 +253,7 @@ static const struct processor_name_to_arch all_cores =
+ #define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \
+   {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS},
+ #include "config/aarch64/aarch64-cores.def"
+-  {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_V8A},
++  {"generic", AARCH64_ARCH_V8A, AARCH64_FL_FOR_V8A},
+   {"", aarch64_no_arch, 0}
+ };
+ 
+diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
+index c6bf7d82c..e42202822 100644
+--- a/gcc/config/aarch64/aarch64-arches.def
++++ b/gcc/config/aarch64/aarch64-arches.def
+@@ -30,19 +30,19 @@
+    Due to the assumptions about the positions of these fields in config.gcc,
+    the NAME should be kept as the first argument and FLAGS as the last.  */
+ 
+-AARCH64_ARCH("armv8-a",	      generic,	     8A,	8,  AARCH64_FL_FOR_V8A)
+-AARCH64_ARCH("armv8.1-a",     generic,	     8_1A,	8,  AARCH64_FL_FOR_V8_1A)
+-AARCH64_ARCH("armv8.2-a",     generic,	     8_2A,	8,  AARCH64_FL_FOR_V8_2A)
+-AARCH64_ARCH("armv8.3-a",     generic,	     8_3A,	8,  AARCH64_FL_FOR_V8_3A)
+-AARCH64_ARCH("armv8.4-a",     generic,	     8_4A,	8,  AARCH64_FL_FOR_V8_4A)
+-AARCH64_ARCH("armv8.5-a",     generic,	     8_5A,	8,  AARCH64_FL_FOR_V8_5A)
+-AARCH64_ARCH("armv8.6-a",     generic,	     8_6A,	8,  AARCH64_FL_FOR_V8_6A)
+-AARCH64_ARCH("armv8.7-a",     generic,       8_7A,      8,  AARCH64_FL_FOR_V8_7A)
+-AARCH64_ARCH("armv8.8-a",     generic,       8_8A,      8,  AARCH64_FL_FOR_V8_8A)
+-AARCH64_ARCH("armv8-r",       generic,	     8R  ,	8,  AARCH64_FL_FOR_V8R)
+-AARCH64_ARCH("armv9-a",       generic,	     9A  ,	9,  AARCH64_FL_FOR_V9A)
+-AARCH64_ARCH("armv9.1-a",     generic,       9_1A,      9,  AARCH64_FL_FOR_V9_1A)
+-AARCH64_ARCH("armv9.2-a",     generic,       9_2A,      9,  AARCH64_FL_FOR_V9_2A)
+-AARCH64_ARCH("armv9.3-a",     generic,       9_3A,      9,  AARCH64_FL_FOR_V9_3A)
++AARCH64_ARCH("armv8-a",	      generic,	     V8A,	8,  AARCH64_FL_FOR_V8A)
++AARCH64_ARCH("armv8.1-a",     generic,	     V8_1A,	8,  AARCH64_FL_FOR_V8_1A)
++AARCH64_ARCH("armv8.2-a",     generic,	     V8_2A,	8,  AARCH64_FL_FOR_V8_2A)
++AARCH64_ARCH("armv8.3-a",     generic,	     V8_3A,	8,  AARCH64_FL_FOR_V8_3A)
++AARCH64_ARCH("armv8.4-a",     generic,	     V8_4A,	8,  AARCH64_FL_FOR_V8_4A)
++AARCH64_ARCH("armv8.5-a",     generic,	     V8_5A,	8,  AARCH64_FL_FOR_V8_5A)
++AARCH64_ARCH("armv8.6-a",     generic,	     V8_6A,	8,  AARCH64_FL_FOR_V8_6A)
++AARCH64_ARCH("armv8.7-a",     generic,       V8_7A,     8,  AARCH64_FL_FOR_V8_7A)
++AARCH64_ARCH("armv8.8-a",     generic,       V8_8A,     8,  AARCH64_FL_FOR_V8_8A)
++AARCH64_ARCH("armv8-r",       generic,	     V8R  ,	8,  AARCH64_FL_FOR_V8R)
++AARCH64_ARCH("armv9-a",       generic,	     V9A  ,	9,  AARCH64_FL_FOR_V9A)
++AARCH64_ARCH("armv9.1-a",     generic,       V9_1A,     9,  AARCH64_FL_FOR_V9_1A)
++AARCH64_ARCH("armv9.2-a",     generic,       V9_2A,     9,  AARCH64_FL_FOR_V9_2A)
++AARCH64_ARCH("armv9.3-a",     generic,       V9_3A,     9,  AARCH64_FL_FOR_V9_3A)
+ 
+ #undef AARCH64_ARCH
+diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
+index c4038c641..f4c2f4ea4 100644
+--- a/gcc/config/aarch64/aarch64-cores.def
++++ b/gcc/config/aarch64/aarch64-cores.def
+@@ -46,132 +46,132 @@
+ /* ARMv8-A Architecture Processors.  */
+ 
+ /* ARM ('A') cores. */
+-AARCH64_CORE("cortex-a34",  cortexa34, cortexa53, 8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
+-AARCH64_CORE("cortex-a35",  cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
+-AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
+-AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
+-AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
+-AARCH64_CORE("cortex-a73",  cortexa73, cortexa57, 8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
++AARCH64_CORE("cortex-a34",  cortexa34, cortexa53, V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
++AARCH64_CORE("cortex-a35",  cortexa35, cortexa53, V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
++AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
++AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
++AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
++AARCH64_CORE("cortex-a73",  cortexa73, cortexa57, V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
+ 
+ /* Cavium ('C') cores. */
+-AARCH64_CORE("thunderx",      thunderx,      thunderx,  8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a0, -1)
++AARCH64_CORE("thunderx",      thunderx,      thunderx,  V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a0, -1)
+ /* Do not swap around "thunderxt88p1" and "thunderxt88",
+    this order is required to handle variant correctly. */
+-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx,  8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO,	thunderxt88,  0x43, 0x0a1, 0)
+-AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88,  0x43, 0x0a1, -1)
++AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx,  V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO,	thunderxt88,  0x43, 0x0a1, 0)
++AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88,  0x43, 0x0a1, -1)
+ 
+ /* OcteonTX is the official name for T81/T83. */
+-AARCH64_CORE("octeontx",      octeontx,      thunderx,  8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a0, -1)
+-AARCH64_CORE("octeontx81",    octeontxt81,   thunderx,  8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
+-AARCH64_CORE("octeontx83",    octeontxt83,   thunderx,  8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
++AARCH64_CORE("octeontx",      octeontx,      thunderx,  V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a0, -1)
++AARCH64_CORE("octeontx81",    octeontxt81,   thunderx,  V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
++AARCH64_CORE("octeontx83",    octeontxt83,   thunderx,  V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
+ 
+-AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
+-AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
++AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
++AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
+ 
+ /* Ampere Computing ('\xC0') cores. */
+-AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1)
++AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1)
+ /* Do not swap around "emag" and "xgene1",
+    this order is required to handle variant correctly. */
+-AARCH64_CORE("emag",        emag,      xgene1,    8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
++AARCH64_CORE("emag",        emag,      xgene1,    V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
+ 
+ /* APM ('P') cores. */
+-AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1)
++AARCH64_CORE("xgene1",      xgene1,    xgene1,    V8A,  AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1)
+ 
+ /* Qualcomm ('Q') cores. */
+-AARCH64_CORE("falkor",      falkor,    falkor,    8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx,   0x51, 0xC00, -1)
+-AARCH64_CORE("qdf24xx",     qdf24xx,   falkor,    8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx,   0x51, 0xC00, -1)
++AARCH64_CORE("falkor",      falkor,    falkor,    V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx,   0x51, 0xC00, -1)
++AARCH64_CORE("qdf24xx",     qdf24xx,   falkor,    V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx,   0x51, 0xC00, -1)
+ 
+ /* Samsung ('S') cores. */
+-AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  0x53, 0x001, -1)
++AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  0x53, 0x001, -1)
+ 
+ /* HXT ('h') cores. */
+-AARCH64_CORE("phecda",      phecda,    falkor,    8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x68, 0x000, -1)
++AARCH64_CORE("phecda",      phecda,    falkor,    V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x68, 0x000, -1)
+ 
+ /* ARMv8.1-A Architecture Processors.  */
+ 
+ /* Broadcom ('B') cores. */
+-AARCH64_CORE("thunderx2t99p1",  thunderx2t99p1, thunderx2t99, 8_1A,  AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
+-AARCH64_CORE("vulcan",  vulcan, thunderx2t99, 8_1A,  AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
++AARCH64_CORE("thunderx2t99p1",  thunderx2t99p1, thunderx2t99, V8_1A,  AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
++AARCH64_CORE("vulcan",  vulcan, thunderx2t99, V8_1A,  AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
+ 
+ /* Cavium ('C') cores. */
+-AARCH64_CORE("thunderx2t99",  thunderx2t99,  thunderx2t99, 8_1A,  AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
++AARCH64_CORE("thunderx2t99",  thunderx2t99,  thunderx2t99, V8_1A,  AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
+ 
+ /* ARMv8.2-A Architecture Processors.  */
+ 
+ /* ARM ('A') cores. */
+-AARCH64_CORE("cortex-a55",  cortexa55, cortexa53, 8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1)
+-AARCH64_CORE("cortex-a75",  cortexa75, cortexa57, 8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1)
+-AARCH64_CORE("cortex-a76",  cortexa76, cortexa57, 8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
+-AARCH64_CORE("cortex-a76ae",  cortexa76ae, cortexa57, 8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
+-AARCH64_CORE("cortex-a77",  cortexa77, cortexa57, 8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
+-AARCH64_CORE("cortex-a78",  cortexa78, cortexa57, 8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
+-AARCH64_CORE("cortex-a78ae",  cortexa78ae, cortexa57, 8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1)
+-AARCH64_CORE("cortex-a78c",  cortexa78c, cortexa57, 8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1)
+-AARCH64_CORE("cortex-a65",  cortexa65, cortexa53, 8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1)
+-AARCH64_CORE("cortex-a65ae",  cortexa65ae, cortexa53, 8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1)
+-AARCH64_CORE("cortex-x1",  cortexx1, cortexa57, 8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1)
+-AARCH64_CORE("ares",  ares, cortexa57, 8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
+-AARCH64_CORE("neoverse-n1",  neoversen1, cortexa57, 8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
+-AARCH64_CORE("neoverse-e1",  neoversee1, cortexa53, 8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
++AARCH64_CORE("cortex-a55",  cortexa55, cortexa53, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1)
++AARCH64_CORE("cortex-a75",  cortexa75, cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1)
++AARCH64_CORE("cortex-a76",  cortexa76, cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
++AARCH64_CORE("cortex-a76ae",  cortexa76ae, cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
++AARCH64_CORE("cortex-a77",  cortexa77, cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
++AARCH64_CORE("cortex-a78",  cortexa78, cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)

_service:tar_scm:0112-LoongArch-Assign-the-u-attribute-to-the-mem-to-which.patch Added

@@ -0,0 +1,64 @@
+From 191675bdfd4cef0fbcf642f53da82a49bd23a3bf Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Fri, 12 Jan 2024 17:06:30 +0800
+Subject: PATCH 112/188 LoongArch: Assign the '/u' attribute to the mem to
+ which the global offset table belongs.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_split_symbol):
+	Assign the '/u' attribute to the mem.
+
+gcc/testsuite/ChangeLog:
+
+	* g++.target/loongarch/got-load.C: New test.
+---
+ gcc/config/loongarch/loongarch.cc             |  5 +++++
+ gcc/testsuite/g++.target/loongarch/got-load.C | 19 +++++++++++++++++++
+ 2 files changed, 24 insertions(+)
+ create mode 100644 gcc/testsuite/g++.target/loongarch/got-load.C
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 13481130b..7da00c132 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -3198,6 +3198,11 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
+ 	      rtx mem = gen_rtx_MEM (Pmode, low);
+ 	      *low_out = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, mem),
+ 					 UNSPEC_LOAD_FROM_GOT);
++
++	      /* Nonzero in a mem, if the memory is statically allocated and
++		 read-only.  A common example of the later is a shared library’s
++		 global offset table.  */
++	      MEM_READONLY_P (mem) = 1;
+ 	    }
+ 
+ 	  break;
+diff --git a/gcc/testsuite/g++.target/loongarch/got-load.C b/gcc/testsuite/g++.target/loongarch/got-load.C
+new file mode 100644
+index 000000000..20924c739
+--- /dev/null
++++ b/gcc/testsuite/g++.target/loongarch/got-load.C
+@@ -0,0 +1,19 @@
++/* { dg-do compile } */
++/* { dg-options "-mabi=lp64d -O2 -mexplicit-relocs -mcmodel=normal -fdump-rtl-expand" } */
++/* { dg-final { scan-rtl-dump-times "mem/u" 2 "expand" } } */
++
++#include <bits/stdc++.h>
++
++using namespace std;
++
++int lr1000052;
++
++void
++test(void)
++{
++  int n;
++
++  cin >> n;
++  for (int i = 0; i < n; ++i)
++    cin >> lri0 >> lri1;
++}
+-- 
+2.43.0
+

_service:tar_scm:0112-aarch64-Simplify-probe-of-final-frame-allocation.patch Deleted

@@ -1,123 +0,0 @@
-From c4f0e121faa36342f1d21919e54a05ad841c4f86 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:08:55 +0100
-Subject: PATCH aarch64: Simplify probe of final frame allocation
-
-Previous patches ensured that the final frame allocation only needs
-a probe when the size is strictly greater than 1KiB.  It's therefore
-safe to use the normal 1024 probe offset in all cases.
-
-The main motivation for doing this is to simplify the code and
-remove the number of special cases.
-
-gcc/
-	* config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space):
-	Always probe the residual allocation at offset 1024, asserting
-	that that is in range.
-
-gcc/testsuite/
-	* gcc.target/aarch64/stack-check-prologue-17.c: Expect the probe
-	to be at offset 1024 rather than offset 0.
-	* gcc.target/aarch64/stack-check-prologue-18.c: Likewise.
-	* gcc.target/aarch64/stack-check-prologue-19.c: Likewise.
----
- gcc/config/aarch64/aarch64.cc                        | 12 ++++--------
- .../gcc.target/aarch64/stack-check-prologue-17.c     |  2 +-
- .../gcc.target/aarch64/stack-check-prologue-18.c     |  4 ++--
- .../gcc.target/aarch64/stack-check-prologue-19.c     |  4 ++--
- 4 files changed, 9 insertions(+), 13 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 16433fb70f4f..8abf3d7a1e2b 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -9523,16 +9523,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
-      are still safe.  */
-   if (residual)
-     {
--      HOST_WIDE_INT residual_probe_offset = guard_used_by_caller;
-+      gcc_assert (guard_used_by_caller + byte_sp_alignment <= size);
-+
-       /* If we're doing final adjustments, and we've done any full page
- 	 allocations then any residual needs to be probed.  */
-       if (final_adjustment_p && rounded_size != 0)
- 	min_probe_threshold = 0;
--      /* If doing a small final adjustment, we always probe at offset 0.
--	 This is done to avoid issues when the final adjustment is smaller
--	 than the probing offset.  */
--      else if (final_adjustment_p && rounded_size == 0)
--	residual_probe_offset = 0;
- 
-       aarch64_sub_sp (temp1, temp2, residual, frame_related_p);
-       if (residual >= min_probe_threshold)
-@@ -9543,8 +9539,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
- 		     HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
- 		     "\n", residual);
- 
--	    emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
--					     residual_probe_offset));
-+	  emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
-+					   guard_used_by_caller));
- 	  emit_insn (gen_blockage ());
- 	}
-     }
-diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
-index 0d8a25d73a24..f0ec1389771d 100644
---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
-+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
-@@ -33,7 +33,7 @@ int test1(int z) {
- **	...
- **	str	x30, \sp\
- **	sub	sp, sp, #1040
--**	str	xzr, \sp\
-+**	str	xzr, \sp, #?1024\
- **	cbnz	w0, .*
- **	bl	g
- **	...
-diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
-index 82447d20fff5..6383bec5ebcd 100644
---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
-+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
-@@ -9,7 +9,7 @@ void g();
- **	...
- **	str	x30, \sp\
- **	sub	sp, sp, #4064
--**	str	xzr, \sp\
-+**	str	xzr, \sp, #?1024\
- **	cbnz	w0, .*
- **	bl	g
- **	...
-@@ -50,7 +50,7 @@ int test1(int z) {
- **	...
- **	str	x30, \sp\
- **	sub	sp, sp, #1040
--**	str	xzr, \sp\
-+**	str	xzr, \sp, #?1024\
- **	cbnz	w0, .*
- **	bl	g
- **	...
-diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
-index 73ac3e4e4eb0..562039b5e9b8 100644
---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
-+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
-@@ -9,7 +9,7 @@ void g();
- **	...
- **	str	x30, \sp\
- **	sub	sp, sp, #4064
--**	str	xzr, \sp\
-+**	str	xzr, \sp, #?1024\
- **	cbnz	w0, .*
- **	bl	g
- **	...
-@@ -50,7 +50,7 @@ int test1(int z) {
- **	...
- **	str	x30, \sp\
- **	sub	sp, sp, #1040
--**	str	xzr, \sp\
-+**	str	xzr, \sp, #?1024\
- **	cbnz	w0, .*
- **	bl	g
- **	...
--- 
-2.43.5
-

_service:tar_scm:0113-Backport-SME-aarch64-Small-config.gcc-cleanups.patch Added

@@ -0,0 +1,55 @@
+From aac2b2d4191d08a107c3ff8d98602355988a5558 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Sep 2022 11:32:52 +0100
+Subject: PATCH 014/157 BackportSME aarch64: Small config.gcc cleanups
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0af214b447529453b356e8e480d7d35b3e642f0e
+
+The aarch64-option-extensions.def parsing in config.gcc had
+some code left over from when it tried to parse the whole
+macro definition.  Also, config.gcc now only looks at the
+first fields of the aarch64-arches.def entries.
+
+gcc/
+	* config.gcc: Remove dead aarch64-option-extensions.def code.
+	* config/aarch64/aarch64-arches.def: Update comment.
+---
+ gcc/config.gcc                        | 8 --------
+ gcc/config/aarch64/aarch64-arches.def | 2 +-
+ 2 files changed, 1 insertion(+), 9 deletions(-)
+
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index 3be450471..da66603cd 100644
+--- a/gcc/config.gcc
++++ b/gcc/config.gcc
+@@ -4210,14 +4210,6 @@ case "${target}" in
+ 			  options_parsed="`$ac_cv_prog_CPP -D"$opt_macro" -x c \
+ 				${srcdir}/config/aarch64/aarch64-option-extensions.def`"
+ 
+-			  # Match one element inside AARCH64_OPT_EXTENSION, we
+-			  # consume anything that's not a ,.
+-			  elem=" 	*$^,\+$ 	*"
+-
+-			  # Repeat the pattern for the number of entries in the
+-			  # AARCH64_OPT_EXTENSION, currently 6 times.
+-			  sed_patt="^$elem,$elem,$elem,$elem,$elem,$elem"
+-
+ 			  while  x"$ext_val" != x 
+ 			  do
+ 				ext_val=`echo $ext_val | sed -e 's/\+//'`
+diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
+index e42202822..ece96e22a 100644
+--- a/gcc/config/aarch64/aarch64-arches.def
++++ b/gcc/config/aarch64/aarch64-arches.def
+@@ -28,7 +28,7 @@
+    ARCH_REV is an integer specifying the architecture major revision.
+    FLAGS are the flags implied by the architecture.
+    Due to the assumptions about the positions of these fields in config.gcc,
+-   the NAME should be kept as the first argument and FLAGS as the last.  */
++   NAME should be kept as the first argument.  */
+ 
+ AARCH64_ARCH("armv8-a",	      generic,	     V8A,	8,  AARCH64_FL_FOR_V8A)
+ AARCH64_ARCH("armv8.1-a",     generic,	     V8_1A,	8,  AARCH64_FL_FOR_V8_1A)
+-- 
+2.33.0
+

_service:tar_scm:0113-LoongArch-testsuite-Fix-fail-in-gen-vect-2-25-.c-fil.patch Added

@@ -0,0 +1,51 @@
+From 1576f83f8cae0ead9de533566ec5f21e7a01f842 Mon Sep 17 00:00:00 2001
+From: chenxiaolong <chenxiaolong@loongson.cn>
+Date: Sat, 13 Jan 2024 15:28:34 +0800
+Subject: PATCH 113/188 LoongArch: testsuite:Fix fail in gen-vect-{2,25}.c
+ file.
+
+1.Added  dg-do compile on LoongArch.
+  When binutils does not support vector instruction sets, an error occurs
+because the assembler does not recognize vector instructions.
+
+2.Added "-mlsx" option for vectorization on LoongArch.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.dg/tree-ssa/gen-vect-2.c: Added detection of compilation
+	behavior and "-mlsx" option on LoongArch.
+	* gcc.dg/tree-ssa/gen-vect-25.c: Dito.
+---
+ gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c  | 2 ++
+ gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c | 2 ++
+ 2 files changed, 4 insertions(+)
+
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c
+index 42171a2fb..395d6f7ee 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c
+@@ -1,6 +1,8 @@
+ /* { dg-do run { target vect_cmdline_needed } } */
++/* { dg-do compile { target { loongarch_sx && {! loongarch_sx_hw } } } } */
+ /* { dg-options "-O2 -fno-tree-loop-distribute-patterns -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */
+ /* { dg-additional-options "-mno-sse" { target { i?86-*-* x86_64-*-* } } } */
++/* { dg-additional-options "-mlsx" { target { loongarch*-*-* } } } */
+ 
+ #include <stdlib.h>
+ 
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c
+index 60ec27054..cea7f246a 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c
+@@ -1,6 +1,8 @@
+ /* { dg-do run { target vect_cmdline_needed } } */
++/* { dg-do compile { target { loongarch_sx && {! loongarch_sx_hw } } } } */
+ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */
+ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic -mno-sse" { target { i?86-*-* x86_64-*-* } } } */
++/* { dg-additional-options "-mlsx" { target { loongarch*-*-* } } } */
+ 
+ #include <stdlib.h>
+ 
+-- 
+2.43.0
+

_service:tar_scm:0113-aarch64-Explicitly-record-probe-registers-in-frame-info.patch Deleted

@@ -1,277 +0,0 @@
-From 6f0ab0a9f46a17b68349ff6035aa776bf65f0575 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:08:56 +0100
-Subject: PATCH aarch64: Explicitly record probe registers in frame info
-
-The stack frame is currently divided into three areas:
-
-A: the area above the hard frame pointer
-B: the SVE saves below the hard frame pointer
-C: the outgoing arguments
-
-If the stack frame is allocated in one chunk, the allocation needs a
-probe if the frame size is >= guard_size - 1KiB.  In addition, if the
-function is not a leaf function, it must probe an address no more than
-1KiB above the outgoing SP.  We ensured the second condition by
-
-(1) using single-chunk allocations for non-leaf functions only if
-    the link register save slot is within 512 bytes of the bottom
-    of the frame; and
-
-(2) using the link register save as a probe (meaning, for instance,
-    that it can't be individually shrink wrapped)
-
-If instead the stack is allocated in multiple chunks, then:
-
-* an allocation involving only the outgoing arguments (C above) requires
-  a probe if the allocation size is > 1KiB
-
-* any other allocation requires a probe if the allocation size
-  is >= guard_size - 1KiB
-
-* second and subsequent allocations require the previous allocation
-  to probe at the bottom of the allocated area, regardless of the size
-  of that previous allocation
-
-The final point means that, unlike for single allocations,
-it can be necessary to have both a non-SVE register probe and
-an SVE register probe.  For example:
-
-* allocate A, probe using a non-SVE register save
-* allocate B, probe using an SVE register save
-* allocate C
-
-The non-SVE register used in this case was again the link register.
-It was previously used even if the link register save slot was some
-bytes above the bottom of the non-SVE register saves, but an earlier
-patch avoided that by putting the link register save slot first.
-
-As a belt-and-braces fix, this patch explicitly records which
-probe registers we're using and allows the non-SVE probe to be
-whichever register comes first (as for SVE).
-
-The patch also avoids unnecessary probes in sve/pcs/stack_clash_3.c.
-
-gcc/
-	* config/aarch64/aarch64.h (aarch64_frame::sve_save_and_probe)
-	(aarch64_frame::hard_fp_save_and_probe): New fields.
-	* config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize them.
-	Rather than asserting that a leaf function saves LR, instead assert
-	that a leaf function saves something.
-	(aarch64_get_separate_components): Prevent the chosen probe
-	registers from being individually shrink-wrapped.
-	(aarch64_allocate_and_probe_stack_space): Remove workaround for
-	probe registers that aren't at the bottom of the previous allocation.
-
-gcc/testsuite/
-	* gcc.target/aarch64/sve/pcs/stack_clash_3.c: Avoid redundant probes.
----
- gcc/config/aarch64/aarch64.cc                 | 68 +++++++++++++++----
- gcc/config/aarch64/aarch64.h                  |  8 +++
- .../aarch64/sve/pcs/stack_clash_3.c           |  6 +-
- 3 files changed, 64 insertions(+), 18 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 8abf3d7a1e2b..a8d907df8843 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8210,15 +8210,11 @@ aarch64_layout_frame (void)
- 	&& !crtl->abi->clobbers_full_reg_p (regno))
-       frame.reg_offsetregno = SLOT_REQUIRED;
- 
--  /* With stack-clash, LR must be saved in non-leaf functions.  The saving of
--     LR counts as an implicit probe which allows us to maintain the invariant
--     described in the comment at expand_prologue.  */
--  gcc_assert (crtl->is_leaf
--	      || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED));
- 
-   poly_int64 offset = crtl->outgoing_args_size;
-   gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
-   frame.bytes_below_saved_regs = offset;
-+  frame.sve_save_and_probe = INVALID_REGNUM;
- 
-   /* Now assign stack slots for the registers.  Start with the predicate
-      registers, since predicate LDR and STR have a relatively small
-@@ -8226,6 +8222,8 @@ aarch64_layout_frame (void)
-   for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++)
-     if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED))
-       {
-+	if (frame.sve_save_and_probe == INVALID_REGNUM)
-+	  frame.sve_save_and_probe = regno;
- 	frame.reg_offsetregno = offset;
- 	offset += BYTES_PER_SVE_PRED;
-       }
-@@ -8263,6 +8261,8 @@ aarch64_layout_frame (void)
-     for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
-       if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED))
- 	{
-+	  if (frame.sve_save_and_probe == INVALID_REGNUM)
-+	    frame.sve_save_and_probe = regno;
- 	  frame.reg_offsetregno = offset;
- 	  offset += vector_save_size;
- 	}
-@@ -8272,10 +8272,18 @@ aarch64_layout_frame (void)
-   frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
-   bool saves_below_hard_fp_p
-     = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
-+  gcc_assert (!saves_below_hard_fp_p
-+	      || (frame.sve_save_and_probe != INVALID_REGNUM
-+		  && known_eq (frame.reg_offsetframe.sve_save_and_probe,
-+			       frame.bytes_below_saved_regs)));
-+
-   frame.bytes_below_hard_fp = offset;
-+  frame.hard_fp_save_and_probe = INVALID_REGNUM;
- 
-   auto allocate_gpr_slot = &(unsigned int regno)
-     {
-+      if (frame.hard_fp_save_and_probe == INVALID_REGNUM)
-+	frame.hard_fp_save_and_probe = regno;
-       frame.reg_offsetregno = offset;
-       if (frame.wb_push_candidate1 == INVALID_REGNUM)
- 	frame.wb_push_candidate1 = regno;
-@@ -8309,6 +8317,8 @@ aarch64_layout_frame (void)
-   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
-     if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED))
-       {
-+	if (frame.hard_fp_save_and_probe == INVALID_REGNUM)
-+	  frame.hard_fp_save_and_probe = regno;
- 	/* If there is an alignment gap between integer and fp callee-saves,
- 	   allocate the last fp register to it if possible.  */
- 	if (regno == last_fp_reg
-@@ -8332,6 +8342,17 @@ aarch64_layout_frame (void)
-   offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
- 
-   frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
-+  gcc_assert (known_eq (frame.saved_regs_size,
-+			frame.below_hard_fp_saved_regs_size)
-+	      || (frame.hard_fp_save_and_probe != INVALID_REGNUM
-+		  && known_eq (frame.reg_offsetframe.hard_fp_save_and_probe,
-+			       frame.bytes_below_hard_fp)));
-+
-+  /* With stack-clash, a register must be saved in non-leaf functions.
-+     The saving of the bottommost register counts as an implicit probe,
-+     which allows us to maintain the invariant described in the comment
-+     at expand_prologue.  */
-+  gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0));
- 
-   offset += get_frame_size ();
-   offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-@@ -8462,6 +8483,25 @@ aarch64_layout_frame (void)
-       frame.final_adjust = frame.bytes_below_saved_regs;
-     }
- 
-+  /* The frame is allocated in pieces, with each non-final piece
-+     including a register save at offset 0 that acts as a probe for
-+     the following piece.  In addition, the save of the bottommost register
-+     acts as a probe for callees and allocas.  Roll back any probes that
-+     aren't needed.
-+
-+     A probe isn't needed if it is associated with the final allocation
-+     (including callees and allocas) that happens before the epilogue is
-+     executed.  */
-+  if (crtl->is_leaf
-+      && !cfun->calls_alloca
-+      && known_eq (frame.final_adjust, 0))
-+    {
-+      if (maybe_ne (frame.sve_callee_adjust, 0))
-+	frame.sve_save_and_probe = INVALID_REGNUM;
-+      else
-+	frame.hard_fp_save_and_probe = INVALID_REGNUM;
-+    }
-+
-   /* Make sure the individual adjustments add up to the full frame size.  */
-   gcc_assert (known_eq (frame.initial_adjust
- 			+ frame.callee_adjust
-@@ -9039,13 +9079,6 @@ aarch64_get_separate_components (void)
- 
- 	poly_int64 offset = frame.reg_offsetregno;
- 
--	/* If the register is saved in the first SVE save slot, we use
--	   it as a stack probe for -fstack-clash-protection.  */
--	if (flag_stack_clash_protection
--	    && maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
--	    && known_eq (offset, frame.bytes_below_saved_regs))
--	  continue;
--
- 	/* Get the offset relative to the register we'll use.  */
- 	if (frame_pointer_needed)
- 	  offset -= frame.bytes_below_hard_fp;
-@@ -9080,6 +9113,13 @@ aarch64_get_separate_components (void)

_service:tar_scm:0114-Backport-SME-aarch64-Avoid-redundancy-in-aarch64-cor.patch Added

@@ -0,0 +1,273 @@
+From f6f28c50045f672a35f5b7344b556fc45dc0b3a1 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Sep 2022 11:32:53 +0100
+Subject: PATCH 015/157 BackportSME aarch64: Avoid redundancy in
+ aarch64-cores.def
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=198bb6ed327c74eb2b0450bf978e4e6a64a6406c
+
+The flags fields of the aarch64-cores.def always start with
+AARCH64_FL_FOR_<ARCH>.  After previous changes, <ARCH> is always
+identical to the previous field, so we can drop the explicit
+AARCH64_FL_FOR_<ARCH> and derive it programmatically.
+
+This isn't a big saving in itself, but it helps with later patches.
+
+gcc/
+	* config/aarch64/aarch64-cores.def: Remove AARCH64_FL_FOR_<ARCH>
+	from the flags field.
+	* common/config/aarch64/aarch64-common.cc (all_cores): Add it
+	here instead.
+	* config/aarch64/aarch64.cc (all_cores): Likewise.
+	* config/aarch64/driver-aarch64.cc (all_cores): Likewise.
+---
+ gcc/common/config/aarch64/aarch64-common.cc |   2 +-
+ gcc/config/aarch64/aarch64-cores.def        | 130 ++++++++++----------
+ gcc/config/aarch64/aarch64.cc               |   2 +-
+ gcc/config/aarch64/driver-aarch64.cc        |   2 +-
+ 4 files changed, 68 insertions(+), 68 deletions(-)
+
+diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
+index 6ca89d31f..a965ac660 100644
+--- a/gcc/common/config/aarch64/aarch64-common.cc
++++ b/gcc/common/config/aarch64/aarch64-common.cc
+@@ -251,7 +251,7 @@ struct arch_to_arch_name
+ static const struct processor_name_to_arch all_cores =
+ {
+ #define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \
+-  {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS},
++  {NAME, AARCH64_ARCH_##ARCH_IDENT, AARCH64_FL_FOR_##ARCH_IDENT | FLAGS},
+ #include "config/aarch64/aarch64-cores.def"
+   {"generic", AARCH64_ARCH_V8A, AARCH64_FL_FOR_V8A},
+   {"", aarch64_no_arch, 0}
+diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
+index f4c2f4ea4..008b0b8c1 100644
+--- a/gcc/config/aarch64/aarch64-cores.def
++++ b/gcc/config/aarch64/aarch64-cores.def
+@@ -46,132 +46,132 @@
+ /* ARMv8-A Architecture Processors.  */
+ 
+ /* ARM ('A') cores. */
+-AARCH64_CORE("cortex-a34",  cortexa34, cortexa53, V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
+-AARCH64_CORE("cortex-a35",  cortexa35, cortexa53, V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
+-AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
+-AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
+-AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
+-AARCH64_CORE("cortex-a73",  cortexa73, cortexa57, V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
++AARCH64_CORE("cortex-a34",  cortexa34, cortexa53, V8A,  AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
++AARCH64_CORE("cortex-a35",  cortexa35, cortexa53, V8A,  AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
++AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, V8A,  AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
++AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, V8A,  AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
++AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, V8A,  AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
++AARCH64_CORE("cortex-a73",  cortexa73, cortexa57, V8A,  AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
+ 
+ /* Cavium ('C') cores. */
+-AARCH64_CORE("thunderx",      thunderx,      thunderx,  V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a0, -1)
++AARCH64_CORE("thunderx",      thunderx,      thunderx,  V8A,  AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a0, -1)
+ /* Do not swap around "thunderxt88p1" and "thunderxt88",
+    this order is required to handle variant correctly. */
+-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx,  V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO,	thunderxt88,  0x43, 0x0a1, 0)
+-AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88,  0x43, 0x0a1, -1)
++AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx,  V8A,  AARCH64_FL_CRC | AARCH64_FL_CRYPTO,	thunderxt88,  0x43, 0x0a1, 0)
++AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  V8A,  AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88,  0x43, 0x0a1, -1)
+ 
+ /* OcteonTX is the official name for T81/T83. */
+-AARCH64_CORE("octeontx",      octeontx,      thunderx,  V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a0, -1)
+-AARCH64_CORE("octeontx81",    octeontxt81,   thunderx,  V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
+-AARCH64_CORE("octeontx83",    octeontxt83,   thunderx,  V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
++AARCH64_CORE("octeontx",      octeontx,      thunderx,  V8A,  AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a0, -1)
++AARCH64_CORE("octeontx81",    octeontxt81,   thunderx,  V8A,  AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
++AARCH64_CORE("octeontx83",    octeontxt83,   thunderx,  V8A,  AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
+ 
+-AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
+-AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
++AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  V8A,  AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
++AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  V8A,  AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
+ 
+ /* Ampere Computing ('\xC0') cores. */
+-AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1)
++AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, 0, ampere1, 0xC0, 0xac3, -1)
+ /* Do not swap around "emag" and "xgene1",
+    this order is required to handle variant correctly. */
+-AARCH64_CORE("emag",        emag,      xgene1,    V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
++AARCH64_CORE("emag",        emag,      xgene1,    V8A,  AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
+ 
+ /* APM ('P') cores. */
+-AARCH64_CORE("xgene1",      xgene1,    xgene1,    V8A,  AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1)
++AARCH64_CORE("xgene1",      xgene1,    xgene1,    V8A,  0, xgene1, 0x50, 0x000, -1)
+ 
+ /* Qualcomm ('Q') cores. */
+-AARCH64_CORE("falkor",      falkor,    falkor,    V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx,   0x51, 0xC00, -1)
+-AARCH64_CORE("qdf24xx",     qdf24xx,   falkor,    V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx,   0x51, 0xC00, -1)
++AARCH64_CORE("falkor",      falkor,    falkor,    V8A,  AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx,   0x51, 0xC00, -1)
++AARCH64_CORE("qdf24xx",     qdf24xx,   falkor,    V8A,  AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx,   0x51, 0xC00, -1)
+ 
+ /* Samsung ('S') cores. */
+-AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  0x53, 0x001, -1)
++AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  V8A,  AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  0x53, 0x001, -1)
+ 
+ /* HXT ('h') cores. */
+-AARCH64_CORE("phecda",      phecda,    falkor,    V8A,  AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x68, 0x000, -1)
++AARCH64_CORE("phecda",      phecda,    falkor,    V8A,  AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x68, 0x000, -1)
+ 
+ /* ARMv8.1-A Architecture Processors.  */
+ 
+ /* Broadcom ('B') cores. */
+-AARCH64_CORE("thunderx2t99p1",  thunderx2t99p1, thunderx2t99, V8_1A,  AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
+-AARCH64_CORE("vulcan",  vulcan, thunderx2t99, V8_1A,  AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
++AARCH64_CORE("thunderx2t99p1",  thunderx2t99p1, thunderx2t99, V8_1A,  AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
++AARCH64_CORE("vulcan",  vulcan, thunderx2t99, V8_1A,  AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
+ 
+ /* Cavium ('C') cores. */
+-AARCH64_CORE("thunderx2t99",  thunderx2t99,  thunderx2t99, V8_1A,  AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
++AARCH64_CORE("thunderx2t99",  thunderx2t99,  thunderx2t99, V8_1A,  AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
+ 
+ /* ARMv8.2-A Architecture Processors.  */
+ 
+ /* ARM ('A') cores. */
+-AARCH64_CORE("cortex-a55",  cortexa55, cortexa53, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1)
+-AARCH64_CORE("cortex-a75",  cortexa75, cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1)
+-AARCH64_CORE("cortex-a76",  cortexa76, cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
+-AARCH64_CORE("cortex-a76ae",  cortexa76ae, cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
+-AARCH64_CORE("cortex-a77",  cortexa77, cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
+-AARCH64_CORE("cortex-a78",  cortexa78, cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
+-AARCH64_CORE("cortex-a78ae",  cortexa78ae, cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1)
+-AARCH64_CORE("cortex-a78c",  cortexa78c, cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1)
+-AARCH64_CORE("cortex-a65",  cortexa65, cortexa53, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1)
+-AARCH64_CORE("cortex-a65ae",  cortexa65ae, cortexa53, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1)
+-AARCH64_CORE("cortex-x1",  cortexx1, cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1)
+-AARCH64_CORE("ares",  ares, cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
+-AARCH64_CORE("neoverse-n1",  neoversen1, cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
+-AARCH64_CORE("neoverse-e1",  neoversee1, cortexa53, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
++AARCH64_CORE("cortex-a55",  cortexa55, cortexa53, V8_2A,  AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1)
++AARCH64_CORE("cortex-a75",  cortexa75, cortexa57, V8_2A,  AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1)
++AARCH64_CORE("cortex-a76",  cortexa76, cortexa57, V8_2A,  AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
++AARCH64_CORE("cortex-a76ae",  cortexa76ae, cortexa57, V8_2A,  AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
++AARCH64_CORE("cortex-a77",  cortexa77, cortexa57, V8_2A,  AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
++AARCH64_CORE("cortex-a78",  cortexa78, cortexa57, V8_2A,  AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
++AARCH64_CORE("cortex-a78ae",  cortexa78ae, cortexa57, V8_2A,  AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1)
++AARCH64_CORE("cortex-a78c",  cortexa78c, cortexa57, V8_2A,  AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1)
++AARCH64_CORE("cortex-a65",  cortexa65, cortexa53, V8_2A,  AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1)
++AARCH64_CORE("cortex-a65ae",  cortexa65ae, cortexa53, V8_2A,  AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1)
++AARCH64_CORE("cortex-x1",  cortexx1, cortexa57, V8_2A,  AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1)
++AARCH64_CORE("ares",  ares, cortexa57, V8_2A,  AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
++AARCH64_CORE("neoverse-n1",  neoversen1, cortexa57, V8_2A,  AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
++AARCH64_CORE("neoverse-e1",  neoversee1, cortexa53, V8_2A,  AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
+ 
+ /* Cavium ('C') cores. */
+-AARCH64_CORE("octeontx2",      octeontx2,      cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1)
+-AARCH64_CORE("octeontx2t98",   octeontx2t98,   cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1)
+-AARCH64_CORE("octeontx2t96",   octeontx2t96,   cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
++AARCH64_CORE("octeontx2",      octeontx2,      cortexa57, V8_2A,  AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1)
++AARCH64_CORE("octeontx2t98",   octeontx2t98,   cortexa57, V8_2A,  AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1)
++AARCH64_CORE("octeontx2t96",   octeontx2t96,   cortexa57, V8_2A,  AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
+ /* Note OcteonTX2 T93 is an alias to OcteonTX2 T96. */
+-AARCH64_CORE("octeontx2t93",   octeontx2t93,   cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
+-AARCH64_CORE("octeontx2f95",   octeontx2f95,   cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1)
+-AARCH64_CORE("octeontx2f95n",  octeontx2f95n,  cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1)
+-AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A,  AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1)
++AARCH64_CORE("octeontx2t93",   octeontx2t93,   cortexa57, V8_2A,  AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
++AARCH64_CORE("octeontx2f95",   octeontx2f95,   cortexa57, V8_2A,  AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1)

_service:tar_scm:0114-LoongArch-Remove-constraint-z-from-movsi_internal.patch Added

@@ -0,0 +1,43 @@
+From 167a3f34b308d3d56e816559701c3fb1c4f88c7b Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Fri, 25 Oct 2024 03:30:35 +0000
+Subject: PATCH 114/188 LoongArch: Remove constraint z from movsi_internal
+
+We don't allow SImode in FCC, so constraint z is never really used
+here.
+
+gcc/ChangeLog:
+
+        * config/loongarch/loongarch.md (movsi_internal): Remove
+        constraint z.
+---
+ gcc/config/loongarch/loongarch.md | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 4c7e28ace..23d8dc126 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -2197,8 +2197,8 @@
+ })
+ 
+ (define_insn_and_split "*movsi_internal"
+-  (set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m,*r,*z")
+-	(match_operand:SI 1 "move_operand" "r,Yd,w,rJ,*r*J,*m,*f,*f,*z,*r"))
++  (set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,w,*f,f,*r,*m")
++   (match_operand:SI 1 "move_operand" "r,Yd,w,rJ,*r*J,m,*f,*f"))
+   "(register_operand (operands0, SImode)
+     || reg_or_0_operand (operands1, SImode))"
+   { return loongarch_output_move (operands0, operands1); }
+@@ -2211,7 +2211,7 @@
+   DONE;
+ }
+   "
+-  (set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore,mftg,mgtf")
++  (set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore")
+    (set_attr "mode" "SI"))
+ 
+ ;; 16-bit Integer moves
+-- 
+2.43.0
+

_service:tar_scm:0114-aarch64-Remove-below-hard-fp-saved-regs-size.patch Deleted

@@ -1,157 +0,0 @@
-From 8254e1b9cd500e0c278465a3657543477e9d1250 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:08:56 +0100
-Subject: PATCH aarch64: Remove below_hard_fp_saved_regs_size
-
-After previous patches, it's no longer necessary to store
-saved_regs_size and below_hard_fp_saved_regs_size in the frame info.
-All measurements instead use the top or bottom of the frame as
-reference points.
-
-gcc/
-	* config/aarch64/aarch64.h (aarch64_frame::saved_regs_size)
-	(aarch64_frame::below_hard_fp_saved_regs_size): Delete.
-	* config/aarch64/aarch64.cc (aarch64_layout_frame): Update accordingly.
----
- gcc/config/aarch64/aarch64.cc | 45 ++++++++++++++++-------------------
- gcc/config/aarch64/aarch64.h  |  7 ------
- 2 files changed, 21 insertions(+), 31 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index a8d907df8843..ac3d3b336a37 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8269,9 +8269,8 @@ aarch64_layout_frame (void)
- 
-   /* OFFSET is now the offset of the hard frame pointer from the bottom
-      of the callee save area.  */
--  frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
--  bool saves_below_hard_fp_p
--    = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
-+  auto below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
-+  bool saves_below_hard_fp_p = maybe_ne (below_hard_fp_saved_regs_size, 0);
-   gcc_assert (!saves_below_hard_fp_p
- 	      || (frame.sve_save_and_probe != INVALID_REGNUM
- 		  && known_eq (frame.reg_offsetframe.sve_save_and_probe,
-@@ -8341,9 +8340,8 @@ aarch64_layout_frame (void)
- 
-   offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
- 
--  frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
--  gcc_assert (known_eq (frame.saved_regs_size,
--			frame.below_hard_fp_saved_regs_size)
-+  auto saved_regs_size = offset - frame.bytes_below_saved_regs;
-+  gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size)
- 	      || (frame.hard_fp_save_and_probe != INVALID_REGNUM
- 		  && known_eq (frame.reg_offsetframe.hard_fp_save_and_probe,
- 			       frame.bytes_below_hard_fp)));
-@@ -8352,7 +8350,7 @@ aarch64_layout_frame (void)
-      The saving of the bottommost register counts as an implicit probe,
-      which allows us to maintain the invariant described in the comment
-      at expand_prologue.  */
--  gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0));
-+  gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0));
- 
-   offset += get_frame_size ();
-   offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-@@ -8409,7 +8407,7 @@ aarch64_layout_frame (void)
- 
-   HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp;
-   HOST_WIDE_INT const_saved_regs_size;
--  if (known_eq (frame.saved_regs_size, 0))
-+  if (known_eq (saved_regs_size, 0))
-     frame.initial_adjust = frame.frame_size;
-   else if (frame.frame_size.is_constant (&const_size)
- 	   && const_size < max_push_offset
-@@ -8422,7 +8420,7 @@ aarch64_layout_frame (void)
-       frame.callee_adjust = const_size;
-     }
-   else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs)
--	   && frame.saved_regs_size.is_constant (&const_saved_regs_size)
-+	   && saved_regs_size.is_constant (&const_saved_regs_size)
- 	   && const_below_saved_regs + const_saved_regs_size < 512
- 	   /* We could handle this case even with data below the saved
- 	      registers, provided that that data left us with valid offsets
-@@ -8441,8 +8439,7 @@ aarch64_layout_frame (void)
-       frame.initial_adjust = frame.frame_size;
-     }
-   else if (saves_below_hard_fp_p
--	   && known_eq (frame.saved_regs_size,
--			frame.below_hard_fp_saved_regs_size))
-+	   && known_eq (saved_regs_size, below_hard_fp_saved_regs_size))
-     {
-       /* Frame in which all saves are SVE saves:
- 
-@@ -8464,7 +8461,7 @@ aarch64_layout_frame (void)
- 	 save SVE registers relative to SP
- 	 sub sp, sp, bytes_below_saved_regs  */
-       frame.callee_adjust = const_above_fp;
--      frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
-+      frame.sve_callee_adjust = below_hard_fp_saved_regs_size;
-       frame.final_adjust = frame.bytes_below_saved_regs;
-     }
-   else
-@@ -8479,7 +8476,7 @@ aarch64_layout_frame (void)
- 	 save SVE registers relative to SP
- 	 sub sp, sp, bytes_below_saved_regs  */
-       frame.initial_adjust = frame.bytes_above_hard_fp;
--      frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
-+      frame.sve_callee_adjust = below_hard_fp_saved_regs_size;
-       frame.final_adjust = frame.bytes_below_saved_regs;
-     }
- 
-@@ -9621,17 +9618,17 @@ aarch64_epilogue_uses (int regno)
- 	|  local variables              | <-- frame_pointer_rtx
- 	|                               |
- 	+-------------------------------+
--	|  padding                      | \
--	+-------------------------------+  |
--	|  callee-saved registers       |  | frame.saved_regs_size
--	+-------------------------------+  |
--	|  LR'                          |  |
--	+-------------------------------+  |
--	|  FP'                          |  |
--	+-------------------------------+  |<- hard_frame_pointer_rtx (aligned)
--	|  SVE vector registers         |  | \
--	+-------------------------------+  |  | below_hard_fp_saved_regs_size
--	|  SVE predicate registers      | /  /
-+	|  padding                      |
-+	+-------------------------------+
-+	|  callee-saved registers       |
-+	+-------------------------------+
-+	|  LR'                          |
-+	+-------------------------------+
-+	|  FP'                          |
-+	+-------------------------------+ <-- hard_frame_pointer_rtx (aligned)
-+	|  SVE vector registers         |
-+	+-------------------------------+
-+	|  SVE predicate registers      |
- 	+-------------------------------+
- 	|  dynamic allocation           |
- 	+-------------------------------+
-diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
-index 46d4693e2064..01f7751bc783 100644
---- a/gcc/config/aarch64/aarch64.h
-+++ b/gcc/config/aarch64/aarch64.h
-@@ -871,18 +871,11 @@ struct GTY (()) aarch64_frame
-      STACK_BOUNDARY.  */
-   HOST_WIDE_INT saved_varargs_size;
- 
--  /* The size of the callee-save registers with a slot in REG_OFFSET.  */
--  poly_int64 saved_regs_size;
--
-   /* The number of bytes between the bottom of the static frame (the bottom
-      of the outgoing arguments) and the bottom of the register save area.
-      This value is always a multiple of STACK_BOUNDARY.  */
-   poly_int64 bytes_below_saved_regs;
- 
--  /* The size of the callee-save registers with a slot in REG_OFFSET that
--     are saved below the hard frame pointer.  */
--  poly_int64 below_hard_fp_saved_regs_size;
--
-   /* The number of bytes between the bottom of the static frame (the bottom
-      of the outgoing arguments) and the hard frame pointer.  This value is
-      always a multiple of STACK_BOUNDARY.  */
--- 
-2.43.5
-

_service:tar_scm:0115-Backport-SME-aarch64-Remove-AARCH64_FL_RCPC8_4-PR107.patch Added

@@ -0,0 +1,83 @@
+From f6137d5be2761caea75dcc1c98d941ceec161456 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Sep 2022 11:32:53 +0100
+Subject: PATCH 016/157 BackportSME aarch64: Remove AARCH64_FL_RCPC8_4
+ PR107025
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0f244d848cffeda68f0eb4c5bb9c7e629bf2e957
+
+AARCH64_FL_RCPC8_4 is an odd-one-out in that it has no associated
+entry in aarch64-option-extensions.def.  This means that, although
+it is internally separated from AARCH64_FL_V8_4A, there is no
+mechanism for turning it on and off individually, independently
+of armv8.4-a.
+
+The only place that the flag was used independently was in the
+entry for thunderx3t110, which enabled it alongside V8_3A.
+As noted in PR107025, this means that any use of the extension
+will fail to assemble.
+
+In the PR trail, Andrew suggested removing the core entry.
+That might be best long-term, but since the barrier for removing
+command-line options without a deprecation period is very high,
+this patch instead just drops the flag from the core entry.
+We'll still produce correct code.
+
+gcc/
+	PR target/107025
+	* config/aarch64/aarch64.h (oAARCH64_FL_RCPC8_4): Delete.
+	(AARCH64_FL_FOR_V8_4A): Update accordingly.
+	(AARCH64_ISA_RCPC8_4): Use AARCH64_FL_V8_4A directly.
+	* config/aarch64/aarch64-cores.def (thunderx3t110): Remove
+	AARCH64_FL_RCPC8_4.
+---
+ gcc/config/aarch64/aarch64-cores.def | 2 +-
+ gcc/config/aarch64/aarch64.h         | 5 ++---
+ 2 files changed, 3 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
+index 008b0b8c1..cf500d0a9 100644
+--- a/gcc/config/aarch64/aarch64-cores.def
++++ b/gcc/config/aarch64/aarch64-cores.def
+@@ -133,7 +133,7 @@ AARCH64_CORE("tsv110",  tsv110, tsv110, V8_2A,  AARCH64_FL_CRYPTO | AARCH64_FL_F
+ /* ARMv8.3-A Architecture Processors.  */
+ 
+ /* Marvell cores (TX3). */
+-AARCH64_CORE("thunderx3t110",  thunderx3t110,  thunderx3t110, V8_3A,  AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
++AARCH64_CORE("thunderx3t110",  thunderx3t110,  thunderx3t110, V8_3A,  AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML, thunderx3t110, 0x43, 0x0b8, 0x0a)
+ 
+ /* ARMv8.4-A Architecture Processors.  */
+ 
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index 918a14193..f4e0cd148 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -173,7 +173,6 @@
+ #define AARCH64_FL_SM4	      (1 << 17)  /* Has ARMv8.4-A SM3 and SM4.  */
+ #define AARCH64_FL_SHA3	      (1 << 18)  /* Has ARMv8.4-a SHA3 and SHA512.  */
+ #define AARCH64_FL_F16FML     (1 << 19)  /* Has ARMv8.4-a FP16 extensions.  */
+-#define AARCH64_FL_RCPC8_4    (1 << 20)  /* Has ARMv8.4-a RCPC extensions.  */
+ 
+ /* Statistical Profiling extensions.  */
+ #define AARCH64_FL_PROFILE    (1 << 21)
+@@ -265,7 +264,7 @@
+   (AARCH64_FL_FOR_V8_2A | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH)
+ #define AARCH64_FL_FOR_V8_4A			\
+   (AARCH64_FL_FOR_V8_3A | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \
+-   | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM)
++   | AARCH64_FL_DOTPROD | AARCH64_FL_FLAGM)
+ #define AARCH64_FL_FOR_V8_5A			\
+   (AARCH64_FL_FOR_V8_4A | AARCH64_FL_V8_5A	\
+    | AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES)
+@@ -313,7 +312,7 @@
+ #define AARCH64_ISA_SM4	           (aarch64_isa_flags & AARCH64_FL_SM4)
+ #define AARCH64_ISA_SHA3	   (aarch64_isa_flags & AARCH64_FL_SHA3)
+ #define AARCH64_ISA_F16FML	   (aarch64_isa_flags & AARCH64_FL_F16FML)
+-#define AARCH64_ISA_RCPC8_4	   (aarch64_isa_flags & AARCH64_FL_RCPC8_4)
++#define AARCH64_ISA_RCPC8_4	   (aarch64_isa_flags & AARCH64_FL_V8_4A)
+ #define AARCH64_ISA_RNG		   (aarch64_isa_flags & AARCH64_FL_RNG)
+ #define AARCH64_ISA_V8_5A	   (aarch64_isa_flags & AARCH64_FL_V8_5A)
+ #define AARCH64_ISA_TME		   (aarch64_isa_flags & AARCH64_FL_TME)
+-- 
+2.33.0
+

_service:tar_scm:0115-LoongArch-doc-Add-attribute-descriptions-defined-in-.patch Added

@@ -0,0 +1,47 @@
+From 0929961b9dd57e0dd18e4cccc6ba760706e74f77 Mon Sep 17 00:00:00 2001
+From: chenxiaolong <chenxiaolong@loongson.cn>
+Date: Wed, 17 Jan 2024 09:24:06 +0800
+Subject: PATCH 115/188 LoongArch: doc: Add attribute descriptions defined in
+ the target-supports.exp.
+
+gcc/ChangeLog:
+
+	* doc/sourcebuild.texi: Add attributes for keywords.
+---
+ gcc/doc/sourcebuild.texi | 20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
+index 71c04841d..a1ab0a1cb 100644
+--- a/gcc/doc/sourcebuild.texi
++++ b/gcc/doc/sourcebuild.texi
+@@ -2292,6 +2292,26 @@ AArch64 target that is able to generate and execute armv8.3-a FJCVTZS
+ instruction.
+ @end table
+ 
++@subsubsection LoongArch specific attributes
++
++@table @code
++@item loongarch_sx
++LoongArch target that generates instructions for SX.
++
++@item loongarch_asx
++LoongArch target that generates instructions for ASX.
++
++@item loongarch_sx_hw
++LoongArch target that is able to generate and execute SX code.
++
++@item loongarch_asx_hw
++LoongArch target that is able to generate and execute ASX code.
++
++@item loongarch_call36_support
++LoongArch binutils supports call36 relocation.
++
++@end table
++
+ @subsubsection MIPS-specific attributes
+ 
+ @table @code
+-- 
+2.43.0
+

_service:tar_scm:0115-aarch64-Make-stack-smash-canary-protect-saved-registers.patch Deleted

@@ -1,298 +0,0 @@
-From 75c37e031408262263442f5b4cdb83d3777b6422 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:08:57 +0100
-Subject: PATCH aarch64: Make stack smash canary protect saved registers
-
-AArch64 normally puts the saved registers near the bottom of the frame,
-immediately above any dynamic allocations.  But this means that a
-stack-smash attack on those dynamic allocations could overwrite the
-saved registers without needing to reach as far as the stack smash
-canary.
-
-The same thing could also happen for variable-sized arguments that are
-passed by value, since those are allocated before a call and popped on
-return.
-
-This patch avoids that by putting the locals (and thus the canary) below
-the saved registers when stack smash protection is active.
-
-The patch fixes CVE-2023-4039.
-
-gcc/
-	* config/aarch64/aarch64.cc (aarch64_save_regs_above_locals_p):
-	New function.
-	(aarch64_layout_frame): Use it to decide whether locals should
-	go above or below the saved registers.
-	(aarch64_expand_prologue): Update stack layout comment.
-	Emit a stack tie after the final adjustment.
-
-gcc/testsuite/
-	* gcc.target/aarch64/stack-protector-8.c: New test.
-	* gcc.target/aarch64/stack-protector-9.c: Likewise.
----
- gcc/config/aarch64/aarch64.cc                 | 46 +++++++--
- .../gcc.target/aarch64/stack-protector-8.c    | 95 +++++++++++++++++++
- .../gcc.target/aarch64/stack-protector-9.c    | 33 +++++++
- 3 files changed, 168 insertions(+), 6 deletions(-)
- create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
- create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index ac3d3b336a37..96c3f48fdc49 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8133,6 +8133,20 @@ aarch64_needs_frame_chain (void)
-   return aarch64_use_frame_pointer;
- }
- 
-+/* Return true if the current function should save registers above
-+   the locals area, rather than below it.  */
-+
-+static bool
-+aarch64_save_regs_above_locals_p ()
-+{
-+  /* When using stack smash protection, make sure that the canary slot
-+     comes between the locals and the saved registers.  Otherwise,
-+     it would be possible for a carefully sized smash attack to change
-+     the saved registers (particularly LR and FP) without reaching the
-+     canary.  */
-+  return crtl->stack_protect_guard;
-+}
-+
- /* Mark the registers that need to be saved by the callee and calculate
-    the size of the callee-saved registers area and frame record (both FP
-    and LR may be omitted).  */
-@@ -8144,6 +8158,7 @@ aarch64_layout_frame (void)
-   poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
-   bool frame_related_fp_reg_p = false;
-   aarch64_frame &frame = cfun->machine->frame;
-+  poly_int64 top_of_locals = -1;
- 
-   frame.emit_frame_chain = aarch64_needs_frame_chain ();
- 
-@@ -8210,9 +8225,16 @@ aarch64_layout_frame (void)
- 	&& !crtl->abi->clobbers_full_reg_p (regno))
-       frame.reg_offsetregno = SLOT_REQUIRED;
- 
-+  bool regs_at_top_p = aarch64_save_regs_above_locals_p ();
- 
-   poly_int64 offset = crtl->outgoing_args_size;
-   gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
-+  if (regs_at_top_p)
-+    {
-+      offset += get_frame_size ();
-+      offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-+      top_of_locals = offset;
-+    }
-   frame.bytes_below_saved_regs = offset;
-   frame.sve_save_and_probe = INVALID_REGNUM;
- 
-@@ -8352,15 +8374,18 @@ aarch64_layout_frame (void)
-      at expand_prologue.  */
-   gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0));
- 
--  offset += get_frame_size ();
--  offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
--  auto top_of_locals = offset;
--
-+  if (!regs_at_top_p)
-+    {
-+      offset += get_frame_size ();
-+      offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-+      top_of_locals = offset;
-+    }
-   offset += frame.saved_varargs_size;
-   gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
-   frame.frame_size = offset;
- 
-   frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp;
-+  gcc_assert (known_ge (top_of_locals, 0));
-   frame.bytes_above_locals = frame.frame_size - top_of_locals;
- 
-   frame.initial_adjust = 0;
-@@ -9615,10 +9640,10 @@ aarch64_epilogue_uses (int regno)
- 	|  for register varargs         |
- 	|                               |
- 	+-------------------------------+
--	|  local variables              | <-- frame_pointer_rtx
-+	|  local variables (1)          | <-- frame_pointer_rtx
- 	|                               |
- 	+-------------------------------+
--	|  padding                      |
-+	|  padding (1)                  |
- 	+-------------------------------+
- 	|  callee-saved registers       |
- 	+-------------------------------+
-@@ -9630,6 +9655,10 @@ aarch64_epilogue_uses (int regno)
- 	+-------------------------------+
- 	|  SVE predicate registers      |
- 	+-------------------------------+
-+	|  local variables (2)          |
-+	+-------------------------------+
-+	|  padding (2)                  |
-+	+-------------------------------+
- 	|  dynamic allocation           |
- 	+-------------------------------+
- 	|  padding                      |
-@@ -9639,6 +9668,9 @@ aarch64_epilogue_uses (int regno)
- 	+-------------------------------+
- 	|                               | <-- stack_pointer_rtx (aligned)
- 
-+   The regions marked (1) and (2) are mutually exclusive.  (2) is used
-+   when aarch64_save_regs_above_locals_p is true.
-+
-    Dynamic stack allocations via alloca() decrease stack_pointer_rtx
-    but leave frame_pointer_rtx and hard_frame_pointer_rtx
-    unchanged.
-@@ -9834,6 +9866,8 @@ aarch64_expand_prologue (void)
-   gcc_assert (known_eq (bytes_below_sp, final_adjust));
-   aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
- 					  !frame_pointer_needed, true);
-+  if (emit_frame_chain && maybe_ne (final_adjust, 0))
-+    emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
- }
- 
- /* Return TRUE if we can use a simple_return insn.
-diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
-new file mode 100644
-index 000000000000..e71d820e3654
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
-@@ -0,0 +1,95 @@
-+/* { dg-options " -O -fstack-protector-strong -mstack-protector-guard=sysreg -mstack-protector-guard-reg=tpidr2_el0 -mstack-protector-guard-offset=16" } */
-+/* { dg-final { check-function-bodies "**" "" } } */
-+
-+void g(void *);
-+__SVBool_t *h(void *);
-+
-+/*
-+** test1:
-+**	sub	sp, sp, #288
-+**	stp	x29, x30, \sp, #?272\
-+**	add	x29, sp, #?272
-+**	mrs	(x0-9+), tpidr2_el0
-+**	ldr	(x0-9+), \\1, #?16\
-+**	str	\2, \sp, #?264\
-+**	mov	\2, #?0
-+**	add	x0, sp, #?8
-+**	bl	g
-+**	...
-+**	mrs	.*
-+**	...
-+**	bne	.*
-+**	...
-+**	ldp	x29, x30, \sp, #?272\
-+**	add	sp, sp, #?288
-+**	ret
-+**	bl	__stack_chk_fail
-+*/
-+int test1() {
-+  int y0x40;
-+  g(y);
-+  return 1;
-+}
-+
-+/*
-+** test2:
-+**	stp	x29, x30, \sp, #?-16\!
-+**	mov	x29, sp
-+**	sub	sp, sp, #1040

_service:tar_scm:0116-Backport-SME-aarch64-Fix-transitive-closure-of-featu.patch Added

@@ -0,0 +1,154 @@
+From c6698a5feb07fc0cda89a54a0ee4006295ac6dbe Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Sep 2022 11:32:53 +0100
+Subject: PATCH 017/157 BackportSME aarch64: Fix transitive closure of
+ features
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=b754d32d3053a4ba2a82361ac0f2739797a811f1
+
+aarch64-option-extensions.def requires us to maintain the transitive
+closure of options by hand.  This patch fixes a few cases where a
+flag was missed.
+
++noaes and +nosha2 now disable +crypto, which IMO makes more
+sense and is consistent with the Clang behaviour.
+
+gcc/
+	* config/aarch64/aarch64-option-extensions.def (dotprod): Depend
+	on fp as well as simd.
+	(sha3): Likewise.
+	(aes): Likewise.  Make +noaes disable crypto.
+	(sha2): Likewise +nosha2.  Also make +nosha2 disable sha3 and
+	sve2-sha3.
+	(sve2-sha3): Depend on sha2 as well as sha3.
+
+gcc/testsuite/
+	* gcc.target/aarch64/options_set_6.c: Expect +crypto+nosha2 to
+	disable crypto but keep aes.
+	* gcc.target/aarch64/pragma_cpp_predefs_4.c: New test.
+---
+ .../aarch64/aarch64-option-extensions.def     | 16 ++++---
+ .../gcc.target/aarch64/options_set_6.c        |  5 +-
+ .../gcc.target/aarch64/pragma_cpp_predefs_4.c | 47 +++++++++++++++++++
+ 3 files changed, 58 insertions(+), 10 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
+
+diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
+index b4d0ac8b6..b98008127 100644
+--- a/gcc/config/aarch64/aarch64-option-extensions.def
++++ b/gcc/config/aarch64/aarch64-option-extensions.def
+@@ -113,28 +113,29 @@ AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, \
+ 
+ /* Enabling "dotprod" also enables "simd".
+    Disabling "dotprod" only disables "dotprod".  */
+-AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_SIMD, 0, \
++AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_FPSIMD, 0, \
+ 		      false, "asimddp")
+ 
+ /* Enabling "aes" also enables "simd".
+    Disabling "aes" disables "aes" and "sve2-aes'.  */
+-AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_SIMD, \
+-		      AARCH64_FL_SVE2_AES, false, "aes")
++AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_FPSIMD, \
++		      AARCH64_FL_SVE2_AES | AARCH64_FL_CRYPTO, false, "aes")
+ 
+ /* Enabling "sha2" also enables "simd".
+    Disabling "sha2" just disables "sha2".  */
+-AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_SIMD, 0, false, \
+-		      "sha1 sha2")
++AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_FPSIMD, \
++		      AARCH64_FL_CRYPTO | AARCH64_FL_SHA3 | \
++		      AARCH64_FL_SVE2_SHA3, false, "sha1 sha2")
+ 
+ /* Enabling "sha3" enables "simd" and "sha2".
+    Disabling "sha3" disables "sha3" and "sve2-sha3".  */
+-AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_SIMD | \
++AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_FPSIMD | \
+ 		      AARCH64_FL_SHA2, AARCH64_FL_SVE2_SHA3, false, \
+ 		      "sha3 sha512")
+ 
+ /* Enabling "sm4" also enables "simd".
+    Disabling "sm4" disables "sm4" and "sve2-sm4".  */
+-AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_SIMD, \
++AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_FPSIMD, \
+ 		      AARCH64_FL_SVE2_SM4, false, "sm3 sm4")
+ 
+ /* Enabling "fp16fml" also enables "fp" and "fp16".
+@@ -192,6 +193,7 @@ AARCH64_OPT_EXTENSION("sve2-aes", AARCH64_FL_SVE2_AES, AARCH64_FL_AES | \
+ /* Enabling "sve2-sha3" also enables "sha3", "simd", "fp16", "fp", "sve", and
+    "sve2". Disabling "sve2-sha3" just disables "sve2-sha3".  */
+ AARCH64_OPT_EXTENSION("sve2-sha3", AARCH64_FL_SVE2_SHA3, AARCH64_FL_SHA3 | \
++		      AARCH64_FL_SHA2 | \
+ 		      AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \
+ 		      AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "svesha3")
+ 
+diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_6.c b/gcc/testsuite/gcc.target/aarch64/options_set_6.c
+index 90a055928..2a1d7fe5b 100644
+--- a/gcc/testsuite/gcc.target/aarch64/options_set_6.c
++++ b/gcc/testsuite/gcc.target/aarch64/options_set_6.c
+@@ -6,7 +6,6 @@ int main ()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */
++/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+aes} 1 } } */
+ 
+-/* Group as a whole was requested to be turned on, crypto itself is a bit and so
+-   just turning off one feature can't turn it off.   */
++/* +crypto turns on +aes and +sha2, but +nosha2 disables +crypto.   */
+diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
+new file mode 100644
+index 000000000..0e6461fa4
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
+@@ -0,0 +1,47 @@
++#pragma GCC target "+nothing+dotprod"
++#ifndef __ARM_FEATURE_FMA
++#error Foo
++#endif
++
++#pragma GCC target "+nothing+aes"
++#ifndef __ARM_FEATURE_FMA
++#error Foo
++#endif
++
++#pragma GCC target "+nothing+sha2"
++#ifndef __ARM_FEATURE_FMA
++#error Foo
++#endif
++
++#pragma GCC target "+nothing+sha3"
++#ifndef __ARM_FEATURE_FMA
++#error Foo
++#endif
++
++#pragma GCC target "+nothing+sm4"
++#ifndef __ARM_FEATURE_FMA
++#error Foo
++#endif
++
++#pragma GCC target "+crypto+noaes"
++#ifdef __ARM_FEATURE_CRYPTO
++#error Foo
++#endif
++
++#pragma GCC target "+crypto+nosha2"
++#ifdef __ARM_FEATURE_CRYPTO
++#error Foo
++#endif
++
++#pragma GCC target "+nothing+sve2-sha3"
++#ifndef __ARM_FEATURE_SHA2
++#error Foo
++#endif
++
++#pragma GCC target "+sve2-sha3+nosha2"
++#ifdef __ARM_FEATURE_SHA3
++#error Foo
++#endif
++#ifdef __ARM_FEATURE_SVE2_SHA3
++#error Foo
++#endif
+-- 
+2.33.0
+

_service:tar_scm:0116-LoongArch-Disable-explicit-reloc-for-TLS-LD-GD-with-.patch Added

@@ -0,0 +1,70 @@
+From c0b63b89a03c11bf6383f0175b60614d73295463 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Mon, 22 Jan 2024 18:07:42 +0800
+Subject: PATCH 116/188 LoongArch: Disable explicit reloc for TLS LD/GD with
+ -mexplicit-relocs=auto
+
+Binutils 2.42 supports TLS LD/GD relaxation which requires the assembler
+macro.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_explicit_relocs_p):
+	If la_opt_explicit_relocs is EXPLICIT_RELOCS_AUTO, return false
+	for SYMBOL_TLS_LDM and SYMBOL_TLS_GD.
+	(loongarch_call_tls_get_addr): Do not split symbols of
+	SYMBOL_TLS_LDM or SYMBOL_TLS_GD if la_opt_explicit_relocs is
+	EXPLICIT_RELOCS_AUTO.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c: Check
+	for la.tls.ld and la.tls.gd.
+---
+ gcc/config/loongarch/loongarch.cc                      | 10 +++++-----
+ .../loongarch/explicit-relocs-auto-tls-ld-gd.c         |  3 ++-
+ 2 files changed, 7 insertions(+), 6 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 7da00c132..5f22b9dd8 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -1967,11 +1967,11 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type)
+     {
+       case SYMBOL_TLS_IE:
+       case SYMBOL_TLS_LE:
+-      case SYMBOL_TLSGD:
+-      case SYMBOL_TLSLDM:
+       case SYMBOL_PCREL64:
+-	/* The linker don't know how to relax TLS accesses or 64-bit
+-	   pc-relative accesses.  */
++	/* TLS IE cannot be relaxed.  TLS LE relaxation is different from
++	   the normal R_LARCH_RELAX-based relaxation and it **requires**
++	   using the explicit %le_{lo12,hi20,add}_r relocs.  The linker
++	   does not relax 64-bit pc-relative accesses as at now.  */
+ 	return true;
+       case SYMBOL_GOT_DISP:
+ 	/* The linker don't know how to relax GOT accesses in extreme
+@@ -2785,7 +2785,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
+ 
+   start_sequence ();
+ 
+-  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
++  if (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS)
+     {
+       /* Split tls symbol to high and low.  */
+       rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc));
+diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c
+index 957ff98df..ca55fcfc5 100644
+--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c
++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c
+@@ -6,4 +6,5 @@ extern __thread int b __attribute__((visibility("default")));
+ 
+ int test() { return a + b; }
+ 
+-/* { dg-final { scan-assembler-not "la.tls" { target tls_native } } } */
++/* { dg-final { scan-assembler "la\\.tls\\.ld" { target tls_native } } } */
++/* { dg-final { scan-assembler "la\\.tls\\.gd" { target tls_native } } } */
+-- 
+2.43.0
+

_service:tar_scm:0116-aarch64-Fix-return-register-handling-in-untyped_call.patch Deleted

@@ -1,66 +0,0 @@
-From 38d0605ac8bc90324170041676fc05e7e595769e Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Wed, 20 Sep 2023 11:13:19 +0100
-Subject: PATCH aarch64: Fix return register handling in untyped_call
-
-While working on another patch, I hit a problem with the aarch64
-expansion of untyped_call.  The expander emits the usual:
-
-  (set (mem ...) (reg resN))
-
-instructions to store the result registers to memory, but it didn't
-say in RTL where those resN results came from.  This eventually led
-to a failure of gcc.dg/torture/stackalign/builtin-return-2.c,
-via regrename.
-
-This patch turns the untyped call from a plain call to a call_value,
-to represent that the call returns (or might return) a useful value.
-The patch also uses a PARALLEL return rtx to represent all the possible
-return registers.
-
-gcc/
-	* config/aarch64/aarch64.md (untyped_call): Emit a call_value
-	rather than a call.  List each possible destination register
-	in the call pattern.
-
-(cherry picked from commit 629efe27744d13c3b83bbe8338b84c37c83dbe4f)
----
- gcc/config/aarch64/aarch64.md | 20 +++++++++++++++++++-
- 1 file changed, 19 insertions(+), 1 deletion(-)
-
-diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
-index b9b3ba4f9164..cd568f3daa0f 100644
---- a/gcc/config/aarch64/aarch64.md
-+++ b/gcc/config/aarch64/aarch64.md
-@@ -1159,9 +1159,27 @@
- {
-   int i;
- 
-+  /* Generate a PARALLEL that contains all of the register results.
-+     The offsets are somewhat arbitrary, since we don't know the
-+     actual return type.  The main thing we need to avoid is having
-+     overlapping byte ranges, since those might give the impression
-+     that two registers are known to have data in common.  */
-+  rtvec rets = rtvec_alloc (XVECLEN (operands2, 0));
-+  poly_int64 offset = 0;
-+  for (i = 0; i < XVECLEN (operands2, 0); i++)
-+    {
-+      rtx reg = SET_SRC (XVECEXP (operands2, 0, i));
-+      gcc_assert (REG_P (reg));
-+      rtx offset_rtx = gen_int_mode (offset, Pmode);
-+      rtx piece = gen_rtx_EXPR_LIST (VOIDmode, reg, offset_rtx);
-+      RTVEC_ELT (rets, i) = piece;
-+      offset += GET_MODE_SIZE (GET_MODE (reg));
-+    }
-+  rtx ret = gen_rtx_PARALLEL (VOIDmode, rets);
-+
-   /* Untyped calls always use the default ABI.  It's only possible to use
-      ABI variants if we know the type of the target function.  */
--  emit_call_insn (gen_call (operands0, const0_rtx, const0_rtx));
-+  emit_call_insn (gen_call_value (ret, operands0, const0_rtx, const0_rtx));
- 
-   for (i = 0; i < XVECLEN (operands2, 0); i++)
-     {
--- 
-2.43.5
-

_service:tar_scm:0117-Backport-SME-aarch64-Reorder-an-entry-in-aarch64-opt.patch Added

@@ -0,0 +1,194 @@
+From 4a2d0bdf5c9a5f4ee615c1d0768cb2e8a3dfef4a Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Sep 2022 11:32:54 +0100
+Subject: PATCH 018/157 BackportSME aarch64: Reorder an entry in
+ aarch64-option-extensions.def
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c067c474f85b1e9c56fb34dd51ef0eec9221b766
+
+aarch64-option-extensions.def was topologically sorted except
+for one case: crypto came before its aes and sha2 dependencies.
+This patch moves crypto after sha2 instead.
+
+gcc/
+	* config/aarch64/aarch64-option-extensions.def: Move crypto
+	after sha2.
+
+gcc/testsuite/
+	* gcc.target/aarch64/cpunative/native_cpu_0.c: Expect +crypto
+	to come after +crc.
+	* gcc.target/aarch64/cpunative/native_cpu_13.c: Likewise.
+	* gcc.target/aarch64/cpunative/native_cpu_16.c: Likewise.
+	* gcc.target/aarch64/cpunative/native_cpu_17.c: Likewise.
+	* gcc.target/aarch64/cpunative/native_cpu_6.c: Likewise.
+	* gcc.target/aarch64/cpunative/native_cpu_7.c: Likewise.
+	* gcc.target/aarch64/options_set_2.c: Likewise.
+	* gcc.target/aarch64/options_set_3.c: Likewise.
+	* gcc.target/aarch64/options_set_4.c: Likewise.
+---
+ .../aarch64/aarch64-option-extensions.def     | 20 +++++++++----------
+ .../aarch64/cpunative/native_cpu_0.c          |  2 +-
+ .../aarch64/cpunative/native_cpu_13.c         |  2 +-
+ .../aarch64/cpunative/native_cpu_16.c         |  2 +-
+ .../aarch64/cpunative/native_cpu_17.c         |  2 +-
+ .../aarch64/cpunative/native_cpu_6.c          |  2 +-
+ .../aarch64/cpunative/native_cpu_7.c          |  2 +-
+ .../gcc.target/aarch64/options_set_2.c        |  2 +-
+ .../gcc.target/aarch64/options_set_3.c        |  2 +-
+ .../gcc.target/aarch64/options_set_4.c        |  4 ++--
+ 10 files changed, 20 insertions(+), 20 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
+index b98008127..df2c8d19b 100644
+--- a/gcc/config/aarch64/aarch64-option-extensions.def
++++ b/gcc/config/aarch64/aarch64-option-extensions.def
+@@ -76,16 +76,6 @@ AARCH64_OPT_EXTENSION("simd", AARCH64_FL_SIMD, AARCH64_FL_FP, \
+ 		      AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM, \
+ 		      false, "asimd")
+ 
+-/* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2".
+-   Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4",
+-   "sve2-aes", "sve2-sha3", "sve2-sm4".  */
+-AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | \
+-		      AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, \
+-		      AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \
+-		      AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | \
+-		      AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4, true, \
+-		      "aes pmull sha1 sha2")
+-
+ /* Enabling or disabling "crc" only changes "crc".  */
+ AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, 0, 0, false, "crc32")
+ 
+@@ -127,6 +117,16 @@ AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_FPSIMD, \
+ 		      AARCH64_FL_CRYPTO | AARCH64_FL_SHA3 | \
+ 		      AARCH64_FL_SVE2_SHA3, false, "sha1 sha2")
+ 
++/* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2".
++   Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4",
++   "sve2-aes", "sve2-sha3", "sve2-sm4".  */
++AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | \
++		      AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, \
++		      AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \
++		      AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | \
++		      AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4, true, \
++		      "aes pmull sha1 sha2")
++
+ /* Enabling "sha3" enables "simd" and "sha2".
+    Disabling "sha3" disables "sha3" and "sve2-sha3".  */
+ AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_FPSIMD | \
+diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c
+index f155f51ba..8499f87c3 100644
+--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c
++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c
+@@ -7,6 +7,6 @@ int main()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod} } } */
++/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto} } } */
+ 
+ /* Test a normal looking procinfo.  */
+diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c
+index b7b3a8e13..551669091 100644
+--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c
++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c
+@@ -7,6 +7,6 @@ int main()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod} } } */
++/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto} } } */
+ 
+ /* Test one with mixed order of feature bits.  */
+diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c
+index a424e7c56..2f963bb23 100644
+--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c
++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c
+@@ -7,6 +7,6 @@ int main()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod\+sve2} } } */
++/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\+sve2} } } */
+ 
+ /* Test a normal looking procinfo.  */
+diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c
+index c269c5fef..c68a697aa 100644
+--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c
++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c
+@@ -7,6 +7,6 @@ int main()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod\+sve2} } } */
++/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\+sve2} } } */
+ 
+ /* Test a normal looking procinfo.  */
+diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c
+index da72052e6..7608e8845 100644
+--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c
++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c
+@@ -7,7 +7,7 @@ int main()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+fp16} } } */
++/* { dg-final { scan-assembler {\.arch armv8-a\+fp16\+crypto} } } */
+ 
+ /* Test one where the feature bits for crypto and fp16 are given in
+    same order as declared in options file.  */
+diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c
+index 96ad4c14d..72b14b4f6 100644
+--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c
++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c
+@@ -7,7 +7,7 @@ int main()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+fp16} } } */
++/* { dg-final { scan-assembler {\.arch armv8-a\+fp16\+crypto} } } */
+ 
+ /* Test one where the crypto and fp16 options are specified in different
+    order from what is in the options file.  */
+diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_2.c b/gcc/testsuite/gcc.target/aarch64/options_set_2.c
+index 3476febce..f82cb5f78 100644
+--- a/gcc/testsuite/gcc.target/aarch64/options_set_2.c
++++ b/gcc/testsuite/gcc.target/aarch64/options_set_2.c
+@@ -6,6 +6,6 @@ int main ()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */
++/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */
+ 
+ /* Check to see if crc and crypto are maintained if crypto specified.  */
+diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_3.c b/gcc/testsuite/gcc.target/aarch64/options_set_3.c
+index 4558339f1..7d350cfa3 100644
+--- a/gcc/testsuite/gcc.target/aarch64/options_set_3.c
++++ b/gcc/testsuite/gcc.target/aarch64/options_set_3.c
+@@ -6,6 +6,6 @@ int main ()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */
++/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */
+ 
+ /* Check if smallest set is maintained when outputting. */
+diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_4.c b/gcc/testsuite/gcc.target/aarch64/options_set_4.c
+index 15514bfe9..5370e02e1 100644
+--- a/gcc/testsuite/gcc.target/aarch64/options_set_4.c
++++ b/gcc/testsuite/gcc.target/aarch64/options_set_4.c
+@@ -6,7 +6,7 @@ int main ()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */
++/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */
+ 
+ /* Check if individual bits that make up a grouping is specified that only the
+-   grouping is kept. */
+\ No newline at end of file
++   grouping is kept. */
+-- 
+2.33.0
+

_service:tar_scm:0117-LoongArch-testsuite-Disable-stack-protector-for-got-.patch Added

@@ -0,0 +1,35 @@
+From 7e10f7b95a598e9471bd1bc77454af43a69eb506 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Tue, 23 Jan 2024 19:32:38 +0800
+Subject: PATCH 117/188 LoongArch: testsuite: Disable stack protector for
+ got-load.C
+
+When building GCC with --enable-default-ssp, the stack protector is
+enabled for got-load.C, causing additional GOT loads for
+__stack_chk_guard.  So mem/u will be matched more than 2 times and the
+test will fail.
+
+Disable stack protector to fix this issue.
+
+gcc/testsuite:
+
+	* g++.target/loongarch/got-load.C (dg-options): Add
+	-fno-stack-protector.
+---
+ gcc/testsuite/g++.target/loongarch/got-load.C | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/testsuite/g++.target/loongarch/got-load.C b/gcc/testsuite/g++.target/loongarch/got-load.C
+index 20924c739..17870176a 100644
+--- a/gcc/testsuite/g++.target/loongarch/got-load.C
++++ b/gcc/testsuite/g++.target/loongarch/got-load.C
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -O2 -mexplicit-relocs -mcmodel=normal -fdump-rtl-expand" } */
++/* { dg-options "-mabi=lp64d -O2 -mexplicit-relocs -mcmodel=normal -fdump-rtl-expand -fno-stack-protector" } */
+ /* { dg-final { scan-rtl-dump-times "mem/u" 2 "expand" } } */
+ 
+ #include <bits/stdc++.h>
+-- 
+2.43.0
+

_service:tar_scm:0117-aarch64-Fix-loose-ldpstp-check.patch Deleted

@@ -1,119 +0,0 @@
-From 74f99f1adc696f446115f36974a3f94f66294a53 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Wed, 20 Sep 2023 11:13:20 +0100
-Subject: PATCH aarch64: Fix loose ldpstp check PR111411
-
-aarch64_operands_ok_for_ldpstp contained the code:
-
-  /* One of the memory accesses must be a mempair operand.
-     If it is not the first one, they need to be swapped by the
-     peephole.  */
-  if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1))
-       && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2)))
-    return false;
-
-But the requirement isn't just that one of the accesses must be a
-valid mempair operand.  It's that the lower access must be, since
-that's the access that will be used for the instruction operand.
-
-gcc/
-	PR target/111411
-	* config/aarch64/aarch64.cc (aarch64_operands_ok_for_ldpstp): Require
-	the lower memory access to a mem-pair operand.
-
-gcc/testsuite/
-	PR target/111411
-	* gcc.dg/rtl/aarch64/pr111411.c: New test.
-
-(cherry picked from commit 2d38f45bcca62ca0c7afef4b579f82c5c2a01610)
----
- gcc/config/aarch64/aarch64.cc               |  8 ++-
- gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c | 57 +++++++++++++++++++++
- 2 files changed, 60 insertions(+), 5 deletions(-)
- create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 96c3f48fdc49..a979accd90a9 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -26031,11 +26031,9 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
-   gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)),
- 			GET_MODE_SIZE (GET_MODE (mem_2))));
- 
--  /* One of the memory accesses must be a mempair operand.
--     If it is not the first one, they need to be swapped by the
--     peephole.  */
--  if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1))
--       && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2)))
-+  /* The lower memory access must be a mem-pair operand.  */
-+  rtx lower_mem = reversed ? mem_2 : mem_1;
-+  if (!aarch64_mem_pair_operand (lower_mem, GET_MODE (lower_mem)))
-     return false;
- 
-   if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
-diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c b/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c
-new file mode 100644
-index 000000000000..ad07e9c6c893
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c
-@@ -0,0 +1,57 @@
-+/* { dg-do compile { target aarch64*-*-* } } */
-+/* { dg-require-effective-target lp64 } */
-+/* { dg-options "-O -fdisable-rtl-postreload -fpeephole2 -fno-schedule-fusion" } */
-+
-+extern int data;
-+
-+void __RTL (startwith ("ira")) foo (void *ptr)
-+{
-+  (function "foo"
-+    (param "ptr"
-+      (DECL_RTL (reg/v:DI <0>  ptr ))
-+      (DECL_RTL_INCOMING (reg/v:DI x0  ptr ))
-+    ) ;; param "ptr"
-+    (insn-chain
-+      (block 2
-+	(edge-from entry (flags "FALLTHRU"))
-+	(cnote 3 bb 2 NOTE_INSN_BASIC_BLOCK)
-+	(insn 4 (set (reg:DI <0>) (reg:DI x0)))
-+	(insn 5 (set (reg:DI <1>)
-+		     (plus:DI (reg:DI <0>) (const_int 768))))
-+	(insn 6 (set (mem:SI (plus:DI (reg:DI <0>)
-+				      (const_int 508)) 1 &data+508 S4 A4)
-+		     (const_int 0)))
-+	(insn 7 (set (mem:SI (plus:DI (reg:DI <1>)
-+				      (const_int -256)) 1 &data+512 S4 A4)
-+		     (const_int 0)))
-+	(edge-to exit (flags "FALLTHRU"))
-+      ) ;; block 2
-+    ) ;; insn-chain
-+  ) ;; function
-+}
-+
-+void __RTL (startwith ("ira")) bar (void *ptr)
-+{
-+  (function "bar"
-+    (param "ptr"
-+      (DECL_RTL (reg/v:DI <0>  ptr ))
-+      (DECL_RTL_INCOMING (reg/v:DI x0  ptr ))
-+    ) ;; param "ptr"
-+    (insn-chain
-+      (block 2
-+	(edge-from entry (flags "FALLTHRU"))
-+	(cnote 3 bb 2 NOTE_INSN_BASIC_BLOCK)
-+	(insn 4 (set (reg:DI <0>) (reg:DI x0)))
-+	(insn 5 (set (reg:DI <1>)
-+		     (plus:DI (reg:DI <0>) (const_int 768))))
-+	(insn 6 (set (mem:SI (plus:DI (reg:DI <1>)
-+				      (const_int -256)) 1 &data+512 S4 A4)
-+		     (const_int 0)))
-+	(insn 7 (set (mem:SI (plus:DI (reg:DI <0>)
-+				      (const_int 508)) 1 &data+508 S4 A4)
-+		     (const_int 0)))
-+	(edge-to exit (flags "FALLTHRU"))
-+      ) ;; block 2
-+    ) ;; insn-chain
-+  ) ;; function
-+}
--- 
-2.43.5
-

_service:tar_scm:0118-Backport-SME-aarch64-Simplify-feature-definitions.patch Added

@@ -0,0 +1,1176 @@
+From deb18d5083d8f9edbdafac184c010a6720dc8dda Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Sep 2022 11:32:54 +0100
+Subject: PATCH 019/157 BackportSME aarch64: Simplify feature definitions
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=11a113d501ff64fa4843e28d0a21b3f4e9d0d3de
+
+Currently the aarch64-option-extensions.def entries, the
+aarch64-cores.def entries, and the AARCH64_FL_FOR_* macros
+have a transitive closure of dependencies that is maintained by hand.
+This is a bit error-prone and is becoming less tenable as more features
+are added.  The main point of this patch is to maintain the closure
+automatically instead.
+
+For example, the +sve2-aes extension requires sve2 and aes.
+This is now described using:
+
+  AARCH64_OPT_EXTENSION("sve2-aes", SVE2_AES, (SVE2, AES), ...)
+
+If life was simple, we could just give the name of the feature
+and the list of features that it requires/depends on.  But sadly
+things are more complicated.  For example:
+
+- the legacy +crypto option enables aes and sha2 only, but +nocrypto
+  disables all crypto-related extensions, including sm4.
+
+- +fp16fml enables fp16, but armv8.4-a enables fp16fml without fp16.
+  fp16fml only has an effect when fp16 is also present; see the
+  comments for more details.
+
+- +bf16 enables simd, but +bf16+nosimd is valid and enables just the
+  scalar bf16 instructions.  rdma behaves similarly.
+
+To handle cases like these, the option entries have extra fields to
+specify what an explicit +foo enables and what an explicit +nofoo
+disables, in addition to the absolute dependencies.
+
+The other main changes are:
+
+- AARCH64_FL_* are now defined automatically.
+
+- the feature list for each architecture level moves from aarch64.h
+  to aarch64-arches.def.
+
+As a consequence, we now have a (redundant) V8A feature flag.
+
+While there, the patch uses a new typedef, aarch64_feature_flags,
+for the set of feature flags.  This should make it easier to switch
+to a class if we run out of bits in the uint64_t.
+
+For now the patch hardcodes the fact that crypto is the only
+synthetic option.  A later patch will remove this field.
+
+To test for things that might not be covered by the testsuite,
+I made the driver print out the all_extensions, all_cores and
+all_archs arrays before and after the patch, with the following
+tweaks:
+
+- renumber the old AARCH64_FL_* bit assignments to match the .def order
+- remove the new V8A flag when printing the new tables
+- treat CRYPTO and CRYPTO | AES | SHA2 the same way when printing the
+  core tables
+
+(On the last point: some cores enabled just CRYPTO while others enabled
+CRYPTO, AES and SHA2.  This doesn't cause a difference in behaviour
+because of how the dependent macros are defined.  With the new scheme,
+all entries with CRYPTO automatically get AES and SHA2 too.)
+
+The only difference is that +nofp now turns off dotprod.  This was
+another instance of an incomplete transitive closure, but unlike the
+instances fixed in a previous patch, it had no observable effect.
+
+gcc/
+	* config/aarch64/aarch64-option-extensions.def: Switch to a new format.
+	* config/aarch64/aarch64-cores.def: Use the same format to specify
+	lists of features.
+	* config/aarch64/aarch64-arches.def: Likewise, moving that information
+	from aarch64.h.
+	* config/aarch64/aarch64-opts.h (aarch64_feature_flags): New typedef.
+	* config/aarch64/aarch64.h (aarch64_feature): New class enum.
+	Turn AARCH64_FL_* macros into constexprs, getting the definitions
+	from aarch64-option-extensions.def.  Remove AARCH64_FL_FOR_* macros.
+	* common/config/aarch64/aarch64-common.cc: Include
+	aarch64-feature-deps.h.
+	(all_extensions): Update for new .def format.
+	(all_extensions_by_on, all_cores, all_architectures): Likewise.
+	* config/aarch64/driver-aarch64.cc: Include aarch64-feature-deps.h.
+	(aarch64_extensions): Update for new .def format.
+	(aarch64_cpu_data, aarch64_arches): Likewise.
+	* config/aarch64/aarch64.cc: Include aarch64-feature-deps.h.
+	(all_architectures, all_cores): Update for new .def format.
+	* config/aarch64/aarch64-sve-builtins.cc
+	(check_required_extensions): Likewise.
+---
+ gcc/common/config/aarch64/aarch64-common.cc   |  29 +-
+ gcc/config/aarch64/aarch64-arches.def         |  28 +-
+ gcc/config/aarch64/aarch64-cores.def          | 130 +++----
+ gcc/config/aarch64/aarch64-feature-deps.h     | 121 +++++++
+ .../aarch64/aarch64-option-extensions.def     | 323 +++++++-----------
+ gcc/config/aarch64/aarch64-opts.h             |   4 +
+ gcc/config/aarch64/aarch64-sve-builtins.cc    |   5 +-
+ gcc/config/aarch64/aarch64.cc                 |  14 +-
+ gcc/config/aarch64/aarch64.h                  | 164 ++-------
+ gcc/config/aarch64/driver-aarch64.cc          |  10 +-
+ 10 files changed, 374 insertions(+), 454 deletions(-)
+ create mode 100644 gcc/config/aarch64/aarch64-feature-deps.h
+
+diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
+index a965ac660..74729bb30 100644
+--- a/gcc/common/config/aarch64/aarch64-common.cc
++++ b/gcc/common/config/aarch64/aarch64-common.cc
+@@ -30,6 +30,7 @@
+ #include "opts.h"
+ #include "flags.h"
+ #include "diagnostic.h"
++#include "config/aarch64/aarch64-feature-deps.h"
+ 
+ #ifdef  TARGET_BIG_ENDIAN_DEFAULT
+ #undef  TARGET_DEFAULT_TARGET_FLAGS
+@@ -214,9 +215,12 @@ struct aarch64_option_extension
+ /* ISA extensions in AArch64.  */
+ static const struct aarch64_option_extension all_extensions =
+ {
+-#define AARCH64_OPT_EXTENSION(NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, \
+-			      SYNTHETIC, Z) \
+-  {NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, SYNTHETIC},
++#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \
++  {NAME, AARCH64_FL_##IDENT, \
++   feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \
++   feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \
++   & ~AARCH64_FL_##IDENT, \
++   AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO},
+ #include "config/aarch64/aarch64-option-extensions.def"
+   {NULL, 0, 0, 0, false}
+ };
+@@ -225,9 +229,12 @@ static const struct aarch64_option_extension all_extensions =
+    bits and extension turned on.  Cached for efficiency.  */
+ static struct aarch64_option_extension all_extensions_by_on =
+ {
+-#define AARCH64_OPT_EXTENSION(NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, \
+-			      SYNTHETIC, Z) \
+-  {NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, SYNTHETIC},
++#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \
++  {NAME, AARCH64_FL_##IDENT, \
++   feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \
++   feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \
++   & ~AARCH64_FL_##IDENT, \
++   AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO},
+ #include "config/aarch64/aarch64-option-extensions.def"
+   {NULL, 0, 0, 0, false}
+ };
+@@ -250,18 +257,18 @@ struct arch_to_arch_name
+    the default set of architectural feature flags they support.  */
+ static const struct processor_name_to_arch all_cores =
+ {
+-#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \
+-  {NAME, AARCH64_ARCH_##ARCH_IDENT, AARCH64_FL_FOR_##ARCH_IDENT | FLAGS},
++#define AARCH64_CORE(NAME, CORE_IDENT, C, ARCH_IDENT, E, F, G, H, I) \
++  {NAME, AARCH64_ARCH_##ARCH_IDENT, feature_deps::cpu_##CORE_IDENT},
+ #include "config/aarch64/aarch64-cores.def"
+-  {"generic", AARCH64_ARCH_V8A, AARCH64_FL_FOR_V8A},
++  {"generic", AARCH64_ARCH_V8A, feature_deps::V8A ().enable},
+   {"", aarch64_no_arch, 0}
+ };
+ 
+ /* Map architecture revisions to their string representation.  */
+ static const struct arch_to_arch_name all_architectures =
+ {
+-#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH, FLAGS) \
+-  {AARCH64_ARCH_##ARCH_IDENT, NAME, FLAGS},
++#define AARCH64_ARCH(NAME, B, ARCH_IDENT, D, E)	\
++  {AARCH64_ARCH_##ARCH_IDENT, NAME, feature_deps::ARCH_IDENT ().enable},
+ #include "config/aarch64/aarch64-arches.def"
+   {aarch64_no_arch, "", 0}
+ };
+diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
+index ece96e22a..9f8246618 100644
+--- a/gcc/config/aarch64/aarch64-arches.def
++++ b/gcc/config/aarch64/aarch64-arches.def
+@@ -30,19 +30,19 @@
+    Due to the assumptions about the positions of these fields in config.gcc,
+    NAME should be kept as the first argument.  */
+ 
+-AARCH64_ARCH("armv8-a",	      generic,	     V8A,	8,  AARCH64_FL_FOR_V8A)
+-AARCH64_ARCH("armv8.1-a",     generic,	     V8_1A,	8,  AARCH64_FL_FOR_V8_1A)
+-AARCH64_ARCH("armv8.2-a",     generic,	     V8_2A,	8,  AARCH64_FL_FOR_V8_2A)
+-AARCH64_ARCH("armv8.3-a",     generic,	     V8_3A,	8,  AARCH64_FL_FOR_V8_3A)
+-AARCH64_ARCH("armv8.4-a",     generic,	     V8_4A,	8,  AARCH64_FL_FOR_V8_4A)
+-AARCH64_ARCH("armv8.5-a",     generic,	     V8_5A,	8,  AARCH64_FL_FOR_V8_5A)
+-AARCH64_ARCH("armv8.6-a",     generic,	     V8_6A,	8,  AARCH64_FL_FOR_V8_6A)
+-AARCH64_ARCH("armv8.7-a",     generic,       V8_7A,     8,  AARCH64_FL_FOR_V8_7A)
+-AARCH64_ARCH("armv8.8-a",     generic,       V8_8A,     8,  AARCH64_FL_FOR_V8_8A)
+-AARCH64_ARCH("armv8-r",       generic,	     V8R  ,	8,  AARCH64_FL_FOR_V8R)
+-AARCH64_ARCH("armv9-a",       generic,	     V9A  ,	9,  AARCH64_FL_FOR_V9A)
+-AARCH64_ARCH("armv9.1-a",     generic,       V9_1A,     9,  AARCH64_FL_FOR_V9_1A)
+-AARCH64_ARCH("armv9.2-a",     generic,       V9_2A,     9,  AARCH64_FL_FOR_V9_2A)
+-AARCH64_ARCH("armv9.3-a",     generic,       V9_3A,     9,  AARCH64_FL_FOR_V9_3A)
++AARCH64_ARCH("armv8-a",       generic,       V8A,       8,  (SIMD))
++AARCH64_ARCH("armv8.1-a",     generic,       V8_1A,     8,  (V8A, LSE, CRC, RDMA))

_service:tar_scm:0118-LoongArch-Disable-TLS-type-symbols-from-generating-n.patch Added

@@ -0,0 +1,65 @@
+From 8cab312a44efc4711fb7adad65f1314349295591 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Tue, 23 Jan 2024 11:28:09 +0800
+Subject: PATCH 118/188 LoongArch: Disable TLS type symbols from generating
+ non-zero offsets.
+
+TLS gd ld and ie type symbols will generate corresponding GOT entries,
+so non-zero offsets cannot be generated.
+The address of TLS le type symbol+addend is not implemented in binutils,
+so non-zero offset is not generated here for the time being.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_symbolic_constant_p):
+	For symbols of type tls, non-zero Offset is not generated.
+---
+ gcc/config/loongarch/loongarch.cc | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 5f22b9dd8..9cdd4ed15 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -1921,11 +1921,7 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type)
+       x = UNSPEC_ADDRESS (x);
+     }
+   else if (SYMBOL_REF_P (x) || LABEL_REF_P (x))
+-    {
+-      *symbol_type = loongarch_classify_symbol (x);
+-      if (*symbol_type == SYMBOL_TLS)
+-	return true;
+-    }
++    *symbol_type = loongarch_classify_symbol (x);
+   else
+     return false;
+ 
+@@ -1936,17 +1932,21 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type)
+      relocations.  */
+   switch (*symbol_type)
+     {
+-    case SYMBOL_TLS_IE:
+-    case SYMBOL_TLS_LE:
+-    case SYMBOL_TLSGD:
+-    case SYMBOL_TLSLDM:
+     case SYMBOL_PCREL:
+     case SYMBOL_PCREL64:
+       /* GAS rejects offsets outside the range -2^31, 2^31-1.  */
+       return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
+ 
++    /* The following symbol types do not allow non-zero offsets.  */
+     case SYMBOL_GOT_DISP:
++    case SYMBOL_TLS_IE:
++    case SYMBOL_TLSGD:
++    case SYMBOL_TLSLDM:
+     case SYMBOL_TLS:
++    /* From an implementation perspective, tls_le symbols are allowed to
++       have non-zero offsets, but currently binutils has not added support,
++       so the generation of non-zero offsets is prohibited here.  */
++    case SYMBOL_TLS_LE:
+       return false;
+     }
+   gcc_unreachable ();
+-- 
+2.43.0
+

_service:tar_scm:0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch Added

@@ -0,0 +1,467 @@
+From e7ebc54e809e8647ff054a02fbaf946b41414004 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Sep 2022 11:32:55 +0100
+Subject: PATCH 020/157 BackportSME aarch64: Simplify generation of .arch
+ strings
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4ebf56f283ae5a98ae4c43079b7e8459945ef18d
+
+aarch64-common.cc has two arrays, one maintaining the original
+definition order and one sorted by population count.  Sorting
+by population count was a way of ensuring topological ordering,
+taking advantage of the fact that the entries are partially
+ordered by the subset relation.  However, the sorting is not
+needed now that the .def file is forced to have topological
+order from the outset.
+
+Other changes are:
+
+(1) The population count used:
+
+      uint64_t total_flags_a = opt_a->flag_canonical & opt_a->flags_on;
+      uint64_t total_flags_b = opt_b->flag_canonical & opt_b->flags_on;
+      int popcnt_a = popcount_hwi ((HOST_WIDE_INT)total_flags_a);
+      int popcnt_b = popcount_hwi ((HOST_WIDE_INT)total_flags_b);
+
+    where I think the & was supposed to be |.  This meant that the
+    counts would always be 1 in practice, since flag_canonical is
+    a single bit.  This led us to printing +nofp+nosimd even though
+    GCC "knows" (and GAS agrees) that +nofp disables simd.
+
+(2) The .arch output code converts +aes+sha2 to +crypto.  I think
+    the main reason for doing this is to support assemblers that
+    predate the individual per-feature crypto flags.  It therefore
+    seems more natural to treat it as a special case, rather than
+    as an instance of a general pattern.  Hopefully we won't do
+    something similar in future!
+
+    (There is already special handling of CRC, for different reasons.)
+
+(3) Previously, if the /proc/cpuinfo code saw a feature like sve,
+    it would assume the presence of all the features that sve
+    depends on.  It would be possible to keep that behaviour
+    if necessary, but it was simpler to assume the presence of
+    fp16 (say) only when fphp is present.  There's an argument
+    that that's more conservatively correct too.
+
+gcc/
+	* common/config/aarch64/aarch64-common.cc
+	(TARGET_OPTION_INIT_STRUCT): Delete.
+	(aarch64_option_extension): Remove is_synthetic_flag.
+	(all_extensions): Update accordingly.
+	(all_extensions_by_on, opt_ext, opt_ext_cmp): Delete.
+	(aarch64_option_init_struct, aarch64_contains_opt): Delete.
+	(aarch64_get_extension_string_for_isa_flags): Rewrite to use
+	all_extensions instead of all_extensions_on.
+
+gcc/testsuite/
+	* gcc.target/aarch64/cpunative/info_8: Add all dependencies of sve.
+	* gcc.target/aarch64/cpunative/info_9: Likewise svesm4.
+	* gcc.target/aarch64/cpunative/info_15: Likewise.
+	* gcc.target/aarch64/cpunative/info_16: Likewise sve2.
+	* gcc.target/aarch64/cpunative/info_17: Likewise.
+	* gcc.target/aarch64/cpunative/native_cpu_2.c: Expect just +nofp
+	rather than +nofp+nosimd.
+	* gcc.target/aarch64/cpunative/native_cpu_10.c: Likewise.
+	* gcc.target/aarch64/target_attr_15.c: Likewise.
+---
+ gcc/common/config/aarch64/aarch64-common.cc   | 244 ++++--------------
+ .../gcc.target/aarch64/cpunative/info_15      |   2 +-
+ .../gcc.target/aarch64/cpunative/info_16      |   2 +-
+ .../gcc.target/aarch64/cpunative/info_17      |   2 +-
+ .../gcc.target/aarch64/cpunative/info_8       |   2 +-
+ .../gcc.target/aarch64/cpunative/info_9       |   2 +-
+ .../aarch64/cpunative/native_cpu_10.c         |   2 +-
+ .../aarch64/cpunative/native_cpu_2.c          |   2 +-
+ .../gcc.target/aarch64/target_attr_15.c       |   2 +-
+ 9 files changed, 55 insertions(+), 205 deletions(-)
+
+diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
+index 74729bb30..057dc094d 100644
+--- a/gcc/common/config/aarch64/aarch64-common.cc
++++ b/gcc/common/config/aarch64/aarch64-common.cc
+@@ -42,8 +42,6 @@
+ 
+ #undef	TARGET_OPTION_OPTIMIZATION_TABLE
+ #define TARGET_OPTION_OPTIMIZATION_TABLE aarch_option_optimization_table
+-#undef TARGET_OPTION_INIT_STRUCT
+-#define TARGET_OPTION_INIT_STRUCT aarch64_option_init_struct
+ 
+ #define INVALID_IMP ((unsigned) -1)
+ 
+@@ -209,7 +207,6 @@ struct aarch64_option_extension
+   const uint64_t flag_canonical;
+   const uint64_t flags_on;
+   const uint64_t flags_off;
+-  const bool is_synthetic;
+ };
+ 
+ /* ISA extensions in AArch64.  */
+@@ -219,24 +216,9 @@ static const struct aarch64_option_extension all_extensions =
+   {NAME, AARCH64_FL_##IDENT, \
+    feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \
+    feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \
+-   & ~AARCH64_FL_##IDENT, \
+-   AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO},
++   & ~AARCH64_FL_##IDENT},
+ #include "config/aarch64/aarch64-option-extensions.def"
+-  {NULL, 0, 0, 0, false}
+-};
+-
+-/* A copy of the ISA extensions list for AArch64 sorted by the popcount of
+-   bits and extension turned on.  Cached for efficiency.  */
+-static struct aarch64_option_extension all_extensions_by_on =
+-{
+-#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \
+-  {NAME, AARCH64_FL_##IDENT, \
+-   feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \
+-   feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \
+-   & ~AARCH64_FL_##IDENT, \
+-   AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO},
+-#include "config/aarch64/aarch64-option-extensions.def"
+-  {NULL, 0, 0, 0, false}
++  {NULL, 0, 0, 0}
+ };
+ 
+ struct processor_name_to_arch
+@@ -353,79 +335,6 @@ aarch64_get_all_extension_candidates (auto_vec<const char *> *candidates)
+     candidates->safe_push (opt->name);
+ }
+ 
+-/* Comparer to sort aarch64's feature extensions by population count. Largest
+-   first.  */
+-
+-typedef const struct aarch64_option_extension opt_ext;
+-
+-int opt_ext_cmp (const void* a, const void* b)
+-{
+-  opt_ext *opt_a = (opt_ext *)a;
+-  opt_ext *opt_b = (opt_ext *)b;
+-
+-  /* We consider the total set of bits an options turns on to be the union of
+-     the singleton set containing the option itself and the set of options it
+-     turns on as a dependency.  As an example +dotprod turns on FL_DOTPROD and
+-     FL_SIMD.  As such the set of bits represented by this option is
+-     {FL_DOTPROD, FL_SIMD}. */
+-  uint64_t total_flags_a = opt_a->flag_canonical & opt_a->flags_on;
+-  uint64_t total_flags_b = opt_b->flag_canonical & opt_b->flags_on;
+-  int popcnt_a = popcount_hwi ((HOST_WIDE_INT)total_flags_a);
+-  int popcnt_b = popcount_hwi ((HOST_WIDE_INT)total_flags_b);
+-  int order = popcnt_b - popcnt_a;
+-
+-  /* If they have the same amount of bits set, give it a more
+-     deterministic ordering by using the value of the bits themselves.  */
+-  if (order != 0)
+-    return order;
+-
+-  if (total_flags_a != total_flags_b)
+-    return total_flags_a < total_flags_b ? 1 : -1;
+-
+-  return 0;
+-}
+-
+-/* Implement TARGET_OPTION_INIT_STRUCT.  */
+-
+-static void
+-aarch64_option_init_struct (struct gcc_options *opts ATTRIBUTE_UNUSED)
+-{
+-    /* Sort the extensions based on how many bits they set, order the larger
+-       counts first.  We sort the list because this makes processing the
+-       feature bits O(n) instead of O(n^2).  While n is small, the function
+-       to calculate the feature strings is called on every options push,
+-       pop and attribute change (arm_neon headers, lto etc all cause this to
+-       happen quite frequently).  It is a trade-off between time and space and
+-       so time won.  */
+-    int n_extensions
+-      = sizeof (all_extensions) / sizeof (struct aarch64_option_extension);
+-    qsort (&all_extensions_by_on, n_extensions,
+-	   sizeof (struct aarch64_option_extension), opt_ext_cmp);
+-}
+-
+-/* Checks to see if enough bits from the option OPT are enabled in
+-   ISA_FLAG_BITS to be able to replace the individual options with the
+-   canonicalized version of the option.  This is done based on two rules:
+-
+-   1) Synthetic groups, such as +crypto we only care about the bits that are
+-      turned on. e.g. +aes+sha2 can be replaced with +crypto.
+-
+-   2) Options that themselves have a bit, such as +rdma, in this case, all the
+-      feature bits they turn on must be available and the bit for the option
+-      itself must be.  In this case it's effectively a reduction rather than a
+-      grouping. e.g. +fp+simd is not enough to turn on +rdma, for that you would
+-      need +rdma+fp+simd which is reduced down to +rdma.
+-*/
+-
+-static bool
+-aarch64_contains_opt (uint64_t isa_flag_bits, opt_ext *opt)
+-{
+-  uint64_t flags_check
+-    = opt->is_synthetic ? opt->flags_on : opt->flag_canonical;

_service:tar_scm:0119-LoongArch-Remove-vec_concatz-mode-pattern.patch Added

@@ -0,0 +1,75 @@
+From e19c5ba24839d7446f1874b0b33bd61e27e36905 Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Wed, 24 Jan 2024 17:19:13 +0800
+Subject: PATCH 119/188 LoongArch: Remove vec_concatz<mode> pattern.
+
+It is incorrect to use vld/vori to implement the vec_concatz<mode> because when the LSX
+instruction is used to update the value of the vector register, the upper 128 bits of
+the vector register will not be zeroed.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasx.md (@vec_concatz<mode>): Remove this define_insn pattern.
+	* config/loongarch/loongarch.cc (loongarch_expand_vector_group_init): Use vec_concat<mode>.
+---
+ gcc/config/loongarch/lasx.md      | 15 ---------------
+ gcc/config/loongarch/loongarch.cc | 17 ++++++-----------
+ 2 files changed, 6 insertions(+), 26 deletions(-)
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index fdfd65e4a..a5128c30c 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -582,21 +582,6 @@
+   (set_attr "type" "simd_insert")
+    (set_attr "mode" "<MODE>"))
+ 
+-(define_insn "@vec_concatz<mode>"
+-  (set (match_operand:LASX 0 "register_operand" "=f")
+-    (vec_concat:LASX
+-      (match_operand:<VHMODE256_ALL> 1 "nonimmediate_operand")
+-      (match_operand:<VHMODE256_ALL> 2 "const_0_operand")))
+-  "ISA_HAS_LASX"
+-{
+-  if (MEM_P (operands1))
+-    return "vld\t%w0,%1";
+-  else
+-    return "vori.b\t%w0,%w1,0";
+-}
+-  (set_attr "type" "simd_splat")
+-   (set_attr "mode" "<MODE>"))
+-
+ (define_insn "vec_concat<mode>"
+   (set (match_operand:LASX 0 "register_operand" "=f")
+ 	(vec_concat:LASX
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 9cdd4ed15..9bd931549 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -9912,17 +9912,12 @@ loongarch_expand_vector_group_init (rtx target, rtx vals)
+       gcc_unreachable ();
+     }
+ 
+-  if (high == CONST0_RTX (half_mode))
+-    emit_insn (gen_vec_concatz (vmode, target, low, high));
+-  else
+-    {
+-      if (!register_operand (low, half_mode))
+-	low = force_reg (half_mode, low);
+-      if (!register_operand (high, half_mode))
+-	high = force_reg (half_mode, high);
+-      emit_insn (gen_rtx_SET (target,
+-			      gen_rtx_VEC_CONCAT (vmode, low, high)));
+-    }
++  if (!register_operand (low, half_mode))
++    low = force_reg (half_mode, low);
++  if (!register_operand (high, half_mode))
++    high = force_reg (half_mode, high);
++  emit_insn (gen_rtx_SET (target,
++			  gen_rtx_VEC_CONCAT (vmode, low, high)));
+ }
+ 
+ /* Expand initialization of a vector which has all same elements.  */
+-- 
+2.43.0
+

_service:tar_scm:0120-Backport-SME-aarch64-Avoid-std-string-in-static-data.patch Added

@@ -0,0 +1,43 @@
+From 7096be1673a10da5218a8620fb40b4b26e61c1d4 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Sep 2022 11:32:55 +0100
+Subject: PATCH 021/157 BackportSME aarch64: Avoid std::string in static
+ data
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=13af9e9fda391f4f0566ad8f0b4d0448a7e984d0
+
+Just a minor patch to avoid having to construct std::strings
+in static data.
+
+gcc/
+	* common/config/aarch64/aarch64-common.cc (processor_name_to_arch)
+	(arch_to_arch_name): Use const char * instead of std::string.
+---
+ gcc/common/config/aarch64/aarch64-common.cc | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
+index 057dc094d..2bdf51b8b 100644
+--- a/gcc/common/config/aarch64/aarch64-common.cc
++++ b/gcc/common/config/aarch64/aarch64-common.cc
+@@ -223,7 +223,7 @@ static const struct aarch64_option_extension all_extensions =
+ 
+ struct processor_name_to_arch
+ {
+-  const std::string processor_name;
++  const char *const processor_name;
+   const enum aarch64_arch arch;
+   const uint64_t flags;
+ };
+@@ -231,7 +231,7 @@ struct processor_name_to_arch
+ struct arch_to_arch_name
+ {
+   const enum aarch64_arch arch;
+-  const std::string arch_name;
++  const char *const arch_name;
+   const uint64_t flags;
+ };
+ 
+-- 
+2.33.0
+

_service:tar_scm:0120-LoongArch-Optimize-implementation-of-single-precisio.patch Added

@@ -0,0 +1,107 @@
+From cb9180ef1fb7e7b97a60adc3d3908b9684771cd8 Mon Sep 17 00:00:00 2001
+From: Li Wei <liwei@loongson.cn>
+Date: Wed, 24 Jan 2024 17:44:17 +0800
+Subject: PATCH 120/188 LoongArch: Optimize implementation of
+ single-precision floating-point approximate division.
+
+We found that in the spec17 521.wrf program, some loop invariant code generated
+from single-precision floating-point approximate division calculation failed to
+propose a loop. This is because the pseudo-register that stores the
+intermediate temporary calculation results is rewritten in the implementation
+of single-precision floating-point approximate division, failing to propose
+invariants in the loop2_invariant pass. To this end, the intermediate temporary
+calculation results are stored in new pseudo-registers without destroying the
+read-write dependency, so that they could be recognized as loop invariants in
+the loop2_invariant pass.
+After optimization, the number of instructions of 521.wrf is reduced by 0.18%
+compared with before optimization (1716612948501 -> 1713471771364).
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_emit_swdivsf): Adjust.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/invariant-recip.c: New test.
+---
+ gcc/config/loongarch/loongarch.cc             | 19 +++++++----
+ .../gcc.target/loongarch/invariant-recip.c    | 33 +++++++++++++++++++
+ 2 files changed, 46 insertions(+), 6 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/invariant-recip.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 9bd931549..5877b0acf 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -10842,16 +10842,23 @@ void loongarch_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
+   /* x0 = 1./b estimate.  */
+   emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
+ 					      unspec)));
+-  /* 2.0 - b * x0  */
++  /* e0 = 2.0 - b * x0.  */
+   emit_insn (gen_rtx_SET (e0, gen_rtx_FMA (mode,
+ 					   gen_rtx_NEG (mode, b), x0, mtwo)));
+ 
+-  /* x0 = a * x0  */
+   if (a != CONST1_RTX (mode))
+-    emit_insn (gen_rtx_SET (x0, gen_rtx_MULT (mode, a, x0)));
+-
+-  /* res = e0 * x0  */
+-  emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e0, x0)));
++    {
++      rtx e1 = gen_reg_rtx (mode);
++      /* e1 = a * x0.  */
++      emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, a, x0)));
++      /* res = e0 * e1.  */
++      emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e0, e1)));
++    }
++  else
++    {
++      /* res = e0 * x0.  */
++      emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e0, x0)));
++    }
+ }
+ 
+ static bool
+diff --git a/gcc/testsuite/gcc.target/loongarch/invariant-recip.c b/gcc/testsuite/gcc.target/loongarch/invariant-recip.c
+new file mode 100644
+index 000000000..2f64f6ed5
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/invariant-recip.c
+@@ -0,0 +1,33 @@
++/* { dg-do compile } */
++/* { dg-options "-Ofast -march=loongarch64 -mabi=lp64d -mrecip -mfrecipe -fdump-rtl-loop2_invariant " } */
++/* { dg-final { scan-rtl-dump "Decided to move dependent invariant" "loop2_invariant" } } */
++
++void
++nislfv_rain_plm (int im, int km, float dzlimkm, float rqlimkm,
++                 float dt)
++{
++  int i, k;
++  float con1, decfl;
++  float dzkm, qnkm, wikm + 1;
++
++  for (i = 0; i < im; i++)
++    {
++      for (k = 0; k < km; k++)
++        {
++          dzk = dzlik;
++        }
++      con1 = 0.05;
++      for (k = km - 1; k >= 0; k--)
++        {
++          decfl = (wik + 1 - wik) * dt / dzk;
++          if (decfl > con1)
++            {
++              wik = wik + 1 - con1 * dzk / dt;
++            }
++        }
++      for (k = 0; k < km; k++)
++        {
++          rqlik = qnk;
++        }
++    }
++}
+-- 
+2.43.0
+

_service:tar_scm:0121-Backport-SME-aarch64-Tweak-constness-of-option-relat.patch Added

@@ -0,0 +1,195 @@
+From 99c5eb58e898417632b6d9a7b2b3d288b50e9b65 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Sep 2022 11:32:55 +0100
+Subject: PATCH 022/157 BackportSME aarch64: Tweak constness of
+ option-related data
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=60dee638c8a7ae59c033868de7e7638c88b38ed2
+
+Some of the option structures have all-const member variables.
+That doesn't seem necessary: we can just use const on the objects
+that are supposed to be read-only.
+
+Also, with the new, more C++-heavy option handling, it seems
+better to use constexpr for the static data, to make sure that
+we're not adding unexpected overhead.
+
+gcc/
+	* common/config/aarch64/aarch64-common.cc (aarch64_option_extension)
+	(processor_name_to_arch, arch_to_arch_name): Remove const from
+	member variables.
+	(all_extensions, all_cores, all_architectures): Make a constexpr.
+	* config/aarch64/aarch64.cc (processor): Remove const from
+	member variables.
+	(all_architectures): Make a constexpr.
+	* config/aarch64/driver-aarch64.cc (aarch64_core_data)
+	(aarch64_arch_driver_info): Remove const from member variables.
+	(aarch64_cpu_data, aarch64_arches): Make a constexpr.
+	(get_arch_from_id): Return a pointer to const.
+	(host_detect_local_cpu): Update accordingly.
+---
+ gcc/common/config/aarch64/aarch64-common.cc | 26 ++++++++++-----------
+ gcc/config/aarch64/aarch64.cc               | 14 +++++------
+ gcc/config/aarch64/driver-aarch64.cc        | 15 ++++++------
+ 3 files changed, 27 insertions(+), 28 deletions(-)
+
+diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
+index 2bdf51b8b..ac3486d71 100644
+--- a/gcc/common/config/aarch64/aarch64-common.cc
++++ b/gcc/common/config/aarch64/aarch64-common.cc
+@@ -203,14 +203,14 @@ aarch64_handle_option (struct gcc_options *opts,
+ /* An ISA extension in the co-processor and main instruction set space.  */
+ struct aarch64_option_extension
+ {
+-  const char *const name;
+-  const uint64_t flag_canonical;
+-  const uint64_t flags_on;
+-  const uint64_t flags_off;
++  const char *name;
++  uint64_t flag_canonical;
++  uint64_t flags_on;
++  uint64_t flags_off;
+ };
+ 
+ /* ISA extensions in AArch64.  */
+-static const struct aarch64_option_extension all_extensions =
++static constexpr aarch64_option_extension all_extensions =
+ {
+ #define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \
+   {NAME, AARCH64_FL_##IDENT, \
+@@ -223,21 +223,21 @@ static const struct aarch64_option_extension all_extensions =
+ 
+ struct processor_name_to_arch
+ {
+-  const char *const processor_name;
+-  const enum aarch64_arch arch;
+-  const uint64_t flags;
++  const char *processor_name;
++  aarch64_arch arch;
++  uint64_t flags;
+ };
+ 
+ struct arch_to_arch_name
+ {
+-  const enum aarch64_arch arch;
+-  const char *const arch_name;
+-  const uint64_t flags;
++  aarch64_arch arch;
++  const char *arch_name;
++  uint64_t flags;
+ };
+ 
+ /* Map processor names to the architecture revision they implement and
+    the default set of architectural feature flags they support.  */
+-static const struct processor_name_to_arch all_cores =
++static constexpr processor_name_to_arch all_cores =
+ {
+ #define AARCH64_CORE(NAME, CORE_IDENT, C, ARCH_IDENT, E, F, G, H, I) \
+   {NAME, AARCH64_ARCH_##ARCH_IDENT, feature_deps::cpu_##CORE_IDENT},
+@@ -247,7 +247,7 @@ static const struct processor_name_to_arch all_cores =
+ };
+ 
+ /* Map architecture revisions to their string representation.  */
+-static const struct arch_to_arch_name all_architectures =
++static constexpr arch_to_arch_name all_architectures =
+ {
+ #define AARCH64_ARCH(NAME, B, ARCH_IDENT, D, E)	\
+   {AARCH64_ARCH_##ARCH_IDENT, NAME, feature_deps::ARCH_IDENT ().enable},
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 1363873b1..71db7ace1 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -2925,16 +2925,16 @@ aarch64_tuning_override_functions =
+ /* A processor implementing AArch64.  */
+ struct processor
+ {
+-  const char *const name;
+-  enum aarch64_processor ident;
+-  enum aarch64_processor sched_core;
+-  enum aarch64_arch arch;
+-  const uint64_t flags;
+-  const struct tune_params *const tune;
++  const char *name;
++  aarch64_processor ident;
++  aarch64_processor sched_core;
++  aarch64_arch arch;
++  uint64_t flags;
++  const tune_params *tune;
+ };
+ 
+ /* Architectures implementing AArch64.  */
+-static const struct processor all_architectures =
++static constexpr processor all_architectures =
+ {
+ #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, D, E) \
+   {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, \
+diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc
+index ddfc9451f..ee9cb65a5 100644
+--- a/gcc/config/aarch64/driver-aarch64.cc
++++ b/gcc/config/aarch64/driver-aarch64.cc
+@@ -50,7 +50,7 @@ struct aarch64_core_data
+   unsigned char implementer_id; /* Exactly 8 bits */
+   unsigned int part_no; /* 12 bits + 12 bits */
+   unsigned variant;
+-  const uint64_t flags;
++  uint64_t flags;
+ };
+ 
+ #define AARCH64_BIG_LITTLE(BIG, LITTLE) \
+@@ -64,7 +64,7 @@ struct aarch64_core_data
+ #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
+   { CORE_NAME, #ARCH, IMP, PART, VARIANT, feature_deps::cpu_##CORE_IDENT },
+ 
+-static struct aarch64_core_data aarch64_cpu_data =
++static constexpr aarch64_core_data aarch64_cpu_data =
+ {
+ #include "aarch64-cores.def"
+   { NULL, NULL, INVALID_IMP, INVALID_CORE, ALL_VARIANTS, 0 }
+@@ -75,14 +75,14 @@ struct aarch64_arch_driver_info
+ {
+   const char* id;
+   const char* name;
+-  const uint64_t flags;
++  uint64_t flags;
+ };
+ 
+ /* Skip the leading "V" in the architecture name.  */
+ #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
+   { #ARCH_IDENT + 1, NAME, feature_deps::ARCH_IDENT ().enable },
+ 
+-static struct aarch64_arch_driver_info aarch64_arches =
++static constexpr aarch64_arch_driver_info aarch64_arches =
+ {
+ #include "aarch64-arches.def"
+   {NULL, NULL, 0}
+@@ -92,7 +92,7 @@ static struct aarch64_arch_driver_info aarch64_arches =
+ /* Return an aarch64_arch_driver_info for the architecture described
+    by ID, or NULL if ID describes something we don't know about.  */
+ 
+-static struct aarch64_arch_driver_info*
++static const aarch64_arch_driver_info *
+ get_arch_from_id (const char* id)
+ {
+   unsigned int i = 0;
+@@ -396,8 +396,7 @@ host_detect_local_cpu (int argc, const char **argv)
+ 
+       if (aarch64_cpu_datai.name == NULL)
+ 	{
+-	  aarch64_arch_driver_info* arch_info
+-	    = get_arch_from_id (DEFAULT_ARCH);
++	  auto arch_info = get_arch_from_id (DEFAULT_ARCH);
+ 
+ 	  gcc_assert (arch_info);
+ 
+@@ -407,7 +406,7 @@ host_detect_local_cpu (int argc, const char **argv)
+       else if (arch)
+ 	{
+ 	  const char *arch_id = aarch64_cpu_datai.arch;
+-	  aarch64_arch_driver_info* arch_info = get_arch_from_id (arch_id);
++	  auto arch_info = get_arch_from_id (arch_id);
+ 
+ 	  /* We got some arch indentifier that's not in aarch64-arches.def?  */
+ 	  if (!arch_info)
+-- 
+2.33.0
+

_service:tar_scm:0121-LoongArch-Define-LOGICAL_OP_NON_SHORT_CIRCUIT.patch Added

@@ -0,0 +1,71 @@
+From a2baa4807fdfd381c543eb7ea85edf14dc6c8a20 Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Tue, 16 Jan 2024 10:32:31 +0800
+Subject: PATCH 121/188 LoongArch: Define LOGICAL_OP_NON_SHORT_CIRCUIT
+
+Define LOGICAL_OP_NON_SHORT_CIRCUIT as 0, for a short-circuit branch, use the
+short-circuit operation instead of the non-short-circuit operation.
+
+SPEC2017 performance evaluation shows 1% performance improvement for fprate
+GEOMEAN and no obvious regression for others. Especially, 526.blender_r +10.6%
+on 3A6000.
+
+This modification will introduce the following FAIL items:
+
+FAIL: gcc.dg/tree-ssa/copy-headers-8.c scan-tree-dump-times ch2 "Conditional combines static and invariant" 1
+FAIL: gcc.dg/tree-ssa/copy-headers-8.c scan-tree-dump-times ch2 "Will duplicate bb" 2
+FAIL: gcc.dg/tree-ssa/update-threading.c scan-tree-dump-times optimized "Invalid sum" 0
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.h (LOGICAL_OP_NON_SHORT_CIRCUIT): Define.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/short-circuit.c: New test.
+---
+ gcc/config/loongarch/loongarch.h              |  1 +
+ .../gcc.target/loongarch/short-circuit.c      | 19 +++++++++++++++++++
+ 2 files changed, 20 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/short-circuit.c
+
+diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
+index f54b078b1..15261fdc0 100644
+--- a/gcc/config/loongarch/loongarch.h
++++ b/gcc/config/loongarch/loongarch.h
+@@ -869,6 +869,7 @@ typedef struct {
+    1 is the default; other values are interpreted relative to that.  */
+ 
+ #define BRANCH_COST(speed_p, predictable_p) la_branch_cost
++#define LOGICAL_OP_NON_SHORT_CIRCUIT 0
+ 
+ /* Return the asm template for a conditional branch instruction.
+    OPCODE is the opcode's mnemonic and OPERANDS is the asm template for
+diff --git a/gcc/testsuite/gcc.target/loongarch/short-circuit.c b/gcc/testsuite/gcc.target/loongarch/short-circuit.c
+new file mode 100644
+index 000000000..bed585ee1
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/short-circuit.c
+@@ -0,0 +1,19 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -ffast-math -fdump-tree-gimple" } */
++
++int
++short_circuit (float *a)
++{
++  float t1x = a0;
++  float t2x = a1;
++  float t1y = a2;
++  float t2y = a3;
++  float t1z = a4;
++  float t2z = a5;
++
++  if (t1x > t2y  || t2x < t1y  || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z)
++    return 0;
++
++  return 1;
++}
++/* { dg-final { scan-tree-dump-times "if" 6 "gimple" } } */
+-- 
+2.43.0
+

_service:tar_scm:0122-Backport-SME-aarch64-Make-more-use-of-aarch64_featur.patch Added

@@ -0,0 +1,394 @@
+From bdb91009cf250fb22c21ae7f5072263492f2b08c Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Sep 2022 11:32:56 +0100
+Subject: PATCH 023/157 BackportSME aarch64: Make more use of
+ aarch64_feature_flags
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fed55a60e5b230bc159617f26e33611073c672fd
+
+A previous patch added a aarch64_feature_flags typedef, to abstract
+the representation of the feature flags.  This patch makes existing
+code use the typedef too.  Hope I've caught them all!
+
+gcc/
+	* common/config/aarch64/aarch64-common.cc: Use aarch64_feature_flags
+	for feature flags throughout.
+	* config/aarch64/aarch64-protos.h: Likewise.
+	* config/aarch64/aarch64-sve-builtins.h: Likewise.
+	* config/aarch64/aarch64-sve-builtins.cc: Likewise.
+	* config/aarch64/aarch64.cc: Likewise.
+	* config/aarch64/aarch64.opt: Likewise.
+	* config/aarch64/driver-aarch64.cc: Likewise.
+---
+ gcc/common/config/aarch64/aarch64-common.cc | 19 +++++++-------
+ gcc/config/aarch64/aarch64-protos.h         |  5 ++--
+ gcc/config/aarch64/aarch64-sve-builtins.cc  | 29 ++++++++++++---------
+ gcc/config/aarch64/aarch64-sve-builtins.h   |  9 ++++---
+ gcc/config/aarch64/aarch64.cc               | 29 +++++++++++----------
+ gcc/config/aarch64/aarch64.opt              |  2 +-
+ gcc/config/aarch64/driver-aarch64.cc        | 10 +++----
+ 7 files changed, 56 insertions(+), 47 deletions(-)
+
+diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
+index ac3486d71..3efa57b26 100644
+--- a/gcc/common/config/aarch64/aarch64-common.cc
++++ b/gcc/common/config/aarch64/aarch64-common.cc
+@@ -204,9 +204,9 @@ aarch64_handle_option (struct gcc_options *opts,
+ struct aarch64_option_extension
+ {
+   const char *name;
+-  uint64_t flag_canonical;
+-  uint64_t flags_on;
+-  uint64_t flags_off;
++  aarch64_feature_flags flag_canonical;
++  aarch64_feature_flags flags_on;
++  aarch64_feature_flags flags_off;
+ };
+ 
+ /* ISA extensions in AArch64.  */
+@@ -225,14 +225,14 @@ struct processor_name_to_arch
+ {
+   const char *processor_name;
+   aarch64_arch arch;
+-  uint64_t flags;
++  aarch64_feature_flags flags;
+ };
+ 
+ struct arch_to_arch_name
+ {
+   aarch64_arch arch;
+   const char *arch_name;
+-  uint64_t flags;
++  aarch64_feature_flags flags;
+ };
+ 
+ /* Map processor names to the architecture revision they implement and
+@@ -262,7 +262,7 @@ static constexpr arch_to_arch_name all_architectures =
+    a copy of the string is created and stored to INVALID_EXTENSION.  */
+ 
+ enum aarch64_parse_opt_result
+-aarch64_parse_extension (const char *str, uint64_t *isa_flags,
++aarch64_parse_extension (const char *str, aarch64_feature_flags *isa_flags,
+ 			 std::string *invalid_extension)
+ {
+   /* The extension string is parsed left to right.  */
+@@ -342,8 +342,9 @@ aarch64_get_all_extension_candidates (auto_vec<const char *> *candidates)
+    that all the "+" flags come before the "+no" flags.  */
+ 
+ std::string
+-aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags,
+-					    uint64_t default_arch_flags)
++aarch64_get_extension_string_for_isa_flags
++  (aarch64_feature_flags isa_flags,
++   aarch64_feature_flags default_arch_flags)
+ {
+   std::string outstr = "";
+ 
+@@ -451,7 +452,7 @@ aarch64_rewrite_selected_cpu (const char *name)
+       || a_to_an->arch == aarch64_no_arch)
+     fatal_error (input_location, "unknown value %qs for %<-mcpu%>", name);
+ 
+-  uint64_t extensions = p_to_a->flags;
++  aarch64_feature_flags extensions = p_to_a->flags;
+   aarch64_parse_extension (extension_str.c_str (), &extensions, NULL);
+ 
+   std::string outstr = a_to_an->arch_name
+diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
+index e60ce3c36..ef84df731 100644
+--- a/gcc/config/aarch64/aarch64-protos.h
++++ b/gcc/config/aarch64/aarch64-protos.h
+@@ -1037,10 +1037,11 @@ bool aarch64_handle_option (struct gcc_options *, struct gcc_options *,
+ 			     const struct cl_decoded_option *, location_t);
+ const char *aarch64_rewrite_selected_cpu (const char *name);
+ enum aarch64_parse_opt_result aarch64_parse_extension (const char *,
+-						       uint64_t *,
++						       aarch64_feature_flags *,
+ 						       std::string *);
+ void aarch64_get_all_extension_candidates (auto_vec<const char *> *candidates);
+-std::string aarch64_get_extension_string_for_isa_flags (uint64_t, uint64_t);
++std::string aarch64_get_extension_string_for_isa_flags (aarch64_feature_flags,
++							aarch64_feature_flags);
+ 
+ rtl_opt_pass *make_pass_fma_steering (gcc::context *);
+ rtl_opt_pass *make_pass_track_speculation (gcc::context *);
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
+index c06e99339..b927a886e 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
+@@ -82,7 +82,7 @@ public:
+ 
+   /* The architecture extensions that the function requires, as a set of
+      AARCH64_FL_* flags.  */
+-  uint64_t required_extensions;
++  aarch64_feature_flags required_extensions;
+ 
+   /* True if the decl represents an overloaded function that needs to be
+      resolved by function_resolver.  */
+@@ -694,13 +694,16 @@ check_required_registers (location_t location, tree fndecl)
+    Report an error against LOCATION if not.  */
+ static bool
+ check_required_extensions (location_t location, tree fndecl,
+-			   uint64_t required_extensions)
++			   aarch64_feature_flags required_extensions)
+ {
+-  uint64_t missing_extensions = required_extensions & ~aarch64_isa_flags;
++  auto missing_extensions = required_extensions & ~aarch64_isa_flags;
+   if (missing_extensions == 0)
+     return check_required_registers (location, fndecl);
+ 
+-  static const struct { uint64_t flag; const char *name; } extensions = {
++  static const struct {
++    aarch64_feature_flags flag;
++    const char *name;
++  } extensions = {
+ #define AARCH64_OPT_EXTENSION(EXT_NAME, IDENT, C, D, E, F) \
+     { AARCH64_FL_##IDENT, EXT_NAME },
+ #include "aarch64-option-extensions.def"
+@@ -992,7 +995,7 @@ function_builder::get_attributes (const function_instance &instance)
+ registered_function &
+ function_builder::add_function (const function_instance &instance,
+ 				const char *name, tree fntype, tree attrs,
+-				uint64_t required_extensions,
++				aarch64_feature_flags required_extensions,
+ 				bool overloaded_p,
+ 				bool placeholder_p)
+ {
+@@ -1034,11 +1037,12 @@ function_builder::add_function (const function_instance &instance,
+    one-to-one mapping between "short" and "full" names, and if standard
+    overload resolution therefore isn't necessary.  */
+ void
+-function_builder::add_unique_function (const function_instance &instance,
+-				       tree return_type,
+-				       vec<tree> &argument_types,
+-				       uint64_t required_extensions,
+-				       bool force_direct_overloads)
++function_builder::
++add_unique_function (const function_instance &instance,
++		     tree return_type,
++		     vec<tree> &argument_types,
++		     aarch64_feature_flags required_extensions,
++		     bool force_direct_overloads)
+ {
+   /* Add the function under its full (unique) name.  */
+   char *name = get_name (instance, false);
+@@ -1081,8 +1085,9 @@ function_builder::add_unique_function (const function_instance &instance,
+    features are available as part of resolving the function to the
+    relevant unique function.  */
+ void
+-function_builder::add_overloaded_function (const function_instance &instance,
+-					   uint64_t required_extensions)
++function_builder::
++add_overloaded_function (const function_instance &instance,
++			 aarch64_feature_flags required_extensions)
+ {
+   char *name = get_name (instance, true);
+   if (registered_function **map_value = m_overload_names.get (name))
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
+index 24594d584..63d1db776 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.h
++++ b/gcc/config/aarch64/aarch64-sve-builtins.h
+@@ -263,7 +263,7 @@ struct function_group_info
+ 
+   /* The architecture extensions that the functions require, as a set of
+      AARCH64_FL_* flags.  */
+-  uint64_t required_extensions;
++  aarch64_feature_flags required_extensions;
+ };
+ 
+ /* Describes a single fully-resolved function (i.e. one that has a
+@@ -321,8 +321,9 @@ public:

_service:tar_scm:0122-LoongArch-Split-vec_selects-of-bottom-elements-into-.patch Added

@@ -0,0 +1,84 @@
+From 5cab5d1a9fb9cfaa0d12d229aa0ee19e0dd55cc5 Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Tue, 16 Jan 2024 10:23:20 +0800
+Subject: PATCH 122/188 LoongArch: Split vec_selects of bottom elements into
+ simple move
+
+For below pattern, can be treated as a simple move because floating point
+and vector share a common register on loongarch64.
+
+(set (reg/v:SF 32 $f0 orig:93 res  93)
+      (vec_select:SF (reg:V8SF 32 $f0 115)
+          (parallel 
+                  (const_int 0 0)
+              )))
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasx.md (vec_extract<mode>_0):
+	New define_insn_and_split patten.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vect-extract.c: New test.
+---
+ gcc/config/loongarch/lasx.md                  | 15 ++++++++++++++
+ .../gcc.target/loongarch/vect-extract.c       | 20 +++++++++++++++++++
+ 2 files changed, 35 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-extract.c
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index a5128c30c..946811e1a 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -746,6 +746,21 @@
+   DONE;
+ })
+ 
++(define_insn_and_split "vec_extract<mode>_0"
++  (set (match_operand:<UNITMODE> 0 "register_operand" "=f")
++        (vec_select:<UNITMODE>
++          (match_operand:FLASX 1 "register_operand" "f")
++          (parallel (const_int 0))))
++  "ISA_HAS_LSX"
++  "#"
++  "&& reload_completed"
++  (set (match_dup 0) (match_dup 1))
++{
++  operands1 = gen_rtx_REG (<UNITMODE>mode, REGNO (operands1));
++}
++  (set_attr "move_type" "fmove")
++   (set_attr "mode" "<UNITMODE>"))
++
+ (define_expand "vec_perm<mode>"
+  (match_operand:LASX 0 "register_operand")
+   (match_operand:LASX 1 "register_operand")
+diff --git a/gcc/testsuite/gcc.target/loongarch/vect-extract.c b/gcc/testsuite/gcc.target/loongarch/vect-extract.c
+new file mode 100644
+index 000000000..ce126e3a4
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/vect-extract.c
+@@ -0,0 +1,20 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -ffast-math -mlasx -fno-vect-cost-model -fno-unroll-loops" } */
++/* { dg-final { scan-assembler-not "xvpickve.w" } } */
++/* { dg-final { scan-assembler-not "xvpickve.d" } } */
++
++float
++sum_float (float *a, int n) {
++  float res = 0.0;
++  for (int i = 0; i < n; i++)
++    res += ai;
++  return res;
++}
++
++double
++sum_double (double *a, int n) {
++  double res = 0.0;
++  for (int i = 0; i < n; i++)
++    res += ai;
++  return res;
++}
+-- 
+2.43.0
+

_service:tar_scm:0123-Backport-SME-aarch64-Tweak-contents-of-flags_on-off-.patch Added

@@ -0,0 +1,70 @@
+From eb92c185c1c71edcbd83b1c66fe4f9e7d52a98b3 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Sep 2022 11:32:56 +0100
+Subject: PATCH 024/157 BackportSME aarch64: Tweak contents of
+ flags_on/off fields
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bb7f43b62a58a0f0326fd3060f0bd43e6f3ef971
+
+After previous changes, it's more convenient if the flags_on and
+flags_off fields of all_extensions include the feature flag itself.
+
+gcc/
+	* common/config/aarch64/aarch64-common.cc (all_extensions):
+	Include the feature flag in flags_on and flags_off.
+	(aarch64_parse_extension): Update accordingly.
+	(aarch64_get_extension_string_for_isa_flags): Likewise.
+---
+ gcc/common/config/aarch64/aarch64-common.cc | 14 ++++++--------
+ 1 file changed, 6 insertions(+), 8 deletions(-)
+
+diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
+index 3efa57b26..752ba5632 100644
+--- a/gcc/common/config/aarch64/aarch64-common.cc
++++ b/gcc/common/config/aarch64/aarch64-common.cc
+@@ -213,10 +213,8 @@ struct aarch64_option_extension
+ static constexpr aarch64_option_extension all_extensions =
+ {
+ #define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \
+-  {NAME, AARCH64_FL_##IDENT, \
+-   feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \
+-   feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \
+-   & ~AARCH64_FL_##IDENT},
++  {NAME, AARCH64_FL_##IDENT, feature_deps::IDENT ().explicit_on, \
++   feature_deps::get_flags_off (feature_deps::root_off_##IDENT)},
+ #include "config/aarch64/aarch64-option-extensions.def"
+   {NULL, 0, 0, 0}
+ };
+@@ -304,9 +302,9 @@ aarch64_parse_extension (const char *str, aarch64_feature_flags *isa_flags,
+ 	    {
+ 	      /* Add or remove the extension.  */
+ 	      if (adding_ext)
+-		*isa_flags |= (opt->flags_on | opt->flag_canonical);
++		*isa_flags |= opt->flags_on;
+ 	      else
+-		*isa_flags &= ~(opt->flags_off | opt->flag_canonical);
++		*isa_flags &= ~opt->flags_off;
+ 	      break;
+ 	    }
+ 	}
+@@ -380,7 +378,7 @@ aarch64_get_extension_string_for_isa_flags
+ 
+       if ((flags & isa_flags & (explicit_flags | ~current_flags)) == flags)
+ 	{
+-	  current_flags |= opt.flag_canonical | opt.flags_on;
++	  current_flags |= opt.flags_on;
+ 	  added |= opt.flag_canonical;
+ 	}
+     }
+@@ -395,7 +393,7 @@ aarch64_get_extension_string_for_isa_flags
+   for (auto &opt : all_extensions)
+     if (opt.flag_canonical & current_flags & ~isa_flags)
+       {
+-	current_flags &= ~(opt.flag_canonical | opt.flags_off);
++	current_flags &= ~opt.flags_off;
+ 	outstr += "+no";
+ 	outstr += opt.name;
+       }
+-- 
+2.33.0
+

_service:tar_scm:0123-LoongArch-Modify-the-address-calculation-logic-for-o.patch Added

@@ -0,0 +1,112 @@
+From c4815d70715bed71b8e89888ef19eb43e9171229 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Tue, 30 Jan 2024 15:02:32 +0800
+Subject: PATCH 123/188 LoongArch: Modify the address calculation logic for
+ obtaining array element values through fp.
+
+Modify address calculation logic from (((a x C) + fp) + offset) to ((fp + offset) + a x C).
+Thereby modifying the register dependencies and optimizing the code.
+The value of C is 2 4 or 8.
+
+The following is the assembly code before and after a loop modification in spec2006 401.bzip:
+
+                 old                      |                 new
+ 735 .L71:                                |  735 .L71:
+ 736         slli.d  $r12,$r15,2          |  736         slli.d  $r12,$r15,2
+ 737         ldx.w   $r13,$r22,$r12       |  737         ldx.w   $r13,$r22,$r12
+ 738         addi.d  $r15,$r15,-1         |  738         addi.d  $r15,$r15,-1
+ 739         slli.w  $r16,$r15,0          |  739         slli.w  $r16,$r15,0
+ 740         addi.w  $r13,$r13,-1         |  740         addi.w  $r13,$r13,-1
+ 741         slti    $r14,$r13,0          |  741         slti    $r14,$r13,0
+ 742         add.w   $r12,$r26,$r13       |  742         add.w   $r12,$r26,$r13
+ 743         maskeqz $r12,$r12,$r14       |  743         maskeqz $r12,$r12,$r14
+ 744         masknez $r14,$r13,$r14       |  744         masknez $r14,$r13,$r14
+ 745         or      $r12,$r12,$r14       |  745         or      $r12,$r12,$r14
+ 746         ldx.bu  $r14,$r30,$r12       |  746         ldx.bu  $r14,$r30,$r12
+ 747         lu12i.w $r13,4096>>12        |  747         alsl.d  $r14,$r14,$r18,2
+ 748         ori     $r13,$r13,432        |  748         ldptr.w $r13,$r14,0
+ 749         add.d   $r13,$r13,$r3        |  749         addi.w  $r17,$r13,-1
+ 750         alsl.d  $r14,$r14,$r13,2     |  750         stptr.w $r17,$r14,0
+ 751         ldptr.w $r13,$r14,-1968      |  751         slli.d  $r13,$r13,2
+ 752         addi.w  $r17,$r13,-1         |  752         stx.w   $r12,$r22,$r13
+ 753         st.w    $r17,$r14,-1968      |  753         ldptr.w $r12,$r19,0
+ 754         slli.d  $r13,$r13,2          |  754         blt     $r12,$r16,.L71
+ 755         stx.w   $r12,$r22,$r13       |  755         .align  4
+ 756         ldptr.w $r12,$r18,-2048      |  756
+ 757         blt     $r12,$r16,.L71       |  757
+ 758         .align  4                    |  758
+
+This patch is ported from riscv's commit r14-3111.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (mem_shadd_or_shadd_rtx_p): New function.
+	(loongarch_legitimize_address): Add logical transformation code.
+---
+ gcc/config/loongarch/loongarch.cc | 43 +++++++++++++++++++++++++++++++
+ 1 file changed, 43 insertions(+)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 5877b0acf..612a9c138 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -3215,6 +3215,22 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
+   return true;
+ }
+ 
++/* Helper loongarch_legitimize_address.  Given X, return true if it
++   is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
++
++   This respectively represent canonical shift-add rtxs or scaled
++   memory addresses.  */
++static bool
++mem_shadd_or_shadd_rtx_p (rtx x)
++{
++  return ((GET_CODE (x) == ASHIFT
++	   || GET_CODE (x) == MULT)
++	  && CONST_INT_P (XEXP (x, 1))
++	  && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3))
++	      || (GET_CODE (x) == MULT
++		  && IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3))));
++}
++
+ /* This function is used to implement LEGITIMIZE_ADDRESS.  If X can
+    be legitimized in a way that the generic machinery might not expect,
+    return a new address, otherwise return NULL.  MODE is the mode of
+@@ -3238,6 +3254,33 @@ loongarch_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+   loongarch_split_plus (x, &base, &offset);
+   if (offset != 0)
+     {
++      /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case.  */
++      if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0))
++	  && IMM12_OPERAND (offset))
++	{
++	  rtx index = XEXP (base, 0);
++	  rtx fp = XEXP (base, 1);
++
++	  if (REG_P (fp) && REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM)
++	    {
++	      /* If we were given a MULT, we must fix the constant
++		 as we're going to create the ASHIFT form.  */
++	      int shift_val = INTVAL (XEXP (index, 1));
++	      if (GET_CODE (index) == MULT)
++		shift_val = exact_log2 (shift_val);
++
++	      rtx reg1 = gen_reg_rtx (Pmode);
++	      rtx reg3 = gen_reg_rtx (Pmode);
++	      loongarch_emit_binary (PLUS, reg1, fp, GEN_INT (offset));
++	      loongarch_emit_binary (PLUS, reg3,
++				     gen_rtx_ASHIFT (Pmode, XEXP (index, 0),
++						     GEN_INT (shift_val)),
++				     reg1);
++
++	      return reg3;
++	    }
++	}
++
+       if (!loongarch_valid_base_register_p (base, mode, false))
+ 	base = copy_to_mode_reg (Pmode, base);
+       addr = loongarch_add_offset (NULL, base, offset);
+-- 
+2.43.0
+

_service:tar_scm:0124-Backport-SME-aarch64-Tweak-handling-of-mgeneral-regs.patch Added

@@ -0,0 +1,370 @@
+From 91f7471cbc7dec42673b58a1896330d64eb6be2a Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Sep 2022 11:32:57 +0100
+Subject: PATCH 025/157 BackportSME aarch64: Tweak handling of
+ -mgeneral-regs-only
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2a269bda9e7b8f9353699d0c965e7e9246500aa0
+
+-mgeneral-regs-only is effectively "+nofp for the compiler without
+changing the assembler's ISA flags".  Currently that's implemented
+by making TARGET_FLOAT, TARGET_SIMD and TARGET_SVE depend on
+!TARGET_GENERAL_REGS_ONLY and then making any feature that needs FP
+registers depend (directly or indirectly) on one of those three TARGET
+macros.  The problem is that it's easy to forgot to do the last bit.
+
+This patch instead represents the distinction between "assemnbler
+ISA flags" and "compiler ISA flags" more directly, funnelling
+all updates through a new function that sets both sets of flags
+together.
+
+gcc/
+	* config/aarch64/aarch64.opt (aarch64_asm_isa_flags): New variable.
+	* config/aarch64/aarch64.h (aarch64_asm_isa_flags)
+	(aarch64_isa_flags): Redefine as read-only macros.
+	(TARGET_SIMD, TARGET_FLOAT, TARGET_SVE): Don't depend on
+	!TARGET_GENERAL_REGS_ONLY.
+	* common/config/aarch64/aarch64-common.cc
+	(aarch64_set_asm_isa_flags): New function.
+	(aarch64_handle_option): Call it when updating -mgeneral-regs.
+	* config/aarch64/aarch64-protos.h (aarch64_simd_switcher): Replace
+	m_old_isa_flags with m_old_asm_isa_flags.
+	(aarch64_set_asm_isa_flags): Declare.
+	* config/aarch64/aarch64-builtins.cc
+	(aarch64_simd_switcher::aarch64_simd_switcher)
+	(aarch64_simd_switcher::~aarch64_simd_switcher): Save and restore
+	aarch64_asm_isa_flags instead of aarch64_isa_flags.
+	* config/aarch64/aarch64-sve-builtins.cc
+	(check_required_extensions): Use aarch64_asm_isa_flags instead
+	of aarch64_isa_flags.
+	* config/aarch64/aarch64.cc (aarch64_set_asm_isa_flags): New function.
+	(aarch64_override_options, aarch64_handle_attr_arch)
+	(aarch64_handle_attr_cpu, aarch64_handle_attr_isa_flags): Use
+	aarch64_set_asm_isa_flags to set the ISA flags.
+	(aarch64_option_print, aarch64_declare_function_name)
+	(aarch64_start_file): Use aarch64_asm_isa_flags instead
+	of aarch64_isa_flags.
+	(aarch64_can_inline_p): Check aarch64_asm_isa_flags as well as
+	aarch64_isa_flags.
+---
+ gcc/common/config/aarch64/aarch64-common.cc | 12 ++++++
+ gcc/config/aarch64/aarch64-builtins.cc      |  6 +--
+ gcc/config/aarch64/aarch64-protos.h         |  5 ++-
+ gcc/config/aarch64/aarch64-sve-builtins.cc  |  2 +-
+ gcc/config/aarch64/aarch64.cc               | 45 ++++++++++++++-------
+ gcc/config/aarch64/aarch64.h                | 17 ++++++--
+ gcc/config/aarch64/aarch64.opt              |  3 ++
+ 7 files changed, 68 insertions(+), 22 deletions(-)
+
+diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
+index 752ba5632..c64b4987e 100644
+--- a/gcc/common/config/aarch64/aarch64-common.cc
++++ b/gcc/common/config/aarch64/aarch64-common.cc
+@@ -137,6 +137,17 @@ reset_tsv110_option ()
+     }
+ }
+ 
++/* Set OPTS->x_aarch64_asm_isa_flags to FLAGS and update
++   OPTS->x_aarch64_isa_flags accordingly.  */
++void
++aarch64_set_asm_isa_flags (gcc_options *opts, aarch64_feature_flags flags)
++{
++  opts->x_aarch64_asm_isa_flags = flags;
++  opts->x_aarch64_isa_flags = flags;
++  if (opts->x_target_flags & MASK_GENERAL_REGS_ONLY)
++    opts->x_aarch64_isa_flags &= ~feature_deps::get_flags_off (AARCH64_FL_FP);
++}
++
+ /* Implement TARGET_HANDLE_OPTION.
+    This function handles the target specific options for CPU/target selection.
+ 
+@@ -174,6 +185,7 @@ aarch64_handle_option (struct gcc_options *opts,
+ 
+     case OPT_mgeneral_regs_only:
+       opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
++      aarch64_set_asm_isa_flags (opts, opts->x_aarch64_asm_isa_flags);
+       return true;
+ 
+     case OPT_mfix_cortex_a53_835769:
+diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
+index 42276e7ca..015e9d975 100644
+--- a/gcc/config/aarch64/aarch64-builtins.cc
++++ b/gcc/config/aarch64/aarch64-builtins.cc
+@@ -1336,20 +1336,20 @@ aarch64_scalar_builtin_type_p (aarch64_simd_type t)
+ /* Enable AARCH64_FL_* flags EXTRA_FLAGS on top of the base Advanced SIMD
+    set.  */
+ aarch64_simd_switcher::aarch64_simd_switcher (unsigned int extra_flags)
+-  : m_old_isa_flags (aarch64_isa_flags),
++  : m_old_asm_isa_flags (aarch64_asm_isa_flags),
+     m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY)
+ {
+   /* Changing the ISA flags should be enough here.  We shouldn't need to
+      pay the compile-time cost of a full target switch.  */
+-  aarch64_isa_flags = AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags;
+   global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
++  aarch64_set_asm_isa_flags (AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags);
+ }
+ 
+ aarch64_simd_switcher::~aarch64_simd_switcher ()
+ {
+   if (m_old_general_regs_only)
+     global_options.x_target_flags |= MASK_GENERAL_REGS_ONLY;
+-  aarch64_isa_flags = m_old_isa_flags;
++  aarch64_set_asm_isa_flags (m_old_asm_isa_flags);
+ }
+ 
+ /* Implement #pragma GCC aarch64 "arm_neon.h".  */
+diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
+index ef84df731..86e444a60 100644
+--- a/gcc/config/aarch64/aarch64-protos.h
++++ b/gcc/config/aarch64/aarch64-protos.h
+@@ -747,7 +747,7 @@ public:
+   ~aarch64_simd_switcher ();
+ 
+ private:
+-  unsigned long m_old_isa_flags;
++  unsigned long m_old_asm_isa_flags;
+   bool m_old_general_regs_only;
+ };
+ 
+@@ -1032,7 +1032,10 @@ extern bool aarch64_classify_address (struct aarch64_address_info *, rtx,
+ 				      machine_mode, bool,
+ 				      aarch64_addr_query_type = ADDR_QUERY_M);
+ 
++void aarch64_set_asm_isa_flags (aarch64_feature_flags);
++
+ /* Defined in common/config/aarch64-common.cc.  */
++void aarch64_set_asm_isa_flags (gcc_options *, aarch64_feature_flags);
+ bool aarch64_handle_option (struct gcc_options *, struct gcc_options *,
+ 			     const struct cl_decoded_option *, location_t);
+ const char *aarch64_rewrite_selected_cpu (const char *name);
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
+index b927a886e..a70e3a6b4 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
+@@ -696,7 +696,7 @@ static bool
+ check_required_extensions (location_t location, tree fndecl,
+ 			   aarch64_feature_flags required_extensions)
+ {
+-  auto missing_extensions = required_extensions & ~aarch64_isa_flags;
++  auto missing_extensions = required_extensions & ~aarch64_asm_isa_flags;
+   if (missing_extensions == 0)
+     return check_required_registers (location, fndecl);
+ 
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 8cb820767..3e83e48ec 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -18432,10 +18432,19 @@ aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value)
+     return (int) value / 64;
+ }
+ 
++/* Set the global aarch64_asm_isa_flags to FLAGS and update
++   aarch64_isa_flags accordingly.  */
++
++void
++aarch64_set_asm_isa_flags (aarch64_feature_flags flags)
++{
++  aarch64_set_asm_isa_flags (&global_options, flags);
++}
++
+ /* Implement TARGET_OPTION_OVERRIDE.  This is called once in the beginning
+    and is used to parse the -m{cpu,tune,arch} strings and setup the initial
+    tuning structs.  In particular it must set selected_tune and
+-   aarch64_isa_flags that define the available ISA features and tuning
++   aarch64_asm_isa_flags that define the available ISA features and tuning
+    decisions.  It must also set selected_arch as this will be used to
+    output the .arch asm tags for each function.  */
+ 
+@@ -18444,7 +18453,7 @@ aarch64_override_options (void)
+ {
+   aarch64_feature_flags cpu_isa = 0;
+   aarch64_feature_flags arch_isa = 0;
+-  aarch64_isa_flags = 0;
++  aarch64_set_asm_isa_flags (0);
+ 
+   const struct processor *cpu = NULL;
+   const struct processor *arch = NULL;
+@@ -18484,25 +18493,25 @@ aarch64_override_options (void)
+ 	}
+ 
+       selected_arch = arch->arch;
+-      aarch64_isa_flags = arch_isa;
++      aarch64_set_asm_isa_flags (arch_isa);
+     }
+   else if (cpu)
+     {
+       selected_arch = cpu->arch;
+-      aarch64_isa_flags = cpu_isa;
++      aarch64_set_asm_isa_flags (cpu_isa);

_service:tar_scm:0124-LoongArch-Merge-template-got_load_tls_-ld-gd-le-ie.patch Added

@@ -0,0 +1,214 @@
+From 3f45bbfe924ffe38832b2ad0050589b9f188422e Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 25 Jan 2024 14:44:39 +0800
+Subject: PATCH 124/188 LoongArch: Merge template got_load_tls_{ld/gd/le/ie}.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_load_tls):
+	Load all types of tls symbols through one function.
+	(loongarch_got_load_tls_gd): Delete.
+	(loongarch_got_load_tls_ld): Delete.
+	(loongarch_got_load_tls_ie): Delete.
+	(loongarch_got_load_tls_le): Delete.
+	(loongarch_call_tls_get_addr): Modify the called function name.
+	(loongarch_legitimize_tls_address): Likewise.
+	* config/loongarch/loongarch.md (@got_load_tls_gd<mode>): Delete.
+	(@load_tls<mode>): New template.
+	(@got_load_tls_ld<mode>): Delete.
+	(@got_load_tls_le<mode>): Delete.
+	(@got_load_tls_ie<mode>): Delete.
+---
+ gcc/config/loongarch/loongarch.cc | 47 +++++-------------------
+ gcc/config/loongarch/loongarch.md | 59 ++++++++++++-------------------
+ 2 files changed, 30 insertions(+), 76 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 612a9c138..ced7e58c2 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -2732,36 +2732,12 @@ loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset)
+ /* The __tls_get_attr symbol.  */
+ static GTY (()) rtx loongarch_tls_symbol;
+ 
+-/* Load an entry from the GOT for a TLS GD access.  */
++/* Load an entry for a TLS access.  */
+ 
+ static rtx
+-loongarch_got_load_tls_gd (rtx dest, rtx sym)
++loongarch_load_tls (rtx dest, rtx sym)
+ {
+-  return gen_got_load_tls_gd (Pmode, dest, sym);
+-}
+-
+-/* Load an entry from the GOT for a TLS LD access.  */
+-
+-static rtx
+-loongarch_got_load_tls_ld (rtx dest, rtx sym)
+-{
+-  return gen_got_load_tls_ld (Pmode, dest, sym);
+-}
+-
+-/* Load an entry from the GOT for a TLS IE access.  */
+-
+-static rtx
+-loongarch_got_load_tls_ie (rtx dest, rtx sym)
+-{
+-  return gen_got_load_tls_ie (Pmode, dest, sym);
+-}
+-
+-/* Add in the thread pointer for a TLS LE access.  */
+-
+-static rtx
+-loongarch_got_load_tls_le (rtx dest, rtx sym)
+-{
+-  return gen_got_load_tls_le (Pmode, dest, sym);
++  return gen_load_tls (Pmode, dest, sym);
+ }
+ 
+ /* Return an instruction sequence that calls __tls_get_addr.  SYM is
+@@ -2805,14 +2781,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
+ 	emit_insn (gen_tls_low (Pmode, a0, high, loc));
+     }
+   else
+-    {
+-      if (type == SYMBOL_TLSLDM)
+-	emit_insn (loongarch_got_load_tls_ld (a0, loc));
+-      else if (type == SYMBOL_TLSGD)
+-	emit_insn (loongarch_got_load_tls_gd (a0, loc));
+-      else
+-	gcc_unreachable ();
+-    }
++    emit_insn (loongarch_load_tls (a0, loc));
+ 
+   if (flag_plt)
+     {
+@@ -2949,10 +2918,10 @@ loongarch_legitimize_tls_address (rtx loc)
+ 	  /* la.tls.ie; tp-relative add.  */
+ 	  tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
+ 	  tmp1 = gen_reg_rtx (Pmode);
++	  tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE);
+ 	  dest = gen_reg_rtx (Pmode);
+ 	  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
+ 	    {
+-	      tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE);
+ 	      tmp3 = gen_reg_rtx (Pmode);
+ 	      rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
+ 	      high = loongarch_force_temporary (tmp3, high);
+@@ -2975,7 +2944,7 @@ loongarch_legitimize_tls_address (rtx loc)
+ 		emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
+ 	    }
+ 	  else
+-	    emit_insn (loongarch_got_load_tls_ie (tmp1, loc));
++	    emit_insn (loongarch_load_tls (tmp1, tmp2));
+ 	  emit_insn (gen_add3_insn (dest, tmp1, tp));
+ 	}
+       break;
+@@ -3007,11 +2976,11 @@ loongarch_legitimize_tls_address (rtx loc)
+ 
+ 	  tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
+ 	  tmp1 = gen_reg_rtx (Pmode);
++	  tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE);
+ 	  dest = gen_reg_rtx (Pmode);
+ 
+ 	  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
+ 	    {
+-	      tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE);
+ 	      tmp3 = gen_reg_rtx (Pmode);
+ 	      rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
+ 	      high = loongarch_force_temporary (tmp3, high);
+@@ -3039,7 +3008,7 @@ loongarch_legitimize_tls_address (rtx loc)
+ 		}
+ 	    }
+ 	  else
+-	    emit_insn (loongarch_got_load_tls_le (tmp1, loc));
++	    emit_insn (loongarch_load_tls (tmp1, tmp2));
+ 	  emit_insn (gen_add3_insn (dest, tmp1, tp));
+ 	}
+       break;
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 23d8dc126..4f9a92334 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -51,10 +51,7 @@
+   UNSPEC_BITREV_8B
+ 
+   ;; TLS
+-  UNSPEC_TLS_GD
+-  UNSPEC_TLS_LD
+-  UNSPEC_TLS_LE
+-  UNSPEC_TLS_IE
++  UNSPEC_TLS
+ 
+   ;; Stack tie
+   UNSPEC_TIE
+@@ -2701,45 +2698,33 @@
+ 
+ ;; Thread-Local Storage
+ 
+-(define_insn "@got_load_tls_gd<mode>"
++(define_insn "@load_tls<mode>"
+   (set (match_operand:P 0 "register_operand" "=r")
+ 	(unspec:P
+ 	    (match_operand:P 1 "symbolic_operand" "")
+-	    UNSPEC_TLS_GD))
++	    UNSPEC_TLS))
+   ""
+-  "la.tls.gd\t%0,%1"
+-  (set_attr "got" "load")
+-   (set_attr "mode" "<MODE>"))
+-
+-(define_insn "@got_load_tls_ld<mode>"
+-  (set (match_operand:P 0 "register_operand" "=r")
+-	(unspec:P
+-	    (match_operand:P 1 "symbolic_operand" "")
+-	    UNSPEC_TLS_LD))
+-  ""
+-  "la.tls.ld\t%0,%1"
+-  (set_attr "got" "load")
+-   (set_attr "mode" "<MODE>"))
++{
++  enum loongarch_symbol_type symbol_type;
++  gcc_assert (loongarch_symbolic_constant_p (operands1, &symbol_type));
+ 
+-(define_insn "@got_load_tls_le<mode>"
+-  (set (match_operand:P 0 "register_operand" "=r")
+-	(unspec:P
+-	    (match_operand:P 1 "symbolic_operand" "")
+-	    UNSPEC_TLS_LE))
+-  ""
+-  "la.tls.le\t%0,%1"
+-  (set_attr "got" "load")
+-   (set_attr "mode" "<MODE>"))
++  switch (symbol_type)
++    {
++    case SYMBOL_TLS_LE:
++      return "la.tls.le\t%0,%1";
++    case SYMBOL_TLS_IE:
++      return "la.tls.ie\t%0,%1";
++    case SYMBOL_TLSLDM:
++      return "la.tls.ld\t%0,%1";
++    case SYMBOL_TLSGD:
++      return "la.tls.gd\t%0,%1";
+ 
+-(define_insn "@got_load_tls_ie<mode>"
+-  (set (match_operand:P 0 "register_operand" "=r")
+-	(unspec:P
+-	    (match_operand:P 1 "symbolic_operand" "")
+-	    UNSPEC_TLS_IE))
+-  ""

_service:tar_scm:0125-Backport-SME-aarch64-Remove-redundant-TARGET_-checks.patch Added

@@ -0,0 +1,453 @@
+From 77a86d955dd1c9cd8c7fc35e6caf0cb707799129 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Sep 2022 11:32:57 +0100
+Subject: PATCH 026/157 BackportSME aarch64: Remove redundant TARGET_*
+ checks
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a31641840af2c40cf36036fa472df34d4a4402c3
+
+After previous patches, it's possible to remove TARGET_*
+options that are redundant due to (IMO) obvious dependencies.
+
+gcc/
+	* config/aarch64/aarch64.h (TARGET_CRYPTO, TARGET_SHA3, TARGET_SM4)
+	(TARGET_DOTPROD): Don't depend on TARGET_SIMD.
+	(TARGET_AES, TARGET_SHA2): Likewise.  Remove TARGET_CRYPTO test.
+	(TARGET_FP_F16INST): Don't depend on TARGET_FLOAT.
+	(TARGET_SVE2, TARGET_SVE_F32MM, TARGET_SVE_F64MM): Don't depend
+	on TARGET_SVE.
+	(TARGET_SVE2_AES, TARGET_SVE2_BITPERM, TARGET_SVE2_SHA3)
+	(TARGET_SVE2_SM4): Don't depend on TARGET_SVE2.
+	(TARGET_F32MM, TARGET_F64MM): Delete.
+	* config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Guard
+	float macros with just TARGET_FLOAT rather than TARGET_FLOAT
+	|| TARGET_SIMD.
+	* config/aarch64/aarch64-simd.md (copysign<mode>3): Depend
+	only on TARGET_SIMD, rather than TARGET_FLOAT && TARGET_SIMD.
+	(aarch64_crypto_aes<aes_op>v16qi): Depend only on TARGET_AES,
+	rather than TARGET_SIMD && TARGET_AES.
+	(aarch64_crypto_aes<aesmc_op>v16qi): Likewise.
+	(*aarch64_crypto_aese_fused): Likewise.
+	(*aarch64_crypto_aesd_fused): Likewise.
+	(aarch64_crypto_pmulldi): Likewise.
+	(aarch64_crypto_pmullv2di): Likewise.
+	(aarch64_crypto_sha1hsi): Likewise TARGET_SHA2.
+	(aarch64_crypto_sha1hv4si): Likewise.
+	(aarch64_be_crypto_sha1hv4si): Likewise.
+	(aarch64_crypto_sha1su1v4si): Likewise.
+	(aarch64_crypto_sha1<sha1_op>v4si): Likewise.
+	(aarch64_crypto_sha1su0v4si): Likewise.
+	(aarch64_crypto_sha256h<sha256_op>v4si): Likewise.
+	(aarch64_crypto_sha256su0v4si): Likewise.
+	(aarch64_crypto_sha256su1v4si): Likewise.
+	(aarch64_crypto_sha512h<sha512_op>qv2di): Likewise TARGET_SHA3.
+	(aarch64_crypto_sha512su0qv2di): Likewise.
+	(aarch64_crypto_sha512su1qv2di, eor3q<mode>4): Likewise.
+	(aarch64_rax1qv2di, aarch64_xarqv2di, bcaxq<mode>4): Likewise.
+	(aarch64_sm3ss1qv4si): Likewise TARGET_SM4.
+	(aarch64_sm3tt<sm3tt_op>qv4si): Likewise.
+	(aarch64_sm3partw<sm3part_op>qv4si): Likewise.
+	(aarch64_sm4eqv4si, aarch64_sm4ekeyqv4si): Likewise.
+	* config/aarch64/aarch64.md (<FLOATUORS:optab>dihf2)
+	(copysign<GPF:mode>3, copysign<GPF:mode>3_insn)
+	(xorsign<mode>3): Remove redundant TARGET_FLOAT condition.
+---
+ gcc/config/aarch64/aarch64-c.cc    |  2 +-
+ gcc/config/aarch64/aarch64-simd.md | 56 +++++++++++++++---------------
+ gcc/config/aarch64/aarch64.h       | 30 ++++++++--------
+ gcc/config/aarch64/aarch64.md      |  8 ++---
+ 4 files changed, 47 insertions(+), 49 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
+index 18c9b975b..2dfe2b8f8 100644
+--- a/gcc/config/aarch64/aarch64-c.cc
++++ b/gcc/config/aarch64/aarch64-c.cc
+@@ -92,7 +92,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
+ 
+   aarch64_def_or_undef (TARGET_FLOAT, "__ARM_FEATURE_FMA", pfile);
+ 
+-  if (TARGET_FLOAT || TARGET_SIMD)
++  if (TARGET_FLOAT)
+     {
+       builtin_define_with_int_value ("__ARM_FP", 0x0E);
+       builtin_define ("__ARM_FP16_FORMAT_IEEE");
+diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
+index de92802f5..a47b39281 100644
+--- a/gcc/config/aarch64/aarch64-simd.md
++++ b/gcc/config/aarch64/aarch64-simd.md
+@@ -693,7 +693,7 @@
+   (match_operand:VHSDF 0 "register_operand")
+    (match_operand:VHSDF 1 "register_operand")
+    (match_operand:VHSDF 2 "register_operand")
+-  "TARGET_FLOAT && TARGET_SIMD"
++  "TARGET_SIMD"
+ {
+   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
+   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
+@@ -8352,7 +8352,7 @@
+ 		 (match_operand:V16QI 1 "register_operand" "%0")
+ 		 (match_operand:V16QI 2 "register_operand" "w"))
+          CRYPTO_AES))
+-  "TARGET_SIMD && TARGET_AES"
++  "TARGET_AES"
+   "aes<aes_op>\\t%0.16b, %2.16b"
+   (set_attr "type" "crypto_aese")
+ )
+@@ -8361,7 +8361,7 @@
+   (set (match_operand:V16QI 0 "register_operand" "=w")
+ 	(unspec:V16QI (match_operand:V16QI 1 "register_operand" "w")
+ 	 CRYPTO_AESMC))
+-  "TARGET_SIMD && TARGET_AES"
++  "TARGET_AES"
+   "aes<aesmc_op>\\t%0.16b, %1.16b"
+   (set_attr "type" "crypto_aesmc")
+ )
+@@ -8380,7 +8380,7 @@
+ 		(match_operand:V16QI 2 "register_operand" "w"))
+ 	     UNSPEC_AESE)
+ 	UNSPEC_AESMC))
+-  "TARGET_SIMD && TARGET_AES
++  "TARGET_AES
+    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
+   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
+   (set_attr "type" "crypto_aese")
+@@ -8401,7 +8401,7 @@
+ 			(match_operand:V16QI 2 "register_operand" "w"))
+ 		UNSPEC_AESD)
+ 	  UNSPEC_AESIMC))
+-  "TARGET_SIMD && TARGET_AES
++  "TARGET_AES
+    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
+   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
+   (set_attr "type" "crypto_aese")
+@@ -8415,7 +8415,7 @@
+         (unspec:SI (match_operand:SI 1
+                        "register_operand" "w")
+          UNSPEC_SHA1H))
+-  "TARGET_SIMD && TARGET_SHA2"
++  "TARGET_SHA2"
+   "sha1h\\t%s0, %s1"
+   (set_attr "type" "crypto_sha1_fast")
+ )
+@@ -8425,7 +8425,7 @@
+ 	(unspec:SI (vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
+ 		     (parallel (const_int 0)))
+ 	 UNSPEC_SHA1H))
+-  "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
++  "TARGET_SHA2 && !BYTES_BIG_ENDIAN"
+   "sha1h\\t%s0, %s1"
+   (set_attr "type" "crypto_sha1_fast")
+ )
+@@ -8435,7 +8435,7 @@
+ 	(unspec:SI (vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
+ 		     (parallel (const_int 3)))
+ 	 UNSPEC_SHA1H))
+-  "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
++  "TARGET_SHA2 && BYTES_BIG_ENDIAN"
+   "sha1h\\t%s0, %s1"
+   (set_attr "type" "crypto_sha1_fast")
+ )
+@@ -8445,7 +8445,7 @@
+         (unspec:V4SI (match_operand:V4SI 1 "register_operand" "0")
+                       (match_operand:V4SI 2 "register_operand" "w")
+          UNSPEC_SHA1SU1))
+-  "TARGET_SIMD && TARGET_SHA2"
++  "TARGET_SHA2"
+   "sha1su1\\t%0.4s, %2.4s"
+   (set_attr "type" "crypto_sha1_fast")
+ )
+@@ -8456,7 +8456,7 @@
+                       (match_operand:SI 2 "register_operand" "w")
+                       (match_operand:V4SI 3 "register_operand" "w")
+          CRYPTO_SHA1))
+-  "TARGET_SIMD && TARGET_SHA2"
++  "TARGET_SHA2"
+   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
+   (set_attr "type" "crypto_sha1_slow")
+ )
+@@ -8467,7 +8467,7 @@
+                       (match_operand:V4SI 2 "register_operand" "w")
+                       (match_operand:V4SI 3 "register_operand" "w")
+          UNSPEC_SHA1SU0))
+-  "TARGET_SIMD && TARGET_SHA2"
++  "TARGET_SHA2"
+   "sha1su0\\t%0.4s, %2.4s, %3.4s"
+   (set_attr "type" "crypto_sha1_xor")
+ )
+@@ -8480,7 +8480,7 @@
+                       (match_operand:V4SI 2 "register_operand" "w")
+                       (match_operand:V4SI 3 "register_operand" "w")
+          CRYPTO_SHA256))
+-  "TARGET_SIMD && TARGET_SHA2"
++  "TARGET_SHA2"
+   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
+   (set_attr "type" "crypto_sha256_slow")
+ )
+@@ -8490,7 +8490,7 @@
+         (unspec:V4SI (match_operand:V4SI 1 "register_operand" "0")
+                       (match_operand:V4SI 2 "register_operand" "w")
+          UNSPEC_SHA256SU0))
+-  "TARGET_SIMD && TARGET_SHA2"
++  "TARGET_SHA2"
+   "sha256su0\\t%0.4s, %2.4s"
+   (set_attr "type" "crypto_sha256_fast")
+ )
+@@ -8501,7 +8501,7 @@
+                       (match_operand:V4SI 2 "register_operand" "w")
+                       (match_operand:V4SI 3 "register_operand" "w")
+          UNSPEC_SHA256SU1))
+-  "TARGET_SIMD && TARGET_SHA2"

_service:tar_scm:0125-LoongArch-Add-the-macro-implementation-of-mcmodel-ex.patch Added

@@ -0,0 +1,453 @@
+From cd177538c2a0f5248e9e7af6247b4d1ba6fe55db Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 25 Jan 2024 19:10:46 +0800
+Subject: PATCH 125/188 LoongArch: Add the macro implementation of
+ mcmodel=extreme.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-protos.h (loongarch_symbol_extreme_p):
+	Add function declaration.
+	* config/loongarch/loongarch.cc (loongarch_symbolic_constant_p):
+	For SYMBOL_PCREL64, non-zero addend of "la.local $rd,$rt,sym+addend"
+	is not allowed
+	(loongarch_load_tls): Added macro support in extreme mode.
+	(loongarch_call_tls_get_addr): Likewise.
+	(loongarch_legitimize_tls_address): Likewise.
+	(loongarch_force_address): Likewise.
+	(loongarch_legitimize_move): Likewise.
+	(loongarch_output_mi_thunk): Likewise.
+	(loongarch_option_override_internal): Remove the code that detects
+	explicit relocs status.
+	(loongarch_handle_model_attribute): Likewise.
+	* config/loongarch/loongarch.md (movdi_symbolic_off64): New template.
+	* config/loongarch/predicates.md (symbolic_off64_operand): New predicate.
+	(symbolic_off64_or_reg_operand): Likewise.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/attr-model-5.c: New test.
+	* gcc.target/loongarch/func-call-extreme-5.c: New test.
+	* gcc.target/loongarch/func-call-extreme-6.c: New test.
+	* gcc.target/loongarch/tls-extreme-macro.c: New test.
+---
+ gcc/config/loongarch/loongarch-protos.h       |   1 +
+ gcc/config/loongarch/loongarch.cc             | 110 +++++++++++-------
+ gcc/config/loongarch/loongarch.md             |  48 +++++++-
+ gcc/config/loongarch/predicates.md            |  12 ++
+ .../gcc.target/loongarch/attr-model-5.c       |   8 ++
+ .../loongarch/func-call-extreme-5.c           |   7 ++
+ .../loongarch/func-call-extreme-6.c           |   7 ++
+ .../gcc.target/loongarch/tls-extreme-macro.c  |  35 ++++++
+ 8 files changed, 184 insertions(+), 44 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-5.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c
+
+diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
+index 5060efbb6..87b94e8b0 100644
+--- a/gcc/config/loongarch/loongarch-protos.h
++++ b/gcc/config/loongarch/loongarch-protos.h
+@@ -222,4 +222,5 @@ extern rtx loongarch_build_signbit_mask (machine_mode, bool, bool);
+ extern void loongarch_emit_swrsqrtsf (rtx, rtx, machine_mode, bool);
+ extern void loongarch_emit_swdivsf (rtx, rtx, rtx, machine_mode);
+ extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type);
++extern bool loongarch_symbol_extreme_p (enum loongarch_symbol_type);
+ #endif /* ! GCC_LOONGARCH_PROTOS_H */
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index ced7e58c2..9cfe5bfb2 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -1932,8 +1932,13 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type)
+      relocations.  */
+   switch (*symbol_type)
+     {
+-    case SYMBOL_PCREL:
+     case SYMBOL_PCREL64:
++      /* When the code model is extreme, the non-zero offset situation
++	 has not been handled well, so it is disabled here now.  */
++      if (!loongarch_explicit_relocs_p (SYMBOL_PCREL64))
++	return false;
++    /* fall through */
++    case SYMBOL_PCREL:
+       /* GAS rejects offsets outside the range -2^31, 2^31-1.  */
+       return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
+ 
+@@ -2735,9 +2740,15 @@ static GTY (()) rtx loongarch_tls_symbol;
+ /* Load an entry for a TLS access.  */
+ 
+ static rtx
+-loongarch_load_tls (rtx dest, rtx sym)
++loongarch_load_tls (rtx dest, rtx sym, enum loongarch_symbol_type type)
+ {
+-  return gen_load_tls (Pmode, dest, sym);
++  /* TLS LE gets a 32 or 64 bit offset here, so one register can do it.  */
++  if (type == SYMBOL_TLS_LE)
++    return gen_load_tls (Pmode, dest, sym);
++
++  return loongarch_symbol_extreme_p (type)
++    ? gen_movdi_symbolic_off64 (dest, sym, gen_reg_rtx (DImode))
++    : gen_load_tls (Pmode, dest, sym);
+ }
+ 
+ /* Return an instruction sequence that calls __tls_get_addr.  SYM is
+@@ -2769,8 +2780,6 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
+ 
+       if (TARGET_CMODEL_EXTREME)
+ 	{
+-	  gcc_assert (TARGET_EXPLICIT_RELOCS);
+-
+ 	  rtx tmp1 = gen_reg_rtx (Pmode);
+ 	  emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc));
+ 	  emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc));
+@@ -2781,7 +2790,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
+ 	emit_insn (gen_tls_low (Pmode, a0, high, loc));
+     }
+   else
+-    emit_insn (loongarch_load_tls (a0, loc));
++    emit_insn (loongarch_load_tls (a0, loc, type));
+ 
+   if (flag_plt)
+     {
+@@ -2848,22 +2857,28 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
+ 
+ 	case CMODEL_EXTREME:
+ 	    {
+-	      gcc_assert (TARGET_EXPLICIT_RELOCS);
+-
+-	      rtx tmp1 = gen_reg_rtx (Pmode);
+-	      rtx high = gen_reg_rtx (Pmode);
+-
+-	      loongarch_emit_move (high,
+-				   gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
+-	      loongarch_emit_move (tmp1, gen_rtx_LO_SUM (Pmode,
+-							 gen_rtx_REG (Pmode, 0),
+-							 loongarch_tls_symbol));
+-	      emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol));
+-	      emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol));
+-	      loongarch_emit_move (dest,
+-				   gen_rtx_MEM (Pmode,
+-						gen_rtx_PLUS (Pmode,
+-							      high, tmp1)));
++	      if (loongarch_explicit_relocs_p (SYMBOL_GOT_DISP))
++		{
++		  rtx tmp1 = gen_reg_rtx (Pmode);
++		  rtx high = gen_reg_rtx (Pmode);
++
++		  loongarch_emit_move (high,
++				       gen_rtx_HIGH (Pmode,
++						     loongarch_tls_symbol));
++		  loongarch_emit_move (tmp1,
++				       gen_rtx_LO_SUM (Pmode,
++						       gen_rtx_REG (Pmode, 0),
++						       loongarch_tls_symbol));
++		  emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol));
++		  emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol));
++		  loongarch_emit_move (dest,
++				       gen_rtx_MEM (Pmode,
++						    gen_rtx_PLUS (Pmode,
++								  high, tmp1)));
++		}
++	      else
++	       emit_insn (gen_movdi_symbolic_off64 (dest, loongarch_tls_symbol,
++						    gen_reg_rtx (DImode)));
+ 	    }
+ 	  break;
+ 
+@@ -2928,8 +2943,6 @@ loongarch_legitimize_tls_address (rtx loc)
+ 
+ 	      if (TARGET_CMODEL_EXTREME)
+ 		{
+-		  gcc_assert (TARGET_EXPLICIT_RELOCS);
+-
+ 		  rtx tmp3 = gen_reg_rtx (Pmode);
+ 		  emit_insn (gen_tls_low (Pmode, tmp3,
+ 					  gen_rtx_REG (Pmode, 0), tmp2));
+@@ -2944,7 +2957,7 @@ loongarch_legitimize_tls_address (rtx loc)
+ 		emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
+ 	    }
+ 	  else
+-	    emit_insn (loongarch_load_tls (tmp1, tmp2));
++	    emit_insn (loongarch_load_tls (tmp1, tmp2, SYMBOL_TLS_IE));
+ 	  emit_insn (gen_add3_insn (dest, tmp1, tp));
+ 	}
+       break;
+@@ -3001,14 +3014,12 @@ loongarch_legitimize_tls_address (rtx loc)
+ 
+ 	      if (TARGET_CMODEL_EXTREME)
+ 		{
+-		  gcc_assert (TARGET_EXPLICIT_RELOCS);
+-
+ 		  emit_insn (gen_lui_h_lo20 (tmp1, tmp1, tmp2));
+ 		  emit_insn (gen_lui_h_hi12 (tmp1, tmp1, tmp2));
+ 		}
+ 	    }
+ 	  else
+-	    emit_insn (loongarch_load_tls (tmp1, tmp2));
++	    emit_insn (loongarch_load_tls (tmp1, tmp2, SYMBOL_TLS_LE));
+ 	  emit_insn (gen_add3_insn (dest, tmp1, tp));
+ 	}
+       break;
+@@ -3081,7 +3092,7 @@ loongarch_force_address (rtx x, machine_mode mode)
+   return x;
+ }
+ 
+-static bool
++bool
+ loongarch_symbol_extreme_p (enum loongarch_symbol_type type)
+ {

_service:tar_scm:0126-Backport-SME-aarch64-Define-__ARM_FEATURE_RCPC.patch Added

@@ -0,0 +1,132 @@
+From 53a858c0c371cbea27ed4170a94fb3918b9fcdcf Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 4 Oct 2022 16:39:18 +0100
+Subject: PATCH 027/157 BackportSME aarch64: Define __ARM_FEATURE_RCPC
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c1b0a767f04a8ccbaff2a7b71d5c817cdb469630
+
+https://github.com/ARM-software/acle/pull/199 adds a new feature
+macro for RCPC, for use in things like inline assembly.  This patch
+adds the associated support to GCC.
+
+Also, RCPC is required for Armv8.3-A and later, but the armv8.3-a
+entry didn't include it.  This was probably harmless in practice
+since GCC simply ignored the extension until now.  (The GAS
+definition is OK.)
+
+gcc/
+	* config/aarch64/aarch64.h (AARCH64_ISA_RCPC): New macro.
+	* config/aarch64/aarch64-arches.def (armv8.3-a): Include RCPC.
+	* config/aarch64/aarch64-cores.def (thunderx3t110, zeus, neoverse-v1)
+	(neoverse-512tvb, saphira): Remove RCPC from these Armv8.3-A+ cores.
+	* config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Define
+	__ARM_FEATURE_RCPC when appropriate.
+
+gcc/testsuite/
+	* gcc.target/aarch64/pragma_cpp_predefs_1.c: Add RCPC tests.
+---
+ gcc/config/aarch64/aarch64-arches.def         |  2 +-
+ gcc/config/aarch64/aarch64-c.cc               |  1 +
+ gcc/config/aarch64/aarch64-cores.def          | 10 +++++-----
+ gcc/config/aarch64/aarch64.h                  |  1 +
+ .../gcc.target/aarch64/pragma_cpp_predefs_1.c | 20 +++++++++++++++++++
+ 5 files changed, 28 insertions(+), 6 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
+index 9f8246618..5a9eff336 100644
+--- a/gcc/config/aarch64/aarch64-arches.def
++++ b/gcc/config/aarch64/aarch64-arches.def
+@@ -33,7 +33,7 @@
+ AARCH64_ARCH("armv8-a",       generic,       V8A,       8,  (SIMD))
+ AARCH64_ARCH("armv8.1-a",     generic,       V8_1A,     8,  (V8A, LSE, CRC, RDMA))
+ AARCH64_ARCH("armv8.2-a",     generic,       V8_2A,     8,  (V8_1A))
+-AARCH64_ARCH("armv8.3-a",     generic,       V8_3A,     8,  (V8_2A, PAUTH))
++AARCH64_ARCH("armv8.3-a",     generic,       V8_3A,     8,  (V8_2A, PAUTH, RCPC))
+ AARCH64_ARCH("armv8.4-a",     generic,       V8_4A,     8,  (V8_3A, F16FML, DOTPROD, FLAGM))
+ AARCH64_ARCH("armv8.5-a",     generic,       V8_5A,     8,  (V8_4A, SB, SSBS, PREDRES))
+ AARCH64_ARCH("armv8.6-a",     generic,       V8_6A,     8,  (V8_5A, I8MM, BF16))
+diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
+index 2dfe2b8f8..4085ad840 100644
+--- a/gcc/config/aarch64/aarch64-c.cc
++++ b/gcc/config/aarch64/aarch64-c.cc
+@@ -202,6 +202,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
+ 			"__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile);
+   aarch64_def_or_undef (TARGET_LS64,
+ 			"__ARM_FEATURE_LS64", pfile);
++  aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile);
+ 
+   /* Not for ACLE, but required to keep "float.h" correct if we switch
+      target between implementations that do or do not support ARMv8.2-A
+diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
+index 60299160b..b50628d6b 100644
+--- a/gcc/config/aarch64/aarch64-cores.def
++++ b/gcc/config/aarch64/aarch64-cores.def
+@@ -133,17 +133,17 @@ AARCH64_CORE("tsv110",  tsv110, tsv110, V8_2A,  (CRYPTO, F16), tsv110,   0x48, 0
+ /* ARMv8.3-A Architecture Processors.  */
+ 
+ /* Marvell cores (TX3). */
+-AARCH64_CORE("thunderx3t110",  thunderx3t110,  thunderx3t110, V8_3A,  (CRYPTO, RCPC, SM4, SHA3, F16FML), thunderx3t110, 0x43, 0x0b8, 0x0a)
++AARCH64_CORE("thunderx3t110",  thunderx3t110,  thunderx3t110, V8_3A,  (CRYPTO, SM4, SHA3, F16FML), thunderx3t110, 0x43, 0x0b8, 0x0a)
+ 
+ /* ARMv8.4-A Architecture Processors.  */
+ 
+ /* Arm ('A') cores.  */
+-AARCH64_CORE("zeus", zeus, cortexa57, V8_4A,  (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1)
+-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A,  (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1)
+-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A,  (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
++AARCH64_CORE("zeus", zeus, cortexa57, V8_4A,  (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1)
++AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A,  (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1)
++AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A,  (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
+ 
+ /* Qualcomm ('Q') cores. */
+-AARCH64_CORE("saphira",     saphira,    saphira,    V8_4A,  (CRYPTO, RCPC), saphira,   0x51, 0xC01, -1)
++AARCH64_CORE("saphira",     saphira,    saphira,    V8_4A,  (CRYPTO), saphira,   0x51, 0xC01, -1)
+ 
+ /* ARMv8-A big.LITTLE implementations.  */
+ 
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index 2a9d2d031..19b82b4f3 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -201,6 +201,7 @@ enum class aarch64_feature : unsigned char {
+ #define AARCH64_ISA_SM4	           (aarch64_isa_flags & AARCH64_FL_SM4)
+ #define AARCH64_ISA_SHA3	   (aarch64_isa_flags & AARCH64_FL_SHA3)
+ #define AARCH64_ISA_F16FML	   (aarch64_isa_flags & AARCH64_FL_F16FML)
++#define AARCH64_ISA_RCPC	   (aarch64_isa_flags & AARCH64_FL_RCPC)
+ #define AARCH64_ISA_RCPC8_4	   (aarch64_isa_flags & AARCH64_FL_V8_4A)
+ #define AARCH64_ISA_RNG		   (aarch64_isa_flags & AARCH64_FL_RNG)
+ #define AARCH64_ISA_V8_5A	   (aarch64_isa_flags & AARCH64_FL_V8_5A)
+diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
+index bfb044f5d..307fa3d67 100644
+--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
++++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
+@@ -248,6 +248,26 @@
+ #error "__ARM_FEATURE_CRC32 is not defined but should be!"
+ #endif
+ 
++#pragma GCC target ("arch=armv8.2-a")
++#ifdef __ARM_FEATURE_RCPC
++#error "__ARM_FEATURE_RCPC is defined but should not be!"
++#endif
++
++#pragma GCC target ("arch=armv8.2-a+rcpc")
++#ifndef __ARM_FEATURE_RCPC
++#error "__ARM_FEATURE_RCPC is not defined but should be!"
++#endif
++
++#pragma GCC target ("+norcpc")
++#ifdef __ARM_FEATURE_RCPC
++#error "__ARM_FEATURE_RCPC is defined but should not be!"
++#endif
++
++#pragma GCC target ("arch=armv8.3-a")
++#ifndef __ARM_FEATURE_RCPC
++#error "__ARM_FEATURE_RCPC is not defined but should be!"
++#endif
++
+ int
+ foo (int a)
+ {
+-- 
+2.33.0
+

_service:tar_scm:0126-LoongArch-Enable-explicit-reloc-for-extreme-TLS-GD-L.patch Added

@@ -0,0 +1,126 @@
+From 1ccf16353b2be4308c79f3b011cb800bfa6f94f4 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Fri, 26 Jan 2024 10:46:51 +0800
+Subject: PATCH 126/188 LoongArch: Enable explicit reloc for extreme TLS
+ GD/LD with -mexplicit-relocs=auto.
+
+Binutils does not support relaxation using four instructions to obtain
+symbol addresses
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_explicit_relocs_p):
+	When the code model of the symbol is extreme and -mexplicit-relocs=auto,
+	the macro instruction loading symbol address is not applicable.
+	(loongarch_call_tls_get_addr): Adjust code.
+	(loongarch_legitimize_tls_address): Likewise.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c: New test.
+	* gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c: New test.
+---
+ gcc/config/loongarch/loongarch.cc             | 19 +++++++++----------
+ .../explicit-relocs-extreme-auto-tls-ld-gd.c  |  5 +++++
+ .../explicit-relocs-medium-auto-tls-ld-gd.c   |  5 +++++
+ 3 files changed, 19 insertions(+), 10 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 9cfe5bfb2..84b949021 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -1968,6 +1968,10 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type)
+   if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO)
+     return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS;
+ 
++  /* The linker don't know how to relax accesses in extreme code model.  */
++  if (loongarch_symbol_extreme_p (type))
++    return true;
++
+   switch (type)
+     {
+       case SYMBOL_TLS_IE:
+@@ -1979,11 +1983,6 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type)
+ 	   does not relax 64-bit pc-relative accesses as at now.  */
+ 	return true;
+       case SYMBOL_GOT_DISP:
+-	/* The linker don't know how to relax GOT accesses in extreme
+-	   code model.  */
+-	if (TARGET_CMODEL_EXTREME)
+-	  return true;
+-
+ 	/* If we are performing LTO for a final link, and we have the
+ 	   linker plugin so we know the resolution of the symbols, then
+ 	   all GOT references are binding to external symbols or
+@@ -2772,7 +2771,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
+ 
+   start_sequence ();
+ 
+-  if (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS)
++  if (loongarch_explicit_relocs_p (type))
+     {
+       /* Split tls symbol to high and low.  */
+       rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc));
+@@ -2805,7 +2804,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
+ 	case CMODEL_MEDIUM:
+ 	    {
+ 	      rtx reg = gen_reg_rtx (Pmode);
+-	      if (TARGET_EXPLICIT_RELOCS)
++	      if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
+ 		{
+ 		  emit_insn (gen_pcalau12i (Pmode, reg, loongarch_tls_symbol));
+ 		  rtx call = gen_call_value_internal_1 (Pmode, v0, reg,
+@@ -2841,7 +2840,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
+ 	case CMODEL_NORMAL:
+ 	case CMODEL_MEDIUM:
+ 	    {
+-	      if (TARGET_EXPLICIT_RELOCS)
++	      if (loongarch_explicit_relocs_p (SYMBOL_GOT_DISP))
+ 		{
+ 		  rtx high = gen_reg_rtx (Pmode);
+ 		  loongarch_emit_move (high,
+@@ -2935,7 +2934,7 @@ loongarch_legitimize_tls_address (rtx loc)
+ 	  tmp1 = gen_reg_rtx (Pmode);
+ 	  tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE);
+ 	  dest = gen_reg_rtx (Pmode);
+-	  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
++	  if (loongarch_explicit_relocs_p (SYMBOL_TLS_IE))
+ 	    {
+ 	      tmp3 = gen_reg_rtx (Pmode);
+ 	      rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
+@@ -2992,7 +2991,7 @@ loongarch_legitimize_tls_address (rtx loc)
+ 	  tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE);
+ 	  dest = gen_reg_rtx (Pmode);
+ 
+-	  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
++	  if (loongarch_explicit_relocs_p (SYMBOL_TLS_LE))
+ 	    {
+ 	      tmp3 = gen_reg_rtx (Pmode);
+ 	      rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
+diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c
+new file mode 100644
+index 000000000..35bd4570a
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c
+@@ -0,0 +1,5 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=extreme -fno-plt" } */
++/* { dg-final { scan-assembler-not "la.tls.\lg\d" { target tls_native } } } */
++
++#include "./explicit-relocs-auto-tls-ld-gd.c"
+diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c
+new file mode 100644
+index 000000000..47bffae8a
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c
+@@ -0,0 +1,5 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=medium -fplt" } */
++/* { dg-final { scan-assembler-not "la.global" { target tls_native } } } */
++
++#include "./explicit-relocs-auto-tls-ld-gd.c"
+-- 
+2.43.0
+

_service:tar_scm:0127-Backport-SME-Add-Ampere-1-and-Ampere-1A-core-definit.patch Added

@@ -0,0 +1,29 @@
+From f6b2917888292c694bae1debe8abb0d6c2c6f59e Mon Sep 17 00:00:00 2001
+From: xiezhiheng <xiezhiheng@huawei.com>
+Date: Tue, 20 Feb 2024 11:03:47 +0800
+Subject: PATCH 028/157 BackportSME Add Ampere-1 and Ampere-1A core
+ definition in aarch64-cores.def
+
+From commit db2f5d661239737157cf131de7d4df1c17d8d88d and
+590a06afbf0e96813b5879742f38f3665512c854
+---
+ gcc/config/aarch64/aarch64-cores.def | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
+index b50628d6b..f069c81cf 100644
+--- a/gcc/config/aarch64/aarch64-cores.def
++++ b/gcc/config/aarch64/aarch64-cores.def
+@@ -69,7 +69,8 @@ AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  V8A,  (CRC, CRYPTO), thu
+ AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  V8A,  (CRC, CRYPTO), thunderx,  0x43, 0x0a3, -1)
+ 
+ /* Ampere Computing ('\xC0') cores. */
+-AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, (), ampere1, 0xC0, 0xac3, -1)
++AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, (F16, RNG, AES, SHA3), ampere1, 0xC0, 0xac3, -1)
++AARCH64_CORE("ampere1a", ampere1a, cortexa57, V8_6A, (F16, RNG, AES, SHA3, MEMTAG), ampere1a, 0xC0, 0xac4, -1)
+ /* Do not swap around "emag" and "xgene1",
+    this order is required to handle variant correctly. */
+ AARCH64_CORE("emag",        emag,      xgene1,    V8A,  (CRC, CRYPTO), emag, 0x50, 0x000, 3)
+-- 
+2.33.0
+

_service:tar_scm:0127-LoongArch-Added-support-for-loading-__get_tls_addr-s.patch Added

@@ -0,0 +1,72 @@
+From d802fd5eb24bba0c274edeea5aff33e794927aaa Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Fri, 26 Jan 2024 11:14:00 +0800
+Subject: PATCH 127/188 LoongArch: Added support for loading __get_tls_addr
+ symbol address using call36.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_call_tls_get_addr):
+	Add support for call36.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c: New test.
+---
+ gcc/config/loongarch/loongarch.cc             | 22 ++++++++++++++-----
+ ...icit-relocs-medium-call36-auto-tls-ld-gd.c |  5 +++++
+ 2 files changed, 21 insertions(+), 6 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 84b949021..0050813df 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -2803,17 +2803,27 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
+ 
+ 	case CMODEL_MEDIUM:
+ 	    {
+-	      rtx reg = gen_reg_rtx (Pmode);
+ 	      if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
+ 		{
+-		  emit_insn (gen_pcalau12i (Pmode, reg, loongarch_tls_symbol));
+-		  rtx call = gen_call_value_internal_1 (Pmode, v0, reg,
+-							loongarch_tls_symbol,
+-							const0_rtx);
+-		  insn = emit_call_insn (call);
++		  rtx call;
++
++		 if (HAVE_AS_SUPPORT_CALL36)
++		   call = gen_call_value_internal (v0, loongarch_tls_symbol,
++						   const0_rtx);
++		 else
++		   {
++		     rtx reg = gen_reg_rtx (Pmode);
++		     emit_insn (gen_pcalau12i (Pmode, reg,
++					       loongarch_tls_symbol));
++		     call = gen_call_value_internal_1 (Pmode, v0, reg,
++						       loongarch_tls_symbol,
++						       const0_rtx);
++		   }
++		 insn = emit_call_insn (call);
+ 		}
+ 	      else
+ 		{
++		  rtx reg = gen_reg_rtx (Pmode);
+ 		  emit_move_insn (reg, loongarch_tls_symbol);
+ 		  insn = emit_call_insn (gen_call_value_internal (v0,
+ 								  reg,
+diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c
+new file mode 100644
+index 000000000..d1a482083
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c
+@@ -0,0 +1,5 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=medium -fplt" } */
++/* { dg-final { scan-assembler "pcaddu18i\t\\\$r1,%call36\\$__tls_get_addr\\$" { target { tls_native && loongarch_call36_support } } } } */
++
++#include "./explicit-relocs-auto-tls-ld-gd.c"
+-- 
+2.43.0
+

_service:tar_scm:0128-Backport-SME-aarch64-Fix-nosimd-handling-of-FPR-move.patch Added

@@ -0,0 +1,968 @@
+From 81a4b464d01cf00f8b355115588e67bf2c021acd Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Wed, 7 Sep 2022 10:52:04 +0100
+Subject: PATCH 029/157 BackportSME aarch64: Fix +nosimd handling of FPR
+ moves
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d6106132907f6bd01109f2616d20a87edecc6fc6
+
+8-bit and 16-bit FPR moves would ICE for +nosimd+fp, and some other
+moves would handle FPR<-zero inefficiently.  This is very much a
+niche case at the moment, but something like it becomes more
+important with SME streaming mode.
+
+The si, di and vector tests already passed, they're just included for
+completeness.
+
+We're a bit inconsistent about whether alternatives involving FPRs
+are marked with arch==fp or arch=* (i.e. default).  E.g. FPR loads
+and stores are sometimes * and sometimes fp.
+
+IMO * makes more sense.  FPRs should not be used at all without
+TARGET_FLOAT, so TARGET_FLOAT represents the base architecture
+when FPRs are enabled.  I think it's more useful if non-default
+arches represent a genuine restriction.
+
+gcc/
+	* config/aarch64/aarch64.md (*mov<SHORT:mode>_aarch64): Extend
+	w<-w, r<-w and w<-r alternatives to !simd, using 32-bit moves
+	in that case.  Extend w<-r to w<-Z.
+	(*mov<HFBF:mode>_aarch64): Likewise, but with Y instead of Z.
+	(*movti_aarch64): Use an FMOV from XZR for w<-Z if MOVI is not
+	available.
+	(define_split): Do not apply the floating-point immediate-to-register
+	split to zeros, even if MOVI is not available.
+
+gcc/testsuite/
+	* gcc.target/aarch64/movqi_1.c: New test.
+	* gcc.target/aarch64/movhi_1.c: Likewise.
+	* gcc.target/aarch64/movsi_1.c: Likewise.
+	* gcc.target/aarch64/movdi_2.c: Likewise.
+	* gcc.target/aarch64/movti_2.c: Likewise.
+	* gcc.target/aarch64/movhf_1.c: Likewise.
+	* gcc.target/aarch64/movsf_1.c: Likewise.
+	* gcc.target/aarch64/movdf_1.c: Likewise.
+	* gcc.target/aarch64/movtf_2.c: Likewise.
+	* gcc.target/aarch64/movv8qi_1.c: Likewise.
+	* gcc.target/aarch64/movv16qi_1.c: Likewise.
+---
+ gcc/config/aarch64/aarch64.md                 | 38 ++++----
+ gcc/testsuite/gcc.target/aarch64/movdf_1.c    | 53 ++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movdi_2.c    | 61 +++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movhf_1.c    | 53 ++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movhi_1.c    | 61 +++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movqi_1.c    | 61 +++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movsf_1.c    | 53 ++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movsi_1.c    | 61 +++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movtf_2.c    | 81 +++++++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movti_2.c    | 86 +++++++++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movv16qi_1.c | 82 ++++++++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movv8qi_1.c  | 55 ++++++++++++
+ 12 files changed, 729 insertions(+), 16 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movdf_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movdi_2.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movhf_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movhi_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movqi_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movsf_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movsi_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_2.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_2.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movv8qi_1.c
+
+diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+index 7ee26284d..7267a74d6 100644
+--- a/gcc/config/aarch64/aarch64.md
++++ b/gcc/config/aarch64/aarch64.md
+@@ -1201,7 +1201,7 @@
+ 
+ (define_insn "*mov<mode>_aarch64"
+   (set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r,    w,r  ,r,w, m,m,r,w,w")
+-	(match_operand:SHORT 1 "aarch64_mov_operand"  " r,M,D<hq>,Usv,m,m,rZ,w,w,r,w"))
++	(match_operand:SHORT 1 "aarch64_mov_operand"  " r,M,D<hq>,Usv,m,m,rZ,w,w,rZ,w"))
+   "(register_operand (operands0, <MODE>mode)
+     || aarch64_reg_or_zero (operands1, <MODE>mode))"
+ {
+@@ -1225,11 +1225,11 @@
+      case 7:
+        return "str\t%<size>1, %0";
+      case 8:
+-       return "umov\t%w0, %1.<v>0";
++       return TARGET_SIMD ? "umov\t%w0, %1.<v>0" : "fmov\t%w0, %s1";
+      case 9:
+-       return "dup\t%0.<Vallxd>, %w1";
++       return TARGET_SIMD ? "dup\t%0.<Vallxd>, %w1" : "fmov\t%s0, %w1";
+      case 10:
+-       return "dup\t%<Vetype>0, %1.<v>0";
++       return TARGET_SIMD ? "dup\t%<Vetype>0, %1.<v>0" : "fmov\t%s0, %s1";
+      default:
+        gcc_unreachable ();
+      }
+@@ -1237,7 +1237,7 @@
+   ;; The "mov_imm" type for CNT is just a placeholder.
+   (set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4,
+ 		     store_4,neon_to_gp<q>,neon_from_gp<q>,neon_dup")
+-   (set_attr "arch" "*,*,simd,sve,*,*,*,*,simd,simd,simd")
++   (set_attr "arch" "*,*,simd,sve,*,*,*,*,*,*,*")
+ )
+ 
+ (define_expand "mov<mode>"
+@@ -1399,14 +1399,15 @@
+ 
+ (define_insn "*movti_aarch64"
+   (set (match_operand:TI 0
+-	 "nonimmediate_operand"  "=   r,w,w, r,w,r,m,m,w,m")
++	 "nonimmediate_operand"  "=   r,w,w,w, r,w,r,m,m,w,m")
+ 	(match_operand:TI 1
+-	 "aarch64_movti_operand" " rUti,Z,r, w,w,m,r,Z,m,w"))
++	 "aarch64_movti_operand" " rUti,Z,Z,r, w,w,m,r,Z,m,w"))
+   "(register_operand (operands0, TImode)
+     || aarch64_reg_or_zero (operands1, TImode))"
+   "@
+    #
+    movi\\t%0.2d, #0
++   fmov\t%d0, xzr
+    #
+    #
+    mov\\t%0.16b, %1.16b
+@@ -1415,11 +1416,11 @@
+    stp\\txzr, xzr, %0
+    ldr\\t%q0, %1
+    str\\t%q1, %0"
+-  (set_attr "type" "multiple,neon_move,f_mcr,f_mrc,neon_logic_q, \
++  (set_attr "type" "multiple,neon_move,f_mcr,f_mcr,f_mrc,neon_logic_q, \
+ 		             load_16,store_16,store_16,\
+                              load_16,store_16")
+-   (set_attr "length" "8,4,8,8,4,4,4,4,4,4")
+-   (set_attr "arch" "*,simd,*,*,simd,*,*,*,fp,fp")
++   (set_attr "length" "8,4,4,8,8,4,4,4,4,4,4")
++   (set_attr "arch" "*,simd,*,*,*,simd,*,*,*,fp,fp")
+ )
+ 
+ ;; Split a TImode register-register or register-immediate move into
+@@ -1458,16 +1459,19 @@
+ )
+ 
+ (define_insn "*mov<mode>_aarch64"
+-  (set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w  , w,?r,w,w  ,w  ,w,m,r,m ,r")
+-	(match_operand:HFBF 1 "general_operand"      "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))
++  (set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w  ,w ,w  ,?r,?r,w,w,w  ,w  ,w,m,r,m ,r")
++	(match_operand:HFBF 1 "general_operand"      "Y ,?rY,?r,?rY, w, w,w,w,Ufc,Uvi,m,w,m,rY,r"))
+   "TARGET_FLOAT && (register_operand (operands0, <MODE>mode)
+     || aarch64_reg_or_fp_zero (operands1, <MODE>mode))"
+   "@
+    movi\\t%0.4h, #0
+    fmov\\t%h0, %w1
+    dup\\t%w0.4h, %w1
++   fmov\\t%s0, %w1
+    umov\\t%w0, %1.h0
++   fmov\\t%w0, %s1
+    mov\\t%0.h0, %1.h0
++   fmov\\t%s0, %s1
+    fmov\\t%h0, %1
+    * return aarch64_output_scalar_simd_mov_immediate (operands1, HImode);
+    ldr\\t%h0, %1
+@@ -1475,9 +1479,10 @@
+    ldrh\\t%w0, %1
+    strh\\t%w1, %0
+    mov\\t%w0, %w1"
+-  (set_attr "type" "neon_move,f_mcr,neon_move,neon_to_gp, neon_move,fconsts, \
+-		     neon_move,f_loads,f_stores,load_4,store_4,mov_reg")
+-   (set_attr "arch" "simd,fp16,simd,simd,simd,fp16,simd,*,*,*,*,*")
++  (set_attr "type" "neon_move,f_mcr,neon_move,f_mcr,neon_to_gp,f_mrc,
++		     neon_move,fmov,fconsts,neon_move,f_loads,f_stores,
++		     load_4,store_4,mov_reg")
++   (set_attr "arch" "simd,fp16,simd,*,simd,*,simd,*,fp16,simd,*,*,*,*,*")
+ )
+ 
+ (define_insn "*movsf_aarch64"
+@@ -1530,10 +1535,11 @@
+ 
+ (define_split
+   (set (match_operand:GPF_HF 0 "nonimmediate_operand")
+-	(match_operand:GPF_HF 1 "general_operand"))
++	(match_operand:GPF_HF 1 "const_double_operand"))
+   "can_create_pseudo_p ()
+    && !aarch64_can_const_movi_rtx_p (operands1, <MODE>mode)
+    && !aarch64_float_const_representable_p (operands1)
++   && !aarch64_float_const_zero_rtx_p (operands1)
+    &&  aarch64_float_const_rtx_p (operands1)"
+   (const_int 0)
+   {
+diff --git a/gcc/testsuite/gcc.target/aarch64/movdf_1.c b/gcc/testsuite/gcc.target/aarch64/movdf_1.c
+new file mode 100644
+index 000000000..a51ded1d6
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/movdf_1.c
+@@ -0,0 +1,53 @@
++/* { dg-do assemble } */

_service:tar_scm:0128-LoongArch-Don-t-split-the-instructions-containing-re.patch Added

@@ -0,0 +1,514 @@
+From 45aace43891ccaef756f2f1356edbb0da676629b Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Mon, 29 Jan 2024 15:20:07 +0800
+Subject: PATCH 128/188 LoongArch: Don't split the instructions containing
+ relocs for extreme code model.
+
+The ABI mandates the pcalau12i/addi.d/lu32i.d/lu52i.d instructions for
+addressing a symbol to be adjacent.  So model them as "one large
+instruction", i.e. define_insn, with two output registers.  The real
+address is the sum of these two registers.
+
+The advantage of this approach is the RTL passes can still use ldx/stx
+instructions to skip an addi.d instruction.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (unspec): Add
+	UNSPEC_LA_PCREL_64_PART1 and UNSPEC_LA_PCREL_64_PART2.
+	(la_pcrel64_two_parts): New define_insn.
+	* config/loongarch/loongarch.cc (loongarch_tls_symbol): Fix a
+	typo in the comment.
+	(loongarch_call_tls_get_addr): If -mcmodel=extreme
+	-mexplicit-relocs={always,auto}, use la_pcrel64_two_parts for
+	addressing the TLS symbol and __tls_get_addr.  Emit an REG_EQUAL
+	note to allow CSE addressing __tls_get_addr.
+	(loongarch_legitimize_tls_address): If -mcmodel=extreme
+	-mexplicit-relocs={always,auto}, address TLS IE symbols with
+	la_pcrel64_two_parts.
+	(loongarch_split_symbol): If -mcmodel=extreme
+	-mexplicit-relocs={always,auto}, address symbols with
+	la_pcrel64_two_parts.
+	(loongarch_output_mi_thunk): Clean up unreachable code.  If
+	-mcmodel=extreme -mexplicit-relocs={always,auto}, address the MI
+	thunks with la_pcrel64_two_parts.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/func-call-extreme-1.c (dg-options):
+	Use -O2 instead of -O0 to ensure the pcalau12i/addi/lu32i/lu52i
+	instruction sequences are not reordered by the compiler.
+	(NOIPA): Disallow interprocedural optimizations.
+	* gcc.target/loongarch/func-call-extreme-2.c: Remove the content
+	duplicated from func-call-extreme-1.c, include it instead.
+	(dg-options): Likewise.
+	* gcc.target/loongarch/func-call-extreme-3.c (dg-options):
+	Likewise.
+	* gcc.target/loongarch/func-call-extreme-4.c (dg-options):
+	Likewise.
+	* gcc.target/loongarch/cmodel-extreme-1.c: New test.
+	* gcc.target/loongarch/cmodel-extreme-2.c: New test.
+	* g++.target/loongarch/cmodel-extreme-mi-thunk-1.C: New test.
+	* g++.target/loongarch/cmodel-extreme-mi-thunk-2.C: New test.
+	* g++.target/loongarch/cmodel-extreme-mi-thunk-3.C: New test.
+---
+ gcc/config/loongarch/loongarch.cc             | 131 ++++++++++--------
+ gcc/config/loongarch/loongarch.md             |  20 +++
+ .../loongarch/cmodel-extreme-mi-thunk-1.C     |  11 ++
+ .../loongarch/cmodel-extreme-mi-thunk-2.C     |   6 +
+ .../loongarch/cmodel-extreme-mi-thunk-3.C     |   6 +
+ .../gcc.target/loongarch/cmodel-extreme-1.c   |  18 +++
+ .../gcc.target/loongarch/cmodel-extreme-2.c   |   7 +
+ .../loongarch/func-call-extreme-1.c           |  14 +-
+ .../loongarch/func-call-extreme-2.c           |  29 +---
+ .../loongarch/func-call-extreme-3.c           |   2 +-
+ .../loongarch/func-call-extreme-4.c           |   2 +-
+ 11 files changed, 154 insertions(+), 92 deletions(-)
+ create mode 100644 gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-1.C
+ create mode 100644 gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-2.C
+ create mode 100644 gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-3.C
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 0050813df..b8f0291ab 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -2733,7 +2733,7 @@ loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset)
+   return plus_constant (Pmode, reg, offset);
+ }
+ 
+-/* The __tls_get_attr symbol.  */
++/* The __tls_get_addr symbol.  */
+ static GTY (()) rtx loongarch_tls_symbol;
+ 
+ /* Load an entry for a TLS access.  */
+@@ -2773,20 +2773,22 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
+ 
+   if (loongarch_explicit_relocs_p (type))
+     {
+-      /* Split tls symbol to high and low.  */
+-      rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc));
+-      high = loongarch_force_temporary (tmp, high);
+-
+       if (TARGET_CMODEL_EXTREME)
+ 	{
+-	  rtx tmp1 = gen_reg_rtx (Pmode);
+-	  emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc));
+-	  emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc));
+-	  emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loc));
+-	  emit_move_insn (a0, gen_rtx_PLUS (Pmode, high, tmp1));
++	  rtx part1 = gen_reg_rtx (Pmode);
++	  rtx part2 = gen_reg_rtx (Pmode);
++
++	  emit_insn (gen_la_pcrel64_two_parts (part1, part2, loc));
++	  emit_move_insn (a0, gen_rtx_PLUS (Pmode, part1, part2));
+ 	}
+       else
+-	emit_insn (gen_tls_low (Pmode, a0, high, loc));
++	{
++	  /* Split tls symbol to high and low.  */
++	  rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc));
++
++	  high = loongarch_force_temporary (tmp, high);
++	  emit_insn (gen_tls_low (Pmode, a0, high, loc));
++	}
+     }
+   else
+     emit_insn (loongarch_load_tls (a0, loc, type));
+@@ -2868,22 +2870,28 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
+ 	    {
+ 	      if (loongarch_explicit_relocs_p (SYMBOL_GOT_DISP))
+ 		{
+-		  rtx tmp1 = gen_reg_rtx (Pmode);
+-		  rtx high = gen_reg_rtx (Pmode);
++		  gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE);
+ 
+-		  loongarch_emit_move (high,
+-				       gen_rtx_HIGH (Pmode,
+-						     loongarch_tls_symbol));
+-		  loongarch_emit_move (tmp1,
+-				       gen_rtx_LO_SUM (Pmode,
+-						       gen_rtx_REG (Pmode, 0),
++		  rtx part1 = gen_reg_rtx (Pmode);
++		  rtx part2 = gen_reg_rtx (Pmode);
++
++		  emit_insn (gen_la_pcrel64_two_parts (part1, part2,
+ 						       loongarch_tls_symbol));
+-		  emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol));
+-		  emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol));
+-		  loongarch_emit_move (dest,
+-				       gen_rtx_MEM (Pmode,
+-						    gen_rtx_PLUS (Pmode,
+-								  high, tmp1)));
++		  loongarch_emit_move (
++		    dest,
++		    gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode,
++						      part1,
++						      part2)));
++
++		  /* Put an REG_EQUAL note here to allow CSE (storing
++		     part1 + part2, i.e. the address of tls_get_addr into
++		     a saved register and use it for multiple TLS
++		     accesses).  */
++		  rtx sum = gen_rtx_UNSPEC (
++		    Pmode, gen_rtvec (1, loongarch_tls_symbol),
++		    UNSPEC_ADDRESS_FIRST
++		    + loongarch_classify_symbol (loongarch_tls_symbol));
++		  set_unique_reg_note (get_last_insn (), REG_EQUAL, sum);
+ 		}
+ 	      else
+ 	       emit_insn (gen_movdi_symbolic_off64 (dest, loongarch_tls_symbol,
+@@ -2946,24 +2954,30 @@ loongarch_legitimize_tls_address (rtx loc)
+ 	  dest = gen_reg_rtx (Pmode);
+ 	  if (loongarch_explicit_relocs_p (SYMBOL_TLS_IE))
+ 	    {
+-	      tmp3 = gen_reg_rtx (Pmode);
+-	      rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
+-	      high = loongarch_force_temporary (tmp3, high);
+-
+ 	      if (TARGET_CMODEL_EXTREME)
+ 		{
+-		  rtx tmp3 = gen_reg_rtx (Pmode);
+-		  emit_insn (gen_tls_low (Pmode, tmp3,
+-					  gen_rtx_REG (Pmode, 0), tmp2));
+-		  emit_insn (gen_lui_h_lo20 (tmp3, tmp3, tmp2));
+-		  emit_insn (gen_lui_h_hi12 (tmp3, tmp3, tmp2));
++		  gcc_assert (la_opt_explicit_relocs
++			      != EXPLICIT_RELOCS_NONE);
++
++		  rtx part1 = gen_reg_rtx (Pmode);
++		  rtx part2 = gen_reg_rtx (Pmode);
++
++		  emit_insn (gen_la_pcrel64_two_parts (part1, part2,
++						       tmp2));
+ 		  emit_move_insn (tmp1,
+ 				  gen_rtx_MEM (Pmode,
+ 					       gen_rtx_PLUS (Pmode,
+-							     high, tmp3)));
++							     part1,
++							     part2)));
+ 		}
+ 	      else
+-		emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
++		{
++		  tmp3 = gen_reg_rtx (Pmode);
++		  rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
++
++		  high = loongarch_force_temporary (tmp3, high);
++		  emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));

_service:tar_scm:0129-Backport-SME-aarch64-Commonise-some-folding-code.patch Added

@@ -0,0 +1,83 @@
+From 805a7aec3ddab49b92bf2d5c1a3e288860cc14bf Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 20 Oct 2022 10:37:35 +0100
+Subject: PATCH 030/157 BackportSME aarch64: Commonise some folding code
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=df99e9e42094dee0833ac38f53e7fae09b4d133c
+
+Add an aarch64_sve::gimple_folder helper for folding calls
+to integer constants.  SME will make more use of this.
+
+gcc/
+	* config/aarch64/aarch64-sve-builtins.h
+	(gimple_folder::fold_to_cstu): New member function.
+	* config/aarch64/aarch64-sve-builtins.cc
+	(gimple_folder::fold_to_cstu): Define.
+	* config/aarch64/aarch64-sve-builtins-base.cc
+	(svcnt_bhwd_impl::fold): Use it.
+---
+ gcc/config/aarch64/aarch64-sve-builtins-base.cc | 9 ++-------
+ gcc/config/aarch64/aarch64-sve-builtins.cc      | 7 +++++++
+ gcc/config/aarch64/aarch64-sve-builtins.h       | 1 +
+ 3 files changed, 10 insertions(+), 7 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+index c24c05487..56c9d75e7 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+@@ -516,9 +516,7 @@ public:
+   gimple *
+   fold (gimple_folder &f) const OVERRIDE
+   {
+-    tree count = build_int_cstu (TREE_TYPE (f.lhs),
+-				 GET_MODE_NUNITS (m_ref_mode));
+-    return gimple_build_assign (f.lhs, count);
++    return f.fold_to_cstu (GET_MODE_NUNITS (m_ref_mode));
+   }
+ 
+   rtx
+@@ -553,10 +551,7 @@ public:
+     unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode);
+     HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, elements_per_vq);
+     if (value >= 0)
+-      {
+-	tree count = build_int_cstu (TREE_TYPE (f.lhs), value);
+-	return gimple_build_assign (f.lhs, count);
+-      }
++      return f.fold_to_cstu (value);
+ 
+     return NULL;
+   }
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
+index a70e3a6b4..e168c8334 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
+@@ -2615,6 +2615,13 @@ gimple_folder::redirect_call (const function_instance &instance)
+   return call;
+ }
+ 
++/* Fold the call to constant VAL.  */
++gimple *
++gimple_folder::fold_to_cstu (poly_uint64 val)
++{
++  return gimple_build_assign (lhs, build_int_cstu (TREE_TYPE (lhs), val));
++}
++
+ /* Fold the call to a PTRUE, taking the element size from type suffix 0.  */
+ gimple *
+ gimple_folder::fold_to_ptrue ()
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
+index 63d1db776..0d130b871 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.h
++++ b/gcc/config/aarch64/aarch64-sve-builtins.h
+@@ -500,6 +500,7 @@ public:
+   tree load_store_cookie (tree);
+ 
+   gimple *redirect_call (const function_instance &);
++  gimple *fold_to_cstu (poly_uint64);
+   gimple *fold_to_pfalse ();
+   gimple *fold_to_ptrue ();
+   gimple *fold_to_vl_pred (unsigned int);
+-- 
+2.33.0
+

_service:tar_scm:0129-LoongArch-Adjust-cost-of-vector_stmt-that-match-mult.patch Added

@@ -0,0 +1,173 @@
+From 825847768a29ec9d50e01015167002998150cb27 Mon Sep 17 00:00:00 2001
+From: Li Wei <liwei@loongson.cn>
+Date: Fri, 26 Jan 2024 16:41:11 +0800
+Subject: PATCH 129/188 LoongArch: Adjust cost of vector_stmt that match
+ multiply-add pattern.
+
+We found that when only 128-bit vectorization was enabled, 549.fotonik3d_r
+failed to vectorize effectively. For this reason, we adjust the cost of
+128-bit vector_stmt that match the multiply-add pattern to facilitate 128-bit
+vectorization.
+The experimental results show that after the modification, 549.fotonik3d_r
+performance can be improved by 9.77% under the 128-bit vectorization option.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_multiply_add_p): New.
+	(loongarch_vector_costs::add_stmt_cost): Adjust.
+
+gcc/testsuite/ChangeLog:
+
+	* gfortran.dg/vect/vect-10.f90: New test.
+---
+ gcc/config/loongarch/loongarch.cc          | 48 +++++++++++++++
+ gcc/testsuite/gfortran.dg/vect/vect-10.f90 | 71 ++++++++++++++++++++++
+ 2 files changed, 119 insertions(+)
+ create mode 100644 gcc/testsuite/gfortran.dg/vect/vect-10.f90
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index b8f0291ab..526ea0bcb 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -4153,6 +4153,37 @@ loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vi
+   return 1 << ceil_log2 (uf);
+ }
+ 
++/* Check if assign stmt rhs op comes from a multiply-add operation.  */
++static bool
++loongarch_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info)
++{
++  gassign *assign = dyn_cast<gassign *> (stmt_info->stmt);
++  if (!assign)
++    return false;
++  tree_code code = gimple_assign_rhs_code (assign);
++  if (code != PLUS_EXPR && code != MINUS_EXPR)
++    return false;
++
++  auto is_mul_result = &(int i)
++    {
++      tree rhs = gimple_op (assign, i);
++      if (TREE_CODE (rhs) != SSA_NAME)
++	return false;
++
++      stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
++      if (!def_stmt_info
++	  || STMT_VINFO_DEF_TYPE (def_stmt_info) != vect_internal_def)
++	return false;
++      gassign *rhs_assign = dyn_cast<gassign *> (def_stmt_info->stmt);
++      if (!rhs_assign || gimple_assign_rhs_code (rhs_assign) != MULT_EXPR)
++	return false;
++
++      return true;
++    };
++
++  return is_mul_result (1) || is_mul_result (2);
++}
++
+ unsigned
+ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
+ 				       stmt_vec_info stmt_info, slp_tree,
+@@ -4165,6 +4196,23 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
+     {
+       int stmt_cost = loongarch_builtin_vectorization_cost (kind, vectype,
+ 							    misalign);
++      if (vectype && stmt_info)
++	{
++	  gassign *assign = dyn_cast<gassign *> (STMT_VINFO_STMT (stmt_info));
++	  machine_mode mode = TYPE_MODE (vectype);
++
++	  /* We found through testing that this strategy (the stmt that
++	     matches the multiply-add pattern) has positive returns only
++	     when applied to the 128-bit vector stmt, so this restriction
++	     is currently made.  */
++	  if (kind == vector_stmt && GET_MODE_SIZE (mode) == 16 && assign)
++	    {
++	      if (!vect_is_reduction (stmt_info)
++		  && loongarch_multiply_add_p (m_vinfo, stmt_info))
++		stmt_cost = 0;
++	    }
++	}
++
+       retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
+       m_costswhere += retval;
+ 
+diff --git a/gcc/testsuite/gfortran.dg/vect/vect-10.f90 b/gcc/testsuite/gfortran.dg/vect/vect-10.f90
+new file mode 100644
+index 000000000..b85bc2702
+--- /dev/null
++++ b/gcc/testsuite/gfortran.dg/vect/vect-10.f90
+@@ -0,0 +1,71 @@
++! { dg-do compile }
++! { dg-additional-options "-Ofast -mlsx -fvect-cost-model=dynamic" { target loongarch64*-*-* } }
++
++MODULE material_mod
++
++IMPLICIT NONE
++
++integer, parameter :: dfp = selected_real_kind (13, 99)
++integer, parameter :: rfp = dfp
++
++PUBLIC Mat_updateE, iepx, iepy, iepz
++
++PRIVATE
++
++integer, dimension (:, :, :), allocatable :: iepx, iepy, iepz
++real (kind = rfp), dimension (:), allocatable :: Dbdx, Dbdy, Dbdz
++integer :: imin, jmin, kmin
++integer, dimension (6) :: Exsize
++integer, dimension (6) :: Eysize
++integer, dimension (6) :: Ezsize
++integer, dimension (6) :: Hxsize
++integer, dimension (6) :: Hysize
++integer, dimension (6) :: Hzsize
++
++CONTAINS
++
++SUBROUTINE mat_updateE (nx, ny, nz, Hx, Hy, Hz, Ex, Ey, Ez)
++
++integer, intent (in) :: nx, ny, nz
++
++real (kind = rfp), intent (inout),                                         &
++  dimension (Exsize (1) : Exsize (2), Exsize (3) : Exsize (4), Exsize (5) : Exsize (6)) :: Ex
++real (kind = rfp), intent (inout),                                         &
++  dimension (Eysize (1) : Eysize (2), Eysize (3) : Eysize (4), Eysize (5) : Eysize (6)) :: Ey
++real (kind = rfp), intent (inout),                                         &
++  dimension (Ezsize (1) : Ezsize (2), Ezsize (3) : Ezsize (4), Ezsize (5) : Ezsize (6)) :: Ez
++real (kind = rfp), intent (in),                                            &
++  dimension (Hxsize (1) : Hxsize (2), Hxsize (3) : Hxsize (4), Hxsize (5) : Hxsize (6)) :: Hx
++real (kind = rfp), intent (in),                                            &
++  dimension (Hysize (1) : Hysize (2), Hysize (3) : Hysize (4), Hysize (5) : Hysize (6)) :: Hy
++real (kind = rfp), intent (in),                                            &
++  dimension (Hzsize (1) : Hzsize (2), Hzsize (3) : Hzsize (4), Hzsize (5) : Hzsize (6)) :: Hz
++
++integer :: i, j, k, mp
++
++do k = kmin, nz
++  do j = jmin, ny
++    do i = imin, nx
++      mp = iepx (i, j, k)
++      Ex (i, j, k) = Ex (i, j, k) +                                        &
++                  Dbdy (mp) * (Hz (i, j, k  ) - Hz (i, j-1, k)) +          &
++                  Dbdz (mp) * (Hy (i, j, k-1) - Hy (i, j  , k))
++
++      mp = iepy (i, j, k)
++      Ey (i, j, k) = Ey (i, j, k) +                                        &
++                  Dbdz (mp) * (Hx (i  , j, k) - Hx (i, j, k-1)) +          &
++                  Dbdx (mp) * (Hz (i-1, j, k) - Hz (i, j, k  ))
++
++      mp = iepz (i, j, k)
++      Ez (i, j, k) = Ez (i, j, k) +                                        &
++                  Dbdx (mp) * (Hy (i, j  , k) - Hy (i-1, j, k)) +          &
++                  Dbdy (mp) * (Hx (i, j-1, k) - Hx (i  , j, k))
++    end do
++  end do
++end do
++
++END SUBROUTINE mat_updateE
++
++END MODULE material_mod
++
++! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target loongarch64*-*-* } } }
+-- 
+2.43.0
+

_service:tar_scm:0130-Backport-SME-aarch64-Add-a-Z-operand-modifier-for-SV.patch Added

@@ -0,0 +1,49 @@
+From 8dc1eee26c61bea8aab62080bd961825142685f9 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 7 Nov 2023 15:22:57 +0000
+Subject: PATCH 031/157 BackportSME aarch64: Add a %Z operand modifier
+ for SVE registers
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f40eac535bd55192cf93daca16235efbcd91157a
+
+This patch adds a %Z operand modifier that prints registers as SVE z
+registers.  The SME patches need this, but so do Tamar's patches.
+I'm separating this out to unblock those.
+
+We should probably document the wxbhsdqZ modifiers as
+user-facing, but doing that for all of them is a separate patch.
+
+gcc/
+	* config/aarch64/aarch64.cc (aarch64_print_operand): Add a %Z
+	modifier for SVE registers.
+---
+ gcc/config/aarch64/aarch64.cc | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 3e83e48ec..fd1114b52 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -11901,6 +11901,10 @@ sizetochar (int size)
+      'N':		Take the duplicated element in a vector constant
+ 			and print the negative of it in decimal.
+      'b/h/s/d/q':	Print a scalar FP/SIMD register name.
++     'Z':		Same for SVE registers.  ('z' was already taken.)
++			Note that it is not necessary to use %Z for operands
++			that have SVE modes.  The convention is to use %Z
++			only for non-SVE (or potentially non-SVE) modes.
+      'S/T/U/V':		Print a FP/SIMD register name for a register list.
+ 			The register printed is the FP/SIMD register name
+ 			of X + 0/1/2/3 for S/T/U/V.
+@@ -12073,6 +12077,8 @@ aarch64_print_operand (FILE *f, rtx x, int code)
+     case 's':
+     case 'd':
+     case 'q':
++    case 'Z':
++      code = TOLOWER (code);
+       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
+ 	{
+ 	  output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
+-- 
+2.33.0
+

_service:tar_scm:0130-LoongArch-Fix-incorrect-return-type-for-frecipe-frsq.patch Added

@@ -0,0 +1,113 @@
+From 99a48268961f05e87f4f9d6f3f22903869f50af7 Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Wed, 24 Jan 2024 17:19:32 +0800
+Subject: PATCH 130/188 LoongArch: Fix incorrect return type for
+ frecipe/frsqrte intrinsic functions
+
+gcc/ChangeLog:
+
+	* config/loongarch/larchintrin.h
+	(__frecipe_s): Update function return type.
+	(__frecipe_d): Ditto.
+	(__frsqrte_s): Ditto.
+	(__frsqrte_d): Ditto.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/larch-frecipe-intrinsic.c: New test.
+---
+ gcc/config/loongarch/larchintrin.h            | 16 +++++-----
+ .../loongarch/larch-frecipe-intrinsic.c       | 30 +++++++++++++++++++
+ 2 files changed, 38 insertions(+), 8 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/larch-frecipe-intrinsic.c
+
+diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h
+index 22035e767..6582dfe49 100644
+--- a/gcc/config/loongarch/larchintrin.h
++++ b/gcc/config/loongarch/larchintrin.h
+@@ -336,38 +336,38 @@ __iocsrwr_d (unsigned long int _1, unsigned int _2)
+ #ifdef __loongarch_frecipe
+ /* Assembly instruction format: fd, fj.  */
+ /* Data types in instruction templates:  SF, SF.  */
+-extern __inline void
++extern __inline float
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+ __frecipe_s (float _1)
+ {
+-  __builtin_loongarch_frecipe_s ((float) _1);
++  return (float) __builtin_loongarch_frecipe_s ((float) _1);
+ }
+ 
+ /* Assembly instruction format: fd, fj.  */
+ /* Data types in instruction templates:  DF, DF.  */
+-extern __inline void
++extern __inline double
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+ __frecipe_d (double _1)
+ {
+-  __builtin_loongarch_frecipe_d ((double) _1);
++  return (double) __builtin_loongarch_frecipe_d ((double) _1);
+ }
+ 
+ /* Assembly instruction format: fd, fj.  */
+ /* Data types in instruction templates:  SF, SF.  */
+-extern __inline void
++extern __inline float
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+ __frsqrte_s (float _1)
+ {
+-  __builtin_loongarch_frsqrte_s ((float) _1);
++  return (float) __builtin_loongarch_frsqrte_s ((float) _1);
+ }
+ 
+ /* Assembly instruction format: fd, fj.  */
+ /* Data types in instruction templates:  DF, DF.  */
+-extern __inline void
++extern __inline double
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+ __frsqrte_d (double _1)
+ {
+-  __builtin_loongarch_frsqrte_d ((double) _1);
++  return (double) __builtin_loongarch_frsqrte_d ((double) _1);
+ }
+ #endif
+ 
+diff --git a/gcc/testsuite/gcc.target/loongarch/larch-frecipe-intrinsic.c b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-intrinsic.c
+new file mode 100644
+index 000000000..6ce2bde0a
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-intrinsic.c
+@@ -0,0 +1,30 @@
++/* Test intrinsics for frecipe.{s/d} and frsqrte.{s/d} instructions */
++/* { dg-do compile } */
++/* { dg-options "-mfrecipe -O2" } */
++/* { dg-final { scan-assembler-times "test_frecipe_s:.*frecipe\\.s.*test_frecipe_s" 1 } } */
++/* { dg-final { scan-assembler-times "test_frecipe_d:.*frecipe\\.d.*test_frecipe_d" 1 } } */
++/* { dg-final { scan-assembler-times "test_frsqrte_s:.*frsqrte\\.s.*test_frsqrte_s" 1 } } */
++/* { dg-final { scan-assembler-times "test_frsqrte_d:.*frsqrte\\.d.*test_frsqrte_d" 1 } } */
++
++#include <larchintrin.h>
++
++float
++test_frecipe_s (float _1)
++{
++  return __frecipe_s (_1);
++}
++double
++test_frecipe_d (double _1)
++{
++  return __frecipe_d (_1);
++}
++float
++test_frsqrte_s (float _1)
++{
++  return __frsqrte_s (_1);
++}
++double
++test_frsqrte_d (double _1)
++{
++  return __frsqrte_d (_1);
++}
+-- 
+2.43.0
+

_service:tar_scm:0131-Backport-SME-mode-switching-Remove-unused-bbnum-fiel.patch Added

@@ -0,0 +1,104 @@
+From 8a43bd7885ce479cadb0643fbb0fc22d2b0ffced Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Sun, 5 Nov 2023 18:28:46 +0000
+Subject: PATCH 032/157 BackportSME mode-switching: Remove unused bbnum
+ field
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2d55ed2b8a754d7279cd002941f7cb481f0fd133
+
+seginfo had an unused bbnum field, presumably dating from before
+BB information was attached directly to insns.
+
+gcc/
+	* mode-switching.cc: Remove unused forward references.
+	(seginfo): Remove bbnum.
+	(new_seginfo): Remove associated argument.
+	(optimize_mode_switching): Update calls accordingly.
+---
+ gcc/mode-switching.cc | 18 +++++-------------
+ 1 file changed, 5 insertions(+), 13 deletions(-)
+
+diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
+index 6e3f1dc65..4cf8f03a0 100644
+--- a/gcc/mode-switching.cc
++++ b/gcc/mode-switching.cc
+@@ -65,13 +65,11 @@ along with GCC; see the file COPYING3.  If not see
+    MODE is the mode this insn must be executed in.
+    INSN_PTR is the insn to be executed (may be the note that marks the
+    beginning of a basic block).
+-   BBNUM is the flow graph basic block this insn occurs in.
+    NEXT is the next insn in the same basic block.  */
+ struct seginfo
+ {
+   int mode;
+   rtx_insn *insn_ptr;
+-  int bbnum;
+   struct seginfo *next;
+   HARD_REG_SET regs_live;
+ };
+@@ -84,11 +82,6 @@ struct bb_info
+   int mode_in;
+ };
+ 
+-static struct seginfo * new_seginfo (int, rtx_insn *, int, HARD_REG_SET);
+-static void add_seginfo (struct bb_info *, struct seginfo *);
+-static void reg_dies (rtx, HARD_REG_SET *);
+-static void reg_becomes_live (rtx, const_rtx, void *);
+-
+ /* Clear ode I from entity J in bitmap B.  */
+ #define clear_mode_bit(b, j, i) \
+        bitmap_clear_bit (b, (j * max_num_modes) + i)
+@@ -148,13 +141,13 @@ commit_mode_sets (struct edge_list *edge_list, int e, struct bb_info *info)
+ }
+ 
+ /* Allocate a new BBINFO structure, initialized with the MODE, INSN,
+-   and basic block BB parameters.
++   and REGS_LIVE parameters.
+    INSN may not be a NOTE_INSN_BASIC_BLOCK, unless it is an empty
+    basic block; that allows us later to insert instructions in a FIFO-like
+    manner.  */
+ 
+ static struct seginfo *
+-new_seginfo (int mode, rtx_insn *insn, int bb, HARD_REG_SET regs_live)
++new_seginfo (int mode, rtx_insn *insn, const HARD_REG_SET &regs_live)
+ {
+   struct seginfo *ptr;
+ 
+@@ -163,7 +156,6 @@ new_seginfo (int mode, rtx_insn *insn, int bb, HARD_REG_SET regs_live)
+   ptr = XNEW (struct seginfo);
+   ptr->mode = mode;
+   ptr->insn_ptr = insn;
+-  ptr->bbnum = bb;
+   ptr->next = NULL;
+   ptr->regs_live = regs_live;
+   return ptr;
+@@ -604,7 +596,7 @@ optimize_mode_switching (void)
+ 		gcc_assert (NOTE_INSN_BASIC_BLOCK_P (ins_pos));
+ 		if (ins_pos != BB_END (bb))
+ 		  ins_pos = NEXT_INSN (ins_pos);
+-		ptr = new_seginfo (no_mode, ins_pos, bb->index, live_now);
++		ptr = new_seginfo (no_mode, ins_pos, live_now);
+ 		add_seginfo (info + bb->index, ptr);
+ 		for (i = 0; i < no_mode; i++)
+ 		  clear_mode_bit (transpbb->index, j, i);
+@@ -622,7 +614,7 @@ optimize_mode_switching (void)
+ 		    {
+ 		      any_set_required = true;
+ 		      last_mode = mode;
+-		      ptr = new_seginfo (mode, insn, bb->index, live_now);
++		      ptr = new_seginfo (mode, insn, live_now);
+ 		      add_seginfo (info + bb->index, ptr);
+ 		      for (i = 0; i < no_mode; i++)
+ 			clear_mode_bit (transpbb->index, j, i);
+@@ -651,7 +643,7 @@ optimize_mode_switching (void)
+ 	     mark the block as nontransparent.  */
+ 	  if (!any_set_required)
+ 	    {
+-	      ptr = new_seginfo (no_mode, BB_END (bb), bb->index, live_now);
++	      ptr = new_seginfo (no_mode, BB_END (bb), live_now);
+ 	      add_seginfo (info + bb->index, ptr);
+ 	      if (last_mode != no_mode)
+ 		for (i = 0; i < no_mode; i++)
+-- 
+2.33.0
+

_service:tar_scm:0131-LoongArch-Fix-an-ODR-violation.patch Added

@@ -0,0 +1,60 @@
+From 89ebd7012ecf49c60bad8dd018e0aa573b58844b Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Fri, 2 Feb 2024 05:37:38 +0800
+Subject: PATCH 131/188 LoongArch: Fix an ODR violation
+
+When bootstrapping GCC 14 with --with-build-config=bootstrap-lto, an ODR
+violation is detected:
+
+    ../../gcc/config/loongarch/loongarch-opts.cc:57: warning:
+    'abi_minimal_isa' violates the C++ One Definition Rule -Wodr
+    57 | abi_minimal_isaN_ABI_BASE_TYPESN_ABI_EXT_TYPES;
+    ../../gcc/config/loongarch/loongarch-def.cc:186: note:
+    'abi_minimal_isa' was previously declared here
+    186 |   abi_minimal_isa = array<array<loongarch_isa, N_ABI_EXT_TYPES>,
+    ../../gcc/config/loongarch/loongarch-def.cc:186: note:
+    code may be misoptimized unless '-fno-strict-aliasing' is used
+
+Fix it by adding a proper declaration of abi_minimal_isa into
+loongarch-def.h and remove the ODR-violating local declaration in
+loongarch-opts.cc.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-def.h (abi_minimal_isa): Declare.
+	* config/loongarch/loongarch-opts.cc (abi_minimal_isa): Remove
+	the ODR-violating locale declaration.
+---
+ gcc/config/loongarch/loongarch-def.h   | 3 +++
+ gcc/config/loongarch/loongarch-opts.cc | 2 --
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
+index 28da3ae5f..fdcf43fc7 100644
+--- a/gcc/config/loongarch/loongarch-def.h
++++ b/gcc/config/loongarch/loongarch-def.h
+@@ -203,5 +203,8 @@ extern loongarch_def_array<loongarch_align, N_TUNE_TYPES>
+   loongarch_cpu_align;
+ extern loongarch_def_array<loongarch_rtx_cost_data, N_TUNE_TYPES>
+   loongarch_cpu_rtx_cost_data;
++extern loongarch_def_array<
++  loongarch_def_array<loongarch_isa, N_ABI_EXT_TYPES>,
++  N_ABI_BASE_TYPES> abi_minimal_isa;
+ 
+ #endif /* LOONGARCH_DEF_H */
+diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
+index a2b069d83..2ea3972d1 100644
+--- a/gcc/config/loongarch/loongarch-opts.cc
++++ b/gcc/config/loongarch/loongarch-opts.cc
+@@ -53,8 +53,6 @@ static const int tm_multilib_list = { TM_MULTILIB_LIST };
+ static int enabled_abi_typesN_ABI_BASE_TYPESN_ABI_EXT_TYPES = { 0 };
+ 
+ #define isa_required(ABI) (abi_minimal_isa(ABI).base(ABI).ext)
+-extern "C" const struct loongarch_isa
+-abi_minimal_isaN_ABI_BASE_TYPESN_ABI_EXT_TYPES;
+ 
+ static inline int
+ is_multilib_enabled (struct loongarch_abi abi)
+-- 
+2.43.0
+

_service:tar_scm:0132-Backport-SME-mode-switching-Tweak-the-macro-hook-doc.patch Added

@@ -0,0 +1,311 @@
+From c980e40d2c27ac3ee33c9b6aea6d2b0d4080852e Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Sat, 11 Nov 2023 17:28:54 +0000
+Subject: PATCH 033/157 BackportSME mode-switching: Tweak the macro/hook
+ documentation
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=8479a3759025961f80cf0cd6bb3f127e09d0510d
+
+I found the documentation for the mode-switching macros/hooks
+a bit hard to follow at first.  This patch tries to add the
+information that I think would have made it easier to understand.
+
+Of course, documentation preferences are personal, and so I could
+be changing something that others understood to something that
+seems impenetrable.
+
+Some notes on specific changes:
+
+- "in an optimizing compilation" didn't seem accurate; the pass
+  is run even at -O0, and often needs to be for correctness.
+
+- "at run time" meant when the compiler was run, rather than when
+  the compiled code was run.
+
+- Removing the list of optional macros isn't a clarification,
+  but it means that upcoming patches don't create an absurdly
+  long list.
+
+- I don't really understand the purpose of TARGET_MODE_PRIORITY,
+  so I mostly left that alone.
+
+gcc/
+	* target.def: Tweak documentation of mode-switching hooks.
+	* doc/tm.texi.in (OPTIMIZE_MODE_SWITCHING): Tweak documentation.
+	(NUM_MODES_FOR_MODE_SWITCHING): Likewise.
+	* doc/tm.texi: Regenerate.
+---
+ gcc/doc/tm.texi    | 69 ++++++++++++++++++++++++++++------------------
+ gcc/doc/tm.texi.in | 26 +++++++++--------
+ gcc/target.def     | 43 ++++++++++++++++++-----------
+ 3 files changed, 84 insertions(+), 54 deletions(-)
+
+diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
+index 851d31c18..553aa4cf2 100644
+--- a/gcc/doc/tm.texi
++++ b/gcc/doc/tm.texi
+@@ -10234,7 +10234,7 @@ The following macros control mode switching optimizations:
+ 
+ @defmac OPTIMIZE_MODE_SWITCHING (@var{entity})
+ Define this macro if the port needs extra instructions inserted for mode
+-switching in an optimizing compilation.
++switching.
+ 
+ For an example, the SH4 can perform both single and double precision
+ floating point operations, but to perform a single precision operation,
+@@ -10244,73 +10244,88 @@ purpose register as a scratch register, hence these FPSCR sets have to
+ be inserted before reload, i.e.@: you cannot put this into instruction emitting
+ or @code{TARGET_MACHINE_DEPENDENT_REORG}.
+ 
+-You can have multiple entities that are mode-switched, and select at run time
+-which entities actually need it.  @code{OPTIMIZE_MODE_SWITCHING} should
+-return nonzero for any @var{entity} that needs mode-switching.
++You can have multiple entities that are mode-switched, some of which might
++only be needed conditionally.  The entities are identified by their index
++into the @code{NUM_MODES_FOR_MODE_SWITCHING} initializer, with the length
++of the initializer determining the number of entities.
++
++@code{OPTIMIZE_MODE_SWITCHING} should return nonzero for any @var{entity}
++that needs mode-switching.
++
+ If you define this macro, you also have to define
+ @code{NUM_MODES_FOR_MODE_SWITCHING}, @code{TARGET_MODE_NEEDED},
+ @code{TARGET_MODE_PRIORITY} and @code{TARGET_MODE_EMIT}.
+-@code{TARGET_MODE_AFTER}, @code{TARGET_MODE_ENTRY}, and @code{TARGET_MODE_EXIT}
+-are optional.
++The other macros in this section are optional.
+ @end defmac
+ 
+ @defmac NUM_MODES_FOR_MODE_SWITCHING
+ If you define @code{OPTIMIZE_MODE_SWITCHING}, you have to define this as
+ initializer for an array of integers.  Each initializer element
+ N refers to an entity that needs mode switching, and specifies the number
+-of different modes that might need to be set for this entity.
+-The position of the initializer in the initializer---starting counting at
++of different modes that are defined for that entity.
++The position of the element in the initializer---starting counting at
+ zero---determines the integer that is used to refer to the mode-switched
+ entity in question.
+-In macros that take mode arguments / yield a mode result, modes are
+-represented as numbers 0 @dots{} N @minus{} 1.  N is used to specify that no mode
+-switch is needed / supplied.
++Modes are represented as numbers 0 @dots{} N @minus{} 1.
++In mode arguments and return values, N either represents an unknown
++mode or ``no mode'', depending on context.
+ @end defmac
+ 
+ @deftypefn {Target Hook} void TARGET_MODE_EMIT (int @var{entity}, int @var{mode}, int @var{prev_mode}, HARD_REG_SET @var{regs_live})
+ Generate one or more insns to set @var{entity} to @var{mode}.
+ @var{hard_reg_live} is the set of hard registers live at the point where
+ the insn(s) are to be inserted. @var{prev_moxde} indicates the mode
+-to switch from. Sets of a lower numbered entity will be emitted before
++to switch from, or is the number of modes if the previous mode is not
++known.  Sets of a lower numbered entity will be emitted before
+ sets of a higher numbered entity to a mode of the same or lower priority.
+ @end deftypefn
+ 
+ @deftypefn {Target Hook} int TARGET_MODE_NEEDED (int @var{entity}, rtx_insn *@var{insn})
+ @var{entity} is an integer specifying a mode-switched entity.
+-If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this macro
+-to return an integer value not larger than the corresponding element
+-in @code{NUM_MODES_FOR_MODE_SWITCHING}, to denote the mode that @var{entity}
+-must be switched into prior to the execution of @var{insn}.
++If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook
++to return the mode that @var{entity} must be switched into prior to the
++execution of @var{insn}, or the number of modes if @var{insn} has no
++such requirement.
+ @end deftypefn
+ 
+ @deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn})
+ @var{entity} is an integer specifying a mode-switched entity.
+-If this macro is defined, it is evaluated for every @var{insn} during mode
+-switching.  It determines the mode that an insn results
+-in (if different from the incoming mode).
++If this hook is defined, it is evaluated for every @var{insn} during mode
++switching.  It returns the mode that @var{entity} is in after @var{insn}
++has been executed.  @var{mode} is the mode that @var{entity} was in
++before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}.
++
++@var{mode} is equal to the number of modes defined for @var{entity}
++if the mode before @var{insn} is unknown.  The hook should likewise return
++the number of modes if it does not know what mode @var{entity} has after
++@var{insn}.
++
++Not defining the hook is equivalent to returning @var{mode}.
+ @end deftypefn
+ 
+ @deftypefn {Target Hook} int TARGET_MODE_ENTRY (int @var{entity})
+-If this macro is defined, it is evaluated for every @var{entity} that
+-needs mode switching.  It should evaluate to an integer, which is a mode
+-that @var{entity} is assumed to be switched to at function entry.
++If this hook is defined, it is evaluated for every @var{entity} that
++needs mode switching.  It should return the mode that @var{entity} is
++guaranteed to be in on entry to the function, or the number of modes
++if there is no such guarantee.
+ If @code{TARGET_MODE_ENTRY} is defined then @code{TARGET_MODE_EXIT}
+ must be defined.
+ @end deftypefn
+ 
+ @deftypefn {Target Hook} int TARGET_MODE_EXIT (int @var{entity})
+-If this macro is defined, it is evaluated for every @var{entity} that
+-needs mode switching.  It should evaluate to an integer, which is a mode
+-that @var{entity} is assumed to be switched to at function exit.
++If this hook is defined, it is evaluated for every @var{entity} that
++needs mode switching.  It should return the mode that @var{entity} must
++be in on return from the function, or the number of modes if there is no
++such requirement.
+ If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY}
+ must be defined.
+ @end deftypefn
+ 
+ @deftypefn {Target Hook} int TARGET_MODE_PRIORITY (int @var{entity}, int @var{n})
+-This macro specifies the order in which modes for @var{entity}
++This hook specifies the order in which modes for @var{entity}
+ are processed. 0 is the highest priority,
+ @code{NUM_MODES_FOR_MODE_SWITCHING@var{entity} - 1} the lowest.
+-The value of the macro should be an integer designating a mode
++The hook returns an integer designating a mode
+ for @var{entity}.  For any fixed @var{entity}, @code{mode_priority}
+ (@var{entity}, @var{n}) shall be a bijection in 0 @dots{}
+ @code{num_modes_for_mode_switching@var{entity} - 1}.
+diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
+index ac95cdf7a..9ec11b15c 100644
+--- a/gcc/doc/tm.texi.in
++++ b/gcc/doc/tm.texi.in
+@@ -6879,7 +6879,7 @@ The following macros control mode switching optimizations:
+ 
+ @defmac OPTIMIZE_MODE_SWITCHING (@var{entity})
+ Define this macro if the port needs extra instructions inserted for mode
+-switching in an optimizing compilation.
++switching.
+ 
+ For an example, the SH4 can perform both single and double precision
+ floating point operations, but to perform a single precision operation,
+@@ -6889,27 +6889,31 @@ purpose register as a scratch register, hence these FPSCR sets have to
+ be inserted before reload, i.e.@: you cannot put this into instruction emitting
+ or @code{TARGET_MACHINE_DEPENDENT_REORG}.
+ 
+-You can have multiple entities that are mode-switched, and select at run time
+-which entities actually need it.  @code{OPTIMIZE_MODE_SWITCHING} should
+-return nonzero for any @var{entity} that needs mode-switching.
++You can have multiple entities that are mode-switched, some of which might
++only be needed conditionally.  The entities are identified by their index
++into the @code{NUM_MODES_FOR_MODE_SWITCHING} initializer, with the length
++of the initializer determining the number of entities.
++
++@code{OPTIMIZE_MODE_SWITCHING} should return nonzero for any @var{entity}
++that needs mode-switching.
++
+ If you define this macro, you also have to define

_service:tar_scm:0132-LoongArch-testsuite-Fix-gcc.dg-vect-vect-reduc-mul_-.patch Added

@@ -0,0 +1,359 @@
+From f4a447bff86c7f5598a7461e353a3c6f4a101ed4 Mon Sep 17 00:00:00 2001
+From: Li Wei <liwei@loongson.cn>
+Date: Fri, 2 Feb 2024 09:42:28 +0800
+Subject: PATCH 132/188 LoongArch: testsuite: Fix
+ gcc.dg/vect/vect-reduc-mul_{1, 2}.c FAIL.
+
+This FAIL was introduced from r14-6908. The reason is that when merging
+constant vector permutation implementations, the 128-bit matching situation
+was not fully considered. In fact, the expansion of 128-bit vectors after
+merging only supports value-based 4 elements set shuffle, so this time is a
+complete implementation of the entire 128-bit vector constant permutation,
+and some structural adjustments have also been made to the code.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_expand_vselect): Adjust.
+	(loongarch_expand_vselect_vconcat): Ditto.
+	(loongarch_try_expand_lsx_vshuf_const): New, use vshuf to implement
+	all 128-bit constant permutation situations.
+	(loongarch_expand_lsx_shuffle): Adjust and rename function name.
+	(loongarch_is_imm_set_shuffle): Renamed function name.
+	(loongarch_expand_vec_perm_even_odd): Function forward declaration.
+	(loongarch_expand_vec_perm_even_odd_1): Add implement for 128-bit
+	extract-even and extract-odd permutations.
+	(loongarch_is_odd_extraction): Delete.
+	(loongarch_is_even_extraction): Ditto.
+	(loongarch_expand_vec_perm_const): Adjust.
+---
+ gcc/config/loongarch/loongarch.cc | 218 ++++++++++++++++++++++--------
+ 1 file changed, 163 insertions(+), 55 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 526ea0bcb..a0e0906af 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -8025,7 +8025,8 @@ struct expand_vec_perm_d
+ 
+ static bool
+ loongarch_expand_vselect (rtx target, rtx op0,
+-			  const unsigned char *perm, unsigned nelt)
++			  const unsigned char *perm, unsigned nelt,
++			  bool testing_p)
+ {
+   rtx rpermMAX_VECT_LEN, x;
+   rtx_insn *insn;
+@@ -8044,6 +8045,9 @@ loongarch_expand_vselect (rtx target, rtx op0,
+       remove_insn (insn);
+       return false;
+     }
++
++  if (testing_p)
++      remove_insn (insn);
+   return true;
+ }
+ 
+@@ -8051,7 +8055,8 @@ loongarch_expand_vselect (rtx target, rtx op0,
+ 
+ static bool
+ loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
+-				  const unsigned char *perm, unsigned nelt)
++				  const unsigned char *perm, unsigned nelt,
++				  bool testing_p)
+ {
+   machine_mode v2mode;
+   rtx x;
+@@ -8059,7 +8064,7 @@ loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
+   if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
+     return false;
+   x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
+-  return loongarch_expand_vselect (target, x, perm, nelt);
++  return loongarch_expand_vselect (target, x, perm, nelt, testing_p);
+ }
+ 
+ static tree
+@@ -8315,11 +8320,87 @@ loongarch_set_handled_components (sbitmap components)
+ #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+ #undef TARGET_ASM_ALIGNED_DI_OP
+ #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
++
++/* Use the vshuf instruction to implement all 128-bit constant vector
++   permuatation.  */
++
++static bool
++loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
++{
++  int i;
++  rtx target, op0, op1, sel, tmp;
++  rtx rpermMAX_VECT_LEN;
++
++  if (GET_MODE_SIZE (d->vmode) == 16)
++    {
++      target = d->target;
++      op0 = d->op0;
++      op1 = d->one_vector_p ? d->op0 : d->op1;
++
++      if (GET_MODE (op0) != GET_MODE (op1)
++	  || GET_MODE (op0) != GET_MODE (target))
++	return false;
++
++      if (d->testing_p)
++	return true;
++
++      for (i = 0; i < d->nelt; i += 1)
++	  rpermi = GEN_INT (d->permi);
++
++      if (d->vmode == E_V2DFmode)
++	{
++	  sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm));
++	  tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0);
++	  emit_move_insn (tmp, sel);
++	}
++      else if (d->vmode == E_V4SFmode)
++	{
++	  sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm));
++	  tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0);
++	  emit_move_insn (tmp, sel);
++	}
++      else
++	{
++	  sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm));
++	  emit_move_insn (d->target, sel);
++	}
++
++      switch (d->vmode)
++	{
++	case E_V2DFmode:
++	  emit_insn (gen_lsx_vshuf_d_f (target, target, op1, op0));
++	  break;
++	case E_V2DImode:
++	  emit_insn (gen_lsx_vshuf_d (target, target, op1, op0));
++	  break;
++	case E_V4SFmode:
++	  emit_insn (gen_lsx_vshuf_w_f (target, target, op1, op0));
++	  break;
++	case E_V4SImode:
++	  emit_insn (gen_lsx_vshuf_w (target, target, op1, op0));
++	  break;
++	case E_V8HImode:
++	  emit_insn (gen_lsx_vshuf_h (target, target, op1, op0));
++	  break;
++	case E_V16QImode:
++	  emit_insn (gen_lsx_vshuf_b (target, op1, op0, target));
++	  break;
++	default:
++	  break;
++	}
++
++      return true;
++    }
++  return false;
++}
++
+ /* Construct (set target (vec_select op0 (parallel selector))) and
+-   return true if that's a valid instruction in the active ISA.  */
++   return true if that's a valid instruction in the active ISA.
++   In fact, it matches the special constant vector with repeated
++   4-element sets.  */
+ 
+ static bool
+-loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d)
++loongarch_is_imm_set_shuffle (struct expand_vec_perm_d *d)
+ {
+   rtx x, eltsMAX_VECT_LEN;
+   rtvec v;
+@@ -8338,6 +8419,9 @@ loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d)
+   if (!loongarch_const_vector_shuffle_set_p (x, d->vmode))
+     return false;
+ 
++  if (d->testing_p)
++    return true;
++
+   x = gen_rtx_VEC_SELECT (d->vmode, d->op0, x);
+   x = gen_rtx_SET (d->target, x);
+ 
+@@ -8350,6 +8434,27 @@ loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d)
+   return true;
+ }
+ 
++static bool
++loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *);
++
++/* Try to match and expand all kinds of 128-bit const vector permutation
++   cases.  */
++
++static bool
++loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d)
++{
++  if (!ISA_HAS_LSX && GET_MODE_SIZE (d->vmode) != 16)
++    return false;
++
++  if (loongarch_is_imm_set_shuffle (d))
++      return true;
++
++  if (loongarch_expand_vec_perm_even_odd (d))
++    return true;
++
++  return loongarch_try_expand_lsx_vshuf_const (d);
++}
++

_service:tar_scm:0133-Backport-SME-mode-switching-Add-note-problem.patch Added

@@ -0,0 +1,35 @@
+From 7ab54a765239bdd2ce548cffdd5b83f9c20f69da Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Sat, 11 Nov 2023 17:28:55 +0000
+Subject: PATCH 034/157 BackportSME mode-switching: Add note problem
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3cd3a09b3f91a1d023cb180763d40598d6bb274b
+
+optimize_mode_switching uses REG_DEAD notes to track register
+liveness, but it failed to tell DF to calculate up-to-date notes.
+
+Noticed by inspection.  I don't have a testcase that fails
+because of this.
+
+gcc/
+	* mode-switching.cc (optimize_mode_switching): Call
+	df_note_add_problem.
+---
+ gcc/mode-switching.cc | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
+index 4cf8f03a0..2a9f98793 100644
+--- a/gcc/mode-switching.cc
++++ b/gcc/mode-switching.cc
+@@ -540,6 +540,7 @@ optimize_mode_switching (void)
+       pre_exit = create_pre_exit (n_entities, entity_map, num_modes);
+     }
+ 
++  df_note_add_problem ();
+   df_analyze ();
+ 
+   /* Create the bitmap vectors.  */
+-- 
+2.33.0
+

_service:tar_scm:0133-LoongArch-Avoid-out-of-bounds-access-in-loongarch_sy.patch Added

@@ -0,0 +1,72 @@
+From 6364467c68ac1ee2b54b866f462fb670a43029fa Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Fri, 2 Feb 2024 08:51:08 +0800
+Subject: PATCH 133/188 LoongArch: Avoid out-of-bounds access in
+ loongarch_symbol_insns
+
+We call loongarch_symbol_insns with mode = MAX_MACHINE_MODE sometimes.
+But in loongarch_symbol_insns:
+
+    if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))
+      return 0;
+
+And LSX_SUPPORTED_MODE_P is defined as:
+
+    #define LSX_SUPPORTED_MODE_P(MODE) \
+      (ISA_HAS_LSX \
+       && GET_MODE_SIZE (MODE) == UNITS_PER_LSX_REG ... ...
+
+GET_MODE_SIZE is expanded to a call to mode_to_bytes, which is defined:
+
+    ALWAYS_INLINE poly_uint16
+    mode_to_bytes (machine_mode mode)
+    {
+    #if GCC_VERSION >= 4001
+      return (__builtin_constant_p (mode)
+	  ? mode_size_inline (mode) : mode_sizemode);
+    #else
+      return mode_sizemode;
+    #endif
+    }
+
+There is an assertion in mode_size_inline:
+
+    gcc_assert (mode >= 0 && mode < NUM_MACHINE_MODES);
+
+Note that NUM_MACHINE_MODES = MAX_MACHINE_MODE (emitted by genmodes.cc),
+thus if __builtin_constant_p (mode) is evaluated true (it happens when
+GCC is bootstrapped with LTO+PGO), the assertion will be triggered and
+cause an ICE.  OTOH if __builtin_constant_p (mode) is evaluated false,
+mode_sizemode is still an out-of-bound array access (the length or the
+mode_size array is NUM_MACHINE_MODES).
+
+So we shouldn't call LSX_SUPPORTED_MODE_P or LASX_SUPPORTED_MODE_P with
+MAX_MACHINE_MODE in loongarch_symbol_insns.  This is very similar to a
+MIPS bug PR98491 fixed by me about 3 years ago.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_symbol_insns): Do not
+	use LSX_SUPPORTED_MODE_P or LASX_SUPPORTED_MODE_P if mode is
+	MAX_MACHINE_MODE.
+---
+ gcc/config/loongarch/loongarch.cc | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index a0e0906af..d23b09cc5 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -2004,7 +2004,8 @@ loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode)
+ {
+   /* LSX LD.* and ST.* cannot support loading symbols via an immediate
+      operand.  */
+-  if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))
++  if (mode != MAX_MACHINE_MODE
++      && (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)))
+     return 0;
+ 
+   switch (type)
+-- 
+2.43.0
+

_service:tar_scm:0134-Backport-SME-mode-switching-Avoid-quadractic-list-op.patch Added

@@ -0,0 +1,90 @@
+From a2a8b560c1749293d3b6d027e20753a7ea042c80 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Sat, 11 Nov 2023 17:28:55 +0000
+Subject: PATCH 035/157 BackportSME mode-switching: Avoid quadractic list
+ operation
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=174ee5115a3004d3664165e9d619535b579111d4
+
+add_seginfo chained insn information to the end of a list
+by starting at the head of the list.  This patch avoids the
+quadraticness by keeping track of the tail pointer.
+
+gcc/
+	* mode-switching.cc (add_seginfo): Replace head pointer with
+	a pointer to the tail pointer.
+	(optimize_mode_switching): Update calls accordingly.
+---
+ gcc/mode-switching.cc | 24 ++++++++----------------
+ 1 file changed, 8 insertions(+), 16 deletions(-)
+
+diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
+index 2a9f98793..6a13951c9 100644
+--- a/gcc/mode-switching.cc
++++ b/gcc/mode-switching.cc
+@@ -162,23 +162,14 @@ new_seginfo (int mode, rtx_insn *insn, const HARD_REG_SET &regs_live)
+ }
+ 
+ /* Add a seginfo element to the end of a list.
+-   HEAD is a pointer to the list beginning.
++   TAIL is a pointer to the list's null terminator.
+    INFO is the structure to be linked in.  */
+ 
+ static void
+-add_seginfo (struct bb_info *head, struct seginfo *info)
++add_seginfo (struct seginfo ***tail_ptr, struct seginfo *info)
+ {
+-  struct seginfo *ptr;
+-
+-  if (head->seginfo == NULL)
+-    head->seginfo = info;
+-  else
+-    {
+-      ptr = head->seginfo;
+-      while (ptr->next != NULL)
+-	ptr = ptr->next;
+-      ptr->next = info;
+-    }
++  **tail_ptr = info;
++  *tail_ptr = &info->next;
+ }
+ 
+ /* Record in LIVE that register REG died.  */
+@@ -573,6 +564,7 @@ optimize_mode_switching (void)
+ 	 Also compute the initial transparency settings.  */
+       FOR_EACH_BB_FN (bb, cfun)
+ 	{
++	  struct seginfo **tail_ptr = &infobb->index.seginfo;
+ 	  struct seginfo *ptr;
+ 	  int last_mode = no_mode;
+ 	  bool any_set_required = false;
+@@ -598,7 +590,7 @@ optimize_mode_switching (void)
+ 		if (ins_pos != BB_END (bb))
+ 		  ins_pos = NEXT_INSN (ins_pos);
+ 		ptr = new_seginfo (no_mode, ins_pos, live_now);
+-		add_seginfo (info + bb->index, ptr);
++		add_seginfo (&tail_ptr, ptr);
+ 		for (i = 0; i < no_mode; i++)
+ 		  clear_mode_bit (transpbb->index, j, i);
+ 	      }
+@@ -616,7 +608,7 @@ optimize_mode_switching (void)
+ 		      any_set_required = true;
+ 		      last_mode = mode;
+ 		      ptr = new_seginfo (mode, insn, live_now);
+-		      add_seginfo (info + bb->index, ptr);
++		      add_seginfo (&tail_ptr, ptr);
+ 		      for (i = 0; i < no_mode; i++)
+ 			clear_mode_bit (transpbb->index, j, i);
+ 		    }
+@@ -645,7 +637,7 @@ optimize_mode_switching (void)
+ 	  if (!any_set_required)
+ 	    {
+ 	      ptr = new_seginfo (no_mode, BB_END (bb), live_now);
+-	      add_seginfo (info + bb->index, ptr);
++	      add_seginfo (&tail_ptr, ptr);
+ 	      if (last_mode != no_mode)
+ 		for (i = 0; i < no_mode; i++)
+ 		  clear_mode_bit (transpbb->index, j, i);
+-- 
+2.33.0
+

_service:tar_scm:0134-LoongArch-Fix-wrong-LSX-FP-vector-negation.patch Added

@@ -0,0 +1,122 @@
+From 659b51a6aed60f389009eff1e04645a47e55a45c Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sat, 3 Feb 2024 03:16:14 +0800
+Subject: PATCH 134/188 LoongArch: Fix wrong LSX FP vector negation
+
+We expanded (neg x) to (minus const0 x) for LSX FP vectors, this is
+wrong because -0.0 is not 0 - 0.0.  This causes some Python tests to
+fail when Python is built with LSX enabled.
+
+Use the vbitrevi.{d/w} instructions to simply reverse the sign bit
+instead.  We are already doing this for LASX and now we can unify them
+into simd.md.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lsx.md (neg<mode:FLSX>2): Remove the
+	incorrect expand.
+	* config/loongarch/simd.md (simdfmt_as_i): New define_mode_attr.
+	(elmsgnbit): Likewise.
+	(neg<mode:FVEC>2): New define_insn.
+	* config/loongarch/lasx.md (negv4df2, negv8sf2): Remove as they
+	are now instantiated in simd.md.
+---
+ gcc/config/loongarch/lasx.md | 16 ----------------
+ gcc/config/loongarch/lsx.md  | 11 -----------
+ gcc/config/loongarch/simd.md | 18 ++++++++++++++++++
+ 3 files changed, 18 insertions(+), 27 deletions(-)
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index 946811e1a..38f35bad6 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -3028,22 +3028,6 @@
+   (set_attr "type" "simd_logic")
+    (set_attr "mode" "V8SF"))
+ 
+-(define_insn "negv4df2"
+-  (set (match_operand:V4DF 0 "register_operand" "=f")
+-	(neg:V4DF (match_operand:V4DF 1 "register_operand" "f")))
+-  "ISA_HAS_LASX"
+-  "xvbitrevi.d\t%u0,%u1,63"
+-  (set_attr "type" "simd_logic")
+-   (set_attr "mode" "V4DF"))
+-
+-(define_insn "negv8sf2"
+-  (set (match_operand:V8SF 0 "register_operand" "=f")
+-	(neg:V8SF (match_operand:V8SF 1 "register_operand" "f")))
+-  "ISA_HAS_LASX"
+-  "xvbitrevi.w\t%u0,%u1,31"
+-  (set_attr "type" "simd_logic")
+-   (set_attr "mode" "V8SF"))
+-
+ (define_insn "xvfmadd<mode>4"
+   (set (match_operand:FLASX 0 "register_operand" "=f")
+ 	(fma:FLASX (match_operand:FLASX 1 "register_operand" "f")
+diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
+index 612377436..d5aa3f46f 100644
+--- a/gcc/config/loongarch/lsx.md
++++ b/gcc/config/loongarch/lsx.md
+@@ -728,17 +728,6 @@
+   DONE;
+ })
+ 
+-(define_expand "neg<mode>2"
+-  (set (match_operand:FLSX 0 "register_operand")
+-	(neg:FLSX (match_operand:FLSX 1 "register_operand")))
+-  "ISA_HAS_LSX"
+-{
+-  rtx reg = gen_reg_rtx (<MODE>mode);
+-  emit_move_insn (reg, CONST0_RTX (<MODE>mode));
+-  emit_insn (gen_sub<mode>3 (operands0, reg, operands1));
+-  DONE;
+-})
+-
+ (define_expand "lsx_vrepli<mode>"
+   (match_operand:ILSX 0 "register_operand")
+    (match_operand 1 "const_imm10_operand")
+diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
+index 8ac1d75a8..00d4c7831 100644
+--- a/gcc/config/loongarch/simd.md
++++ b/gcc/config/loongarch/simd.md
+@@ -85,12 +85,21 @@
+ (define_mode_attr simdifmt_for_f (V2DF "l") (V4DF "l")
+ 				  (V4SF "w") (V8SF "w"))
+ 
++;; Suffix for integer mode in LSX or LASX instructions to operating FP
++;; vectors using integer vector operations.
++(define_mode_attr simdfmt_as_i (V2DF "d") (V4DF "d")
++				(V4SF "w") (V8SF "w"))
++
+ ;; Size of vector elements in bits.
+ (define_mode_attr elmbits (V2DI "64") (V4DI "64")
+ 			   (V4SI "32") (V8SI "32")
+ 			   (V8HI "16") (V16HI "16")
+ 			   (V16QI "8") (V32QI "8"))
+ 
++;; The index of sign bit in FP vector elements.
++(define_mode_attr elmsgnbit (V2DF "63") (V4DF "63")
++			     (V4SF "31") (V8SF "31"))
++
+ ;; This attribute is used to form an immediate operand constraint using
+ ;; "const_<bitimm>_operand".
+ (define_mode_attr bitimm (V16QI "uimm3") (V32QI "uimm3")
+@@ -457,6 +466,15 @@
+   DONE;
+ })
+ 
++;; FP negation.
++(define_insn "neg<mode>2"
++  (set (match_operand:FVEC 0 "register_operand" "=f")
++	(neg:FVEC (match_operand:FVEC 1 "register_operand" "f")))
++  ""
++  "<x>vbitrevi.<simdfmt_as_i>\t%<wu>0,%<wu>1,<elmsgnbit>"
++  (set_attr "type" "simd_logic")
++   (set_attr "mode" "<MODE>"))
++
+ ; The LoongArch SX Instructions.
+ (include "lsx.md")
+ 
+-- 
+2.43.0
+

_service:tar_scm:0135-Backport-SME-mode-switching-Fix-the-mode-passed-to-t.patch Added

@@ -0,0 +1,136 @@
+From 194700063ed04b56d84912f7ace1b8370af6c696 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Sat, 11 Nov 2023 17:28:56 +0000
+Subject: PATCH 036/157 BackportSME mode-switching: Fix the mode passed
+ to the emit hook
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5afd208beaef50bcc43b556d4c41d41656b06436
+
+optimize_mode_switching passes an entity's current mode (if known)
+to the emit hook.  However, the mode that it passed ignored the
+effect of the after hook.  Instead, the mode for the first emit
+call in a block was taken from the incoming mode, whereas the
+mode for each subsequent emit call was taken from the result
+of the previous call.
+
+The previous pass through the insns already calculated the
+correct mode, so this patch records it in the seginfo structure.
+(There was a 32-bit hole on 64-bit hosts, so this doesn't increase
+the size of the structure for them.)
+
+gcc/
+	* mode-switching.cc (seginfo): Add a prev_mode field.
+	(new_seginfo): Take and initialize the prev_mode.
+	(optimize_mode_switching): Update calls accordingly.
+	Use the recorded modes during the emit phase, rather than
+	computing one on the fly.
+---
+ gcc/mode-switching.cc | 30 +++++++++++++++++-------------
+ 1 file changed, 17 insertions(+), 13 deletions(-)
+
+diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
+index 6a13951c9..584cd4f67 100644
+--- a/gcc/mode-switching.cc
++++ b/gcc/mode-switching.cc
+@@ -68,6 +68,7 @@ along with GCC; see the file COPYING3.  If not see
+    NEXT is the next insn in the same basic block.  */
+ struct seginfo
+ {
++  int prev_mode;
+   int mode;
+   rtx_insn *insn_ptr;
+   struct seginfo *next;
+@@ -140,20 +141,22 @@ commit_mode_sets (struct edge_list *edge_list, int e, struct bb_info *info)
+   return need_commit;
+ }
+ 
+-/* Allocate a new BBINFO structure, initialized with the MODE, INSN,
+-   and REGS_LIVE parameters.
++/* Allocate a new BBINFO structure, initialized with the PREV_MODE, MODE,
++   INSN, and REGS_LIVE parameters.
+    INSN may not be a NOTE_INSN_BASIC_BLOCK, unless it is an empty
+    basic block; that allows us later to insert instructions in a FIFO-like
+    manner.  */
+ 
+ static struct seginfo *
+-new_seginfo (int mode, rtx_insn *insn, const HARD_REG_SET &regs_live)
++new_seginfo (int prev_mode, int mode, rtx_insn *insn,
++	     const HARD_REG_SET &regs_live)
+ {
+   struct seginfo *ptr;
+ 
+   gcc_assert (!NOTE_INSN_BASIC_BLOCK_P (insn)
+ 	      || insn == BB_END (NOTE_BASIC_BLOCK (insn)));
+   ptr = XNEW (struct seginfo);
++  ptr->prev_mode = prev_mode;
+   ptr->mode = mode;
+   ptr->insn_ptr = insn;
+   ptr->next = NULL;
+@@ -589,7 +592,7 @@ optimize_mode_switching (void)
+ 		gcc_assert (NOTE_INSN_BASIC_BLOCK_P (ins_pos));
+ 		if (ins_pos != BB_END (bb))
+ 		  ins_pos = NEXT_INSN (ins_pos);
+-		ptr = new_seginfo (no_mode, ins_pos, live_now);
++		ptr = new_seginfo (no_mode, no_mode, ins_pos, live_now);
+ 		add_seginfo (&tail_ptr, ptr);
+ 		for (i = 0; i < no_mode; i++)
+ 		  clear_mode_bit (transpbb->index, j, i);
+@@ -605,12 +608,12 @@ optimize_mode_switching (void)
+ 
+ 		  if (mode != no_mode && mode != last_mode)
+ 		    {
+-		      any_set_required = true;
+-		      last_mode = mode;
+-		      ptr = new_seginfo (mode, insn, live_now);
++		      ptr = new_seginfo (last_mode, mode, insn, live_now);
+ 		      add_seginfo (&tail_ptr, ptr);
+ 		      for (i = 0; i < no_mode; i++)
+ 			clear_mode_bit (transpbb->index, j, i);
++		      any_set_required = true;
++		      last_mode = mode;
+ 		    }
+ 
+ 		  if (targetm.mode_switching.after)
+@@ -636,7 +639,7 @@ optimize_mode_switching (void)
+ 	     mark the block as nontransparent.  */
+ 	  if (!any_set_required)
+ 	    {
+-	      ptr = new_seginfo (no_mode, BB_END (bb), live_now);
++	      ptr = new_seginfo (last_mode, no_mode, BB_END (bb), live_now);
+ 	      add_seginfo (&tail_ptr, ptr);
+ 	      if (last_mode != no_mode)
+ 		for (i = 0; i < no_mode; i++)
+@@ -777,9 +780,9 @@ optimize_mode_switching (void)
+       FOR_EACH_BB_FN (bb, cfun)
+ 	{
+ 	  struct seginfo *ptr, *next;
+-	  int cur_mode = bb_infojbb->index.mode_in;
++	  struct seginfo *first = bb_infojbb->index.seginfo;
+ 
+-	  for (ptr = bb_infojbb->index.seginfo; ptr; ptr = next)
++	  for (ptr = first; ptr; ptr = next)
+ 	    {
+ 	      next = ptr->next;
+ 	      if (ptr->mode != no_mode)
+@@ -789,14 +792,15 @@ optimize_mode_switching (void)
+ 		  rtl_profile_for_bb (bb);
+ 		  start_sequence ();
+ 
++		  int cur_mode = (ptr == first && ptr->prev_mode == no_mode
++				  ? bb_infojbb->index.mode_in
++				  : ptr->prev_mode);
++
+ 		  targetm.mode_switching.emit (entity_mapj, ptr->mode,
+ 					       cur_mode, ptr->regs_live);
+ 		  mode_set = get_insns ();
+ 		  end_sequence ();
+ 
+-		  /* modes kill each other inside a basic block.  */
+-		  cur_mode = ptr->mode;
+-
+ 		  /* Insert MODE_SET only if it is nonempty.  */
+ 		  if (mode_set != NULL_RTX)
+ 		    {
+-- 
+2.33.0
+

_service:tar_scm:0135-LoongArch-Fix-wrong-return-value-type-of-__iocsrrd_h.patch Added

@@ -0,0 +1,30 @@
+From 539eb7639eeda8ea43149032f6aa724e5d46017c Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Mon, 5 Feb 2024 16:23:20 +0800
+Subject: PATCH 135/188 LoongArch: Fix wrong return value type of
+ __iocsrrd_h.
+
+gcc/ChangeLog:
+
+	* config/loongarch/larchintrin.h (__iocsrrd_h): Modify the
+	function return value type to unsigned short.
+---
+ gcc/config/loongarch/larchintrin.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h
+index 6582dfe49..046e042fd 100644
+--- a/gcc/config/loongarch/larchintrin.h
++++ b/gcc/config/loongarch/larchintrin.h
+@@ -268,7 +268,7 @@ __iocsrrd_b (unsigned int _1)
+ 
+ /* Assembly instruction format:	rd, rj.  */
+ /* Data types in instruction templates:  UHI, USI.  */
+-extern __inline unsigned char
++extern __inline unsigned short
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+ __iocsrrd_h (unsigned int _1)
+ {
+-- 
+2.43.0
+

_service:tar_scm:0136-Backport-SME-mode-switching-Simplify-recording-of-tr.patch Added

@@ -0,0 +1,103 @@
+From ac51d446ee605e942b0831d3ff617980d94bf502 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Sat, 11 Nov 2023 17:28:56 +0000
+Subject: PATCH 037/157 BackportSME mode-switching: Simplify recording of
+ transparency
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=335b55f4146c5ef9e3bf4bcb7e58e887c3150b02
+
+For a given block, an entity is either transparent for
+all modes or for none.  Each update to the transparency set
+therefore used a loop like:
+
+		for (i = 0; i < no_mode; i++)
+		  clear_mode_bit (transpbb->index, j, i);
+
+This patch instead starts out with a bit-per-block bitmap
+and updates the main bitmap at the end.
+
+This isn't much of a simplification on its own.  The main
+purpose is to simplify later patches.
+
+gcc/
+	* mode-switching.cc (optimize_mode_switching): Initially
+	compute transparency in a bit-per-block bitmap.
+---
+ gcc/mode-switching.cc | 19 +++++++++++--------
+ 1 file changed, 11 insertions(+), 8 deletions(-)
+
+diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
+index 584cd4f67..4d2b9e284 100644
+--- a/gcc/mode-switching.cc
++++ b/gcc/mode-switching.cc
+@@ -555,6 +555,8 @@ optimize_mode_switching (void)
+   bitmap_vector_clear (antic, last_basic_block_for_fn (cfun));
+   bitmap_vector_clear (comp, last_basic_block_for_fn (cfun));
+ 
++  auto_sbitmap transp_all (last_basic_block_for_fn (cfun));
++
+   for (j = n_entities - 1; j >= 0; j--)
+     {
+       int e = entity_mapj;
+@@ -562,6 +564,8 @@ optimize_mode_switching (void)
+       struct bb_info *info = bb_infoj;
+       rtx_insn *insn;
+ 
++      bitmap_ones (transp_all);
++
+       /* Determine what the first use (if any) need for a mode of entity E is.
+ 	 This will be the mode that is anticipatable for this block.
+ 	 Also compute the initial transparency settings.  */
+@@ -594,8 +598,7 @@ optimize_mode_switching (void)
+ 		  ins_pos = NEXT_INSN (ins_pos);
+ 		ptr = new_seginfo (no_mode, no_mode, ins_pos, live_now);
+ 		add_seginfo (&tail_ptr, ptr);
+-		for (i = 0; i < no_mode; i++)
+-		  clear_mode_bit (transpbb->index, j, i);
++		bitmap_clear_bit (transp_all, bb->index);
+ 	      }
+ 	  }
+ 
+@@ -610,8 +613,7 @@ optimize_mode_switching (void)
+ 		    {
+ 		      ptr = new_seginfo (last_mode, mode, insn, live_now);
+ 		      add_seginfo (&tail_ptr, ptr);
+-		      for (i = 0; i < no_mode; i++)
+-			clear_mode_bit (transpbb->index, j, i);
++		      bitmap_clear_bit (transp_all, bb->index);
+ 		      any_set_required = true;
+ 		      last_mode = mode;
+ 		    }
+@@ -642,8 +644,7 @@ optimize_mode_switching (void)
+ 	      ptr = new_seginfo (last_mode, no_mode, BB_END (bb), live_now);
+ 	      add_seginfo (&tail_ptr, ptr);
+ 	      if (last_mode != no_mode)
+-		for (i = 0; i < no_mode; i++)
+-		  clear_mode_bit (transpbb->index, j, i);
++		bitmap_clear_bit (transp_all, bb->index);
+ 	    }
+ 	}
+       if (targetm.mode_switching.entry && targetm.mode_switching.exit)
+@@ -666,8 +667,7 @@ optimize_mode_switching (void)
+ 		 an extra check in make_preds_opaque.  We also
+ 		 need this to avoid confusing pre_edge_lcm when
+ 		 antic is cleared but transp and comp are set.  */
+-	      for (i = 0; i < no_mode; i++)
+-		clear_mode_bit (transpbb->index, j, i);
++	      bitmap_clear_bit (transp_all, bb->index);
+ 
+ 	      /* Insert a fake computing definition of MODE into entry
+ 		 blocks which compute no mode. This represents the mode on
+@@ -687,6 +687,9 @@ optimize_mode_switching (void)
+ 
+ 	  FOR_EACH_BB_FN (bb, cfun)
+ 	    {
++	      if (!bitmap_bit_p (transp_all, bb->index))
++		clear_mode_bit (transpbb->index, j, m);
++
+ 	      if (infobb->index.seginfo->mode == m)
+ 		set_mode_bit (anticbb->index, j, m);
+ 
+-- 
+2.33.0
+

_service:tar_scm:0136-LoongArch-Remove-redundant-symbol-type-conversions-i.patch Added

@@ -0,0 +1,337 @@
+From 868f56db1101bf679f1b2510b9934a978f503a1e Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Mon, 5 Feb 2024 16:53:01 +0800
+Subject: PATCH 136/188 LoongArch: Remove redundant symbol type conversions
+ in larchintrin.h.
+
+gcc/ChangeLog:
+
+	* config/loongarch/larchintrin.h (__movgr2fcsr): Remove redundant
+	symbol type conversions.
+	(__cacop_d): Likewise.
+	(__cpucfg): Likewise.
+	(__asrtle_d): Likewise.
+	(__asrtgt_d): Likewise.
+	(__lddir_d): Likewise.
+	(__ldpte_d): Likewise.
+	(__crc_w_b_w): Likewise.
+	(__crc_w_h_w): Likewise.
+	(__crc_w_w_w): Likewise.
+	(__crc_w_d_w): Likewise.
+	(__crcc_w_b_w): Likewise.
+	(__crcc_w_h_w): Likewise.
+	(__crcc_w_w_w): Likewise.
+	(__crcc_w_d_w): Likewise.
+	(__csrrd_w): Likewise.
+	(__csrwr_w): Likewise.
+	(__csrxchg_w): Likewise.
+	(__csrrd_d): Likewise.
+	(__csrwr_d): Likewise.
+	(__csrxchg_d): Likewise.
+	(__iocsrrd_b): Likewise.
+	(__iocsrrd_h): Likewise.
+	(__iocsrrd_w): Likewise.
+	(__iocsrrd_d): Likewise.
+	(__iocsrwr_b): Likewise.
+	(__iocsrwr_h): Likewise.
+	(__iocsrwr_w): Likewise.
+	(__iocsrwr_d): Likewise.
+	(__frecipe_s): Likewise.
+	(__frecipe_d): Likewise.
+	(__frsqrte_s): Likewise.
+	(__frsqrte_d): Likewise.
+---
+ gcc/config/loongarch/larchintrin.h | 69 ++++++++++++++----------------
+ 1 file changed, 33 insertions(+), 36 deletions(-)
+
+diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h
+index 046e042fd..2e94e5612 100644
+--- a/gcc/config/loongarch/larchintrin.h
++++ b/gcc/config/loongarch/larchintrin.h
+@@ -87,13 +87,13 @@ __rdtimel_w (void)
+ /* Assembly instruction format:	fcsr, rj.  */
+ /* Data types in instruction templates:  VOID, UQI, USI.  */
+ #define __movgr2fcsr(/*ui5*/ _1, _2) \
+-  __builtin_loongarch_movgr2fcsr ((_1), (unsigned int) _2);
++  __builtin_loongarch_movgr2fcsr ((_1), _2);
+ 
+ #if defined __loongarch64
+ /* Assembly instruction format:	ui5, rj, si12.  */
+ /* Data types in instruction templates:  VOID, USI, UDI, SI.  */
+ #define __cacop_d(/*ui5*/ _1, /*unsigned long int*/ _2, /*si12*/ _3) \
+-  ((void) __builtin_loongarch_cacop_d ((_1), (unsigned long int) (_2), (_3)))
++  __builtin_loongarch_cacop_d ((_1), (_2), (_3))
+ #else
+ #error "Unsupported ABI."
+ #endif
+@@ -104,7 +104,7 @@ extern __inline unsigned int
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+ __cpucfg (unsigned int _1)
+ {
+-  return (unsigned int) __builtin_loongarch_cpucfg ((unsigned int) _1);
++  return __builtin_loongarch_cpucfg (_1);
+ }
+ 
+ #ifdef __loongarch64
+@@ -114,7 +114,7 @@ extern __inline void
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+ __asrtle_d (long int _1, long int _2)
+ {
+-  __builtin_loongarch_asrtle_d ((long int) _1, (long int) _2);
++  __builtin_loongarch_asrtle_d (_1, _2);
+ }
+ 
+ /* Assembly instruction format:	rj, rk.  */
+@@ -123,7 +123,7 @@ extern __inline void
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+ __asrtgt_d (long int _1, long int _2)
+ {
+-  __builtin_loongarch_asrtgt_d ((long int) _1, (long int) _2);
++  __builtin_loongarch_asrtgt_d (_1, _2);
+ }
+ #endif
+ 
+@@ -131,7 +131,7 @@ __asrtgt_d (long int _1, long int _2)
+ /* Assembly instruction format:	rd, rj, ui5.  */
+ /* Data types in instruction templates:  DI, DI, UQI.  */
+ #define __lddir_d(/*long int*/ _1, /*ui5*/ _2) \
+-  ((long int) __builtin_loongarch_lddir_d ((long int) (_1), (_2)))
++  __builtin_loongarch_lddir_d ((_1), (_2))
+ #else
+ #error "Unsupported ABI."
+ #endif
+@@ -140,7 +140,7 @@ __asrtgt_d (long int _1, long int _2)
+ /* Assembly instruction format:	rj, ui5.  */
+ /* Data types in instruction templates:  VOID, DI, UQI.  */
+ #define __ldpte_d(/*long int*/ _1, /*ui5*/ _2) \
+-  ((void) __builtin_loongarch_ldpte_d ((long int) (_1), (_2)))
++  __builtin_loongarch_ldpte_d ((_1), (_2))
+ #else
+ #error "Unsupported ABI."
+ #endif
+@@ -151,7 +151,7 @@ extern __inline int
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+ __crc_w_b_w (char _1, int _2)
+ {
+-  return (int) __builtin_loongarch_crc_w_b_w ((char) _1, (int) _2);
++  return __builtin_loongarch_crc_w_b_w (_1, _2);
+ }
+ 
+ /* Assembly instruction format:	rd, rj, rk.  */
+@@ -160,7 +160,7 @@ extern __inline int
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+ __crc_w_h_w (short _1, int _2)
+ {
+-  return (int) __builtin_loongarch_crc_w_h_w ((short) _1, (int) _2);
++  return __builtin_loongarch_crc_w_h_w (_1, _2);
+ }
+ 
+ /* Assembly instruction format:	rd, rj, rk.  */
+@@ -169,7 +169,7 @@ extern __inline int
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+ __crc_w_w_w (int _1, int _2)
+ {
+-  return (int) __builtin_loongarch_crc_w_w_w ((int) _1, (int) _2);
++  return __builtin_loongarch_crc_w_w_w (_1, _2);
+ }
+ 
+ #ifdef __loongarch64
+@@ -179,7 +179,7 @@ extern __inline int
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+ __crc_w_d_w (long int _1, int _2)
+ {
+-  return (int) __builtin_loongarch_crc_w_d_w ((long int) _1, (int) _2);
++  return __builtin_loongarch_crc_w_d_w (_1, _2);
+ }
+ #endif
+ 
+@@ -189,7 +189,7 @@ extern __inline int
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+ __crcc_w_b_w (char _1, int _2)
+ {
+-  return (int) __builtin_loongarch_crcc_w_b_w ((char) _1, (int) _2);
++  return __builtin_loongarch_crcc_w_b_w (_1, _2);
+ }
+ 
+ /* Assembly instruction format:	rd, rj, rk.  */
+@@ -198,7 +198,7 @@ extern __inline int
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+ __crcc_w_h_w (short _1, int _2)
+ {
+-  return (int) __builtin_loongarch_crcc_w_h_w ((short) _1, (int) _2);
++  return __builtin_loongarch_crcc_w_h_w (_1, _2);
+ }
+ 
+ /* Assembly instruction format:	rd, rj, rk.  */
+@@ -207,7 +207,7 @@ extern __inline int
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+ __crcc_w_w_w (int _1, int _2)
+ {
+-  return (int) __builtin_loongarch_crcc_w_w_w ((int) _1, (int) _2);
++  return __builtin_loongarch_crcc_w_w_w (_1, _2);
+ }
+ 
+ #ifdef __loongarch64
+@@ -217,44 +217,41 @@ extern __inline int
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+ __crcc_w_d_w (long int _1, int _2)
+ {
+-  return (int) __builtin_loongarch_crcc_w_d_w ((long int) _1, (int) _2);
++  return __builtin_loongarch_crcc_w_d_w (_1, _2);
+ }
+ #endif
+ 
+ /* Assembly instruction format:	rd, ui14.  */
+ /* Data types in instruction templates:  USI, USI.  */
+ #define __csrrd_w(/*ui14*/ _1) \
+-  ((unsigned int) __builtin_loongarch_csrrd_w ((_1)))
++  __builtin_loongarch_csrrd_w ((_1))
+ 
+ /* Assembly instruction format:	rd, ui14.  */
+ /* Data types in instruction templates:  USI, USI, USI.  */
+ #define __csrwr_w(/*unsigned int*/ _1, /*ui14*/ _2) \
+-  ((unsigned int) __builtin_loongarch_csrwr_w ((unsigned int) (_1), (_2)))
++  __builtin_loongarch_csrwr_w ((_1), (_2))
+ 
+ /* Assembly instruction format:	rd, rj, ui14.  */
+ /* Data types in instruction templates:  USI, USI, USI, USI.  */
+ #define __csrxchg_w(/*unsigned int*/ _1, /*unsigned int*/ _2, /*ui14*/ _3) \
+-  ((unsigned int) __builtin_loongarch_csrxchg_w ((unsigned int) (_1), \

_service:tar_scm:0137-Backport-SME-mode-switching-Tweak-entry-exit-handlin.patch Added

@@ -0,0 +1,92 @@
+From c0aaf329d9c547b249ac120a8d1995d8546a1edb Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Sat, 11 Nov 2023 17:28:57 +0000
+Subject: PATCH 038/157 BackportSME mode-switching: Tweak entry/exit
+ handling
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e59ec35276599805cdc6c3979d8a167b027d286e
+
+An entity isn't transparent in a block that requires a specific mode.
+optimize_mode_switching took that into account for normal insns,
+but didn't for the exit block.  Later patches misbehaved because
+of this.
+
+In contrast, an entity was correctly marked as non-transparent
+in the entry block, but the reasoning seemed a bit convoluted.
+It also referred to a function that no longer exists.
+Since KILL = ~TRANSP, the entity is by definition not transparent
+in a block that defines the entity, so I think we can make it so
+without comment.
+
+Finally, the exit handling was nested in the entry handling,
+but that doesn't seem necessary.  A target could say that an
+entity is undefined on entry but must be defined on return,
+on a "be liberal in what you accept, be conservative in what
+you do" principle.
+
+gcc/
+	* mode-switching.cc (optimize_mode_switching): Mark the exit
+	block as nontransparent if it requires a specific mode.
+	Handle the entry and exit mode as sibling rather than nested
+	concepts.  Remove outdated comment.
+---
+ gcc/mode-switching.cc | 34 +++++++++++++++-------------------
+ 1 file changed, 15 insertions(+), 19 deletions(-)
+
+diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
+index 4d2b9e284..4761c2ff0 100644
+--- a/gcc/mode-switching.cc
++++ b/gcc/mode-switching.cc
+@@ -649,34 +649,30 @@ optimize_mode_switching (void)
+ 	}
+       if (targetm.mode_switching.entry && targetm.mode_switching.exit)
+ 	{
+-	  int mode = targetm.mode_switching.entry (e);
+-
+ 	  infopost_entry->index.mode_out =
+ 	    infopost_entry->index.mode_in = no_mode;
+-	  if (pre_exit)
+-	    {
+-	      infopre_exit->index.mode_out =
+-		infopre_exit->index.mode_in = no_mode;
+-	    }
+ 
++	  int mode = targetm.mode_switching.entry (e);
+ 	  if (mode != no_mode)
+ 	    {
+-	      bb = post_entry;
+-
+-	      /* By always making this nontransparent, we save
+-		 an extra check in make_preds_opaque.  We also
+-		 need this to avoid confusing pre_edge_lcm when
+-		 antic is cleared but transp and comp are set.  */
+-	      bitmap_clear_bit (transp_all, bb->index);
+-
+ 	      /* Insert a fake computing definition of MODE into entry
+ 		 blocks which compute no mode. This represents the mode on
+ 		 entry.  */
+-	      infobb->index.computing = mode;
++	      infopost_entry->index.computing = mode;
++	      bitmap_clear_bit (transp_all, post_entry->index);
++	    }
+ 
+-	      if (pre_exit)
+-		infopre_exit->index.seginfo->mode =
+-		  targetm.mode_switching.exit (e);
++	  if (pre_exit)
++	    {
++	      infopre_exit->index.mode_out =
++		infopre_exit->index.mode_in = no_mode;
++
++	      int mode = targetm.mode_switching.exit (e);
++	      if (mode != no_mode)
++		{
++		  infopre_exit->index.seginfo->mode = mode;
++		  bitmap_clear_bit (transp_all, pre_exit->index);
++		}
+ 	    }
+ 	}
+ 
+-- 
+2.33.0
+

_service:tar_scm:0137-LoongArch-When-checking-whether-the-assembler-suppor.patch Added

@@ -0,0 +1,54 @@
+From 3580ce2b8c57967117e55af48beba0aaa6257e8b Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Wed, 21 Feb 2024 11:17:14 +0800
+Subject: PATCH 137/188 LoongArch: When checking whether the assembler
+ supports conditional branch relaxation, add compilation parameter
+ "--fatal-warnings" to the assembler.
+
+In binutils 2.40 and earlier versions, only a warning will be reported
+when a relocation immediate value is out of bounds. As a result,
+the value of the macro HAVE_AS_COND_BRANCH_RELAXATION will also be
+defined as 1 when the assembler does not support conditional branch
+relaxation. Therefore, add the compilation option "--fatal-warnings"
+to avoid this problem.
+
+gcc/ChangeLog:
+
+	* configure: Regenerate.
+	* configure.ac: Add parameter "--fatal-warnings" to assemble
+	when checking whether the assemble support conditional branch
+	relaxation.
+---
+ gcc/configure    | 2 +-
+ gcc/configure.ac | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/configure b/gcc/configure
+index eecfe60d6..f31395017 100755
+--- a/gcc/configure
++++ b/gcc/configure
+@@ -28947,7 +28947,7 @@ else
+        nop
+        .endr
+        beq $a0,$a1,a' > conftest.s
+-    if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
++    if { ac_try='$gcc_cv_as $gcc_cv_as_flags --fatal-warnings -o conftest.o conftest.s >&5'
+   { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+   (eval $ac_try) 2>&5
+   ac_status=$?
+diff --git a/gcc/configure.ac b/gcc/configure.ac
+index d1032440d..35f2c657f 100644
+--- a/gcc/configure.ac
++++ b/gcc/configure.ac
+@@ -5349,7 +5349,7 @@ x:
+ 		Define if your assembler supports -mrelax option.))
+     gcc_GAS_CHECK_FEATURE(conditional branch relaxation support,
+       gcc_cv_as_loongarch_cond_branch_relax,
+-      ,
++      --fatal-warnings,
+       a:
+        .rept 32769
+        nop
+-- 
+2.43.0
+

_service:tar_scm:0138-Backport-SME-mode-switching-Allow-targets-to-set-the.patch Added

@@ -0,0 +1,93 @@
+From 9505464aec8f95125293c64e2eea9577e9be4700 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Sat, 11 Nov 2023 17:28:57 +0000
+Subject: PATCH 039/157 BackportSME mode-switching: Allow targets to set
+ the mode for EH handlers
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4b803fbf839439b1deca660e32d5ced211111dfa
+
+The mode-switching pass already had hooks to say what mode
+an entity is in on entry to a function and what mode it must
+be in on return.  For SME, we also want to say what mode an
+entity is guaranteed to be in on entry to an exception handler.
+
+gcc/
+	* target.def (mode_switching.eh_handler): New hook.
+	* doc/tm.texi.in (TARGET_MODE_EH_HANDLER): New @hook.
+	* doc/tm.texi: Regenerate.
+	* mode-switching.cc (optimize_mode_switching): Use eh_handler
+	to get the mode on entry to an exception handler.
+---
+ gcc/doc/tm.texi       | 6 ++++++
+ gcc/doc/tm.texi.in    | 2 ++
+ gcc/mode-switching.cc | 5 ++++-
+ gcc/target.def        | 7 +++++++
+ 4 files changed, 19 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
+index 553aa4cf2..4788b3f7a 100644
+--- a/gcc/doc/tm.texi
++++ b/gcc/doc/tm.texi
+@@ -10321,6 +10321,12 @@ If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY}
+ must be defined.
+ @end deftypefn
+ 
++@deftypefn {Target Hook} int TARGET_MODE_EH_HANDLER (int @var{entity})
++If this hook is defined, it should return the mode that @var{entity} is
++guaranteed to be in on entry to an exception handler, or the number of modes
++if there is no such guarantee.
++@end deftypefn
++
+ @deftypefn {Target Hook} int TARGET_MODE_PRIORITY (int @var{entity}, int @var{n})
+ This hook specifies the order in which modes for @var{entity}
+ are processed. 0 is the highest priority,
+diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
+index 9ec11b15c..ad343504f 100644
+--- a/gcc/doc/tm.texi.in
++++ b/gcc/doc/tm.texi.in
+@@ -6926,6 +6926,8 @@ mode or ``no mode'', depending on context.
+ 
+ @hook TARGET_MODE_EXIT
+ 
++@hook TARGET_MODE_EH_HANDLER
++
+ @hook TARGET_MODE_PRIORITY
+ 
+ @node Target Attributes
+diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
+index 4761c2ff0..9a6ba6cca 100644
+--- a/gcc/mode-switching.cc
++++ b/gcc/mode-switching.cc
+@@ -596,7 +596,10 @@ optimize_mode_switching (void)
+ 		gcc_assert (NOTE_INSN_BASIC_BLOCK_P (ins_pos));
+ 		if (ins_pos != BB_END (bb))
+ 		  ins_pos = NEXT_INSN (ins_pos);
+-		ptr = new_seginfo (no_mode, no_mode, ins_pos, live_now);
++		if (bb_has_eh_pred (bb)
++		    && targetm.mode_switching.eh_handler)
++		  last_mode = targetm.mode_switching.eh_handler (e);
++		ptr = new_seginfo (no_mode, last_mode, ins_pos, live_now);
+ 		add_seginfo (&tail_ptr, ptr);
+ 		bitmap_clear_bit (transp_all, bb->index);
+ 	      }
+diff --git a/gcc/target.def b/gcc/target.def
+index b87b0f927..bbb482de6 100644
+--- a/gcc/target.def
++++ b/gcc/target.def
+@@ -7042,6 +7042,13 @@ If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY}\n\
+ must be defined.",
+  int, (int entity), NULL)
+ 
++DEFHOOK
++(eh_handler,
++ "If this hook is defined, it should return the mode that @var{entity} is\n\
++guaranteed to be in on entry to an exception handler, or the number of modes\n\
++if there is no such guarantee.",
++ int, (int entity), NULL)
++
+ DEFHOOK
+ (priority,
+  "This hook specifies the order in which modes for @var{entity}\n\
+-- 
+2.33.0
+

_service:tar_scm:0138-LoongArch-Don-t-falsely-claim-gold-supported-in-topl.patch Added

@@ -0,0 +1,49 @@
+From e6968eb62b2a0adc7ef591594240582630adfc61 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Wed, 21 Feb 2024 23:54:53 +0800
+Subject: PATCH 138/188 LoongArch: Don't falsely claim gold supported in
+ toplevel configure
+
+The gold linker has never been ported to LoongArch (and it seems
+unlikely to be ported in the future as the new architectures are
+focusing on lld and/or mold for fast linkers).
+
+ChangeLog:
+
+	* configure.ac (ENABLE_GOLD): Remove loongarch*-*-* from target
+	list.
+	* configure: Regenerate.
+---
+ configure    | 2 +-
+ configure.ac | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/configure b/configure
+index 81b4a3cec..ebdca8c62 100755
+--- a/configure
++++ b/configure
+@@ -3058,7 +3058,7 @@ case "${ENABLE_GOLD}" in
+       # Check for target supported by gold.
+       case "${target}" in
+         i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \
+-        | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-* | loongarch*-*-*)
++        | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-*)
+ 	  configdirs="$configdirs gold"
+ 	  if test x${ENABLE_GOLD} = xdefault; then
+ 	    default_ld=gold
+diff --git a/configure.ac b/configure.ac
+index 9f8dbd319..4f45fd2ba 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -353,7 +353,7 @@ case "${ENABLE_GOLD}" in
+       # Check for target supported by gold.
+       case "${target}" in
+         i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \
+-        | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-* | loongarch*-*-*)
++        | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-*)
+ 	  configdirs="$configdirs gold"
+ 	  if test x${ENABLE_GOLD} = xdefault; then
+ 	    default_ld=gold
+-- 
+2.43.0
+

_service:tar_scm:0139-Backport-SME-mode-switching-Pass-set-of-live-registe.patch Added

@@ -0,0 +1,211 @@
+From a6964e11c7f624cdaed2c9608565a5968292b70f Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Sat, 11 Nov 2023 17:28:58 +0000
+Subject: PATCH 040/157 BackportSME mode-switching: Pass set of live
+ registers to the needed hook
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=29d3e1892ebec8abce784077d1f1a3e21d763218
+
+The emit hook already takes the set of live hard registers as input.
+This patch passes it to the needed hook too.  SME uses this to
+optimise the mode choice based on whether state is live or dead.
+
+The main caller already had access to the required info, but the
+special handling of return values did not.
+
+gcc/
+	* target.def (mode_switching.needed): Add a regs_live parameter.
+	* doc/tm.texi: Regenerate.
+	* config/epiphany/epiphany-protos.h (epiphany_mode_needed): Update
+	accordingly.
+	* config/epiphany/epiphany.cc (epiphany_mode_needed): Likewise.
+	* config/epiphany/mode-switch-use.cc (insert_uses): Likewise.
+	* config/i386/i386.cc (ix86_mode_needed): Likewise.
+	* config/riscv/riscv.cc (riscv_mode_needed): Likewise.
+	* config/sh/sh.cc (sh_mode_needed): Likewise.
+	* mode-switching.cc (optimize_mode_switching): Likewise.
+	(create_pre_exit): Likewise, using the DF simulate functions
+	to calculate the required information.
+---
+ gcc/config/epiphany/epiphany-protos.h  |  4 +++-
+ gcc/config/epiphany/epiphany.cc        |  2 +-
+ gcc/config/epiphany/mode-switch-use.cc |  2 +-
+ gcc/config/i386/i386.cc                |  2 +-
+ gcc/config/sh/sh.cc                    |  4 ++--
+ gcc/doc/tm.texi                        |  5 +++--
+ gcc/mode-switching.cc                  | 14 ++++++++++++--
+ gcc/target.def                         |  5 +++--
+ 8 files changed, 26 insertions(+), 12 deletions(-)
+
+diff --git a/gcc/config/epiphany/epiphany-protos.h b/gcc/config/epiphany/epiphany-protos.h
+index 61b63234e..d463e5483 100644
+--- a/gcc/config/epiphany/epiphany-protos.h
++++ b/gcc/config/epiphany/epiphany-protos.h
+@@ -44,7 +44,9 @@ extern void emit_set_fp_mode (int entity, int mode, int prev_mode,
+ #endif
+ extern void epiphany_insert_mode_switch_use (rtx_insn *insn, int, int);
+ extern void epiphany_expand_set_fp_mode (rtx *operands);
+-extern int epiphany_mode_needed (int entity, rtx_insn *insn);
++#ifdef HARD_CONST
++extern int epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET);
++#endif
+ extern int epiphany_mode_after (int entity, int last_mode, rtx_insn *insn);
+ extern bool epiphany_epilogue_uses (int regno);
+ extern bool epiphany_optimize_mode_switching (int entity);
+diff --git a/gcc/config/epiphany/epiphany.cc b/gcc/config/epiphany/epiphany.cc
+index f8c049340..be0fbc68c 100644
+--- a/gcc/config/epiphany/epiphany.cc
++++ b/gcc/config/epiphany/epiphany.cc
+@@ -2400,7 +2400,7 @@ epiphany_mode_priority (int entity, int priority)
+ }
+ 
+ int
+-epiphany_mode_needed (int entity, rtx_insn *insn)
++epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
+ {
+   enum attr_fp_mode mode;
+ 
+diff --git a/gcc/config/epiphany/mode-switch-use.cc b/gcc/config/epiphany/mode-switch-use.cc
+index 887550a33..cacb1ce5a 100644
+--- a/gcc/config/epiphany/mode-switch-use.cc
++++ b/gcc/config/epiphany/mode-switch-use.cc
+@@ -58,7 +58,7 @@ insert_uses (void)
+ 	{
+ 	  if (!INSN_P (insn))
+ 	    continue;
+-	  mode = epiphany_mode_needed (e, insn);
++	  mode = epiphany_mode_needed (e, insn, {});
+ 	  if (mode == no_mode)
+ 	    continue;
+ 	  if (target_insert_mode_switch_use)
+diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
+index 60f3296b0..4d591d217 100644
+--- a/gcc/config/i386/i386.cc
++++ b/gcc/config/i386/i386.cc
+@@ -14522,7 +14522,7 @@ ix86_i387_mode_needed (int entity, rtx_insn *insn)
+    prior to the execution of insn.  */
+ 
+ static int
+-ix86_mode_needed (int entity, rtx_insn *insn)
++ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
+ {
+   switch (entity)
+     {
+diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
+index 03e1c04ec..85e83e12e 100644
+--- a/gcc/config/sh/sh.cc
++++ b/gcc/config/sh/sh.cc
+@@ -195,7 +195,7 @@ static int calc_live_regs (HARD_REG_SET *);
+ static HOST_WIDE_INT rounded_frame_size (int);
+ static bool sh_frame_pointer_required (void);
+ static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
+-static int sh_mode_needed (int, rtx_insn *);
++static int sh_mode_needed (int, rtx_insn *, HARD_REG_SET);
+ static int sh_mode_after (int, int, rtx_insn *);
+ static int sh_mode_entry (int);
+ static int sh_mode_exit (int);
+@@ -12529,7 +12529,7 @@ sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
+ }
+ 
+ static int
+-sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
++sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn, HARD_REG_SET)
+ {
+   return recog_memoized (insn) >= 0  ? get_attr_fp_mode (insn) : FP_MODE_NONE;
+ }
+diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
+index 4788b3f7a..d8ac6c4d6 100644
+--- a/gcc/doc/tm.texi
++++ b/gcc/doc/tm.texi
+@@ -10280,12 +10280,13 @@ known.  Sets of a lower numbered entity will be emitted before
+ sets of a higher numbered entity to a mode of the same or lower priority.
+ @end deftypefn
+ 
+-@deftypefn {Target Hook} int TARGET_MODE_NEEDED (int @var{entity}, rtx_insn *@var{insn})
++@deftypefn {Target Hook} int TARGET_MODE_NEEDED (int @var{entity}, rtx_insn *@var{insn}, HARD_REG_SET @var{regs_live})
+ @var{entity} is an integer specifying a mode-switched entity.
+ If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook
+ to return the mode that @var{entity} must be switched into prior to the
+ execution of @var{insn}, or the number of modes if @var{insn} has no
+-such requirement.
++such requirement.  @var{regs_live} contains the set of hard registers
++that are live before @var{insn}.
+ @end deftypefn
+ 
+ @deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn})
+diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
+index 9a6ba6cca..6bbda5058 100644
+--- a/gcc/mode-switching.cc
++++ b/gcc/mode-switching.cc
+@@ -254,6 +254,9 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes)
+ 	    && GET_CODE (PATTERN (last_insn)) == USE
+ 	    && GET_CODE ((ret_reg = XEXP (PATTERN (last_insn), 0))) == REG)
+ 	  {
++	    auto_bitmap live;
++	    df_simulate_initialize_backwards (src_bb, live);
++
+ 	    int ret_start = REGNO (ret_reg);
+ 	    int nregs = REG_NREGS (ret_reg);
+ 	    int ret_end = ret_start + nregs;
+@@ -262,6 +265,8 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes)
+ 	    bool forced_late_switch = false;
+ 	    rtx_insn *before_return_copy;
+ 
++	    df_simulate_one_insn_backwards (src_bb, last_insn, live);
++
+ 	    do
+ 	      {
+ 		rtx_insn *return_copy = PREV_INSN (last_insn);
+@@ -269,6 +274,8 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes)
+ 		int copy_start, copy_num;
+ 		int j;
+ 
++		df_simulate_one_insn_backwards (src_bb, return_copy, live);
++
+ 		if (NONDEBUG_INSN_P (return_copy))
+ 		  {
+ 		    /* When using SJLJ exceptions, the call to the
+@@ -368,11 +375,14 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes)
+ 		       the case for floating point on SH4 - then it might
+ 		       be set by an arithmetic operation that needs a
+ 		       different mode than the exit block.  */
++		    HARD_REG_SET hard_regs_live;
++		    REG_SET_TO_HARD_REG_SET (hard_regs_live, live);
+ 		    for (j = n_entities - 1; j >= 0; j--)
+ 		      {
+ 			int e = entity_mapj;
+ 			int mode =
+-			  targetm.mode_switching.needed (e, return_copy);
++			  targetm.mode_switching.needed (e, return_copy,
++							 hard_regs_live);
+ 
+ 			if (mode != num_modese
+ 			    && mode != targetm.mode_switching.exit (e))
+@@ -609,7 +619,7 @@ optimize_mode_switching (void)
+ 	    {
+ 	      if (INSN_P (insn))
+ 		{
+-		  int mode = targetm.mode_switching.needed (e, insn);
++		  int mode = targetm.mode_switching.needed (e, insn, live_now);
+ 		  rtx link;
+ 
+ 		  if (mode != no_mode && mode != last_mode)
+diff --git a/gcc/target.def b/gcc/target.def
+index bbb482de6..06a52bdaf 100644
+--- a/gcc/target.def
++++ b/gcc/target.def
+@@ -7003,8 +7003,9 @@ DEFHOOK
+ If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook\n\
+ to return the mode that @var{entity} must be switched into prior to the\n\

_service:tar_scm:0139-LoongArch-NFC-Deduplicate-crc-instruction-defines.patch Added

@@ -0,0 +1,56 @@
+From 7a4761a31454f999331e8aa5f831e26e249c4295 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 25 Feb 2024 20:40:41 +0800
+Subject: PATCH 139/188 LoongArch: NFC: Deduplicate crc instruction defines
+
+Introduce an iterator for UNSPEC_CRC and UNSPEC_CRCC to make the next
+change easier.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (CRC): New define_int_iterator.
+	(crc): New define_int_attr.
+	(loongarch_crc_w_<size>_w, loongarch_crcc_w_<size>_w): Unify
+	into ...
+	(loongarch_<crc>_w_<size>_w): ... here.
+---
+ gcc/config/loongarch/loongarch.md | 18 +++++-------------
+ 1 file changed, 5 insertions(+), 13 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 9356194fe..b5ad9eada 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -4251,24 +4251,16 @@
+ 
+ 
+ (define_mode_iterator QHSD QI HI SI DI)
++(define_int_iterator CRC UNSPEC_CRC UNSPEC_CRCC)
++(define_int_attr crc (UNSPEC_CRC "crc") (UNSPEC_CRCC "crcc"))
+ 
+-(define_insn "loongarch_crc_w_<size>_w"
++(define_insn "loongarch_<crc>_w_<size>_w"
+   (set (match_operand:SI 0 "register_operand" "=r")
+ 	(unspec:SI (match_operand:QHSD 1 "register_operand" "r")
+ 		   (match_operand:SI 2 "register_operand" "r")
+-		     UNSPEC_CRC))
++		     CRC))
+   ""
+-  "crc.w.<size>.w\t%0,%1,%2"
+-  (set_attr "type" "unknown")
+-   (set_attr "mode" "<MODE>"))
+-
+-(define_insn "loongarch_crcc_w_<size>_w"
+-  (set (match_operand:SI 0 "register_operand" "=r")
+-	(unspec:SI (match_operand:QHSD 1 "register_operand" "r")
+-		   (match_operand:SI 2 "register_operand" "r")
+-		     UNSPEC_CRCC))
+-  ""
+-  "crcc.w.<size>.w\t%0,%1,%2"
++  "<crc>.w.<size>.w\t%0,%1,%2"
+   (set_attr "type" "unknown")
+    (set_attr "mode" "<MODE>"))
+ 
+-- 
+2.43.0
+

_service:tar_scm:0140-Backport-SME-mode-switching-Pass-the-set-of-live-reg.patch Added

@@ -0,0 +1,177 @@
+From 4457604c11c0a32f3736d73429d1e5fb7baae3a5 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Sat, 11 Nov 2023 17:28:58 +0000
+Subject: PATCH 041/157 BackportSME mode-switching: Pass the set of live
+ registers to the after hook
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=93d65f39bc5c3dc318deb6da0e3633f3a4c6c34d
+
+This patch passes the set of live hard registers to the after hook,
+like the previous one did for the needed hook.
+
+gcc/
+	* target.def (mode_switching.after): Add a regs_live parameter.
+	* doc/tm.texi: Regenerate.
+	* config/epiphany/epiphany-protos.h (epiphany_mode_after): Update
+	accordingly.
+	* config/epiphany/epiphany.cc (epiphany_mode_needed): Likewise.
+	(epiphany_mode_after): Likewise.
+	* config/i386/i386.cc (ix86_mode_after): Likewise.
+	* config/riscv/riscv.cc (riscv_mode_after): Likewise.
+	* config/sh/sh.cc (sh_mode_after): Likewise.
+	* mode-switching.cc (optimize_mode_switching): Likewise.
+---
+ gcc/config/epiphany/epiphany-protos.h | 3 ++-
+ gcc/config/epiphany/epiphany.cc       | 5 +++--
+ gcc/config/i386/i386.cc               | 2 +-
+ gcc/config/sh/sh.cc                   | 5 +++--
+ gcc/doc/tm.texi                       | 4 +++-
+ gcc/mode-switching.cc                 | 8 ++++----
+ gcc/target.def                        | 4 +++-
+ 7 files changed, 19 insertions(+), 12 deletions(-)
+
+diff --git a/gcc/config/epiphany/epiphany-protos.h b/gcc/config/epiphany/epiphany-protos.h
+index d463e5483..6326b7e80 100644
+--- a/gcc/config/epiphany/epiphany-protos.h
++++ b/gcc/config/epiphany/epiphany-protos.h
+@@ -46,8 +46,9 @@ extern void epiphany_insert_mode_switch_use (rtx_insn *insn, int, int);
+ extern void epiphany_expand_set_fp_mode (rtx *operands);
+ #ifdef HARD_CONST
+ extern int epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET);
++extern int epiphany_mode_after (int entity, int last_mode, rtx_insn *insn,
++				HARD_REG_SET);
+ #endif
+-extern int epiphany_mode_after (int entity, int last_mode, rtx_insn *insn);
+ extern bool epiphany_epilogue_uses (int regno);
+ extern bool epiphany_optimize_mode_switching (int entity);
+ extern bool epiphany_is_interrupt_p (tree);
+diff --git a/gcc/config/epiphany/epiphany.cc b/gcc/config/epiphany/epiphany.cc
+index be0fbc68c..62636b1ec 100644
+--- a/gcc/config/epiphany/epiphany.cc
++++ b/gcc/config/epiphany/epiphany.cc
+@@ -2437,7 +2437,7 @@ epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
+     return 2;
+   case EPIPHANY_MSW_ENTITY_ROUND_KNOWN:
+     if (recog_memoized (insn) == CODE_FOR_set_fp_mode)
+-      mode = (enum attr_fp_mode) epiphany_mode_after (entity, mode, insn);
++      mode = (enum attr_fp_mode) epiphany_mode_after (entity, mode, insn, {});
+     /* Fall through.  */
+   case EPIPHANY_MSW_ENTITY_NEAREST:
+   case EPIPHANY_MSW_ENTITY_TRUNC:
+@@ -2498,7 +2498,8 @@ epiphany_mode_entry_exit (int entity, bool exit)
+ }
+ 
+ int
+-epiphany_mode_after (int entity, int last_mode, rtx_insn *insn)
++epiphany_mode_after (int entity, int last_mode, rtx_insn *insn,
++		     HARD_REG_SET)
+ {
+   /* We have too few call-saved registers to hope to keep the masks across
+      calls.  */
+diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
+index 4d591d217..593185fa6 100644
+--- a/gcc/config/i386/i386.cc
++++ b/gcc/config/i386/i386.cc
+@@ -14583,7 +14583,7 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
+ /* Return the mode that an insn results in.  */
+ 
+ static int
+-ix86_mode_after (int entity, int mode, rtx_insn *insn)
++ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
+ {
+   switch (entity)
+     {
+diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
+index 85e83e12e..74d61c43b 100644
+--- a/gcc/config/sh/sh.cc
++++ b/gcc/config/sh/sh.cc
+@@ -196,7 +196,7 @@ static HOST_WIDE_INT rounded_frame_size (int);
+ static bool sh_frame_pointer_required (void);
+ static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
+ static int sh_mode_needed (int, rtx_insn *, HARD_REG_SET);
+-static int sh_mode_after (int, int, rtx_insn *);
++static int sh_mode_after (int, int, rtx_insn *, HARD_REG_SET);
+ static int sh_mode_entry (int);
+ static int sh_mode_exit (int);
+ static int sh_mode_priority (int entity, int n);
+@@ -12535,7 +12535,8 @@ sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn, HARD_REG_SET)
+ }
+ 
+ static int
+-sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
++sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn,
++	       HARD_REG_SET)
+ {
+   if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
+       get_attr_fp_set (insn) != FP_SET_NONE)
+diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
+index d8ac6c4d6..7fce485b2 100644
+--- a/gcc/doc/tm.texi
++++ b/gcc/doc/tm.texi
+@@ -10289,12 +10289,14 @@ such requirement.  @var{regs_live} contains the set of hard registers
+ that are live before @var{insn}.
+ @end deftypefn
+ 
+-@deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn})
++@deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn}, HARD_REG_SET @var{regs_live})
+ @var{entity} is an integer specifying a mode-switched entity.
+ If this hook is defined, it is evaluated for every @var{insn} during mode
+ switching.  It returns the mode that @var{entity} is in after @var{insn}
+ has been executed.  @var{mode} is the mode that @var{entity} was in
+ before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}.
++@var{regs_live} is the set of hard registers that are live after @var{insn}
++has been executed.
+ 
+ @var{mode} is equal to the number of modes defined for @var{entity}
+ if the mode before @var{insn} is unknown.  The hook should likewise return
+diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
+index 6bbda5058..4f0445894 100644
+--- a/gcc/mode-switching.cc
++++ b/gcc/mode-switching.cc
+@@ -631,10 +631,6 @@ optimize_mode_switching (void)
+ 		      last_mode = mode;
+ 		    }
+ 
+-		  if (targetm.mode_switching.after)
+-		    last_mode = targetm.mode_switching.after (e, last_mode,
+-							      insn);
+-
+ 		  /* Update LIVE_NOW.  */
+ 		  for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
+ 		    if (REG_NOTE_KIND (link) == REG_DEAD)
+@@ -644,6 +640,10 @@ optimize_mode_switching (void)
+ 		  for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
+ 		    if (REG_NOTE_KIND (link) == REG_UNUSED)
+ 		      reg_dies (XEXP (link, 0), &live_now);
++
++		  if (targetm.mode_switching.after)
++		    last_mode = targetm.mode_switching.after (e, last_mode,
++							      insn, live_now);
+ 		}
+ 	    }
+ 
+diff --git a/gcc/target.def b/gcc/target.def
+index 06a52bdaf..67c20bbb0 100644
+--- a/gcc/target.def
++++ b/gcc/target.def
+@@ -7014,6 +7014,8 @@ If this hook is defined, it is evaluated for every @var{insn} during mode\n\
+ switching.  It returns the mode that @var{entity} is in after @var{insn}\n\
+ has been executed.  @var{mode} is the mode that @var{entity} was in\n\
+ before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}.\n\
++@var{regs_live} is the set of hard registers that are live after @var{insn}\n\
++has been executed.\n\
+ \n\
+ @var{mode} is equal to the number of modes defined for @var{entity}\n\
+ if the mode before @var{insn} is unknown.  The hook should likewise return\n\
+@@ -7021,7 +7023,7 @@ the number of modes if it does not know what mode @var{entity} has after\n\
+ @var{insn}.\n\
+ \n\
+ Not defining the hook is equivalent to returning @var{mode}.",
+- int, (int entity, int mode, rtx_insn *insn), NULL)
++ int, (int entity, int mode, rtx_insn *insn, HARD_REG_SET regs_live), NULL)
+ 
+ DEFHOOK
+ (entry,
+-- 
+2.33.0
+

_service:tar_scm:0140-LoongArch-Remove-unneeded-sign-extension-after-crc-c.patch Added

@@ -0,0 +1,70 @@
+From 946f9153a5d813301b05fb56a75e2c7ce22a6c2a Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 25 Feb 2024 20:44:34 +0800
+Subject: PATCH 140/188 LoongArch: Remove unneeded sign extension after
+ crc/crcc instructions
+
+The specification of crc/crcc instructions is clear that the output is
+sign-extended to GRLEN.  Add a define_insn to tell the compiler this
+fact and allow it to remove the unneeded sign extension on crc/crcc
+output.  As crc/crcc instructions are usually used in a tight loop,
+this should produce a significant performance gain.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md
+	(loongarch_<crc>_w_<size>_w_extended): New define_insn.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/crc-sext.c: New test;
+---
+ gcc/config/loongarch/loongarch.md             | 11 +++++++++++
+ gcc/testsuite/gcc.target/loongarch/crc-sext.c | 13 +++++++++++++
+ 2 files changed, 24 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/crc-sext.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index b5ad9eada..248ad12bb 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -4264,6 +4264,17 @@
+   (set_attr "type" "unknown")
+    (set_attr "mode" "<MODE>"))
+ 
++(define_insn "loongarch_<crc>_w_<size>_w_extended"
++  (set (match_operand:DI 0 "register_operand" "=r")
++	(sign_extend:DI
++	  (unspec:SI (match_operand:QHSD 1 "register_operand" "r")
++		      (match_operand:SI 2 "register_operand" "r")
++		     CRC)))
++  "TARGET_64BIT"
++  "<crc>.w.<size>.w\t%0,%1,%2"
++  (set_attr "type" "unknown")
++   (set_attr "mode" "<MODE>"))
++
+ ;; With normal or medium code models, if the only use of a pc-relative
+ ;; address is for loading or storing a value, then relying on linker
+ ;; relaxation is not better than emitting the machine instruction directly.
+diff --git a/gcc/testsuite/gcc.target/loongarch/crc-sext.c b/gcc/testsuite/gcc.target/loongarch/crc-sext.c
+new file mode 100644
+index 000000000..9ade5a8e4
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/crc-sext.c
+@@ -0,0 +1,13 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=loongarch64" } */
++/* { dg-final { check-function-bodies "**" "" } } */
++
++/*
++**my_crc:
++**	crc.w.d.w	\$r4,\$r4,\$r5
++**	jr	\$r1
++*/
++int my_crc(long long dword, int crc)
++{
++	return __builtin_loongarch_crc_w_d_w(dword, crc);
++}
+-- 
+2.43.0
+

_service:tar_scm:0141-Backport-SME-mode-switching-Use-1-based-edge-aux-fie.patch Added

@@ -0,0 +1,56 @@
+From b0d3536b2a28d3a7084e3bbb9532e719aaf2016b Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Sat, 11 Nov 2023 17:28:59 +0000
+Subject: PATCH 042/157 BackportSME mode-switching: Use 1-based edge aux
+ fields
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=718228a6f479fe252e0e6f71933c2465b7b480a9
+
+The pass used the edge aux field to record which mode change
+should happen on the edge, with -1 meaning "none".  It's more
+convenient for later patches to leave aux zero for "none",
+and use numbers based at 1 to record a change.
+
+gcc/
+	* mode-switching.cc (commit_mode_sets): Use 1-based edge aux values.
+---
+ gcc/mode-switching.cc | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
+index 4f0445894..89a8494c6 100644
+--- a/gcc/mode-switching.cc
++++ b/gcc/mode-switching.cc
+@@ -106,10 +106,10 @@ commit_mode_sets (struct edge_list *edge_list, int e, struct bb_info *info)
+   for (int ed = NUM_EDGES (edge_list) - 1; ed >= 0; ed--)
+     {
+       edge eg = INDEX_EDGE (edge_list, ed);
+-      int mode;
+ 
+-      if ((mode = (int)(intptr_t)(eg->aux)) != -1)
++      if (eg->aux)
+ 	{
++	  int mode = (int) (intptr_t) eg->aux - 1;
+ 	  HARD_REG_SET live_at_edge;
+ 	  basic_block src_bb = eg->src;
+ 	  int cur_mode = infosrc_bb->index.mode_out;
+@@ -727,14 +727,14 @@ optimize_mode_switching (void)
+ 	{
+ 	  edge eg = INDEX_EDGE (edge_list, ed);
+ 
+-	  eg->aux = (void *)(intptr_t)-1;
++	  eg->aux = (void *) (intptr_t) 0;
+ 
+ 	  for (i = 0; i < no_mode; i++)
+ 	    {
+ 	      int m = targetm.mode_switching.priority (entity_mapj, i);
+ 	      if (mode_bit_p (inserted, j, m))
+ 		{
+-		  eg->aux = (void *)(intptr_t)m;
++		  eg->aux = (void *) (intptr_t) (m + 1);
+ 		  break;
+ 		}
+ 	    }
+-- 
+2.33.0
+

_service:tar_scm:0141-LoongArch-Allow-s9-as-a-register-alias.patch Added

@@ -0,0 +1,45 @@
+From a74a85ed5f5b00018553d614b4dc57eb1dd5f5ee Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Thu, 25 Jan 2024 23:49:13 +0800
+Subject: PATCH 141/188 LoongArch: Allow s9 as a register alias
+
+The psABI allows using s9 as an alias of r22.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.h (ADDITIONAL_REGISTER_NAMES): Add
+	s9 as an alias of r22.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/regname-fp-s9.c: New test.
+---
+ gcc/config/loongarch/loongarch.h                   | 1 +
+ gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c | 3 +++
+ 2 files changed, 4 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c
+
+diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
+index 15261fdc0..8bcdb8729 100644
+--- a/gcc/config/loongarch/loongarch.h
++++ b/gcc/config/loongarch/loongarch.h
+@@ -931,6 +931,7 @@ typedef struct {
+   { "t8",	20 + GP_REG_FIRST },					\
+   { "x",	21 + GP_REG_FIRST },					\
+   { "fp",	22 + GP_REG_FIRST },					\
++  { "s9",	22 + GP_REG_FIRST },					\
+   { "s0",	23 + GP_REG_FIRST },					\
+   { "s1",	24 + GP_REG_FIRST },					\
+   { "s2",	25 + GP_REG_FIRST },					\
+diff --git a/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c b/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c
+new file mode 100644
+index 000000000..d2e3b80f8
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c
+@@ -0,0 +1,3 @@
++/* { dg-do compile } */
++register long s9 asm("s9"); /* { dg-note "conflicts with 's9'" } */
++register long fp asm("fp"); /* { dg-warning "register of 'fp' used for multiple global register variables" } */
+-- 
+2.43.0
+

_service:tar_scm:0142-Backport-SME-mode-switching-Add-a-target-configurabl.patch Added

@@ -0,0 +1,337 @@
+From 88d76baa38bb29d5cc732b3c0188b74ef9783713 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Sat, 11 Nov 2023 17:28:59 +0000
+Subject: PATCH 043/157 BackportSME mode-switching: Add a
+ target-configurable confluence operator
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=493b0038d7d04986c7de977074d095e4eb7d9a27
+
+The mode-switching pass assumed that all of an entity's modes
+were mutually exclusive.  However, the upcoming SME changes
+have an entity with some overlapping modes, so that there is
+sometimes a "superunion" mode that contains two given modes.
+We can use this relationship to pass something more helpful than
+"don't know" to the emit hook.
+
+This patch adds a new hook that targets can use to specify
+a mode confluence operator.
+
+With mutually exclusive modes, it's possible to compute a block's
+incoming and outgoing modes by looking at its availability sets.
+With the confluence operator, we instead need to solve a full
+dataflow problem.
+
+However, when emitting a mode transition, the upcoming SME use of
+mode-switching benefits from having as much information as possible
+about the starting mode.  Calculating this information is definitely
+worth the compile time.
+
+The dataflow problem is written to work before and after the LCM
+problem has been solved.  A later patch makes use of this.
+
+While there (since git blame would ping me for the reindented code),
+I used a lambda to avoid the cut-&-pasted loops.
+
+gcc/
+	* target.def (mode_switching.confluence): New hook.
+	* doc/tm.texi (TARGET_MODE_CONFLUENCE): New @hook.
+	* doc/tm.texi.in: Regenerate.
+	* mode-switching.cc (confluence_info): New variable.
+	(mode_confluence, forward_confluence_n, forward_transfer): New
+	functions.
+	(optimize_mode_switching): Use them to calculate mode_in when
+	TARGET_MODE_CONFLUENCE is defined.
+---
+ gcc/doc/tm.texi       |  16 ++++
+ gcc/doc/tm.texi.in    |   2 +
+ gcc/mode-switching.cc | 179 +++++++++++++++++++++++++++++++++++-------
+ gcc/target.def        |  17 ++++
+ 4 files changed, 186 insertions(+), 28 deletions(-)
+
+diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
+index 7fce485b2..d7053ec9e 100644
+--- a/gcc/doc/tm.texi
++++ b/gcc/doc/tm.texi
+@@ -10306,6 +10306,22 @@ the number of modes if it does not know what mode @var{entity} has after
+ Not defining the hook is equivalent to returning @var{mode}.
+ @end deftypefn
+ 
++@deftypefn {Target Hook} int TARGET_MODE_CONFLUENCE (int @var{entity}, int @var{mode1}, int @var{mode2})
++By default, the mode-switching pass assumes that a given entity's modes
++are mutually exclusive.  This means that the pass can only tell
++@code{TARGET_MODE_EMIT} about an entity's previous mode if all
++incoming paths of execution leave the entity in the same state.
++
++However, some entities might have overlapping, non-exclusive modes,
++so that it is sometimes possible to represent ``mode @var{mode1} or mode
++@var{mode2}'' with something more specific than ``mode not known''.
++If this is true for at least one entity, you should define this hook
++and make it return a mode that includes @var{mode1} and @var{mode2}
++as possibilities.  (The mode can include other possibilities too.)
++The hook should return the number of modes if no suitable mode exists
++for the given arguments.
++@end deftypefn
++
+ @deftypefn {Target Hook} int TARGET_MODE_ENTRY (int @var{entity})
+ If this hook is defined, it is evaluated for every @var{entity} that
+ needs mode switching.  It should return the mode that @var{entity} is
+diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
+index ad343504f..d420e62fd 100644
+--- a/gcc/doc/tm.texi.in
++++ b/gcc/doc/tm.texi.in
+@@ -6922,6 +6922,8 @@ mode or ``no mode'', depending on context.
+ 
+ @hook TARGET_MODE_AFTER
+ 
++@hook TARGET_MODE_CONFLUENCE
++
+ @hook TARGET_MODE_ENTRY
+ 
+ @hook TARGET_MODE_EXIT
+diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
+index 89a8494c6..065767902 100644
+--- a/gcc/mode-switching.cc
++++ b/gcc/mode-switching.cc
+@@ -484,6 +484,101 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes)
+   return pre_exit;
+ }
+ 
++/* Return the confluence of modes MODE1 and MODE2 for entity ENTITY,
++   using NO_MODE to represent an unknown mode if nothing more precise
++   is available.  */
++
++int
++mode_confluence (int entity, int mode1, int mode2, int no_mode)
++{
++  if (mode1 == mode2)
++    return mode1;
++
++  if (mode1 != no_mode
++      && mode2 != no_mode
++      && targetm.mode_switching.confluence)
++    return targetm.mode_switching.confluence (entity, mode1, mode2);
++
++  return no_mode;
++}
++
++/* Information for the dataflow problems below.  */
++struct
++{
++  /* Information about each basic block, indexed by block id.  */
++  struct bb_info *bb_info;
++
++  /* The entity that we're processing.  */
++  int entity;
++
++  /* The number of modes defined for the entity, and thus the identifier
++     of the "don't know" mode.  */
++  int no_mode;
++} confluence_info;
++
++/* Propagate information about any mode change on edge E to the
++   destination block's mode_in.  Return true if something changed.
++
++   The mode_in and mode_out fields use no_mode + 1 to mean "not yet set".  */
++
++static bool
++forward_confluence_n (edge e)
++{
++  /* The entry and exit blocks have no useful mode information.  */
++  if (e->src->index == ENTRY_BLOCK || e->dest->index == EXIT_BLOCK)
++    return false;
++
++  /* We don't control mode changes across abnormal edges.  */
++  if (e->flags & EDGE_ABNORMAL)
++    return false;
++
++  /* E->aux is nonzero if we have computed the LCM problem and scheduled
++     E to change the mode to E->aux - 1.  Otherwise model the change
++     from the source to the destination.  */
++  struct bb_info *bb_info = confluence_info.bb_info;
++  int no_mode = confluence_info.no_mode;
++  int src_mode = bb_infoe->src->index.mode_out;
++  if (e->aux)
++    src_mode = (int) (intptr_t) e->aux - 1;
++  if (src_mode == no_mode + 1)
++    return false;
++
++  int dest_mode = bb_infoe->dest->index.mode_in;
++  if (dest_mode == no_mode + 1)
++    {
++      bb_infoe->dest->index.mode_in = src_mode;
++      return true;
++    }
++
++  int entity = confluence_info.entity;
++  int new_mode = mode_confluence (entity, src_mode, dest_mode, no_mode);
++  if (dest_mode == new_mode)
++    return false;
++
++  bb_infoe->dest->index.mode_in = new_mode;
++  return true;
++}
++
++/* Update block BB_INDEX's mode_out based on its mode_in.  Return true if
++   something changed.  */
++
++static bool
++forward_transfer (int bb_index)
++{
++  /* The entry and exit blocks have no useful mode information.  */
++  if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK)
++    return false;
++
++  /* Only propagate through a block if the entity is transparent.  */
++  struct bb_info *bb_info = confluence_info.bb_info;
++  if (bb_infobb_index.computing != confluence_info.no_mode
++      || bb_infobb_index.mode_out == bb_infobb_index.mode_in)
++    return false;
++
++  bb_infobb_index.mode_out = bb_infobb_index.mode_in;
++  return true;
++}
++
+ /* Find all insns that need a particular mode setting, and insert the
+    necessary mode switches.  Return true if we did work.  */
+ 
+@@ -567,6 +662,39 @@ optimize_mode_switching (void)
+ 
+   auto_sbitmap transp_all (last_basic_block_for_fn (cfun));

_service:tar_scm:0142-LoongArch-testsuite-Rewrite-x-vfcmp-d-f-.c-to-avoid-.patch Added

@@ -0,0 +1,1117 @@
+From d568321f8894ed270bf0011892b86baa6d6b82bd Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Tue, 5 Mar 2024 20:46:57 +0800
+Subject: PATCH 142/188 LoongArch: testsuite: Rewrite {x,}vfcmp-{d,f}.c to
+ avoid named registers
+
+Loops on named vector register are not vectorized (see comment 11 of
+PR113622), so the these test cases have been failing for a while.
+Rewrite them using check-function-bodies to remove hard coding register
+names.  A barrier is needed to always load the first operand before the
+second operand.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vfcmp-f.c: Rewrite to avoid named
+	registers.
+	* gcc.target/loongarch/vfcmp-d.c: Likewise.
+	* gcc.target/loongarch/xvfcmp-f.c: Likewise.
+	* gcc.target/loongarch/xvfcmp-d.c: Likewise.
+---
+ gcc/testsuite/gcc.target/loongarch/vfcmp-d.c  | 202 ++++++++--
+ gcc/testsuite/gcc.target/loongarch/vfcmp-f.c  | 347 ++++++++++++++----
+ gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c | 202 ++++++++--
+ gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c | 204 ++++++++--
+ 4 files changed, 816 insertions(+), 139 deletions(-)
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c b/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c
+index 8b870ef38..87e4ed19e 100644
+--- a/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c
++++ b/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c
+@@ -1,28 +1,188 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -mlsx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */
++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */
++/* { dg-final { check-function-bodies "**" "" } } */
+ 
+ #define F double
+ #define I long long
+ 
+ #include "vfcmp-f.c"
+ 
+-/* { dg-final { scan-assembler "compare_quiet_equal:.*\tvfcmp\\.ceq\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_equal\n" } } */
+-/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\tvfcmp\\.cune\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_equal\n" } } */
+-/* { dg-final { scan-assembler "compare_signaling_greater:.*\tvfcmp\\.slt\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater\n" } } */
+-/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\tvfcmp\\.sle\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_equal\n" } } */
+-/* { dg-final { scan-assembler "compare_signaling_less:.*\tvfcmp\\.slt\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less\n" } } */
+-/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\tvfcmp\\.sle\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_equal\n" } } */
+-/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\tvfcmp\\.sule\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_not_greater\n" } } */
+-/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\tvfcmp\\.sult\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_unordered\n" } } */
+-/* { dg-final { scan-assembler "compare_signaling_not_less:.*\tvfcmp\\.sule\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_not_less\n" } } */
+-/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\tvfcmp\\.sult\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_unordered\n" } } */
+-/* { dg-final { scan-assembler "compare_quiet_less:.*\tvfcmp\\.clt\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less\n" } } */
+-/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\tvfcmp\\.cle\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_equal\n" } } */
+-/* { dg-final { scan-assembler "compare_quiet_greater:.*\tvfcmp\\.clt\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater\n" } } */
+-/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\tvfcmp\\.cle\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_equal\n" } } */
+-/* { dg-final { scan-assembler "compare_quiet_not_less:.*\tvfcmp\\.cule\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_not_less\n" } } */
+-/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\tvfcmp\\.cult\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_unordered\n" } } */
+-/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\tvfcmp\\.cule\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_greater\n" } } */
+-/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\tvfcmp\\.cult\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_unordered\n" } } */
+-/* { dg-final { scan-assembler "compare_quiet_unordered:.*\tvfcmp\\.cun\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_unordered\n" } } */
+-/* { dg-final { scan-assembler "compare_quiet_ordered:.*\tvfcmp\\.cor\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_ordered\n" } } */
++/*
++** compare_quiet_equal:
++** 	vld	(\$vr0-9+),\$r4,0
++** 	vld	(\$vr0-9+),\$r5,0
++** 	vfcmp.ceq.d	(\$vr0-9+),(\1,\2|\2,\1)
++**	vst	\3,\$r6,0
++**	jr	\$r1
++*/
++
++/*
++** compare_quiet_not_equal:
++** 	vld	(\$vr0-9+),\$r4,0
++** 	vld	(\$vr0-9+),\$r5,0
++** 	vfcmp.cune.d	(\$vr0-9+),(\1,\2|\2,\1)
++**	vst	\3,\$r6,0
++**	jr	\$r1
++*/
++
++/*
++** compare_signaling_greater:
++** 	vld	(\$vr0-9+),\$r4,0
++** 	vld	(\$vr0-9+),\$r5,0
++** 	vfcmp.slt.d	(\$vr0-9+),\2,\1
++**	vst	\3,\$r6,0
++**	jr	\$r1
++*/
++
++/*
++** compare_signaling_greater_equal:
++** 	vld	(\$vr0-9+),\$r4,0
++** 	vld	(\$vr0-9+),\$r5,0
++** 	vfcmp.sle.d	(\$vr0-9+),\2,\1
++**	vst	\3,\$r6,0
++**	jr	\$r1
++*/
++
++/*
++** compare_signaling_less:
++** 	vld	(\$vr0-9+),\$r4,0
++** 	vld	(\$vr0-9+),\$r5,0
++** 	vfcmp.slt.d	(\$vr0-9+),\1,\2
++**	vst	\3,\$r6,0
++**	jr	\$r1
++*/
++
++/*
++** compare_signaling_less_equal:
++** 	vld	(\$vr0-9+),\$r4,0
++** 	vld	(\$vr0-9+),\$r5,0
++** 	vfcmp.sle.d	(\$vr0-9+),\1,\2
++**	vst	\3,\$r6,0
++**	jr	\$r1
++*/
++
++/*
++** compare_signaling_not_greater:
++** 	vld	(\$vr0-9+),\$r4,0
++** 	vld	(\$vr0-9+),\$r5,0
++** 	vfcmp.sule.d	(\$vr0-9+),\1,\2
++**	vst	\3,\$r6,0
++**	jr	\$r1
++*/
++
++/*
++** compare_signaling_less_unordered:
++** 	vld	(\$vr0-9+),\$r4,0
++** 	vld	(\$vr0-9+),\$r5,0
++** 	vfcmp.sult.d	(\$vr0-9+),\1,\2
++**	vst	\3,\$r6,0
++**	jr	\$r1
++*/
++
++/*
++** compare_signaling_not_less:
++** 	vld	(\$vr0-9+),\$r4,0
++** 	vld	(\$vr0-9+),\$r5,0
++** 	vfcmp.sule.d	(\$vr0-9+),\2,\1
++**	vst	\3,\$r6,0
++**	jr	\$r1
++*/
++
++/*
++** compare_signaling_greater_unordered:
++** 	vld	(\$vr0-9+),\$r4,0
++** 	vld	(\$vr0-9+),\$r5,0
++** 	vfcmp.sult.d	(\$vr0-9+),\2,\1
++**	vst	\3,\$r6,0
++**	jr	\$r1
++*/
++
++/*
++** compare_quiet_less:
++** 	vld	(\$vr0-9+),\$r4,0
++** 	vld	(\$vr0-9+),\$r5,0
++** 	vfcmp.clt.d	(\$vr0-9+),\1,\2
++**	vst	\3,\$r6,0
++**	jr	\$r1
++*/
++
++/*
++** compare_quiet_less_equal:
++** 	vld	(\$vr0-9+),\$r4,0
++** 	vld	(\$vr0-9+),\$r5,0
++** 	vfcmp.cle.d	(\$vr0-9+),\1,\2
++**	vst	\3,\$r6,0
++**	jr	\$r1
++*/
++
++/*
++** compare_quiet_greater:
++** 	vld	(\$vr0-9+),\$r4,0
++** 	vld	(\$vr0-9+),\$r5,0
++** 	vfcmp.clt.d	(\$vr0-9+),\2,\1
++**	vst	\3,\$r6,0
++**	jr	\$r1
++*/
++
++/*
++** compare_quiet_greater_equal:
++** 	vld	(\$vr0-9+),\$r4,0
++** 	vld	(\$vr0-9+),\$r5,0
++** 	vfcmp.cle.d	(\$vr0-9+),\2,\1
++**	vst	\3,\$r6,0
++**	jr	\$r1
++*/
++
++/*
++** compare_quiet_not_less:
++** 	vld	(\$vr0-9+),\$r4,0
++** 	vld	(\$vr0-9+),\$r5,0
++** 	vfcmp.cule.d	(\$vr0-9+),\2,\1
++**	vst	\3,\$r6,0
++**	jr	\$r1
++*/
++
++/*
++** compare_quiet_greater_unordered:
++** 	vld	(\$vr0-9+),\$r4,0

_service:tar_scm:0143-Backport-SME-mode-switching-Add-a-backprop-hook.patch Added

@@ -0,0 +1,483 @@
+From cb4189b45a3a411958ab6aa85108f6dc7516acf5 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Sat, 11 Nov 2023 17:29:00 +0000
+Subject: PATCH 044/157 BackportSME mode-switching: Add a backprop hook
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fc8458e20a524d053f576d64a606e21f8bd03b84
+
+This patch adds a way for targets to ask that selected mode changes
+be brought forward, through a combination of:
+
+(1) requiring a mode in blocks where the entity was previously
+    transparent
+
+(2) pushing the transition at the head of a block onto incomging edges
+
+SME has two uses for this:
+
+- A "one-shot" entity that, for any given path of execution,
+  either stays off or makes exactly one transition from off to on.
+  This relies only on (1) above; see the hook description for more info.
+
+  The main purpose of using mode-switching for this entity is to
+  shrink-wrap the code that requires it.
+
+- A second entity for which all transitions must be from known
+  modes, which is enforced using a combination of (1) and (2).
+  More specifically, (1) looks for edges B1->B2 for which:
+
+  - B2 requires a specific mode and
+  - B1 does not guarantee a specific starting mode
+
+  In this system, such an edge is only possible if the entity is
+  transparent in B1.  (1) then forces B1 to require some safe common
+  mode.  Applying this inductively means that all incoming edges are
+  from known modes.  If different edges give different starting modes,
+  (2) pushes the transitions onto the edges themselves; this only
+  happens if the entity is not transparent in some predecessor block.
+
+The patch also uses the back-propagation as an excuse to do a simple
+on-the-fly optimisation.
+
+Hopefully the comments in the patch explain things a bit better.
+
+gcc/
+	* target.def (mode_switching.backprop): New hook.
+	* doc/tm.texi.in (TARGET_MODE_BACKPROP): New @hook.
+	* doc/tm.texi: Regenerate.
+	* mode-switching.cc (struct bb_info): Add single_succ.
+	(confluence_info): Add transp field.
+	(single_succ_confluence_n, single_succ_transfer): New functions.
+	(backprop_confluence_n, backprop_transfer): Likewise.
+	(optimize_mode_switching): Use them.  Push mode transitions onto
+	a block's incoming edges, if the backprop hook requires it.
+---
+ gcc/doc/tm.texi       |  28 +++++
+ gcc/doc/tm.texi.in    |   2 +
+ gcc/mode-switching.cc | 275 ++++++++++++++++++++++++++++++++++++++++++
+ gcc/target.def        |  29 +++++
+ 4 files changed, 334 insertions(+)
+
+diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
+index d7053ec9e..5f0972356 100644
+--- a/gcc/doc/tm.texi
++++ b/gcc/doc/tm.texi
+@@ -10322,6 +10322,34 @@ The hook should return the number of modes if no suitable mode exists
+ for the given arguments.
+ @end deftypefn
+ 
++@deftypefn {Target Hook} int TARGET_MODE_BACKPROP (int @var{entity}, int @var{mode1}, int @var{mode2})
++If defined, the mode-switching pass uses this hook to back-propagate mode
++requirements through blocks that have no mode requirements of their own.
++Specifically, @var{mode1} is the mode that @var{entity} has on exit
++from a block B1 (say) and @var{mode2} is the mode that the next block
++requires @var{entity} to have.  B1 does not have any mode requirements
++of its own.
++
++The hook should return the mode that it prefers or requires @var{entity}
++to have in B1, or the number of modes if there is no such requirement.
++If the hook returns a required mode for more than one of B1's outgoing
++edges, those modes are combined as for @code{TARGET_MODE_CONFLUENCE}.
++
++For example, suppose there is a ``one-shot'' entity that,
++for a given execution of a function, either stays off or makes exactly
++one transition from off to on.  It is safe to make the transition at any
++time, but it is better not to do so unnecessarily.  This hook allows the
++function to manage such an entity without having to track its state at
++runtime.  Specifically. the entity would have two modes, 0 for off and
++1 for on, with 2 representing ``don't know''.  The system is forbidden from
++transitioning from 2 to 1, since 2 represents the possibility that the
++entity is already on (and the aim is to avoid having to emit code to
++check for that case).  This hook would therefore return 1 when @var{mode1}
++is 2 and @var{mode2} is 1, which would force the entity to be on in the
++source block.  Applying this inductively would remove all transitions
++in which the previous state is unknown.
++@end deftypefn
++
+ @deftypefn {Target Hook} int TARGET_MODE_ENTRY (int @var{entity})
+ If this hook is defined, it is evaluated for every @var{entity} that
+ needs mode switching.  It should return the mode that @var{entity} is
+diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
+index d420e62fd..fcab21744 100644
+--- a/gcc/doc/tm.texi.in
++++ b/gcc/doc/tm.texi.in
+@@ -6924,6 +6924,8 @@ mode or ``no mode'', depending on context.
+ 
+ @hook TARGET_MODE_CONFLUENCE
+ 
++@hook TARGET_MODE_BACKPROP
++
+ @hook TARGET_MODE_ENTRY
+ 
+ @hook TARGET_MODE_EXIT
+diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
+index 065767902..c2a0f0294 100644
+--- a/gcc/mode-switching.cc
++++ b/gcc/mode-switching.cc
+@@ -81,6 +81,7 @@ struct bb_info
+   int computing;
+   int mode_out;
+   int mode_in;
++  int single_succ;
+ };
+ 
+ /* Clear ode I from entity J in bitmap B.  */
+@@ -508,6 +509,9 @@ struct
+   /* Information about each basic block, indexed by block id.  */
+   struct bb_info *bb_info;
+ 
++  /* A bitmap of blocks for which the current entity is transparent.  */
++  sbitmap transp;
++
+   /* The entity that we're processing.  */
+   int entity;
+ 
+@@ -579,6 +583,210 @@ forward_transfer (int bb_index)
+   return true;
+ }
+ 
++/* A backwards confluence function.  Update the the bb_info single_succ
++   field for E's source block, based on changes to E's destination block.
++   At the end of the dataflow problem, single_succ is the single mode
++   that all successors require (directly or indirectly), or no_mode
++   if there are conflicting requirements.
++
++   Initially, a value of no_mode + 1 means "don't know".  */
++
++static bool
++single_succ_confluence_n (edge e)
++{
++  /* The entry block has no associated mode information.  */
++  if (e->src->index == ENTRY_BLOCK)
++    return false;
++
++  /* We don't control mode changes across abnormal edges.  */
++  if (e->flags & EDGE_ABNORMAL)
++    return false;
++
++  /* Do nothing if we've already found a conflict.  */
++  struct bb_info *bb_info = confluence_info.bb_info;
++  int no_mode = confluence_info.no_mode;
++  int src_mode = bb_infoe->src->index.single_succ;
++  if (src_mode == no_mode)
++    return false;
++
++  /* Work out what mode the destination block (or its successors) require.  */
++  int dest_mode;
++  if (e->dest->index == EXIT_BLOCK)
++    dest_mode = no_mode;
++  else if (bitmap_bit_p (confluence_info.transp, e->dest->index))
++    dest_mode = bb_infoe->dest->index.single_succ;
++  else
++    dest_mode = bb_infoe->dest->index.seginfo->mode;
++
++  /* Do nothing if the destination block has no new information.  */
++  if (dest_mode == no_mode + 1 || dest_mode == src_mode)
++    return false;
++
++  /* Detect conflicting modes.  */
++  if (src_mode != no_mode + 1)
++    dest_mode = no_mode;
++
++  bb_infoe->src->index.single_succ = dest_mode;
++  return true;
++}
++
++/* A backward transfer function for computing the bb_info single_succ
++   fields, as described above single_succ_confluence.  */
++
++static bool
++single_succ_transfer (int bb_index)
++{
++  /* We don't have any field to transfer to.  Assume that, after the
++     first iteration, we are only called if single_succ has changed.
++     We should then process incoming edges if the entity is transparent.  */
++  return bitmap_bit_p (confluence_info.transp, bb_index);
++}
++
++/* Check whether the target wants to back-propagate a mode change across
++   edge E, and update the source block's computed mode if so.  Return true

_service:tar_scm:0143-LoongArch-Use-lib-instead-of-lib64-as-the-library-se.patch Added

@@ -0,0 +1,80 @@
+From 415d38d84b2e363a2d512b54baac5532553f1402 Mon Sep 17 00:00:00 2001
+From: Yang Yujie <yangyujie@loongson.cn>
+Date: Wed, 6 Mar 2024 09:19:59 +0800
+Subject: PATCH 143/188 LoongArch: Use /lib instead of /lib64 as the library
+ search path for MUSL.
+
+gcc/ChangeLog:
+
+	* config.gcc: Add a case for loongarch*-*-linux-musl*.
+	* config/loongarch/linux.h: Disable the multilib-compatible
+	treatment for *musl* targets.
+	* config/loongarch/musl.h: New file.
+---
+ gcc/config.gcc               |  3 +++
+ gcc/config/loongarch/linux.h |  4 +++-
+ gcc/config/loongarch/musl.h  | 23 +++++++++++++++++++++++
+ 3 files changed, 29 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/config/loongarch/musl.h
+
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index 039187fa2..499b36b45 100644
+--- a/gcc/config.gcc
++++ b/gcc/config.gcc
+@@ -2509,6 +2509,9 @@ riscv*-*-freebsd*)
+ 
+ loongarch*-*-linux*)
+ 	tm_file="elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file}"
++	case ${target} in
++	  *-linux-musl*) tm_file="${tm_file} loongarch/musl.h"
++	esac
+ 	tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h loongarch/loongarch-driver.h"
+ 	extra_options="${extra_options} linux-android.opt"
+ 	tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux"
+diff --git a/gcc/config/loongarch/linux.h b/gcc/config/loongarch/linux.h
+index 00039ac18..38aa4da2c 100644
+--- a/gcc/config/loongarch/linux.h
++++ b/gcc/config/loongarch/linux.h
+@@ -21,7 +21,9 @@ along with GCC; see the file COPYING3.  If not see
+  * This ensures that a compiler configured with --disable-multilib
+  * can work in a multilib environment.  */
+ 
+-#if defined(LA_DISABLE_MULTILIB) && defined(LA_DISABLE_MULTIARCH)
++#if !defined(LA_DEFAULT_TARGET_MUSL) \
++  && defined(LA_DISABLE_MULTILIB) \
++  && defined(LA_DISABLE_MULTIARCH)
+ 
+   #if DEFAULT_ABI_BASE == ABI_BASE_LP64D
+     #define ABI_LIBDIR "lib64"
+diff --git a/gcc/config/loongarch/musl.h b/gcc/config/loongarch/musl.h
+new file mode 100644
+index 000000000..fa43bc866
+--- /dev/null
++++ b/gcc/config/loongarch/musl.h
+@@ -0,0 +1,23 @@
++/* Definitions for MUSL C library support.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++
++#ifndef LA_DEFAULT_TARGET_MUSL
++#define LA_DEFAULT_TARGET_MUSL
++#endif
+-- 
+2.43.0
+

_service:tar_scm:0144-Backport-SME-aarch64-Add-a-result_mode-helper-functi.patch Added

@@ -0,0 +1,81 @@
+From 4553f252c10968037edceba4abe3984dc9bbad2a Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Fri, 1 Dec 2023 08:36:15 +0000
+Subject: PATCH 045/157 BackportSME aarch64: Add a result_mode helper
+ function
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a1bc121c00e30bd1bdaa62d87cbe64eb88e74f45
+
+SME will add more intrinsics whose expansion code requires
+the mode of the function return value.  This patch adds an
+associated helper routine.
+
+gcc/
+	* config/aarch64/aarch64-sve-builtins.h
+	(function_expander::result_mode): New member function.
+	* config/aarch64/aarch64-sve-builtins-base.cc
+	(svld234_impl::expand): Use it.
+	* config/aarch64/aarch64-sve-builtins.cc
+	(function_expander::get_reg_target): Likewise.
+---
+ gcc/config/aarch64/aarch64-sve-builtins-base.cc | 2 +-
+ gcc/config/aarch64/aarch64-sve-builtins.cc      | 2 +-
+ gcc/config/aarch64/aarch64-sve-builtins.h       | 9 +++++++++
+ 3 files changed, 11 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+index 56c9d75e7..c9bf13792 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+@@ -1272,7 +1272,7 @@ public:
+   rtx
+   expand (function_expander &e) const OVERRIDE
+   {
+-    machine_mode tuple_mode = TYPE_MODE (TREE_TYPE (e.call_expr));
++    machine_mode tuple_mode = e.result_mode ();
+     insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab,
+ 					     tuple_mode, e.vector_mode (0));
+     return e.use_contiguous_load_insn (icode);
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
+index e168c8334..91af96687 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
+@@ -2796,7 +2796,7 @@ function_expander::get_fallback_value (machine_mode mode, unsigned int nops,
+ rtx
+ function_expander::get_reg_target ()
+ {
+-  machine_mode target_mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl)));
++  machine_mode target_mode = result_mode ();
+   if (!possible_target || GET_MODE (possible_target) != target_mode)
+     possible_target = gen_reg_rtx (target_mode);
+   return possible_target;
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
+index 0d130b871..52994cde0 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.h
++++ b/gcc/config/aarch64/aarch64-sve-builtins.h
+@@ -528,6 +528,8 @@ public:
+   insn_code direct_optab_handler_for_sign (optab, optab, unsigned int = 0,
+ 					   machine_mode = E_VOIDmode);
+ 
++  machine_mode result_mode () const;
++
+   bool overlaps_input_p (rtx);
+ 
+   rtx convert_to_pmode (rtx);
+@@ -877,6 +879,13 @@ function_base::call_properties (const function_instance &instance) const
+   return flags;
+ }
+ 
++/* Return the mode of the result of a call.  */
++inline machine_mode
++function_expander::result_mode () const
++{
++  return TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl)));
++}
++
+ }
+ 
+ #endif
+-- 
+2.33.0
+

_service:tar_scm:0144-LoongArch-testsuite-Fix-problems-with-incorrect-resu.patch Added

@@ -0,0 +1,551 @@
+From 2170e0e811cb1b592f7577571f10b5ab95da9eaa Mon Sep 17 00:00:00 2001
+From: chenxiaolong <chenxiaolong@loongson.cn>
+Date: Fri, 25 Oct 2024 06:05:59 +0000
+Subject: PATCH 144/188 LoongArch: testsuite:Fix problems with incorrect
+ results in  vector test cases.
+
+In simd_correctness_check.h, the role of the macro ASSERTEQ_64 is to check the
+result of the passed vector values for the 64-bit data of each array element.
+It turns out that it uses the abs() function to check only the lower 32 bits
+of the data at a time, so it replaces abs() with the llabs() function.
+
+However, the following two problems may occur after modification:
+
+1.FAIL in lasx-xvfrint_s.c and lsx-vfrint_s.c
+The reason for the error is because vector test cases that use __m{128,256} to
+define vector types are composed of 32-bit primitive types, they should use
+ASSERTEQ_32 instead of ASSERTEQ_64 to check for correctness.
+
+2.FAIL in lasx-xvshuf_b.c and lsx-vshuf.c
+The cause of the error is that the expected result of the function setting in
+the test case is incorrect.
+
+gcc/testsuite/ChangeLog:
+
+        * gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c: Replace
+        ASSERTEQ_64 with the macro ASSERTEQ_32.
+        * gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c: Modify the expected
+        test results of some functions according to the function of the vector
+        instruction.
+        * gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c: Same
+        modification as lasx-xvfrint_s.c.
+        * gcc.target/loongarch/vector/lsx/lsx-vshuf.c: Same
+        modification as lasx-xvshuf_b.c.
+        * gcc.target/loongarch/vector/simd_correctness_check.h: Use the llabs()
+        function instead of abs() to check the correctness of the results.
+---
+ .../loongarch/vector/lasx/lasx-xvfrint_s.c    | 58 +++++++++----------
+ .../loongarch/vector/lsx/lsx-vfrint_s.c       | 50 ++++++++--------
+ .../loongarch/vector/simd_correctness_check.h |  2 +-
+ 3 files changed, 55 insertions(+), 55 deletions(-)
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c
+index fbfe300ea..4538528a6 100644
+--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c
++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c
+@@ -184,7 +184,7 @@ main ()
+   *((int *)&__m256_result1) = 0x00000000;
+   *((int *)&__m256_result0) = 0x00000000;
+   __m256_out = __lasx_xvfrintrne_s (__m256_op0);
+-  ASSERTEQ_64 (__LINE__, __m256_result, __m256_out);
++  ASSERTEQ_32 (__LINE__, __m256_result, __m256_out);
+ 
+   *((int *)&__m256_op07) = 0xffffffff;
+   *((int *)&__m256_op06) = 0xffffffff;
+@@ -203,7 +203,7 @@ main ()
+   *((int *)&__m256_result1) = 0x00000000;
+   *((int *)&__m256_result0) = 0x00000000;
+   __m256_out = __lasx_xvfrintrne_s (__m256_op0);
+-  ASSERTEQ_64 (__LINE__, __m256_result, __m256_out);
++  ASSERTEQ_32 (__LINE__, __m256_result, __m256_out);
+ 
+   *((int *)&__m256_op07) = 0xffffffff;
+   *((int *)&__m256_op06) = 0xffffffff;
+@@ -222,7 +222,7 @@ main ()
+   *((int *)&__m256_result1) = 0xffffffff;
+   *((int *)&__m256_result0) = 0xffffffff;
+   __m256_out = __lasx_xvfrintrne_s (__m256_op0);
+-  ASSERTEQ_64 (__LINE__, __m256_result, __m256_out);
++  ASSERTEQ_32 (__LINE__, __m256_result, __m256_out);
+ 
+   *((int *)&__m256_op07) = 0x01010101;
+   *((int *)&__m256_op06) = 0x01010101;
+@@ -241,7 +241,7 @@ main ()
+   *((int *)&__m256_result1) = 0x00000000;
+   *((int *)&__m256_result0) = 0x00000000;
+   __m256_out = __lasx_xvfrintrne_s (__m256_op0);
+-  ASSERTEQ_64 (__LINE__, __m256_result, __m256_out);
++  ASSERTEQ_32 (__LINE__, __m256_result, __m256_out);
+ 
+   *((int *)&__m256_op07) = 0x00000000;
+   *((int *)&__m256_op06) = 0x00000000;
+@@ -260,7 +260,7 @@ main ()
+   *((int *)&__m256_result1) = 0x00000000;
+   *((int *)&__m256_result0) = 0x00000000;
+   __m256_out = __lasx_xvfrintrne_s (__m256_op0);
+-  ASSERTEQ_64 (__LINE__, __m256_result, __m256_out);
++  ASSERTEQ_32 (__LINE__, __m256_result, __m256_out);
+ 
+   *((int *)&__m256_op07) = 0xffffffff;
+   *((int *)&__m256_op06) = 0xffffffff;
+@@ -279,7 +279,7 @@ main ()
+   *((int *)&__m256_result1) = 0x00000000;
+   *((int *)&__m256_result0) = 0x00000000;
+   __m256_out = __lasx_xvfrintrne_s (__m256_op0);
+-  ASSERTEQ_64 (__LINE__, __m256_result, __m256_out);
++  ASSERTEQ_32 (__LINE__, __m256_result, __m256_out);
+ 
+   *((int *)&__m256_op07) = 0xffffffff;
+   *((int *)&__m256_op06) = 0xffffffff;
+@@ -298,7 +298,7 @@ main ()
+   *((int *)&__m256_result1) = 0xffffffff;
+   *((int *)&__m256_result0) = 0xffffffff;
+   __m256_out = __lasx_xvfrintrne_s (__m256_op0);
+-  ASSERTEQ_64 (__LINE__, __m256_result, __m256_out);
++  ASSERTEQ_32 (__LINE__, __m256_result, __m256_out);
+ 
+   *((int *)&__m256_op07) = 0x01010101;
+   *((int *)&__m256_op06) = 0x01010101;
+@@ -317,7 +317,7 @@ main ()
+   *((int *)&__m256_result1) = 0x00000000;
+   *((int *)&__m256_result0) = 0x00000000;
+   __m256_out = __lasx_xvfrintrne_s (__m256_op0);
+-  ASSERTEQ_64 (__LINE__, __m256_result, __m256_out);
++  ASSERTEQ_32 (__LINE__, __m256_result, __m256_out);
+ 
+   *((int *)&__m256_op07) = 0x55555555;
+   *((int *)&__m256_op06) = 0x36aaaaac;
+@@ -336,7 +336,7 @@ main ()
+   *((int *)&__m256_result1) = 0x55555555;
+   *((int *)&__m256_result0) = 0x80000000;
+   __m256_out = __lasx_xvfrintrp_s (__m256_op0);
+-  ASSERTEQ_64 (__LINE__, __m256_result, __m256_out);
++  ASSERTEQ_32 (__LINE__, __m256_result, __m256_out);
+ 
+   *((int *)&__m256_op07) = 0x00000000;
+   *((int *)&__m256_op06) = 0x00000000;
+@@ -355,7 +355,7 @@ main ()
+   *((int *)&__m256_result1) = 0x00000000;
+   *((int *)&__m256_result0) = 0x00000000;
+   __m256_out = __lasx_xvfrintrp_s (__m256_op0);
+-  ASSERTEQ_64 (__LINE__, __m256_result, __m256_out);
++  ASSERTEQ_32 (__LINE__, __m256_result, __m256_out);
+ 
+   *((int *)&__m256_op07) = 0xffffc741;
+   *((int *)&__m256_op06) = 0x8a023680;
+@@ -374,7 +374,7 @@ main ()
+   *((int *)&__m256_result1) = 0x00000000;
+   *((int *)&__m256_result0) = 0x00000000;
+   __m256_out = __lasx_xvfrintrp_s (__m256_op0);
+-  ASSERTEQ_64 (__LINE__, __m256_result, __m256_out);
++  ASSERTEQ_32 (__LINE__, __m256_result, __m256_out);
+ 
+   *((int *)&__m256_op07) = 0x00000000;
+   *((int *)&__m256_op06) = 0xffffffff;
+@@ -393,7 +393,7 @@ main ()
+   *((int *)&__m256_result1) = 0x00000000;
+   *((int *)&__m256_result0) = 0xffffffff;
+   __m256_out = __lasx_xvfrintrp_s (__m256_op0);
+-  ASSERTEQ_64 (__LINE__, __m256_result, __m256_out);
++  ASSERTEQ_32 (__LINE__, __m256_result, __m256_out);
+ 
+   *((int *)&__m256_op07) = 0x00200101;
+   *((int *)&__m256_op06) = 0x01610000;
+@@ -412,7 +412,7 @@ main ()
+   *((int *)&__m256_result1) = 0x3f800000;
+   *((int *)&__m256_result0) = 0x3f800000;
+   __m256_out = __lasx_xvfrintrp_s (__m256_op0);
+-  ASSERTEQ_64 (__LINE__, __m256_result, __m256_out);
++  ASSERTEQ_32 (__LINE__, __m256_result, __m256_out);
+ 
+   *((int *)&__m256_op07) = 0x00000000;
+   *((int *)&__m256_op06) = 0x00000000;
+@@ -431,7 +431,7 @@ main ()
+   *((int *)&__m256_result1) = 0xfefefefe;
+   *((int *)&__m256_result0) = 0x3f800000;
+   __m256_out = __lasx_xvfrintrp_s (__m256_op0);
+-  ASSERTEQ_64 (__LINE__, __m256_result, __m256_out);
++  ASSERTEQ_32 (__LINE__, __m256_result, __m256_out);
+ 
+   *((int *)&__m256_op07) = 0x1c1c1c1c;
+   *((int *)&__m256_op06) = 0x1c1c1c1c;
+@@ -450,7 +450,7 @@ main ()
+   *((int *)&__m256_result1) = 0xfffffffe;
+   *((int *)&__m256_result0) = 0xffffff00;
+   __m256_out = __lasx_xvfrintrp_s (__m256_op0);
+-  ASSERTEQ_64 (__LINE__, __m256_result, __m256_out);
++  ASSERTEQ_32 (__LINE__, __m256_result, __m256_out);
+ 
+   *((int *)&__m256_op07) = 0x00000000;
+   *((int *)&__m256_op06) = 0x00000000;
+@@ -469,7 +469,7 @@ main ()
+   *((int *)&__m256_result1) = 0x00000000;
+   *((int *)&__m256_result0) = 0x00000000;
+   __m256_out = __lasx_xvfrintrm_s (__m256_op0);
+-  ASSERTEQ_64 (__LINE__, __m256_result, __m256_out);
++  ASSERTEQ_32 (__LINE__, __m256_result, __m256_out);
+ 
+   *((int *)&__m256_op07) = 0x00000000;
+   *((int *)&__m256_op06) = 0x00000000;
+@@ -488,7 +488,7 @@ main ()
+   *((int *)&__m256_result1) = 0x00000000;
+   *((int *)&__m256_result0) = 0x00000000;
+   __m256_out = __lasx_xvfrintrm_s (__m256_op0);
+-  ASSERTEQ_64 (__LINE__, __m256_result, __m256_out);
++  ASSERTEQ_32 (__LINE__, __m256_result, __m256_out);
+ 
+   *((int *)&__m256_op07) = 0xffffffff;
+   *((int *)&__m256_op06) = 0xffffffff;
+@@ -507,7 +507,7 @@ main ()

_service:tar_scm:0145-Backport-SME-rtl-Try-to-remove-EH-edges-after-pro-ep.patch Added

@@ -0,0 +1,232 @@
+From 60612cbd9cdd9b5079c0505b9d53c9cd98fba4b1 Mon Sep 17 00:00:00 2001
+From: Kewen Lin <linkw@linux.ibm.com>
+Date: Tue, 15 Nov 2022 20:26:07 -0600
+Subject: PATCH 046/157 BackportSME rtl: Try to remove EH edges after
+ {pro,epi}logue generation PR90259
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=63e1b2e767a3f4695373c2406ff719c0a60c1858
+
+After prologue and epilogue generation, the judgement on whether
+one memory access onto stack frame may trap or not could change,
+since we get more exact stack information by now.
+
+As PR90259 shows, some memory access becomes impossible to trap
+any more after prologue and epilogue generation, it can make
+subsequent optimization be able to remove it if safe, but it
+results in unexpected control flow status due to REG_EH_REGION
+note missing.
+
+This patch proposes to try to remove EH edges with function
+purge_all_dead_edges after prologue and epilogue generation,
+it simplifies CFG as early as we can and don't need any fixup
+in downstream passes.
+
+CFG simplification result with PR90259's case as example:
+
+*before*
+
+   18: %1:TF=call `__gcc_qdiv' argc:0
+      REG_EH_REGION 0x2
+   77: NOTE_INSN_BASIC_BLOCK 3
+   19: NOTE_INSN_DELETED
+   20: NOTE_INSN_DELETED
+  110: %31:SI+0x20=%1:DF
+      REG_EH_REGION 0x2
+  116: NOTE_INSN_BASIC_BLOCK 4
+  111: %31:SI+0x28=%2:DF
+      REG_EH_REGION 0x2
+   22: NOTE_INSN_BASIC_BLOCK 5
+  108: %0:DF=%31:SI+0x20
+      REG_EH_REGION 0x2
+  117: NOTE_INSN_BASIC_BLOCK 6
+  109: %1:DF=%31:SI+0x28
+      REG_EH_REGION 0x2
+   79: NOTE_INSN_BASIC_BLOCK 7
+   26: %31:SI+0x18=%0:DF
+  104: pc=L69
+  105: barrier
+
+*after*
+
+   18: %1:TF=call `__gcc_qdiv' argc:0
+      REG_EH_REGION 0x2
+   77: NOTE_INSN_BASIC_BLOCK 3
+   19: NOTE_INSN_DELETED
+   20: NOTE_INSN_DELETED
+  110: %31:SI+0x20=%1:DF
+  111: %31:SI+0x28=%2:DF
+  108: %0:DF=%31:SI+0x20
+  109: %1:DF=%31:SI+0x28
+   26: %31:SI+0x18=%0:DF
+  104: pc=L69
+  105: barrier
+
+	PR rtl-optimization/90259
+
+gcc/ChangeLog:
+
+	* function.cc (rest_of_handle_thread_prologue_and_epilogue): Add
+	parameter fun, and call function purge_all_dead_edges.
+	(pass_thread_prologue_and_epilogue::execute): Name unamed parameter
+	as fun, and use it for rest_of_handle_thread_prologue_and_epilogue.
+
+gcc/testsuite/ChangeLog:
+
+	* g++.target/powerpc/pr90259.C: New.
+---
+ gcc/function.cc                            |  13 ++-
+ gcc/testsuite/g++.target/powerpc/pr90259.C | 103 +++++++++++++++++++++
+ 2 files changed, 113 insertions(+), 3 deletions(-)
+ create mode 100644 gcc/testsuite/g++.target/powerpc/pr90259.C
+
+diff --git a/gcc/function.cc b/gcc/function.cc
+index 49c7ccf4b..28de39dd6 100644
+--- a/gcc/function.cc
++++ b/gcc/function.cc
+@@ -6529,7 +6529,7 @@ make_pass_leaf_regs (gcc::context *ctxt)
+ }
+ 
+ static unsigned int
+-rest_of_handle_thread_prologue_and_epilogue (void)
++rest_of_handle_thread_prologue_and_epilogue (function *fun)
+ {
+   /* prepare_shrink_wrap is sensitive to the block structure of the control
+      flow graph, so clean it up first.  */
+@@ -6546,6 +6546,13 @@ rest_of_handle_thread_prologue_and_epilogue (void)
+      Fix that up.  */
+   fixup_partitions ();
+ 
++  /* After prologue and epilogue generation, the judgement on whether
++     one memory access onto stack frame may trap or not could change,
++     since we get more exact stack information by now.  So try to
++     remove any EH edges here, see PR90259.  */
++  if (fun->can_throw_non_call_exceptions)
++    purge_all_dead_edges ();
++
+   /* Shrink-wrapping can result in unreachable edges in the epilogue,
+      see PR57320.  */
+   cleanup_cfg (optimize ? CLEANUP_EXPENSIVE : 0);
+@@ -6614,9 +6621,9 @@ public:
+   {}
+ 
+   /* opt_pass methods: */
+-  virtual unsigned int execute (function *)
++  unsigned int execute (function * fun) final override
+     {
+-      return rest_of_handle_thread_prologue_and_epilogue ();
++      return rest_of_handle_thread_prologue_and_epilogue (fun);
+     }
+ 
+ }; // class pass_thread_prologue_and_epilogue
+diff --git a/gcc/testsuite/g++.target/powerpc/pr90259.C b/gcc/testsuite/g++.target/powerpc/pr90259.C
+new file mode 100644
+index 000000000..db75ac7fe
+--- /dev/null
++++ b/gcc/testsuite/g++.target/powerpc/pr90259.C
+@@ -0,0 +1,103 @@
++/* { dg-require-effective-target long_double_ibm128 } */
++/* { dg-options "-O2 -ffloat-store -fgcse -fnon-call-exceptions -fno-forward-propagate -fno-omit-frame-pointer -fstack-protector-all" } */
++/* { dg-add-options long_double_ibm128 } */
++
++/* Verify there is no ICE.  */
++
++template <int a> struct b
++{
++  static constexpr int c = a;
++};
++template <bool a> using d = b<a>;
++struct e
++{
++  int f;
++  int
++  g ()
++  {
++    return __builtin_ceil (f / (long double) h);
++  }
++  float h;
++};
++template <typename, typename> using k = d<!bool ()>;
++template <typename> class n
++{
++public:
++  e ae;
++  void af ();
++};
++template <typename l>
++void
++n<l>::af ()
++{
++  ae.g ();
++}
++template <bool> using m = int;
++template <typename ag, typename ah, typename ai = m<k<ag, ah>::c>>
++using aj = n<ai>;
++struct o
++{
++  void
++  af ()
++  {
++    al.af ();
++  }
++  aj<int, int> al;
++};
++template <typename> class am;
++template <typename i> class ao
++{
++protected:
++  static i *ap (int);
++};
++template <typename, typename> class p;
++template <typename ar, typename i, typename... j> class p<ar (j...), i> : ao<i>
++{
++public:
++  static ar
++  as (const int &p1, j...)
++  {
++    (*ao<i>::ap (p1)) (j ()...);
++  }
++};
++template <typename ar, typename... j> class am<ar (j...)>
++{
++  template <typename, typename> using av = int;
++
++public:
++  template <typename i, typename = av<d<!bool ()>, void>,
++	    typename = av<i, void>>
++  am (i);
++  using aw = ar (*) (const int &, j...);
++  aw ax;
++};

_service:tar_scm:0145-LoongArch-Fixed-an-issue-with-the-implementation-of-.patch Added

@@ -0,0 +1,130 @@
+From 44a9ae67e19c0d744bd744cb0e9ae9e0069e40f1 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Tue, 5 Mar 2024 14:43:04 +0800
+Subject: PATCH 145/188 LoongArch: Fixed an issue with the implementation of
+ the template atomic_compare_and_swapsi.
+
+If the hardware does not support LAMCAS, atomic_compare_and_swapsi needs to be
+implemented through "ll.w+sc.w". In the implementation of the instruction sequence,
+it is necessary to determine whether the two registers are equal.
+Since LoongArch's comparison instructions do not distinguish between 32-bit
+and 64-bit, the two operand registers that need to be compared are symbolically
+extended, and one of the operand registers is obtained from memory through the
+"ll.w" instruction, which can ensure that the symbolic expansion is carried out.
+However, the value of the other operand register is not guaranteed to be the
+value of the sign extension.
+
+gcc/ChangeLog:
+
+	* config/loongarch/sync.md (atomic_cas_value_strong<mode>):
+	In loongarch64, a sign extension operation is added when
+	operands2 is a register operand and the mode is SImode.
+
+gcc/testsuite/ChangeLog:
+
+	* g++.target/loongarch/atomic-cas-int.C: New test.
+---
+ gcc/config/loongarch/sync.md                  | 46 ++++++++++++++-----
+ .../g++.target/loongarch/atomic-cas-int.C     | 32 +++++++++++++
+ 2 files changed, 67 insertions(+), 11 deletions(-)
+ create mode 100644 gcc/testsuite/g++.target/loongarch/atomic-cas-int.C
+
+diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
+index 5da5c2780..2e008c487 100644
+--- a/gcc/config/loongarch/sync.md
++++ b/gcc/config/loongarch/sync.md
+@@ -245,18 +245,42 @@
+    (clobber (match_scratch:GPR 5 "=&r"))
+   ""
+ {
+-  return "1:\\n\\t"
+-	 "ll.<amo>\\t%0,%1\\n\\t"
+-	 "bne\\t%0,%z2,2f\\n\\t"
+-	 "or%i3\\t%5,$zero,%3\\n\\t"
+-	 "sc.<amo>\\t%5,%1\\n\\t"
+-	 "beqz\\t%5,1b\\n\\t"
+-	 "b\\t3f\\n\\t"
+-	 "2:\\n\\t"
+-	 "%G4\\n\\t"
+-	 "3:\\n\\t";
++  output_asm_insn ("1:", operands);
++  output_asm_insn ("ll.<amo>\t%0,%1", operands);
++
++  /* Like the test case atomic-cas-int.C, in loongarch64, O1 and higher, the
++     return value of the val_without_const_folding will not be truncated and
++     will be passed directly to the function compare_exchange_strong.
++     However, the instruction 'bne' does not distinguish between 32-bit and
++     64-bit operations.  so if the upper 32 bits of the register are not
++     extended by the 32nd bit symbol, then the comparison may not be valid
++     here.  This will affect the result of the operation.  */
++
++  if (TARGET_64BIT && REG_P (operands2)
++      && GET_MODE (operands2) == SImode)
++    {
++      output_asm_insn ("addi.w\t%5,%2,0", operands);
++      output_asm_insn ("bne\t%0,%5,2f", operands);
++    }
++  else
++    output_asm_insn ("bne\t%0,%z2,2f", operands);
++
++  output_asm_insn ("or%i3\t%5,$zero,%3", operands);
++  output_asm_insn ("sc.<amo>\t%5,%1", operands);
++  output_asm_insn ("beqz\t%5,1b", operands);
++  output_asm_insn ("b\t3f", operands);
++  output_asm_insn ("2:", operands);
++  output_asm_insn ("%G4", operands);
++  output_asm_insn ("3:", operands);
++
++  return "";
+ }
+-  (set (attr "length") (const_int 28)))
++  (set (attr "length")
++     (if_then_else
++	(and (match_test "GET_MODE (operands2) == SImode")
++	     (match_test "REG_P (operands2)"))
++	(const_int 32)
++	(const_int 28))))
+ 
+ (define_insn "atomic_cas_value_strong<mode>_amcas"
+   (set (match_operand:QHWD 0 "register_operand" "=&r")
+diff --git a/gcc/testsuite/g++.target/loongarch/atomic-cas-int.C b/gcc/testsuite/g++.target/loongarch/atomic-cas-int.C
+new file mode 100644
+index 000000000..830ce4826
+--- /dev/null
++++ b/gcc/testsuite/g++.target/loongarch/atomic-cas-int.C
+@@ -0,0 +1,32 @@
++/* { dg-do run } */
++/* { dg-options "-O2" } */
++
++#include <atomic>
++#include <cstdio>
++
++__attribute__ ((noinline)) long
++val_without_const_folding (long val)
++{
++  return val;
++}
++
++int
++main ()
++{
++  int oldval = 0xaa;
++  int newval = 0xbb;
++  std::atomic<int> amo;
++
++  amo.store (oldval);
++
++  long longval = val_without_const_folding (0xff80000000000000 + oldval);
++  oldval = static_cast<int> (longval);
++
++  amo.compare_exchange_strong (oldval, newval);
++
++  if (newval != amo.load (std::memory_order_relaxed))
++    __builtin_abort ();
++
++  return 0;
++}
++
+-- 
+2.43.0
+

_service:tar_scm:0146-Backport-SME-Fix-PR-middle-end-107705-ICE-after-recl.patch Added

@@ -0,0 +1,71 @@
+From beb962ec516f152cef482b229c9adf0390dc3b2c Mon Sep 17 00:00:00 2001
+From: Andrew Pinski <apinski@marvell.com>
+Date: Thu, 17 Nov 2022 22:03:08 +0000
+Subject: PATCH 047/157 BackportSME Fix PR middle-end/107705: ICE after
+ reclaration error
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ceba66ee230bb96b0889fc8ec7333c7ffae96d6e
+
+The problem here is after we created a call expression
+in the C front-end, we replace the decl type with
+an error mark node. We then end up calling
+aggregate_value_p with the call expression
+with the decl with the error mark as the type
+and we ICE.
+
+The fix is to check the function type
+after we process the call expression inside
+aggregate_value_p to get it.
+
+OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
+
+Thanks,
+Andrew Pinski
+
+gcc/ChangeLog:
+
+	PR middle-end/107705
+	* function.cc (aggregate_value_p): Return 0 if
+	the function type was an error operand.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.dg/redecl-22.c: New test.
+---
+ gcc/function.cc                  | 3 +++
+ gcc/testsuite/gcc.dg/redecl-22.c | 9 +++++++++
+ 2 files changed, 12 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.dg/redecl-22.c
+
+diff --git a/gcc/function.cc b/gcc/function.cc
+index 28de39dd6..99aa738eb 100644
+--- a/gcc/function.cc
++++ b/gcc/function.cc
+@@ -2090,6 +2090,9 @@ aggregate_value_p (const_tree exp, const_tree fntype)
+   if (VOID_TYPE_P (type))
+     return 0;
+ 
++  if (error_operand_p (fntype))
++    return 0;
++
+   /* If a record should be passed the same as its first (and only) member
+      don't pass it as an aggregate.  */
+   if (TREE_CODE (type) == RECORD_TYPE && TYPE_TRANSPARENT_AGGR (type))
+diff --git a/gcc/testsuite/gcc.dg/redecl-22.c b/gcc/testsuite/gcc.dg/redecl-22.c
+new file mode 100644
+index 000000000..7758570fa
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/redecl-22.c
+@@ -0,0 +1,9 @@
++/* We used to ICE in the gimplifier, PR 107705 */
++/* { dg-do compile } */
++/* { dg-options "-w" } */
++int f (void)
++{
++  int (*p) (void) = 0; // { dg-note "" }
++  return p ();
++  int p = 1; // { dg-error "" }
++}
+-- 
+2.33.0
+

_service:tar_scm:0146-LoongArch-testsuite-Add-compilation-options-to-the-r.patch Added

@@ -0,0 +1,30 @@
+From eab751e71d4f4d5e9b2eda55d793fd57541fbc56 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 7 Mar 2024 09:44:03 +0800
+Subject: PATCH 146/188 LoongArch: testsuite: Add compilation options to the
+ regname-fp-s9.c.
+
+When the value of the macro DEFAULT_CFLAGS is set to '-ansi -pedantic-errors',
+regname-s9-fp.c will test to fail. To solve this problem, add the compilation
+option '-Wno-pedantic -std=gnu90' to this test case.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/regname-fp-s9.c: Add compilation option
+	'-Wno-pedantic -std=gnu90'.
+---
+ gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c b/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c
+index d2e3b80f8..77a74f1f6 100644
+--- a/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c
++++ b/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c
+@@ -1,3 +1,4 @@
+ /* { dg-do compile } */
++/* { dg-additional-options "-Wno-pedantic -std=gnu90" } */
+ register long s9 asm("s9"); /* { dg-note "conflicts with 's9'" } */
+ register long fp asm("fp"); /* { dg-warning "register of 'fp' used for multiple global register variables" } */
+-- 
+2.43.0
+

_service:tar_scm:0147-Backport-SME-function-Change-return-type-of-predicat.patch Added

@@ -0,0 +1,351 @@
+From c074871572ef22cbcca8f0f4bc493d60caeddd78 Mon Sep 17 00:00:00 2001
+From: Uros Bizjak <ubizjak@gmail.com>
+Date: Wed, 21 Jun 2023 21:55:30 +0200
+Subject: PATCH 048/157 BackportSME function: Change return type of
+ predicate function from int to bool
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ce47d3c2cf59bb2cc94afc4bbef88b0e4950f086
+
+Also change some internal variables to bool and some functions to void.
+
+gcc/ChangeLog:
+
+	* function.h (emit_initial_value_sets):
+	Change return type from int to void.
+	(aggregate_value_p): Change return type from int to bool.
+	(prologue_contains): Ditto.
+	(epilogue_contains): Ditto.
+	(prologue_epilogue_contains): Ditto.
+	* function.cc (temp_slot): Make "in_use" variable bool.
+	(make_slot_available): Update for changed "in_use" variable.
+	(assign_stack_temp_for_type): Ditto.
+	(emit_initial_value_sets): Change return type from int to void
+	and update function body accordingly.
+	(instantiate_virtual_regs): Ditto.
+	(rest_of_handle_thread_prologue_and_epilogue): Ditto.
+	(safe_insn_predicate): Change return type from int to bool.
+	(aggregate_value_p): Change return type from int to bool
+	and update function body accordingly.
+	(prologue_contains): Change return type from int to bool.
+	(prologue_epilogue_contains): Ditto.
+---
+ gcc/function.cc | 77 ++++++++++++++++++++++++-------------------------
+ gcc/function.h  | 10 +++----
+ 2 files changed, 42 insertions(+), 45 deletions(-)
+
+diff --git a/gcc/function.cc b/gcc/function.cc
+index 99aa738eb..fc8eb5812 100644
+--- a/gcc/function.cc
++++ b/gcc/function.cc
+@@ -578,8 +578,8 @@ public:
+   tree type;
+   /* The alignment (in bits) of the slot.  */
+   unsigned int align;
+-  /* Nonzero if this temporary is currently in use.  */
+-  char in_use;
++  /* True if this temporary is currently in use.  */
++  bool in_use;
+   /* Nesting level at which this slot is being used.  */
+   int level;
+   /* The offset of the slot from the frame_pointer, including extra space
+@@ -674,7 +674,7 @@ make_slot_available (class temp_slot *temp)
+ {
+   cut_slot_from_list (temp, temp_slots_at_level (temp->level));
+   insert_slot_to_list (temp, &avail_temp_slots);
+-  temp->in_use = 0;
++  temp->in_use = false;
+   temp->level = -1;
+   n_temp_slots_in_use--;
+ }
+@@ -848,7 +848,7 @@ assign_stack_temp_for_type (machine_mode mode, poly_int64 size, tree type)
+ 	  if (known_ge (best_p->size - rounded_size, alignment))
+ 	    {
+ 	      p = ggc_alloc<temp_slot> ();
+-	      p->in_use = 0;
++	      p->in_use = false;
+ 	      p->size = best_p->size - rounded_size;
+ 	      p->base_offset = best_p->base_offset + rounded_size;
+ 	      p->full_size = best_p->full_size - rounded_size;
+@@ -918,7 +918,7 @@ assign_stack_temp_for_type (machine_mode mode, poly_int64 size, tree type)
+     }
+ 
+   p = selected;
+-  p->in_use = 1;
++  p->in_use = true;
+   p->type = type;
+   p->level = temp_slot_level;
+   n_temp_slots_in_use++;
+@@ -1340,7 +1340,7 @@ has_hard_reg_initial_val (machine_mode mode, unsigned int regno)
+   return NULL_RTX;
+ }
+ 
+-unsigned int
++void
+ emit_initial_value_sets (void)
+ {
+   struct initial_value_struct *ivs = crtl->hard_reg_initial_vals;
+@@ -1348,7 +1348,7 @@ emit_initial_value_sets (void)
+   rtx_insn *seq;
+ 
+   if (ivs == 0)
+-    return 0;
++    return;
+ 
+   start_sequence ();
+   for (i = 0; i < ivs->num_entries; i++)
+@@ -1357,7 +1357,6 @@ emit_initial_value_sets (void)
+   end_sequence ();
+ 
+   emit_insn_at_entry (seq);
+-  return 0;
+ }
+ 
+ /* Return the hardreg-pseudoreg initial values pair entry I and
+@@ -1535,7 +1534,7 @@ instantiate_virtual_regs_in_rtx (rtx *loc)
+ /* A subroutine of instantiate_virtual_regs_in_insn.  Return true if X
+    matches the predicate for insn CODE operand OPERAND.  */
+ 
+-static int
++static bool
+ safe_insn_predicate (int code, int operand, rtx x)
+ {
+   return code < 0 || insn_operand_matches ((enum insn_code) code, operand, x);
+@@ -1948,7 +1947,7 @@ instantiate_decls (tree fndecl)
+ /* Pass through the INSNS of function FNDECL and convert virtual register
+    references to hard register references.  */
+ 
+-static unsigned int
++static void
+ instantiate_virtual_regs (void)
+ {
+   rtx_insn *insn;
+@@ -2002,8 +2001,6 @@ instantiate_virtual_regs (void)
+   /* Indicate that, from now on, assign_stack_local should use
+      frame_pointer_rtx.  */
+   virtuals_instantiated = 1;
+-
+-  return 0;
+ }
+ 
+ namespace {
+@@ -2031,7 +2028,8 @@ public:
+   /* opt_pass methods: */
+   virtual unsigned int execute (function *)
+     {
+-      return instantiate_virtual_regs ();
++      instantiate_virtual_regs ();
++      return 0;
+     }
+ 
+ }; // class pass_instantiate_virtual_regs
+@@ -2045,12 +2043,12 @@ make_pass_instantiate_virtual_regs (gcc::context *ctxt)
+ }
+ 
+ &#xc;
+-/* Return 1 if EXP is an aggregate type (or a value with aggregate type).
++/* Return true if EXP is an aggregate type (or a value with aggregate type).
+    This means a type for which function calls must pass an address to the
+    function or get an address back from the function.
+    EXP may be a type node or an expression (whose type is tested).  */
+ 
+-int
++bool
+ aggregate_value_p (const_tree exp, const_tree fntype)
+ {
+   const_tree type = (TYPE_P (exp)) ? exp : TREE_TYPE (exp);
+@@ -2070,7 +2068,7 @@ aggregate_value_p (const_tree exp, const_tree fntype)
+ 	  else
+ 	    /* For internal functions, assume nothing needs to be
+ 	       returned in memory.  */
+-	    return 0;
++	    return false;
+ 	}
+ 	break;
+       case FUNCTION_DECL:
+@@ -2088,10 +2086,10 @@ aggregate_value_p (const_tree exp, const_tree fntype)
+       }
+ 
+   if (VOID_TYPE_P (type))
+-    return 0;
++    return false;
+ 
+   if (error_operand_p (fntype))
+-    return 0;
++    return false;
+ 
+   /* If a record should be passed the same as its first (and only) member
+      don't pass it as an aggregate.  */
+@@ -2102,25 +2100,25 @@ aggregate_value_p (const_tree exp, const_tree fntype)
+      reference, do so.  */
+   if ((TREE_CODE (exp) == PARM_DECL || TREE_CODE (exp) == RESULT_DECL)
+       && DECL_BY_REFERENCE (exp))
+-    return 1;
++    return true;
+ 
+   /* Function types that are TREE_ADDRESSABLE force return in memory.  */
+   if (fntype && TREE_ADDRESSABLE (fntype))
+-    return 1;
++    return true;
+ 
+   /* Types that are TREE_ADDRESSABLE must be constructed in memory,
+      and thus can't be returned in registers.  */
+   if (TREE_ADDRESSABLE (type))
+-    return 1;
++    return true;
+ 
+   if (TYPE_EMPTY_P (type))
+-    return 0;
++    return false;
+

_service:tar_scm:0147-LoongArch-Emit-R_LARCH_RELAX-for-TLS-IE-with-non-ext.patch Added

@@ -0,0 +1,137 @@
+From 465f0653b6e7bf5adb5d1f6c9e8aff2b81a3f27f Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Fri, 26 Jan 2024 18:28:32 +0800
+Subject: PATCH 147/188 LoongArch: Emit R_LARCH_RELAX for TLS IE with
+ non-extreme code model to allow the IE to LE linker relaxation
+
+In Binutils we need to make IE to LE relaxation only allowed when there
+is an R_LARCH_RELAX after R_LARCH_TLE_IE_PC_{HI20,LO12} so an invalid
+"partial" relaxation won't happen with the extreme code model.  So if we
+are emitting %ie_pc_{hi20,lo12} in a non-extreme code model, emit an
+R_LARCH_RELAX to allow the relaxation.  The IE to LE relaxation does not
+require the pcalau12i and the ld instruction to be adjacent, so we don't
+need to limit ourselves to use the macro.
+
+For the distro maintainers backporting changes: this change depends on
+r14-8721, without r14-8721 R_LARCH_RELAX can be emitted mistakenly in
+the extreme code model.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_print_operand_reloc):
+	Support 'Q' for R_LARCH_RELAX for TLS IE.
+	(loongarch_output_move): Use 'Q' to print R_LARCH_RELAX for TLS
+	IE.
+	* config/loongarch/loongarch.md (ld_from_got<mode>): Likewise.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/tls-ie-relax.c: New test.
+	* gcc.target/loongarch/tls-ie-norelax.c: New test.
+	* gcc.target/loongarch/tls-ie-extreme.c: New test.
+---
+ gcc/config/loongarch/loongarch.cc                 | 15 ++++++++++++++-
+ gcc/config/loongarch/loongarch.md                 |  2 +-
+ .../gcc.target/loongarch/tls-ie-extreme.c         |  5 +++++
+ .../gcc.target/loongarch/tls-ie-norelax.c         |  5 +++++
+ gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c | 11 +++++++++++
+ 5 files changed, 36 insertions(+), 2 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-ie-extreme.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-ie-norelax.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index d23b09cc5..c1dc30b61 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -4977,7 +4977,7 @@ loongarch_output_move (rtx dest, rtx src)
+ 	  if (type == SYMBOL_TLS_LE)
+ 	    return "lu12i.w\t%0,%h1";
+ 	  else
+-	    return "pcalau12i\t%0,%h1";
++	    return "%Q1pcalau12i\t%0,%h1";
+ 	}
+ 
+       if (src_code == CONST_INT)
+@@ -6141,6 +6141,7 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
+    'L'  Print the low-part relocation associated with OP.
+    'm'	Print one less than CONST_INT OP in decimal.
+    'N'	Print the inverse of the integer branch condition for comparison OP.
++   'Q'  Print R_LARCH_RELAX for TLS IE.
+    'r'  Print address 12-31bit relocation associated with OP.
+    'R'  Print address 32-51bit relocation associated with OP.
+    'T'	Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
+@@ -6278,6 +6279,18 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
+ 					    letter);
+       break;
+ 
++    case 'Q':
++      if (!TARGET_LINKER_RELAXATION)
++	break;
++
++      if (code == HIGH)
++	op = XEXP (op, 0);
++
++      if (loongarch_classify_symbolic_expression (op) == SYMBOL_TLS_IE)
++	fprintf (file, ".reloc\t.,R_LARCH_RELAX\n\t");
++
++      break;
++
+     case 'r':
+       loongarch_print_operand_reloc (file, op, false /* hi64_part */,
+ 				     true /* lo_reloc */);
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 248ad12bb..d2c7c3b05 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -2620,7 +2620,7 @@
+ 				(match_operand:P 2 "symbolic_operand")))
+ 	UNSPEC_LOAD_FROM_GOT))
+   ""
+-  "ld.<d>\t%0,%1,%L2"
++  "%Q2ld.<d>\t%0,%1,%L2"
+   (set_attr "type" "move")
+ )
+ 
+diff --git a/gcc/testsuite/gcc.target/loongarch/tls-ie-extreme.c b/gcc/testsuite/gcc.target/loongarch/tls-ie-extreme.c
+new file mode 100644
+index 000000000..00c545a3e
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/tls-ie-extreme.c
+@@ -0,0 +1,5 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mcmodel=extreme -mexplicit-relocs=auto -mrelax" } */
++/* { dg-final { scan-assembler-not "R_LARCH_RELAX" { target tls_native } } } */
++
++#include "tls-ie-relax.c"
+diff --git a/gcc/testsuite/gcc.target/loongarch/tls-ie-norelax.c b/gcc/testsuite/gcc.target/loongarch/tls-ie-norelax.c
+new file mode 100644
+index 000000000..dd6bf3634
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/tls-ie-norelax.c
+@@ -0,0 +1,5 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mcmodel=normal -mexplicit-relocs -mno-relax" } */
++/* { dg-final { scan-assembler-not "R_LARCH_RELAX" { target tls_native } } } */
++
++#include "tls-ie-relax.c"
+diff --git a/gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c b/gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c
+new file mode 100644
+index 000000000..e9f7569b1
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mcmodel=normal -mexplicit-relocs -mrelax" } */
++/* { dg-final { scan-assembler-times "R_LARCH_RELAX" 2 { target tls_native } } } */
++
++extern __thread int errno;
++
++void
++unimplemented (void)
++{
++  errno = -38;
++}
+-- 
+2.43.0
+

_service:tar_scm:0148-Backport-SME-Allow-prologues-and-epilogues-to-be-ins.patch Added

@@ -0,0 +1,233 @@
+From 417d51e1ecf41b3ba3ddf24eaf1e07db5c1ded9e Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 09:28:46 +0000
+Subject: PATCH 049/157 BackportSME Allow prologues and epilogues to be
+ inserted later
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e9d2ae6b9816e61a6148040149c63faa83f54702
+
+Arm's SME adds a new processor mode called streaming mode.
+This mode enables some new (matrix-oriented) instructions and
+disables several existing groups of instructions, such as most
+Advanced SIMD vector instructions and a much smaller set of SVE
+instructions.  It can also change the current vector length.
+
+There are instructions to switch in and out of streaming mode.
+However, their effect on the ISA and vector length can't be represented
+directly in RTL, so they need to be emitted late in the pass pipeline,
+close to md_reorg.
+
+It's sometimes the responsibility of the prologue and epilogue to
+switch modes, which means we need to emit the prologue and epilogue
+sequences late as well.  (This loses shrink-wrapping and scheduling
+opportunities, but that's a price worth paying.)
+
+This patch therefore adds a target hook for forcing prologue
+and epilogue insertion to happen later in the pipeline.
+
+gcc/
+	* target.def (use_late_prologue_epilogue): New hook.
+	* doc/tm.texi.in: Add TARGET_USE_LATE_PROLOGUE_EPILOGUE.
+	* doc/tm.texi: Regenerate.
+	* passes.def (pass_late_thread_prologue_and_epilogue): New pass.
+	* tree-pass.h (make_pass_late_thread_prologue_and_epilogue): Declare.
+	* function.cc (pass_thread_prologue_and_epilogue::gate): New function.
+	(pass_data_late_thread_prologue_and_epilogue): New pass variable.
+	(pass_late_thread_prologue_and_epilogue): New pass class.
+	(make_pass_late_thread_prologue_and_epilogue): New function.
+---
+ gcc/doc/tm.texi    | 19 ++++++++++++++++++
+ gcc/doc/tm.texi.in |  2 ++
+ gcc/function.cc    | 50 ++++++++++++++++++++++++++++++++++++++++++++++
+ gcc/passes.def     |  3 +++
+ gcc/target.def     | 21 +++++++++++++++++++
+ gcc/tree-pass.h    |  2 ++
+ 6 files changed, 97 insertions(+)
+
+diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
+index 5f0972356..d930d233d 100644
+--- a/gcc/doc/tm.texi
++++ b/gcc/doc/tm.texi
+@@ -11684,6 +11684,25 @@ of the if-block in the @code{struct ce_if_block} structure that is pointed
+ to by @var{ce_info}.
+ @end defmac
+ 
++@deftypefn {Target Hook} bool TARGET_USE_LATE_PROLOGUE_EPILOGUE ()
++Return true if the current function's prologue and epilogue should
++be emitted late in the pass pipeline, instead of at the usual point.
++
++Normally, the prologue and epilogue sequences are introduced soon after
++register allocation is complete.  The advantage of this approach is that
++it allows the prologue and epilogue instructions to be optimized and
++scheduled with other code in the function.  However, some targets
++require the prologue and epilogue to be the first and last sequences
++executed by the function, with no variation allowed.  This hook should
++return true on such targets.
++
++The default implementation returns false, which is correct for most
++targets.  The hook should only return true if there is a specific
++target limitation that cannot be described in RTL.  For example,
++the hook might return true if the prologue and epilogue need to switch
++between instruction sets.
++@end deftypefn
++
+ @deftypefn {Target Hook} void TARGET_MACHINE_DEPENDENT_REORG (void)
+ If non-null, this hook performs a target-specific pass over the
+ instruction stream.  The compiler will run it at all optimization levels,
+diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
+index fcab21744..19eabec48 100644
+--- a/gcc/doc/tm.texi.in
++++ b/gcc/doc/tm.texi.in
+@@ -7708,6 +7708,8 @@ of the if-block in the @code{struct ce_if_block} structure that is pointed
+ to by @var{ce_info}.
+ @end defmac
+ 
++@hook TARGET_USE_LATE_PROLOGUE_EPILOGUE
++
+ @hook TARGET_MACHINE_DEPENDENT_REORG
+ 
+ @hook TARGET_INIT_BUILTINS
+diff --git a/gcc/function.cc b/gcc/function.cc
+index fc8eb5812..7c90b5f23 100644
+--- a/gcc/function.cc
++++ b/gcc/function.cc
+@@ -84,6 +84,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "function-abi.h"
+ #include "value-range.h"
+ #include "gimple-range.h"
++#include "insn-attr.h"
+ 
+ /* So we can assign to cfun in this file.  */
+ #undef cfun
+@@ -6620,6 +6621,11 @@ public:
+   {}
+ 
+   /* opt_pass methods: */
++  bool gate (function *) final override
++    {
++      return !targetm.use_late_prologue_epilogue ();
++    }
++
+   unsigned int execute (function * fun) final override
+     {
+       rest_of_handle_thread_prologue_and_epilogue (fun);
+@@ -6628,6 +6634,44 @@ public:
+ 
+ }; // class pass_thread_prologue_and_epilogue
+ 
++const pass_data pass_data_late_thread_prologue_and_epilogue =
++{
++  RTL_PASS, /* type */
++  "late_pro_and_epilogue", /* name */
++  OPTGROUP_NONE, /* optinfo_flags */
++  TV_THREAD_PROLOGUE_AND_EPILOGUE, /* tv_id */
++  0, /* properties_required */
++  0, /* properties_provided */
++  0, /* properties_destroyed */
++  0, /* todo_flags_start */
++  ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
++};
++
++class pass_late_thread_prologue_and_epilogue : public rtl_opt_pass
++{
++public:
++  pass_late_thread_prologue_and_epilogue (gcc::context *ctxt)
++    : rtl_opt_pass (pass_data_late_thread_prologue_and_epilogue, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  bool gate (function *) final override
++    {
++      return targetm.use_late_prologue_epilogue ();
++    }
++
++  unsigned int execute (function *fn) final override
++    {
++      /* It's not currently possible to have both delay slots and
++	 late prologue/epilogue, since the latter has to run before
++	 the former, and the former won't honor whatever restrictions
++	 the latter is trying to enforce.  */
++      gcc_assert (!DELAY_SLOTS);
++      rest_of_handle_thread_prologue_and_epilogue (fn);
++      return 0;
++    }
++}; // class pass_late_thread_prologue_and_epilogue
++
+ } // anon namespace
+ 
+ rtl_opt_pass *
+@@ -6636,6 +6680,12 @@ make_pass_thread_prologue_and_epilogue (gcc::context *ctxt)
+   return new pass_thread_prologue_and_epilogue (ctxt);
+ }
+ 
++rtl_opt_pass *
++make_pass_late_thread_prologue_and_epilogue (gcc::context *ctxt)
++{
++  return new pass_late_thread_prologue_and_epilogue (ctxt);
++}
++
+ namespace {
+ 
+ const pass_data pass_data_zero_call_used_regs =
+diff --git a/gcc/passes.def b/gcc/passes.def
+index cdc600298..8797f166f 100644
+--- a/gcc/passes.def
++++ b/gcc/passes.def
+@@ -523,6 +523,9 @@ along with GCC; see the file COPYING3.  If not see
+ 	      NEXT_PASS (pass_stack_regs_run);
+ 	  POP_INSERT_PASSES ()
+       POP_INSERT_PASSES ()
++      NEXT_PASS (pass_late_thread_prologue_and_epilogue);
++      /* No target-independent code motion is allowed beyond this point,
++         excepting the legacy delayed-branch pass.  */
+       NEXT_PASS (pass_late_compilation);
+       PUSH_INSERT_PASSES_WITHIN (pass_late_compilation)
+ 	  NEXT_PASS (pass_zero_call_used_regs);
+diff --git a/gcc/target.def b/gcc/target.def
+index 4d77c1523..fd4899612 100644
+--- a/gcc/target.def
++++ b/gcc/target.def
+@@ -4120,6 +4120,27 @@ returns @code{VOIDmode}.",
+  machine_mode, (machine_mode m1, machine_mode m2),
+  default_cc_modes_compatible)
+ 
++DEFHOOK
++(use_late_prologue_epilogue,
++ "Return true if the current function's prologue and epilogue should\n\
++be emitted late in the pass pipeline, instead of at the usual point.\n\
++\n\
++Normally, the prologue and epilogue sequences are introduced soon after\n\

_service:tar_scm:0148-LoongArch-Remove-unused-and-incorrect-sge-u-_-X-mode.patch Added

@@ -0,0 +1,57 @@
+From acc00029aab3cdd1186f1ed4a93db2205cdd166c Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Wed, 13 Mar 2024 20:44:38 +0800
+Subject: PATCH 148/188 LoongArch: Remove unused and incorrect
+ "sge<u>_<X:mode><GPR:mode>" define_insn
+
+If this insn is really used, we'll have something like
+
+    slti $r4,$r0,$r5
+
+in the code.  The assembler will reject it because slti wants 2
+register operands and 1 immediate operand.  But we've not got any bug
+report for this, indicating this define_insn is unused at all.
+
+Note that do_store_flag (in expr.cc) is already converting x >= 1 to
+x > 0 unconditionally, so this define_insn is indeed unused and we can
+just remove it.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (any_ge): Remove.
+	(sge<u>_<X:mode><GPR:mode>): Remove.
+---
+ gcc/config/loongarch/loongarch.md | 10 ----------
+ 1 file changed, 10 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index d2c7c3b05..1b3525dde 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -517,7 +517,6 @@
+ ;; These code iterators allow the signed and unsigned scc operations to use
+ ;; the same template.
+ (define_code_iterator any_gt gt gtu)
+-(define_code_iterator any_ge ge geu)
+ (define_code_iterator any_lt lt ltu)
+ (define_code_iterator any_le le leu)
+ 
+@@ -3355,15 +3354,6 @@
+   (set_attr "type" "slt")
+    (set_attr "mode" "<X:MODE>"))
+ 
+-(define_insn "*sge<u>_<X:mode><GPR:mode>"
+-  (set (match_operand:GPR 0 "register_operand" "=r")
+-	(any_ge:GPR (match_operand:X 1 "register_operand" "r")
+-		     (const_int 1)))
+-  ""
+-  "slt<u>i\t%0,%.,%1"
+-  (set_attr "type" "slt")
+-   (set_attr "mode" "<X:MODE>"))
+-
+ (define_insn "*slt<u>_<X:mode><GPR:mode>"
+   (set (match_operand:GPR 0 "register_operand" "=r")
+ 	(any_lt:GPR (match_operand:X 1 "register_operand" "r")
+-- 
+2.43.0
+

_service:tar_scm:0149-Backport-SME-Add-a-target-hook-for-sibcall-epilogues.patch Added

@@ -0,0 +1,239 @@
+From e906213086639df81085a0101bf88fb66c1dbc2b Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 09:35:57 +0000
+Subject: PATCH 050/157 BackportSME Add a target hook for sibcall
+ epilogues
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2e0aefa77157396acb48833407637303edba450a
+
+Epilogues for sibling calls are generated using the
+sibcall_epilogue pattern.  One disadvantage of this approach
+is that the target doesn't know which call the epilogue is for,
+even though the code that generates the pattern has the call
+to hand.
+
+Although call instructions are currently rtxes, and so could be
+passed as an operand to the pattern, the main point of introducing
+rtx_insn was to move towards separating the rtx and insn types
+(a good thing IMO).  There also isn't an existing practice of
+passing genuine instructions (as opposed to labels) to
+instruction patterns.
+
+This patch therefore adds a hook that can be defined as an
+alternative to sibcall_epilogue.  The advantage is that it
+can be passed the call; the disadvantage is that it can't
+use .md conveniences like generating instructions from
+textual patterns (although most epilogues are too complex
+to benefit much from that anyway).
+
+gcc/
+	* doc/tm.texi.in: Add TARGET_EMIT_EPILOGUE_FOR_SIBCALL.
+	* doc/tm.texi: Regenerate.
+	* target.def (emit_epilogue_for_sibcall): New hook.
+	* calls.cc (can_implement_as_sibling_call_p): Use it.
+	* function.cc (thread_prologue_and_epilogue_insns): Likewise.
+	(reposition_prologue_and_epilogue_notes): Likewise.
+	* config/aarch64/aarch64-protos.h (aarch64_expand_epilogue): Take
+	an rtx_call_insn * rather than a bool.
+	* config/aarch64/aarch64.cc (aarch64_expand_epilogue): Likewise.
+	(TARGET_EMIT_EPILOGUE_FOR_SIBCALL): Define.
+	* config/aarch64/aarch64.md (epilogue): Update call.
+	(sibcall_epilogue): Delete.
+---
+ gcc/calls.cc                        |  3 ++-
+ gcc/config/aarch64/aarch64-protos.h |  2 +-
+ gcc/config/aarch64/aarch64.cc       | 11 +++++++----
+ gcc/config/aarch64/aarch64.md       | 11 +----------
+ gcc/doc/tm.texi                     |  8 ++++++++
+ gcc/doc/tm.texi.in                  |  2 ++
+ gcc/function.cc                     | 15 +++++++++++++--
+ gcc/target.def                      |  9 +++++++++
+ 8 files changed, 43 insertions(+), 18 deletions(-)
+
+diff --git a/gcc/calls.cc b/gcc/calls.cc
+index 4d0bc45be..c1db66883 100644
+--- a/gcc/calls.cc
++++ b/gcc/calls.cc
+@@ -2461,7 +2461,8 @@ can_implement_as_sibling_call_p (tree exp,
+ 				 tree addr,
+ 				 const args_size &args_size)
+ {
+-  if (!targetm.have_sibcall_epilogue ())
++  if (!targetm.have_sibcall_epilogue ()
++      && !targetm.emit_epilogue_for_sibcall)
+     {
+       maybe_complain_about_tail_call
+ 	(exp,
+diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
+index 86e444a60..97984f3ab 100644
+--- a/gcc/config/aarch64/aarch64-protos.h
++++ b/gcc/config/aarch64/aarch64-protos.h
+@@ -887,7 +887,7 @@ const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
+ const char * aarch64_output_probe_stack_range (rtx, rtx);
+ const char * aarch64_output_probe_sve_stack_clash (rtx, rtx, rtx, rtx);
+ void aarch64_err_no_fpadvsimd (machine_mode);
+-void aarch64_expand_epilogue (bool);
++void aarch64_expand_epilogue (rtx_call_insn *);
+ rtx aarch64_ptrue_all (unsigned int);
+ opt_machine_mode aarch64_ptrue_all_mode (rtx);
+ rtx aarch64_convert_sve_data_to_pred (rtx, machine_mode, rtx);
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index fd1114b52..055b436b1 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -10046,7 +10046,7 @@ aarch64_use_return_insn_p (void)
+    from a deallocated stack, and we optimize the unwind records by
+    emitting them all together if possible.  */
+ void
+-aarch64_expand_epilogue (bool for_sibcall)
++aarch64_expand_epilogue (rtx_call_insn *sibcall)
+ {
+   poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
+   HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
+@@ -10194,7 +10194,7 @@ aarch64_expand_epilogue (bool for_sibcall)
+ 	   explicitly authenticate.
+     */
+   if (aarch64_return_address_signing_enabled ()
+-      && (for_sibcall || !TARGET_ARMV8_3))
++      && (sibcall || !TARGET_ARMV8_3))
+     {
+       switch (aarch64_ra_sign_key)
+ 	{
+@@ -10212,7 +10212,7 @@ aarch64_expand_epilogue (bool for_sibcall)
+     }
+ 
+   /* Stack adjustment for exception handler.  */
+-  if (crtl->calls_eh_return && !for_sibcall)
++  if (crtl->calls_eh_return && !sibcall)
+     {
+       /* We need to unwind the stack by the offset computed by
+ 	 EH_RETURN_STACKADJ_RTX.  We have already reset the CFA
+@@ -10223,7 +10223,7 @@ aarch64_expand_epilogue (bool for_sibcall)
+     }
+ 
+   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
+-  if (!for_sibcall)
++  if (!sibcall)
+     emit_jump_insn (ret_rtx);
+ }
+ 
+@@ -28246,6 +28246,9 @@ aarch64_libgcc_floating_mode_supported_p
+ #undef TARGET_HAVE_SHADOW_CALL_STACK
+ #define TARGET_HAVE_SHADOW_CALL_STACK true
+ 
++#undef TARGET_EMIT_EPILOGUE_FOR_SIBCALL
++#define TARGET_EMIT_EPILOGUE_FOR_SIBCALL aarch64_expand_epilogue
++
+ struct gcc_target targetm = TARGET_INITIALIZER;
+ 
+ #include "gt-aarch64.h"
+diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+index 7267a74d6..a78476c8a 100644
+--- a/gcc/config/aarch64/aarch64.md
++++ b/gcc/config/aarch64/aarch64.md
+@@ -871,16 +871,7 @@
+   (clobber (const_int 0))
+   ""
+   "
+-  aarch64_expand_epilogue (false);
+-  DONE;
+-  "
+-)
+-
+-(define_expand "sibcall_epilogue"
+-  (clobber (const_int 0))
+-  ""
+-  "
+-  aarch64_expand_epilogue (true);
++  aarch64_expand_epilogue (nullptr);
+   DONE;
+   "
+ )
+diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
+index d930d233d..369f4b8da 100644
+--- a/gcc/doc/tm.texi
++++ b/gcc/doc/tm.texi
+@@ -11703,6 +11703,14 @@ the hook might return true if the prologue and epilogue need to switch
+ between instruction sets.
+ @end deftypefn
+ 
++@deftypefn {Target Hook} void TARGET_EMIT_EPILOGUE_FOR_SIBCALL (rtx_call_insn *@var{call})
++If defined, this hook emits an epilogue sequence for sibling (tail)
++call instruction @var{call}.  Another way of providing epilogues
++for sibling calls is to define the @code{sibcall_epilogue} instruction
++pattern; the main advantage of this hook over the pattern is that it
++has access to the call instruction.
++@end deftypefn
++
+ @deftypefn {Target Hook} void TARGET_MACHINE_DEPENDENT_REORG (void)
+ If non-null, this hook performs a target-specific pass over the
+ instruction stream.  The compiler will run it at all optimization levels,
+diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
+index 19eabec48..748b0777a 100644
+--- a/gcc/doc/tm.texi.in
++++ b/gcc/doc/tm.texi.in
+@@ -7710,6 +7710,8 @@ to by @var{ce_info}.
+ 
+ @hook TARGET_USE_LATE_PROLOGUE_EPILOGUE
+ 
++@hook TARGET_EMIT_EPILOGUE_FOR_SIBCALL
++
+ @hook TARGET_MACHINE_DEPENDENT_REORG
+ 
+ @hook TARGET_INIT_BUILTINS
+diff --git a/gcc/function.cc b/gcc/function.cc
+index 7c90b5f23..ddab43ca4 100644
+--- a/gcc/function.cc
++++ b/gcc/function.cc
+@@ -6209,7 +6209,17 @@ thread_prologue_and_epilogue_insns (void)
+       if (!(CALL_P (insn) && SIBLING_CALL_P (insn)))
+ 	continue;
+ 
+-      if (rtx_insn *ep_seq = targetm.gen_sibcall_epilogue ())
++      rtx_insn *ep_seq;
++      if (targetm.emit_epilogue_for_sibcall)
++	{
++	  start_sequence ();
++	  targetm.emit_epilogue_for_sibcall (as_a<rtx_call_insn *> (insn));
++	  ep_seq = get_insns ();
++	  end_sequence ();

_service:tar_scm:0149-LoongArch-Remove-masking-process-for-operand-3-of-xv.patch Added

@@ -0,0 +1,85 @@
+From 0dba1a1daef3f043235382f0e8f107313b9bde07 Mon Sep 17 00:00:00 2001
+From: Chenghui Pan <panchenghui@loongson.cn>
+Date: Thu, 14 Mar 2024 09:26:54 +0800
+Subject: PATCH 149/188 LoongArch: Remove masking process for operand 3 of
+ xvpermi.q.
+
+The behavior of non-zero unused bits in xvpermi.q instruction's
+third operand is undefined on LoongArch, according to our
+discussion (https://github.com/llvm/llvm-project/pull/83540),
+we think that keeping original insn operand as unmodified
+state is better solution.
+
+This patch partially reverts 7b158e036a95b1ab40793dd53bed7dbd770ffdaf.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasx.md (lasx_xvpermi_q_<LASX:mode>):
+	Remove masking of operand 3.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c:
+	Reposition operand 3's value into instruction's defined accept range.
+---
+ gcc/config/loongarch/lasx.md                                | 5 -----
+ .../gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c       | 6 +++---
+ 2 files changed, 3 insertions(+), 8 deletions(-)
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index 38f35bad6..f3b5ea373 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -640,8 +640,6 @@
+    (set_attr "mode" "<MODE>"))
+ 
+ ;; xvpermi.q
+-;; Unused bits in operands3 need be set to 0 to avoid
+-;; causing undefined behavior on LA464.
+ (define_insn "lasx_xvpermi_q_<LASX:mode>"
+   (set (match_operand:LASX 0 "register_operand" "=f")
+ 	(unspec:LASX
+@@ -651,9 +649,6 @@
+ 	  UNSPEC_LASX_XVPERMI_Q))
+   "ISA_HAS_LASX"
+ {
+-  int mask = 0x33;
+-  mask &= INTVAL (operands3);
+-  operands3 = GEN_INT (mask);
+   return "xvpermi.q\t%u0,%u2,%3";
+ }
+   (set_attr "type" "simd_splat")
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c
+index dbc29d2fb..f89dfc311 100644
+--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c
++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c
+@@ -27,7 +27,7 @@ main ()
+   *((unsigned long*)& __m256i_result2) = 0x7fff7fff7fff0000;
+   *((unsigned long*)& __m256i_result1) = 0x7fe37fe3001d001d;
+   *((unsigned long*)& __m256i_result0) = 0x7fff7fff7fff0000;
+-  __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x2a);
++  __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x22);
+   ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
+ 
+   *((unsigned long*)& __m256i_op03) = 0x0000000000000000;
+@@ -42,7 +42,7 @@ main ()
+   *((unsigned long*)& __m256i_result2) = 0x000000000019001c;
+   *((unsigned long*)& __m256i_result1) = 0x0000000000000000;
+   *((unsigned long*)& __m256i_result0) = 0x00000000000001fe;
+-  __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0xb9);
++  __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x31);
+   ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
+ 
+   *((unsigned long*)& __m256i_op03) = 0x00ff00ff00ff00ff;
+@@ -57,7 +57,7 @@ main ()
+   *((unsigned long*)& __m256i_result2) = 0xffff0000ffff0000;
+   *((unsigned long*)& __m256i_result1) = 0x00ff00ff00ff00ff;
+   *((unsigned long*)& __m256i_result0) = 0x00ff00ff00ff00ff;
+-  __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0xca);
++  __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x02);
+   ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
+ 
+   return 0;
+-- 
+2.43.0
+

_service:tar_scm:0150-Backport-SME-Add-a-new-target-hook-TARGET_START_CALL.patch Added

@@ -0,0 +1,461 @@
+From 58adede22d9ff2368b5c24ec3fc0e53bd3ddc8bd Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 09:44:52 +0000
+Subject: PATCH 051/157 BackportSME Add a new target hook:
+ TARGET_START_CALL_ARGS
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=672fad57c1f99ff893019e2da4620e26b9b31dd2
+
+We have the following two hooks into the call expansion code:
+
+- TARGET_CALL_ARGS is called for each argument before arguments
+  are moved into hard registers.
+
+- TARGET_END_CALL_ARGS is called after the end of the call
+  sequence (specifically, after any return value has been
+  moved to a pseudo).
+
+This patch adds a TARGET_START_CALL_ARGS hook that is called before
+the TARGET_CALL_ARGS sequence.  This means that TARGET_START_CALL_REGS
+and TARGET_END_CALL_REGS bracket the region in which argument registers
+might be live.  They also bracket a region in which the only call
+emiitted by target-independent code is the call to the target function
+itself.  (For example, TARGET_START_CALL_ARGS happens after any use of
+memcpy to copy arguments, and TARGET_END_CALL_ARGS happens before any
+use of memcpy to copy the result.)
+
+Also, the patch adds the cumulative argument structure as an argument
+to the hooks, so that the target can use it to record and retrieve
+information about the call as a whole.
+
+The TARGET_CALL_ARGS docs said:
+
+   While generating RTL for a function call, this target hook is invoked once
+   for each argument passed to the function, either a register returned by
+   ``TARGET_FUNCTION_ARG`` or a memory location.  It is called just
+-  before the point where argument registers are stored.
+
+The last bit was true for normal calls, but for libcalls the hook was
+invoked earlier, before stack arguments have been copied.  I don't think
+this caused a practical difference for nvptx (the only port to use the
+hooks) since I wouldn't expect any libcalls to take stack parameters.
+
+gcc/
+	* doc/tm.texi.in: Add TARGET_START_CALL_ARGS.
+	* doc/tm.texi: Regenerate.
+	* target.def (start_call_args): New hook.
+	(call_args, end_call_args): Add a parameter for the cumulative
+	argument information.
+	* hooks.h (hook_void_rtx_tree): Delete.
+	* hooks.cc (hook_void_rtx_tree): Likewise.
+	* targhooks.h (hook_void_CUMULATIVE_ARGS): Declare.
+	(hook_void_CUMULATIVE_ARGS_rtx_tree): Likewise.
+	* targhooks.cc (hook_void_CUMULATIVE_ARGS): New function.
+	(hook_void_CUMULATIVE_ARGS_rtx_tree): Likewise.
+	* calls.cc (expand_call): Call start_call_args before computing
+	and storing stack parameters.  Pass the cumulative argument
+	information to call_args and end_call_args.
+	(emit_library_call_value_1): Likewise.
+	* config/nvptx/nvptx.cc (nvptx_call_args): Add a cumulative
+	argument parameter.
+	(nvptx_end_call_args): Likewise.
+---
+ gcc/calls.cc              | 61 +++++++++++++++++++++------------------
+ gcc/config/nvptx/nvptx.cc |  4 +--
+ gcc/doc/tm.texi           | 53 +++++++++++++++++++++++++++-------
+ gcc/doc/tm.texi.in        |  2 ++
+ gcc/hooks.cc              |  5 ----
+ gcc/hooks.h               |  1 -
+ gcc/target.def            | 59 +++++++++++++++++++++++++++++--------
+ gcc/targhooks.cc          | 10 +++++++
+ gcc/targhooks.h           |  5 ++--
+ 9 files changed, 140 insertions(+), 60 deletions(-)
+
+diff --git a/gcc/calls.cc b/gcc/calls.cc
+index c1db66883..4a8535cc6 100644
+--- a/gcc/calls.cc
++++ b/gcc/calls.cc
+@@ -3507,15 +3507,26 @@ expand_call (tree exp, rtx target, int ignore)
+ 		sibcall_failure = 1;
+ 	    }
+ 
++      /* Set up the next argument register.  For sibling calls on machines
++	 with register windows this should be the incoming register.  */
++      if (pass == 0)
++	next_arg_reg = targetm.calls.function_incoming_arg
++	  (args_so_far, function_arg_info::end_marker ());
++      else
++	next_arg_reg = targetm.calls.function_arg
++	  (args_so_far, function_arg_info::end_marker ());
++
++      targetm.calls.start_call_args (args_so_far);
++
+       bool any_regs = false;
+       for (i = 0; i < num_actuals; i++)
+ 	if (argsi.reg != NULL_RTX)
+ 	  {
+ 	    any_regs = true;
+-	    targetm.calls.call_args (argsi.reg, funtype);
++	    targetm.calls.call_args (args_so_far, argsi.reg, funtype);
+ 	  }
+       if (!any_regs)
+-	targetm.calls.call_args (pc_rtx, funtype);
++	targetm.calls.call_args (args_so_far, pc_rtx, funtype);
+ 
+       /* Figure out the register where the value, if any, will come back.  */
+       valreg = 0;
+@@ -3578,15 +3589,6 @@ expand_call (tree exp, rtx target, int ignore)
+ 	 later safely search backwards to find the CALL_INSN.  */
+       before_call = get_last_insn ();
+ 
+-      /* Set up next argument register.  For sibling calls on machines
+-	 with register windows this should be the incoming register.  */
+-      if (pass == 0)
+-	next_arg_reg = targetm.calls.function_incoming_arg
+-	  (args_so_far, function_arg_info::end_marker ());
+-      else
+-	next_arg_reg = targetm.calls.function_arg
+-	  (args_so_far, function_arg_info::end_marker ());
+-
+       if (pass == 1 && (return_flags & ERF_RETURNS_ARG))
+ 	{
+ 	  int arg_nr = return_flags & ERF_RETURN_ARG_MASK;
+@@ -3879,7 +3881,7 @@ expand_call (tree exp, rtx target, int ignore)
+       for (i = 0; i < num_actuals; ++i)
+ 	free (argsi.aligned_regs);
+ 
+-      targetm.calls.end_call_args ();
++      targetm.calls.end_call_args (args_so_far);
+ 
+       insns = get_insns ();
+       end_sequence ();
+@@ -4437,17 +4439,9 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
+     }
+ #endif
+ 
+-  /* When expanding a normal call, args are stored in push order,
+-     which is the reverse of what we have here.  */
+-  bool any_regs = false;
+-  for (int i = nargs; i-- > 0; )
+-    if (argveci.reg != NULL_RTX)
+-      {
+-	targetm.calls.call_args (argveci.reg, NULL_TREE);
+-	any_regs = true;
+-      }
+-  if (!any_regs)
+-    targetm.calls.call_args (pc_rtx, NULL_TREE);
++  rtx call_cookie
++    = targetm.calls.function_arg (args_so_far,
++				  function_arg_info::end_marker ());
+ 
+   /* Push the args that need to be pushed.  */
+ 
+@@ -4565,6 +4559,20 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
+ 
+   fun = prepare_call_address (NULL, fun, NULL, &call_fusage, 0, 0);
+ 
++  targetm.calls.start_call_args (args_so_far);
++
++  /* When expanding a normal call, args are stored in push order,
++     which is the reverse of what we have here.  */
++  bool any_regs = false;
++  for (int i = nargs; i-- > 0; )
++    if (argveci.reg != NULL_RTX)
++      {
++	targetm.calls.call_args (args_so_far, argveci.reg, NULL_TREE);
++	any_regs = true;
++      }
++  if (!any_regs)
++    targetm.calls.call_args (args_so_far, pc_rtx, NULL_TREE);
++
+   /* Now load any reg parms into their regs.  */
+ 
+   /* ARGNUM indexes the ARGVEC array in the order in which the arguments
+@@ -4671,10 +4679,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
+ 	       get_identifier (XSTR (orgfun, 0)),
+ 	       build_function_type (tfom, NULL_TREE),
+ 	       original_args_size.constant, args_size.constant,
+-	       struct_value_size,
+-	       targetm.calls.function_arg (args_so_far,
+-					   function_arg_info::end_marker ()),
+-	       valreg,
++	       struct_value_size, call_cookie, valreg,
+ 	       old_inhibit_defer_pop + 1, call_fusage, flags, args_so_far);
+ 
+   if (flag_ipa_ra)
+@@ -4694,7 +4699,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
+       valreg = gen_rtx_REG (TYPE_MODE (tfom), REGNO (valreg));
+     }
+ 
+-  targetm.calls.end_call_args ();
++  targetm.calls.end_call_args (args_so_far);
+ 
+   /* For calls to `setjmp', etc., inform function.cc:setjmp_warnings
+      that it should complain if nonvolatile values are live.  For
+diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
+index 3634a49de..7f2103ba6 100644
+--- a/gcc/config/nvptx/nvptx.cc
++++ b/gcc/config/nvptx/nvptx.cc
+@@ -1780,7 +1780,7 @@ nvptx_get_drap_rtx (void)

_service:tar_scm:0150-LoongArch-Fix-C23-.-functions-returning-large-aggreg.patch Added

@@ -0,0 +1,48 @@
+From 3ed698858f0ebb12a99ed1cc12c038b533f64b2c Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Fri, 25 Oct 2024 06:15:21 +0000
+Subject: PATCH 150/188 LoongArch: Fix C23 (...) functions returning large
+ aggregates  PR114175
+
+We were assuming TYPE_NO_NAMED_ARGS_STDARG_P don't have any named
+arguments and there is nothing to advance, but that is not the case
+for (...) functions returning by hidden reference which have one such
+artificial argument.  This is causing gcc.dg/c23-stdarg-6.c and
+gcc.dg/c23-stdarg-8.c to fail.
+
+Fix the issue by checking if arg.type is NULL, as r14-9503 explains.
+
+gcc/ChangeLog:
+
+        PR target/114175
+        * config/loongarch/loongarch.cc
+        (loongarch_setup_incoming_varargs): Only skip
+        loongarch_function_arg_advance for TYPE_NO_NAMED_ARGS_STDARG_P
+        functions if arg.type is NULL.
+---
+ gcc/config/loongarch/loongarch.cc | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index c1dc30b61..1e3981e19 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -767,7 +767,14 @@ loongarch_setup_incoming_varargs (cumulative_args_t cum,
+      argument.  Advance a local copy of CUM past the last "real" named
+      argument, to find out how many registers are left over.  */
+   local_cum = *get_cumulative_args (cum);
+-  loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg);
++
++  /* For a C23 variadic function w/o any named argument, and w/o an
++     artifical argument for large return value, skip advancing args.
++     There is such an artifical argument iff. arg.type is non-NULL
++     (PR 114175).  */
++  if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
++      || arg.type != NULL_TREE)
++    loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg);
+ 
+   /* Found out how many registers we need to save.  */
+   gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs;
+-- 
+2.43.0
+

_service:tar_scm:0151-Backport-SME-Allow-targets-to-add-USEs-to-asms.patch Added

@@ -0,0 +1,490 @@
+From 8684458c3faf358e5a15dfb73b4ef632341ddf0a Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 09:52:41 +0000
+Subject: PATCH 052/157 BackportSME Allow targets to add USEs to asms
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=414d795d8a365b6e72a84257caa36cb3bed7e0ba
+
+Arm's SME has an array called ZA that for inline asm purposes
+is effectively a form of special-purpose memory.  It doesn't
+have an associated storage type and so can't be passed and
+returned in normal C/C++ objects.
+
+We'd therefore like "za" in a clobber list to mean that an inline
+asm can read from and write to ZA.  (Just reading or writing
+individually is unlikely to be useful, but we could add syntax
+for that too if necessary.)
+
+There is currently a TARGET_MD_ASM_ADJUST target hook that allows
+targets to add clobbers to an asm instruction.  This patch
+extends that to allow targets to add USEs as well.
+
+gcc/
+	* target.def (md_asm_adjust): Add a uses parameter.
+	* doc/tm.texi: Regenerate.
+	* cfgexpand.cc (expand_asm_loc): Update call to md_asm_adjust.
+	Handle any USEs created by the target.
+	(expand_asm_stmt): Likewise.
+	* recog.cc (asm_noperands): Handle asms with USEs.
+	(decode_asm_operands): Likewise.
+	* config/arm/aarch-common-protos.h (arm_md_asm_adjust): Add uses
+	parameter.
+	* config/arm/aarch-common.cc (arm_md_asm_adjust): Likewise.
+	* config/arm/arm.cc (thumb1_md_asm_adjust): Likewise.
+	* config/avr/avr.cc (avr_md_asm_adjust): Likewise.
+	* config/cris/cris.cc (cris_md_asm_adjust): Likewise.
+	* config/i386/i386.cc (ix86_md_asm_adjust): Likewise.
+	* config/mn10300/mn10300.cc (mn10300_md_asm_adjust): Likewise.
+	* config/nds32/nds32.cc (nds32_md_asm_adjust): Likewise.
+	* config/pdp11/pdp11.cc (pdp11_md_asm_adjust): Likewise.
+	* config/rs6000/rs6000.cc (rs6000_md_asm_adjust): Likewise.
+	* config/s390/s390.cc (s390_md_asm_adjust): Likewise.
+	* config/vax/vax.cc (vax_md_asm_adjust): Likewise.
+	* config/visium/visium.cc (visium_md_asm_adjust): Likewise.
+---
+ gcc/cfgexpand.cc                     | 37 +++++++++++++++++++---------
+ gcc/config/arm/aarch-common-protos.h |  2 +-
+ gcc/config/arm/aarch-common.cc       |  3 ++-
+ gcc/config/arm/arm.cc                |  5 ++--
+ gcc/config/avr/avr.cc                |  1 +
+ gcc/config/cris/cris.cc              |  6 +++--
+ gcc/config/i386/i386.cc              |  5 ++--
+ gcc/config/mn10300/mn10300.cc        |  3 ++-
+ gcc/config/nds32/nds32.cc            |  4 +--
+ gcc/config/pdp11/pdp11.cc            |  6 +++--
+ gcc/config/rs6000/rs6000.cc          |  3 ++-
+ gcc/config/s390/s390.cc              |  3 ++-
+ gcc/config/vax/vax.cc                |  4 ++-
+ gcc/config/visium/visium.cc          |  5 ++--
+ gcc/doc/tm.texi                      |  5 ++--
+ gcc/recog.cc                         | 20 ++++++++++-----
+ gcc/target.def                       |  5 ++--
+ 17 files changed, 77 insertions(+), 40 deletions(-)
+
+diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
+index 4691355aa..5401a4ebd 100644
+--- a/gcc/cfgexpand.cc
++++ b/gcc/cfgexpand.cc
+@@ -2873,6 +2873,7 @@ expand_asm_loc (tree string, int vol, location_t locus)
+       auto_vec<rtx> input_rvec, output_rvec;
+       auto_vec<machine_mode> input_mode;
+       auto_vec<const char *> constraints;
++      auto_vec<rtx> use_rvec;
+       auto_vec<rtx> clobber_rvec;
+       HARD_REG_SET clobbered_regs;
+       CLEAR_HARD_REG_SET (clobbered_regs);
+@@ -2882,16 +2883,20 @@ expand_asm_loc (tree string, int vol, location_t locus)
+ 
+       if (targetm.md_asm_adjust)
+ 	targetm.md_asm_adjust (output_rvec, input_rvec, input_mode,
+-			       constraints, clobber_rvec, clobbered_regs,
+-			       locus);
++			       constraints, use_rvec, clobber_rvec,
++			       clobbered_regs, locus);
+ 
+       asm_op = body;
+       nclobbers = clobber_rvec.length ();
+-      body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (1 + nclobbers));
++      auto nuses = use_rvec.length ();
++      body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (1 + nuses + nclobbers));
+ 
+-      XVECEXP (body, 0, 0) = asm_op;
+-      for (i = 0; i < nclobbers; i++)
+-	XVECEXP (body, 0, i + 1) = gen_rtx_CLOBBER (VOIDmode, clobber_rveci);
++      i = 0;
++      XVECEXP (body, 0, i++) = asm_op;
++      for (rtx use : use_rvec)
++	XVECEXP (body, 0, i++) = gen_rtx_USE (VOIDmode, use);
++      for (rtx clobber : clobber_rvec)
++	XVECEXP (body, 0, i++) = gen_rtx_CLOBBER (VOIDmode, clobber);
+     }
+ 
+   emit_insn (body);
+@@ -3443,11 +3448,12 @@ expand_asm_stmt (gasm *stmt)
+      maintaining source-level compatibility means automatically clobbering
+      the flags register.  */
+   rtx_insn *after_md_seq = NULL;
++  auto_vec<rtx> use_rvec;
+   if (targetm.md_asm_adjust)
+     after_md_seq
+ 	= targetm.md_asm_adjust (output_rvec, input_rvec, input_mode,
+-				 constraints, clobber_rvec, clobbered_regs,
+-				 locus);
++				 constraints, use_rvec, clobber_rvec,
++				 clobbered_regs, locus);
+ 
+   /* Do not allow the hook to change the output and input count,
+      lest it mess up the operand numbering.  */
+@@ -3455,7 +3461,8 @@ expand_asm_stmt (gasm *stmt)
+   gcc_assert (input_rvec.length() == ninputs);
+   gcc_assert (constraints.length() == noutputs + ninputs);
+ 
+-  /* But it certainly can adjust the clobbers.  */
++  /* But it certainly can adjust the uses and clobbers.  */
++  unsigned nuses = use_rvec.length ();
+   unsigned nclobbers = clobber_rvec.length ();
+ 
+   /* Third pass checks for easy conflicts.  */
+@@ -3527,7 +3534,7 @@ expand_asm_stmt (gasm *stmt)
+ 			       ARGVEC CONSTRAINTS OPNAMES))
+      If there is more than one, put them inside a PARALLEL.  */
+ 
+-  if (noutputs == 0 && nclobbers == 0)
++  if (noutputs == 0 && nuses == 0 && nclobbers == 0)
+     {
+       /* No output operands: put in a raw ASM_OPERANDS rtx.  */
+       if (nlabels > 0)
+@@ -3535,7 +3542,7 @@ expand_asm_stmt (gasm *stmt)
+       else
+ 	emit_insn (body);
+     }
+-  else if (noutputs == 1 && nclobbers == 0)
++  else if (noutputs == 1 && nuses == 0 && nclobbers == 0)
+     {
+       ASM_OPERANDS_OUTPUT_CONSTRAINT (body) = constraints0;
+       if (nlabels > 0)
+@@ -3551,7 +3558,8 @@ expand_asm_stmt (gasm *stmt)
+       if (num == 0)
+ 	num = 1;
+ 
+-      body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num + nclobbers));
++      body = gen_rtx_PARALLEL (VOIDmode,
++			       rtvec_alloc (num + nuses + nclobbers));
+ 
+       /* For each output operand, store a SET.  */
+       for (i = 0; i < noutputs; ++i)
+@@ -3578,6 +3586,11 @@ expand_asm_stmt (gasm *stmt)
+       if (i == 0)
+ 	XVECEXP (body, 0, i++) = obody;
+ 
++      /* Add the uses specified by the target hook.  No checking should
++	 be needed since this doesn't come directly from user code.  */
++      for (rtx use : use_rvec)
++	XVECEXP (body, 0, i++) = gen_rtx_USE (VOIDmode, use);
++
+       /* Store (clobber REG) for each clobbered register specified.  */
+       for (unsigned j = 0; j < nclobbers; ++j)
+ 	{
+diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
+index ae0465159..3b525c174 100644
+--- a/gcc/config/arm/aarch-common-protos.h
++++ b/gcc/config/arm/aarch-common-protos.h
+@@ -149,7 +149,7 @@ struct cpu_cost_table
+ 
+ rtx_insn *arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
+ 			     vec<machine_mode> & /*input_modes*/,
+-			     vec<const char *> &constraints,
++			     vec<const char *> &constraints, vec<rtx> &,
+ 			     vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
+ 			     location_t loc);
+ 
+diff --git a/gcc/config/arm/aarch-common.cc b/gcc/config/arm/aarch-common.cc
+index 04a53d750..365cfc140 100644
+--- a/gcc/config/arm/aarch-common.cc
++++ b/gcc/config/arm/aarch-common.cc
+@@ -533,7 +533,8 @@ arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
+ rtx_insn *
+ arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
+ 		   vec<machine_mode> & /*input_modes*/,
+-		   vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
++		   vec<const char *> &constraints,
++		   vec<rtx> & /*uses*/, vec<rtx> & /*clobbers*/,
+ 		   HARD_REG_SET & /*clobbered_regs*/, location_t loc)
+ {
+   bool saw_asm_flag = false;
+diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
+index b700c23b8..c72e9c0b0 100644
+--- a/gcc/config/arm/arm.cc
++++ b/gcc/config/arm/arm.cc
+@@ -325,7 +325,7 @@ static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);

_service:tar_scm:0151-LoongArch-Remove-unused-useless-definitions.patch Added

@@ -0,0 +1,123 @@
+From 6ee300fd31e000efba141ed8806e56bd03826197 Mon Sep 17 00:00:00 2001
+From: Chenghui Pan <panchenghui@loongson.cn>
+Date: Fri, 15 Mar 2024 09:30:25 +0800
+Subject: PATCH 151/188 LoongArch: Remove unused/useless definitions.
+
+This patch removes some unnecessary definitions of target hook functions
+according to the documentation of GCC.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-protos.h
+	(loongarch_cfun_has_cprestore_slot_p): Delete.
+	(loongarch_adjust_insn_length): Delete.
+	(current_section_name): Delete.
+	(loongarch_split_symbol_type): Delete.
+	* config/loongarch/loongarch.cc
+	(loongarch_case_values_threshold): Delete.
+	(loongarch_spill_class): Delete.
+	(TARGET_OPTAB_SUPPORTED_P): Delete.
+	(TARGET_CASE_VALUES_THRESHOLD): Delete.
+	(TARGET_SPILL_CLASS): Delete.
+---
+ gcc/config/loongarch/loongarch-protos.h |  5 -----
+ gcc/config/loongarch/loongarch.cc       | 26 -------------------------
+ 2 files changed, 31 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
+index 87b94e8b0..3dac20279 100644
+--- a/gcc/config/loongarch/loongarch-protos.h
++++ b/gcc/config/loongarch/loongarch-protos.h
+@@ -93,7 +93,6 @@ extern void loongarch_split_lsx_copy_d (rtx, rtx, rtx, rtx (*)(rtx, rtx, rtx));
+ extern void loongarch_split_lsx_insert_d (rtx, rtx, rtx, rtx);
+ extern void loongarch_split_lsx_fill_d (rtx, rtx);
+ extern const char *loongarch_output_move (rtx, rtx);
+-extern bool loongarch_cfun_has_cprestore_slot_p (void);
+ #ifdef RTX_CODE
+ extern void loongarch_expand_scc (rtx *);
+ extern bool loongarch_expand_vec_cmp (rtx *);
+@@ -135,7 +134,6 @@ extern int loongarch_class_max_nregs (enum reg_class, machine_mode);
+ extern machine_mode loongarch_hard_regno_caller_save_mode (unsigned int,
+ 							   unsigned int,
+ 							   machine_mode);
+-extern int loongarch_adjust_insn_length (rtx_insn *, int);
+ extern const char *loongarch_output_conditional_branch (rtx_insn *, rtx *,
+ 							const char *,
+ 							const char *);
+@@ -157,7 +155,6 @@ extern bool loongarch_global_symbol_noweak_p (const_rtx);
+ extern bool loongarch_weak_symbol_p (const_rtx);
+ extern bool loongarch_symbol_binds_local_p (const_rtx);
+ 
+-extern const char *current_section_name (void);
+ extern unsigned int current_section_flags (void);
+ extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+ extern bool loongarch_check_zero_div_p (void);
+@@ -198,8 +195,6 @@ extern bool loongarch_epilogue_uses (unsigned int);
+ extern bool loongarch_load_store_bonding_p (rtx *, machine_mode, bool);
+ extern bool loongarch_split_symbol_type (enum loongarch_symbol_type);
+ 
+-typedef rtx (*mulsidi3_gen_fn) (rtx, rtx, rtx);
+-
+ extern void loongarch_register_frame_header_opt (void);
+ extern void loongarch_expand_vec_cond_expr (machine_mode, machine_mode, rtx *);
+ extern void loongarch_expand_vec_cond_mask_expr (machine_mode, machine_mode,
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 1e3981e19..903c0d4ef 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -10812,23 +10812,6 @@ loongarch_expand_vec_cmp (rtx operands)
+   return true;
+ }
+ 
+-/* Implement TARGET_CASE_VALUES_THRESHOLD.  */
+-
+-unsigned int
+-loongarch_case_values_threshold (void)
+-{
+-  return default_case_values_threshold ();
+-}
+-
+-/* Implement TARGET_SPILL_CLASS.  */
+-
+-static reg_class_t
+-loongarch_spill_class (reg_class_t rclass ATTRIBUTE_UNUSED,
+-		       machine_mode mode ATTRIBUTE_UNUSED)
+-{
+-  return NO_REGS;
+-}
+-
+ /* Implement TARGET_PROMOTE_FUNCTION_MODE.  */
+ 
+ /* This function is equivalent to default_promote_function_mode_always_promote
+@@ -11283,9 +11266,6 @@ loongarch_asm_code_end (void)
+ #undef TARGET_FUNCTION_ARG_BOUNDARY
+ #define TARGET_FUNCTION_ARG_BOUNDARY loongarch_function_arg_boundary
+ 
+-#undef TARGET_OPTAB_SUPPORTED_P
+-#define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p
+-
+ #undef TARGET_VECTOR_MODE_SUPPORTED_P
+ #define TARGET_VECTOR_MODE_SUPPORTED_P loongarch_vector_mode_supported_p
+ 
+@@ -11355,18 +11335,12 @@ loongarch_asm_code_end (void)
+ #undef TARGET_SCHED_REASSOCIATION_WIDTH
+ #define TARGET_SCHED_REASSOCIATION_WIDTH loongarch_sched_reassociation_width
+ 
+-#undef TARGET_CASE_VALUES_THRESHOLD
+-#define TARGET_CASE_VALUES_THRESHOLD loongarch_case_values_threshold
+-
+ #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+ #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV loongarch_atomic_assign_expand_fenv
+ 
+ #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
+ #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
+ 
+-#undef TARGET_SPILL_CLASS
+-#define TARGET_SPILL_CLASS loongarch_spill_class
+-
+ #undef TARGET_HARD_REGNO_NREGS
+ #define TARGET_HARD_REGNO_NREGS loongarch_hard_regno_nregs
+ #undef TARGET_HARD_REGNO_MODE_OK
+-- 
+2.43.0
+

_service:tar_scm:0152-Backport-SME-New-compact-syntax-for-insn-and-insn_sp.patch Added

@@ -0,0 +1,998 @@
+From 763db5ed42e18cdddf979dda82056345e3af15ed Mon Sep 17 00:00:00 2001
+From: Tamar Christina <tamar.christina@arm.com>
+Date: Mon, 19 Jun 2023 15:47:46 +0100
+Subject: PATCH 053/157 BackportSME New compact syntax for insn and
+ insn_split in Machine Descriptions.
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=957ae90406591739b68e95ad49a0232faeb74217
+
+This patch adds support for a compact syntax for specifying constraints in
+instruction patterns. Credit for the idea goes to Richard Earnshaw.
+
+With this new syntax we want a clean break from the current limitations to make
+something that is hopefully easier to use and maintain.
+
+The idea behind this compact syntax is that often times it's quite hard to
+correlate the entries in the constrains list, attributes and instruction lists.
+
+One has to count and this often is tedious.  Additionally when changing a single
+line in the insn multiple lines in a diff change, making it harder to see what's
+going on.
+
+This new syntax takes into account many of the common things that are done in MD
+files.   It's also worth saying that this version is intended to deal with the
+common case of a string based alternatives.   For C chunks we have some ideas
+but those are not intended to be addressed here.
+
+It's easiest to explain with an example:
+
+normal syntax:
+
+(define_insn_and_split "*movsi_aarch64"
+  (set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,  r,  r,  r, w,r,w, w")
+	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))
+  "(register_operand (operands0, SImode)
+    || aarch64_reg_or_zero (operands1, SImode))"
+  "@
+   mov\\t%w0, %w1
+   mov\\t%w0, %w1
+   mov\\t%w0, %w1
+   mov\\t%w0, %1
+   #
+   * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands1);
+   ldr\\t%w0, %1
+   ldr\\t%s0, %1
+   str\\t%w1, %0
+   str\\t%s1, %0
+   adrp\\t%x0, %A1\;ldr\\t%w0, %x0, %L1
+   adr\\t%x0, %c1
+   adrp\\t%x0, %A1
+   fmov\\t%s0, %w1
+   fmov\\t%w0, %s1
+   fmov\\t%s0, %s1
+   * return aarch64_output_scalar_simd_mov_immediate (operands1, SImode);"
+  "CONST_INT_P (operands1) && !aarch64_move_imm (INTVAL (operands1), SImode)
+    && REG_P (operands0) && GP_REGNUM_P (REGNO (operands0))"
+   (const_int 0)
+   "{
+       aarch64_expand_mov_immediate (operands0, operands1);
+       DONE;
+    }"
+  ;; The "mov_imm" type for CNT is just a placeholder.
+  (set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
+		    load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
+   (set_attr "arch"   "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
+   (set_attr "length" "4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")
+
+)
+
+New syntax:
+
+(define_insn_and_split "*movsi_aarch64"
+  (set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "aarch64_mov_operand"))
+  "(register_operand (operands0, SImode)
+    || aarch64_reg_or_zero (operands1, SImode))"
+  {@ cons: =0, 1; attrs: type, arch, length
+     r , r  ; mov_reg  , *   , 4 mov\t%w0, %w1
+     k , r  ; mov_reg  , *   , 4 ^
+     r , k  ; mov_reg  , *   , 4 ^
+     r , M  ; mov_imm  , *   , 4 mov\t%w0, %1
+     r , n  ; mov_imm  , *   ,16 #
+     /* The "mov_imm" type for CNT is just a placeholder.  */
+     r , Usv; mov_imm  , sve , 4 << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands1);
+     r , m  ; load_4   , *   , 4 ldr\t%w0, %1
+     w , m  ; load_4   , fp  , 4 ldr\t%s0, %1
+     m , rZ ; store_4  , *   , 4 str\t%w1, %0
+     m , w  ; store_4  , fp  , 4 str\t%s1, %0
+     r , Usw; load_4   , *   , 8 adrp\t%x0, %A1;ldr\t%w0, %x0, %L1
+     r , Usa; adr      , *   , 4 adr\t%x0, %c1
+     r , Ush; adr      , *   , 4 adrp\t%x0, %A1
+     w , rZ ; f_mcr    , fp  , 4 fmov\t%s0, %w1
+     r , w  ; f_mrc    , fp  , 4 fmov\t%w0, %s1
+     w , w  ; fmov     , fp  , 4 fmov\t%s0, %s1
+     w , Ds ; neon_move, simd, 4 << aarch64_output_scalar_simd_mov_immediate (operands1, SImode);
+  }
+  "CONST_INT_P (operands1) && !aarch64_move_imm (INTVAL (operands1), SImode)
+    && REG_P (operands0) && GP_REGNUM_P (REGNO (operands0))"
+  (const_int 0)
+  {
+    aarch64_expand_mov_immediate (operands0, operands1);
+    DONE;
+  }
+)
+
+The main syntax rules are as follows (See docs for full rules):
+  - Template must start with "{@" and end with "}" to use the new syntax.
+  - "{@" is followed by a layout in parentheses which is "cons:" followed by
+    a list of match_operand/match_scratch IDs, then a semicolon, then the
+    same for attributes ("attrs:"). Both sections are optional (so you can
+    use only cons, or only attrs, or both), and cons must come before attrs
+    if present.
+  - Each alternative begins with any amount of whitespace.
+  - Following the whitespace is a comma-separated list of constraints and/or
+    attributes within brackets , with sections separated by a semicolon.
+  - Following the closing '' is any amount of whitespace, and then the actual
+    asm output.
+  - Spaces are allowed in the list (they will simply be removed).
+  - All alternatives should be specified: a blank list should be
+    ",,", ",,;," etc., not "" or "" (however genattr may segfault if
+    you leave certain attributes empty, I have found).
+  - The actual constraint string in the match_operand or match_scratch, and
+    the attribute string in the set_attr, must be blank or an empty string
+    (you can't combine the old and new syntaxes).
+  - The common idion * return can be shortened by using <<.
+  - Any unexpanded iterators left during processing will result in an error at
+    compile time.   If for some reason <> is needed in the output then these
+    must be escaped using \.
+  - Within an {@ block both multiline and singleline C comments are allowed, but
+    when used outside of a C block they must be the only non-whitespace blocks on
+    the line
+  - Inside an {@ block any unexpanded iterators will result in a compile time
+    fault instead of incorrect assembly being generated at runtime.  If the
+    literal <> is needed in the output this needs to be escaped with \<\>.
+  - This check is not performed inside C blocks (lines starting with *).
+  - Instead of copying the previous instruction again in the next pattern, one
+    can use ^ to refer to the previous asm string.
+
+This patch works by blindly transforming the new syntax into the old syntax,
+so it doesn't do extensive checking. However, it does verify that:
+	- The correct number of constraints/attributes are specified.
+	- You haven't mixed old and new syntax.
+	- The specified operand IDs/attribute names actually exist.
+	- You don't have duplicate cons
+
+If something goes wrong, it may write invalid constraints/attributes/template
+back into the rtx. But this shouldn't matter because error_at will cause the
+program to fail on exit anyway.
+
+Because this transformation occurs as early as possible (before patterns are
+queued), the rest of the compiler can completely ignore the new syntax and
+assume that the old syntax will always be used.
+
+This doesn't seem to have any measurable effect on the runtime of gen*
+programs.
+
+gcc/ChangeLog:
+
+	* gensupport.cc (class conlist, add_constraints, add_attributes,
+	skip_spaces, expect_char, preprocess_compact_syntax,
+	parse_section_layout, parse_section, convert_syntax): New.
+	(process_rtx): Check for conversion.
+	* genoutput.cc (process_template): Check for unresolved iterators.
+	(class data): Add compact_syntax_p.
+	(gen_insn): Use it.
+	* gensupport.h (compact_syntax): New.
+	(hash-set.h): Include.
+	* doc/md.texi: Document it.
+
+Co-Authored-By: Omar Tahir <Omar.Tahir2@arm.com>
+---
+ gcc/doc/md.texi   | 163 +++++++++++++++
+ gcc/genoutput.cc  |  48 ++++-
+ gcc/gensupport.cc | 498 ++++++++++++++++++++++++++++++++++++++++++++++
+ gcc/gensupport.h  |   3 +
+ 4 files changed, 709 insertions(+), 3 deletions(-)
+
+diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
+index 3b544358b..04ace8f7f 100644
+--- a/gcc/doc/md.texi
++++ b/gcc/doc/md.texi
+@@ -27,6 +27,7 @@ See the next chapter for information on the C header file.
+                         from such an insn.
+ * Output Statement::    For more generality, write C code to output
+                         the assembler code.
++* Compact Syntax::      Compact syntax for writing machine descriptors.
+ * Predicates::          Controlling what kinds of operands can be used
+                         for an insn.
+ * Constraints::         Fine-tuning operand selection.
+@@ -713,6 +714,168 @@ you can use @samp{*} inside of a @samp{@@} multi-alternative template:
+ @end group
+ @end smallexample
+ 
++@node Compact Syntax
++@section Compact Syntax
++@cindex compact syntax
++
++When a @code{define_insn} or @code{define_insn_and_split} has multiple
++alternatives it may be beneficial to use the compact syntax when specifying
++alternatives.

_service:tar_scm:0152-LoongArch-Change-loongarch_expand_vec_cmp-s-return-t.patch Added

@@ -0,0 +1,110 @@
+From d569e34b29faee3658014b3900e9553a4880dac0 Mon Sep 17 00:00:00 2001
+From: Chenghui Pan <panchenghui@loongson.cn>
+Date: Fri, 15 Mar 2024 09:30:26 +0800
+Subject: PATCH 152/188 LoongArch: Change loongarch_expand_vec_cmp()'s return
+ type from bool to void.
+
+This function is always return true at the end of function implementation,
+so the return value is useless.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasx.md (vec_cmp<mode><mode256_i>): Remove checking
+	of loongarch_expand_vec_cmp()'s return value.
+	(vec_cmpu<ILASX:mode><mode256_i>): Ditto.
+	* config/loongarch/lsx.md (vec_cmp<mode><mode_i>): Ditto.
+	(vec_cmpu<ILSX:mode><mode_i>): Ditto.
+	* config/loongarch/loongarch-protos.h
+	(loongarch_expand_vec_cmp): Change loongarch_expand_vec_cmp()'s return
+	type from bool to void.
+	* config/loongarch/loongarch.cc (loongarch_expand_vec_cmp): Ditto.
+---
+ gcc/config/loongarch/lasx.md            | 6 ++----
+ gcc/config/loongarch/loongarch-protos.h | 2 +-
+ gcc/config/loongarch/loongarch.cc       | 3 +--
+ gcc/config/loongarch/lsx.md             | 6 ++----
+ 4 files changed, 6 insertions(+), 11 deletions(-)
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index f3b5ea373..45a0a8cc8 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -1378,8 +1378,7 @@
+ 	   (match_operand:LASX 3 "register_operand")))
+   "ISA_HAS_LASX"
+ {
+-  bool ok = loongarch_expand_vec_cmp (operands);
+-  gcc_assert (ok);
++  loongarch_expand_vec_cmp (operands);
+   DONE;
+ })
+ 
+@@ -1390,8 +1389,7 @@
+ 	   (match_operand:ILASX 3 "register_operand")))
+   "ISA_HAS_LASX"
+ {
+-  bool ok = loongarch_expand_vec_cmp (operands);
+-  gcc_assert (ok);
++  loongarch_expand_vec_cmp (operands);
+   DONE;
+ })
+ 
+diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
+index 3dac20279..8523da8d6 100644
+--- a/gcc/config/loongarch/loongarch-protos.h
++++ b/gcc/config/loongarch/loongarch-protos.h
+@@ -95,7 +95,7 @@ extern void loongarch_split_lsx_fill_d (rtx, rtx);
+ extern const char *loongarch_output_move (rtx, rtx);
+ #ifdef RTX_CODE
+ extern void loongarch_expand_scc (rtx *);
+-extern bool loongarch_expand_vec_cmp (rtx *);
++extern void loongarch_expand_vec_cmp (rtx *);
+ extern void loongarch_expand_conditional_branch (rtx *);
+ extern void loongarch_expand_conditional_move (rtx *);
+ extern void loongarch_expand_conditional_trap (rtx);
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 903c0d4ef..dbb318660 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -10803,13 +10803,12 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode,
+ }
+ 
+ /* Expand integer vector comparison */
+-bool
++void
+ loongarch_expand_vec_cmp (rtx operands)
+ {
+ 
+   rtx_code code = GET_CODE (operands1);
+   loongarch_expand_lsx_cmp (operands0, code, operands2, operands3);
+-  return true;
+ }
+ 
+ /* Implement TARGET_PROMOTE_FUNCTION_MODE.  */
+diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
+index d5aa3f46f..dc81093e9 100644
+--- a/gcc/config/loongarch/lsx.md
++++ b/gcc/config/loongarch/lsx.md
+@@ -518,8 +518,7 @@
+ 	   (match_operand:LSX 3 "register_operand")))
+   "ISA_HAS_LSX"
+ {
+-  bool ok = loongarch_expand_vec_cmp (operands);
+-  gcc_assert (ok);
++  loongarch_expand_vec_cmp (operands);
+   DONE;
+ })
+ 
+@@ -530,8 +529,7 @@
+ 	   (match_operand:ILSX 3 "register_operand")))
+   "ISA_HAS_LSX"
+ {
+-  bool ok = loongarch_expand_vec_cmp (operands);
+-  gcc_assert (ok);
++  loongarch_expand_vec_cmp (operands);
+   DONE;
+ })
+ 
+-- 
+2.43.0
+

_service:tar_scm:0153-Backport-SME-recog-Improve-parser-for-pattern-new-co.patch Added

@@ -0,0 +1,104 @@
+From 35b64175c6fd622212d0bf936e7e98c635e1c618 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Wed, 13 Sep 2023 14:50:30 +0100
+Subject: PATCH 054/157 BackportSME recog: Improve parser for pattern new
+ compact syntax
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dd1091fe455c1ede5993b4cdf10d0f7c461b86d7
+
+Hi all,
+
+this is to add support to the new compact pattern syntax for the case
+where the constraints do appear unsorted like:
+
+(define_insn "*<optab>si3_insn_uxtw"
+  (set (match_operand:DI 0 "register_operand")
+        (zero_extend:DI (SHIFT_no_rotate:SI
+         (match_operand:SI 1 "register_operand")
+         (match_operand:QI 2 "aarch64_reg_or_shift_imm_si"))))
+  ""
+  {@ cons: =0, 2,   1
+           r,  Uss, r <shift>\\t%w0, %w1, %2
+           r,  r,   r <shift>\\t%w0, %w1, %w2
+  }
+  (set_attr "type" "bfx,shift_reg")
+)
+
+Best Regards
+
+  Andrea
+
+gcc/Changelog
+
+2023-09-20  Richard Sandiford  <richard.sandiford@arm.com>
+
+	* gensupport.cc (convert_syntax): Updated to support unordered
+	constraints in compact syntax.
+---
+ gcc/gensupport.cc | 32 ++++++++++++++++----------------
+ 1 file changed, 16 insertions(+), 16 deletions(-)
+
+diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc
+index 23c61dcdd..97c614850 100644
+--- a/gcc/gensupport.cc
++++ b/gcc/gensupport.cc
+@@ -895,19 +895,6 @@ convert_syntax (rtx x, file_location loc)
+ 
+   parse_section_layout (loc, &templ, "cons:", tconvec, true);
+ 
+-  /* Check for any duplicate cons entries and sort based on i.  */
+-  for (auto e : tconvec)
+-    {
+-      unsigned idx = e.idx;
+-      if (idx >= convec.size ())
+-	convec.resize (idx + 1);
+-
+-      if (convecidx.idx >= 0)
+-	fatal_at (loc, "duplicate cons number found: %d", idx);
+-      convecidx = e;
+-    }
+-  tconvec.clear ();
+-
+   if (*templ != '')
+     {
+       if (*templ == ';')
+@@ -950,13 +937,13 @@ convert_syntax (rtx x, file_location loc)
+ 	  new_templ += '\n';
+ 	  new_templ.append (buffer);
+ 	  /* Parse the constraint list, then the attribute list.  */
+-	  if (convec.size () > 0)
+-	    parse_section (&templ, convec.size (), alt_no, convec, loc,
++	  if (tconvec.size () > 0)
++	    parse_section (&templ, tconvec.size (), alt_no, tconvec, loc,
+ 			   "constraint");
+ 
+ 	  if (attrvec.size () > 0)
+ 	    {
+-	      if (convec.size () > 0 && !expect_char (&templ, ';'))
++	      if (tconvec.size () > 0 && !expect_char (&templ, ';'))
+ 		fatal_at (loc, "expected `;' to separate constraints "
+ 			       "and attributes in alternative %d", alt_no);
+ 
+@@ -1026,6 +1013,19 @@ convert_syntax (rtx x, file_location loc)
+       ++alt_no;
+     }
+ 
++  /* Check for any duplicate cons entries and sort based on i.  */
++  for (auto e : tconvec)
++    {
++      unsigned idx = e.idx;
++      if (idx >= convec.size ())
++	convec.resize (idx + 1);
++
++      if (convecidx.idx >= 0)
++	fatal_at (loc, "duplicate cons number found: %d", idx);
++      convecidx = e;
++    }
++  tconvec.clear ();
++
+   /* Write the constraints and attributes into their proper places.  */
+   if (convec.size () > 0)
+     add_constraints (x, loc, convec);
+-- 
+2.33.0
+

_service:tar_scm:0153-LoongArch-Combine-UNITS_PER_FP_REG-and-UNITS_PER_FPR.patch Added

@@ -0,0 +1,104 @@
+From 6c4a2fbdabab053a2a0fb1041e3ffccc3d853c97 Mon Sep 17 00:00:00 2001
+From: Chenghui Pan <panchenghui@loongson.cn>
+Date: Fri, 15 Mar 2024 09:30:27 +0800
+Subject: PATCH 153/188 LoongArch: Combine UNITS_PER_FP_REG and
+ UNITS_PER_FPREG macros.
+
+These macros are completely same in definition, so we can keep the previous one
+and eliminate later one.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc
+	(loongarch_hard_regno_mode_ok_uncached): Combine UNITS_PER_FP_REG and
+	UNITS_PER_FPREG macros.
+	(loongarch_hard_regno_nregs): Ditto.
+	(loongarch_class_max_nregs): Ditto.
+	(loongarch_get_separate_components): Ditto.
+	(loongarch_process_components): Ditto.
+	* config/loongarch/loongarch.h (UNITS_PER_FPREG): Ditto.
+	(UNITS_PER_HWFPVALUE): Ditto.
+	(UNITS_PER_FPVALUE): Ditto.
+---
+ gcc/config/loongarch/loongarch.cc | 10 +++++-----
+ gcc/config/loongarch/loongarch.h  |  7 ++-----
+ 2 files changed, 7 insertions(+), 10 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index dbb318660..8d9cda165 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -6773,7 +6773,7 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode)
+ 	 and TRUNC.  There's no point allowing sizes smaller than a word,
+ 	 because the FPU has no appropriate load/store instructions.  */
+       if (mclass == MODE_INT)
+-	return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FPREG;
++	return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FP_REG;
+     }
+ 
+   return false;
+@@ -6816,7 +6816,7 @@ loongarch_hard_regno_nregs (unsigned int regno, machine_mode mode)
+       if (LASX_SUPPORTED_MODE_P (mode))
+ 	return 1;
+ 
+-      return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG;
++      return (GET_MODE_SIZE (mode) + UNITS_PER_FP_REG - 1) / UNITS_PER_FP_REG;
+     }
+ 
+   /* All other registers are word-sized.  */
+@@ -6851,7 +6851,7 @@ loongarch_class_max_nregs (enum reg_class rclass, machine_mode mode)
+ 	  else if (LSX_SUPPORTED_MODE_P (mode))
+ 	    size = MIN (size, UNITS_PER_LSX_REG);
+ 	  else
+-	    size = MIN (size, UNITS_PER_FPREG);
++	    size = MIN (size, UNITS_PER_FP_REG);
+ 	}
+       left &= ~reg_class_contentsFP_REGS;
+     }
+@@ -8227,7 +8227,7 @@ loongarch_get_separate_components (void)
+ 	if (IMM12_OPERAND (offset))
+ 	  bitmap_set_bit (components, regno);
+ 
+-	offset -= UNITS_PER_FPREG;
++	offset -= UNITS_PER_FP_REG;
+       }
+ 
+   /* Don't mess with the hard frame pointer.  */
+@@ -8306,7 +8306,7 @@ loongarch_process_components (sbitmap components, loongarch_save_restore_fn fn)
+ 	if (bitmap_bit_p (components, regno))
+ 	  loongarch_save_restore_reg (mode, regno, offset, fn);
+ 
+-	offset -= UNITS_PER_FPREG;
++	offset -= UNITS_PER_FP_REG;
+       }
+ }
+ 
+diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
+index 8bcdb8729..698e42aec 100644
+--- a/gcc/config/loongarch/loongarch.h
++++ b/gcc/config/loongarch/loongarch.h
+@@ -138,19 +138,16 @@ along with GCC; see the file COPYING3.  If not see
+ /* Width of a LASX vector register in bits.  */
+ #define BITS_PER_LASX_REG (UNITS_PER_LASX_REG * BITS_PER_UNIT)
+ 
+-/* For LARCH, width of a floating point register.  */
+-#define UNITS_PER_FPREG (TARGET_DOUBLE_FLOAT ? 8 : 4)
+-
+ /* The largest size of value that can be held in floating-point
+    registers and moved with a single instruction.  */
+ #define UNITS_PER_HWFPVALUE \
+-  (TARGET_SOFT_FLOAT ? 0 : UNITS_PER_FPREG)
++  (TARGET_SOFT_FLOAT ? 0 : UNITS_PER_FP_REG)
+ 
+ /* The largest size of value that can be held in floating-point
+    registers.  */
+ #define UNITS_PER_FPVALUE \
+   (TARGET_SOFT_FLOAT ? 0 \
+-   : TARGET_SINGLE_FLOAT ? UNITS_PER_FPREG \
++   : TARGET_SINGLE_FLOAT ? UNITS_PER_FP_REG \
+ 			 : LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT)
+ 
+ /* The number of bytes in a double.  */
+-- 
+2.43.0
+

_service:tar_scm:0154-Backport-SME-recog-Support-space-in-cons.patch Added

@@ -0,0 +1,49 @@
+From e593ad216bd1f4f75d9875898f352e0e5f978159 Mon Sep 17 00:00:00 2001
+From: Andrea Corallo <andrea.corallo@arm.com>
+Date: Fri, 15 Sep 2023 10:23:02 +0200
+Subject: PATCH 055/157 BackportSME recog: Support space in " cons"
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=9d31045b21324166c3997d603961d99e3c4c357d
+
+Hi all,
+
+this is to allow for spaces before "cons:" in the definitions of
+patterns using the new compact syntax, ex:
+
+(define_insn "aarch64_simd_dup<mode>"
+  (set (match_operand:VDQ_I 0 "register_operand")
+        (vec_duplicate:VDQ_I
+          (match_operand:<VEL> 1 "register_operand")))
+  "TARGET_SIMD"
+  {@  cons: =0 , 1  ; attrs: type      
+      w        , w  ; neon_dup<q>       dup\t%0.<Vtype>, %1.<Vetype>0
+      w        , ?r ; neon_from_gp<q>   dup\t%0.<Vtype>, %<vwcore>1
+  }
+)
+
+gcc/Changelog
+
+2023-09-20  Andrea Corallo  <andrea.corallo@arm.com>
+
+	* gensupport.cc (convert_syntax): Skip spaces before "cons:"
+	in new compact pattern syntax.
+---
+ gcc/gensupport.cc | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc
+index 97c614850..3d7a6d4fd 100644
+--- a/gcc/gensupport.cc
++++ b/gcc/gensupport.cc
+@@ -893,6 +893,8 @@ convert_syntax (rtx x, file_location loc)
+   if (!expect_char (&templ, ''))
+     fatal_at (loc, "expecing `' to begin section list");
+ 
++  skip_spaces (&templ);
++
+   parse_section_layout (loc, &templ, "cons:", tconvec, true);
+ 
+   if (*templ != '')
+-- 
+2.33.0
+

_service:tar_scm:0154-LoongArch-Fix-a-typo-PR-114407.patch Added

@@ -0,0 +1,30 @@
+From 72f18deb0b8e59cc23f25cb99b59a25a0a1d99c7 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Thu, 21 Mar 2024 04:01:17 +0800
+Subject: PATCH 154/188 LoongArch: Fix a typo PR 114407
+
+gcc/ChangeLog:
+
+	PR target/114407
+	* config/loongarch/loongarch-opts.cc (loongarch_config_target):
+	Fix typo in diagnostic message, enabing -> enabling.
+---
+ gcc/config/loongarch/loongarch-opts.cc | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
+index 2ea3972d1..bdecfaf49 100644
+--- a/gcc/config/loongarch/loongarch-opts.cc
++++ b/gcc/config/loongarch/loongarch-opts.cc
+@@ -362,7 +362,7 @@ config_target_isa:
+ 	  gcc_assert (constrained.simd);
+ 
+ 	  inform (UNKNOWN_LOCATION,
+-		  "enabing %qs promotes %<%s%s%> to %<%s%s%>",
++		  "enabling %qs promotes %<%s%s%> to %<%s%s%>",
+ 		  loongarch_isa_ext_stringst.isa.simd,
+ 		  OPTSTR_ISA_EXT_FPU, loongarch_isa_ext_stringst.isa.fpu,
+ 		  OPTSTR_ISA_EXT_FPU, loongarch_isa_ext_stringsISA_EXT_FPU64);
+-- 
+2.43.0
+

_service:tar_scm:0155-Backport-SME-aarch64-Generalise-require_immediate_la.patch Added

@@ -0,0 +1,164 @@
+From cb6d55f6bc7c490f72a43dd87543ab7a7ea582a8 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:18 +0000
+Subject: PATCH 056/157 BackportSME aarch64: Generalise
+ require_immediate_lane_index
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c0cf2c893d54420b0c19fee7bd41ae40017d0106
+
+require_immediate_lane_index previously hard-coded the assumption
+that the group size is determined by the argument immediately before
+the index.  However, for SME, there are cases where it should be
+determined by an earlier argument instead.
+
+gcc/
+	* config/aarch64/aarch64-sve-builtins.h:
+	(function_checker::require_immediate_lane_index): Add an argument
+	for the index of the indexed vector argument.
+	* config/aarch64/aarch64-sve-builtins.cc
+	(function_checker::require_immediate_lane_index): Likewise.
+	* config/aarch64/aarch64-sve-builtins-shapes.cc
+	(ternary_bfloat_lane_base::check): Update accordingly.
+	(ternary_qq_lane_base::check): Likewise.
+	(binary_lane_def::check): Likewise.
+	(binary_long_lane_def::check): Likewise.
+	(ternary_lane_def::check): Likewise.
+	(ternary_lane_rotate_def::check): Likewise.
+	(ternary_long_lane_def::check): Likewise.
+	(ternary_qq_lane_rotate_def::check): Likewise.
+---
+ .../aarch64/aarch64-sve-builtins-shapes.cc     | 16 ++++++++--------
+ gcc/config/aarch64/aarch64-sve-builtins.cc     | 18 ++++++++++++------
+ gcc/config/aarch64/aarch64-sve-builtins.h      |  3 ++-
+ 3 files changed, 22 insertions(+), 15 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+index f57f92698..4fa4181b9 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+@@ -941,7 +941,7 @@ struct ternary_bfloat_lane_base
+   bool
+   check (function_checker &c) const OVERRIDE
+   {
+-    return c.require_immediate_lane_index (3, N);
++    return c.require_immediate_lane_index (3, 2, N);
+   }
+ };
+ 
+@@ -956,7 +956,7 @@ struct ternary_qq_lane_base
+   bool
+   check (function_checker &c) const OVERRIDE
+   {
+-    return c.require_immediate_lane_index (3, 4);
++    return c.require_immediate_lane_index (3, 0);
+   }
+ };
+ 
+@@ -1123,7 +1123,7 @@ struct binary_lane_def : public overloaded_base<0>
+   bool
+   check (function_checker &c) const OVERRIDE
+   {
+-    return c.require_immediate_lane_index (2);
++    return c.require_immediate_lane_index (2, 1);
+   }
+ };
+ SHAPE (binary_lane)
+@@ -1162,7 +1162,7 @@ struct binary_long_lane_def : public overloaded_base<0>
+   bool
+   check (function_checker &c) const OVERRIDE
+   {
+-    return c.require_immediate_lane_index (2);
++    return c.require_immediate_lane_index (2, 1);
+   }
+ };
+ SHAPE (binary_long_lane)
+@@ -2817,7 +2817,7 @@ struct ternary_lane_def : public overloaded_base<0>
+   bool
+   check (function_checker &c) const OVERRIDE
+   {
+-    return c.require_immediate_lane_index (3);
++    return c.require_immediate_lane_index (3, 2);
+   }
+ };
+ SHAPE (ternary_lane)
+@@ -2845,7 +2845,7 @@ struct ternary_lane_rotate_def : public overloaded_base<0>
+   bool
+   check (function_checker &c) const OVERRIDE
+   {
+-    return (c.require_immediate_lane_index (3, 2)
++    return (c.require_immediate_lane_index (3, 2, 2)
+ 	    && c.require_immediate_one_of (4, 0, 90, 180, 270));
+   }
+ };
+@@ -2868,7 +2868,7 @@ struct ternary_long_lane_def
+   bool
+   check (function_checker &c) const OVERRIDE
+   {
+-    return c.require_immediate_lane_index (3);
++    return c.require_immediate_lane_index (3, 2);
+   }
+ };
+ SHAPE (ternary_long_lane)
+@@ -2965,7 +2965,7 @@ struct ternary_qq_lane_rotate_def : public overloaded_base<0>
+   bool
+   check (function_checker &c) const OVERRIDE
+   {
+-    return (c.require_immediate_lane_index (3, 4)
++    return (c.require_immediate_lane_index (3, 0)
+ 	    && c.require_immediate_one_of (4, 0, 90, 180, 270));
+   }
+ };
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
+index 91af96687..7924cdf0f 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
+@@ -2440,20 +2440,26 @@ function_checker::require_immediate_enum (unsigned int rel_argno, tree type)
+   return false;
+ }
+ 
+-/* Check that argument REL_ARGNO is suitable for indexing argument
+-   REL_ARGNO - 1, in groups of GROUP_SIZE elements.  REL_ARGNO counts
+-   from the end of the predication arguments.  */
++/* The intrinsic conceptually divides vector argument REL_VEC_ARGNO into
++   groups of GROUP_SIZE elements.  Return true if argument REL_ARGNO is
++   a suitable constant index for selecting one of these groups.  The
++   selection happens within a 128-bit quadword, rather than the whole vector.
++
++   REL_ARGNO and REL_VEC_ARGNO count from the end of the predication
++   arguments.  */
+ bool
+ function_checker::require_immediate_lane_index (unsigned int rel_argno,
++						unsigned int rel_vec_argno,
+ 						unsigned int group_size)
+ {
+   unsigned int argno = m_base_arg + rel_argno;
+   if (!argument_exists_p (argno))
+     return true;
+ 
+-  /* Get the type of the previous argument.  tree_argument_type wants a
+-     1-based number, whereas ARGNO is 0-based.  */
+-  machine_mode mode = TYPE_MODE (type_argument_type (m_fntype, argno));
++  /* Get the type of the vector argument.  tree_argument_type wants a
++     1-based number, whereas VEC_ARGNO is 0-based.  */
++  unsigned int vec_argno = m_base_arg + rel_vec_argno;
++  machine_mode mode = TYPE_MODE (type_argument_type (m_fntype, vec_argno + 1));
+   gcc_assert (VECTOR_MODE_P (mode));
+   unsigned int nlanes = 128 / (group_size * GET_MODE_UNIT_BITSIZE (mode));
+   return require_immediate_range (rel_argno, 0, nlanes - 1);
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
+index 52994cde0..824c31cd7 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.h
++++ b/gcc/config/aarch64/aarch64-sve-builtins.h
+@@ -463,7 +463,8 @@ public:
+   bool require_immediate_either_or (unsigned int, HOST_WIDE_INT,
+ 				    HOST_WIDE_INT);
+   bool require_immediate_enum (unsigned int, tree);
+-  bool require_immediate_lane_index (unsigned int, unsigned int = 1);
++  bool require_immediate_lane_index (unsigned int, unsigned int,
++				     unsigned int = 1);
+   bool require_immediate_one_of (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT,
+ 				 HOST_WIDE_INT, HOST_WIDE_INT);
+   bool require_immediate_range (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT);
+-- 
+2.33.0
+

_service:tar_scm:0155-testsuite-Add-a-test-case-for-negating-FP-vectors-co.patch Added

@@ -0,0 +1,68 @@
+From e27123a020e7bf0845a9804a4b09fe4ce57992f0 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Tue, 6 Feb 2024 17:49:50 +0800
+Subject: PATCH 155/188 testsuite: Add a test case for negating FP vectors
+ containing zeros
+
+Recently I've fixed two wrong FP vector negate implementation which
+caused wrong sign bits in zeros in targets (r14-8786 and r14-8801).  To
+prevent a similar issue from happening again, add a test case.
+
+Tested on x86_64 (with SSE2, AVX, AVX2, and AVX512F), AArch64, MIPS
+(with MSA), LoongArch (with LSX and LASX).
+
+gcc/testsuite:
+
+	* gcc.dg/vect/vect-neg-zero.c: New test.
+---
+ gcc/testsuite/gcc.dg/vect/vect-neg-zero.c | 38 +++++++++++++++++++++++
+ 1 file changed, 38 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.dg/vect/vect-neg-zero.c
+
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-neg-zero.c b/gcc/testsuite/gcc.dg/vect/vect-neg-zero.c
+new file mode 100644
+index 000000000..21fa00cfa
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-neg-zero.c
+@@ -0,0 +1,38 @@
++/* { dg-add-options ieee } */
++/* { dg-additional-options "-fno-associative-math -fsigned-zeros" } */
++
++double x4 = {-0.0, 0.0, -0.0, 0.0};
++float y8 = {-0.0, 0.0, -0.0, 0.0, -0.0, -0.0, 0.0, 0.0};
++
++static __attribute__ ((always_inline)) inline void
++test (int factor)
++{
++  double a4;
++  float b8;
++
++  asm ("" ::: "memory");
++
++  for (int i = 0; i < 2 * factor; i++)
++    ai = -xi;
++
++  for (int i = 0; i < 4 * factor; i++)
++    bi = -yi;
++
++#pragma GCC novector
++  for (int i = 0; i < 2 * factor; i++)
++    if (__builtin_signbit (ai) == __builtin_signbit (xi))
++      __builtin_abort ();
++
++#pragma GCC novector
++  for (int i = 0; i < 4 * factor; i++)
++    if (__builtin_signbit (bi) == __builtin_signbit (yi))
++      __builtin_abort ();
++}
++
++int
++main (void)
++{
++  test (1);
++  test (2);
++  return 0;
++}
+-- 
+2.43.0
+

_service:tar_scm:0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch Added

@@ -0,0 +1,469 @@
+From 8394394bd26c7be6129b9a4e673d2a3530d9efde Mon Sep 17 00:00:00 2001
+From: Christophe Lyon <christophe.lyon@arm.com>
+Date: Fri, 11 Mar 2022 16:21:02 +0000
+Subject: PATCH 057/157 BackportSME aarch64: Add backend support for DFP
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0dc8e1e7026d9b8ec8b669c051786d426a52cd22
+
+This patch updates the aarch64 backend as needed to support DFP modes
+(SD, DD and TD).
+
+Changes	v1->v2:
+
+* Drop	support	for DFP	modes in
+  aarch64_gen_{load||store}wb_pair as these are only used in
+  prologue/epilogue where DFP modes are not used.  Drop	the
+  changes to the corresponding patterns in aarch64.md, and
+  useless GPF_PAIR iterator.
+
+* In aarch64_reinterpret_float_as_int, handle DDmode the same way
+  as DFmode (needed in case the representation of the
+  floating-point value can be loaded using mov/movk.
+
+* In aarch64_float_const_zero_rtx_p, reject constants with DFP
+  mode: when X is zero, the callers want to emit either '0' or
+  'zr' depending on the context, which is not the way 0.0 is
+  represented in DFP mode (in particular fmov d0, #0 is not right
+  for DFP).
+
+* In aarch64_legitimate_constant_p, accept DFP
+
+2022-03-31  Christophe Lyon  <christophe.lyon@arm.com>
+
+	gcc/
+	* config/aarch64/aarch64.cc
+	(aarch64_split_128bit_move): Handle DFP modes.
+	(aarch64_mode_valid_for_sched_fusion_p): Likewise.
+	(aarch64_classify_address): Likewise.
+	(aarch64_legitimize_address_displacement): Likewise.
+	(aarch64_reinterpret_float_as_int): Likewise.
+	(aarch64_float_const_zero_rtx_p): Likewise.
+	(aarch64_can_const_movi_rtx_p): Likewise.
+	(aarch64_anchor_offset): Likewise.
+	(aarch64_secondary_reload): Likewise.
+	(aarch64_rtx_costs): Likewise.
+	(aarch64_legitimate_constant_p): Likewise.
+	(aarch64_gimplify_va_arg_expr): Likewise.
+	(aapcs_vfp_sub_candidate): Likewise.
+	(aarch64_vfp_is_call_or_return_candidate): Likewise.
+	(aarch64_output_scalar_simd_mov_immediate): Likewise.
+	(aarch64_gen_adjusted_ldpstp): Likewise.
+	(aarch64_scalar_mode_supported_p): Accept DFP modes if enabled.
+	* config/aarch64/aarch64.md
+	(movsf_aarch64): Use SFD iterator and rename into
+	mov<mode>_aarch64.
+	(movdf_aarch64): Use DFD iterator and rename into
+	mov<mode>_aarch64.
+	(movtf_aarch64): Use TFD iterator and rename into
+	mov<mode>_aarch64.
+	(split pattern for move TF mode): Use TFD iterator.
+	* config/aarch64/iterators.md
+	(GPF_TF_F16_MOV): Add DFP modes.
+	(SFD, DFD, TFD): New iterators.
+	(GPF_TF): Add DFP modes.
+	(TX, DX, DX2): Likewise.
+---
+ gcc/config/aarch64/aarch64.cc   | 82 ++++++++++++++++++++++-----------
+ gcc/config/aarch64/aarch64.md   | 34 +++++++-------
+ gcc/config/aarch64/iterators.md | 24 +++++++---
+ 3 files changed, 89 insertions(+), 51 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 055b436b1..02210ed13 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -5068,7 +5068,7 @@ aarch64_split_128bit_move (rtx dst, rtx src)
+ 
+   machine_mode mode = GET_MODE (dst);
+ 
+-  gcc_assert (mode == TImode || mode == TFmode);
++  gcc_assert (mode == TImode || mode == TFmode || mode == TDmode);
+   gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
+   gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
+ 
+@@ -10834,6 +10834,7 @@ aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
+ {
+   return mode == SImode || mode == DImode
+ 	 || mode == SFmode || mode == DFmode
++	 || mode == SDmode || mode == DDmode
+ 	 || (aarch64_vector_mode_supported_p (mode)
+ 	     && (known_eq (GET_MODE_SIZE (mode), 8)
+ 		 || (known_eq (GET_MODE_SIZE (mode), 16)
+@@ -10876,12 +10877,13 @@ aarch64_classify_address (struct aarch64_address_info *info,
+   vec_flags &= ~VEC_PARTIAL;
+ 
+   /* On BE, we use load/store pair for all large int mode load/stores.
+-     TI/TFmode may also use a load/store pair.  */
++     TI/TF/TDmode may also use a load/store pair.  */
+   bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
+   bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
+ 			    || type == ADDR_QUERY_LDP_STP_N
+ 			    || mode == TImode
+ 			    || mode == TFmode
++			    || mode == TDmode
+ 			    || (BYTES_BIG_ENDIAN && advsimd_struct_p));
+   /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
+      corresponds to the actual size of the memory being loaded/stored and the
+@@ -10955,7 +10957,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
+ 	  info->offset = op1;
+ 	  info->const_offset = offset;
+ 
+-	  /* TImode and TFmode values are allowed in both pairs of X
++	  /* TImode, TFmode and TDmode values are allowed in both pairs of X
+ 	     registers and individual Q registers.  The available
+ 	     address modes are:
+ 	     X,X: 7-bit signed scaled offset
+@@ -10964,7 +10966,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
+ 	     When performing the check for pairs of X registers i.e.  LDP/STP
+ 	     pass down DImode since that is the natural size of the LDP/STP
+ 	     instruction memory accesses.  */
+-	  if (mode == TImode || mode == TFmode)
++	  if (mode == TImode || mode == TFmode || mode == TDmode)
+ 	    return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
+ 		    && (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
+ 			|| offset_12bit_unsigned_scaled_p (mode, offset)));
+@@ -11087,14 +11089,14 @@ aarch64_classify_address (struct aarch64_address_info *info,
+ 	  info->offset = XEXP (XEXP (x, 1), 1);
+ 	  info->const_offset = offset;
+ 
+-	  /* TImode and TFmode values are allowed in both pairs of X
++	  /* TImode, TFmode and TDmode values are allowed in both pairs of X
+ 	     registers and individual Q registers.  The available
+ 	     address modes are:
+ 	     X,X: 7-bit signed scaled offset
+ 	     Q:   9-bit signed offset
+ 	     We conservatively require an offset representable in either mode.
+ 	   */
+-	  if (mode == TImode || mode == TFmode)
++	  if (mode == TImode || mode == TFmode || mode == TDmode)
+ 	    return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
+ 		    && aarch64_offset_9bit_signed_unscaled_p (mode, offset));
+ 
+@@ -11256,9 +11258,9 @@ aarch64_legitimize_address_displacement (rtx *offset1, rtx *offset2,
+ 	 offset.  Use 4KB range for 1- and 2-byte accesses and a 16KB
+ 	 range otherwise to increase opportunities for sharing the base
+ 	 address of different sizes.  Unaligned accesses use the signed
+-	 9-bit range, TImode/TFmode use the intersection of signed
++	 9-bit range, TImode/TFmode/TDmode use the intersection of signed
+ 	 scaled 7-bit and signed 9-bit offset.  */
+-      if (mode == TImode || mode == TFmode)
++      if (mode == TImode || mode == TFmode || mode == TDmode)
+ 	second_offset = ((const_offset + 0x100) & 0x1f8) - 0x100;
+       else if ((const_offset & (size - 1)) != 0)
+ 	second_offset = ((const_offset + 0x100) & 0x1ff) - 0x100;
+@@ -11339,7 +11341,7 @@ aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
+ 		  CONST_DOUBLE_REAL_VALUE (value),
+ 		  REAL_MODE_FORMAT (mode));
+ 
+-  if (mode == DFmode)
++  if (mode == DFmode || mode == DDmode)
+     {
+       int order = BYTES_BIG_ENDIAN ? 1 : 0;
+       ival = zext_hwi (resorder, 32);
+@@ -11380,11 +11382,15 @@ aarch64_float_const_rtx_p (rtx x)
+   return false;
+ }
+ 
+-/* Return TRUE if rtx X is immediate constant 0.0 */
++/* Return TRUE if rtx X is immediate constant 0.0 (but not in Decimal
++   Floating Point).  */
+ bool
+ aarch64_float_const_zero_rtx_p (rtx x)
+ {
+-  if (GET_MODE (x) == VOIDmode)
++  /* 0.0 in Decimal Floating Point cannot be represented by #0 or
++     zr as our callers expect, so no need to check the actual
++     value if X is of Decimal Floating Point type.  */
++  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_DECIMAL_FLOAT)
+     return false;
+ 
+   if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
+@@ -11422,7 +11428,7 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
+   else
+     return false;
+ 
+-   /* use a 64 bit mode for everything except for DI/DF mode, where we use
++   /* use a 64 bit mode for everything except for DI/DF/DD mode, where we use
+      a 128 bit vector mode.  */
+   int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64;
+ 
+@@ -12628,7 +12634,7 @@ aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size,
+   if (IN_RANGE (offset, -256, 0))
+     return 0;
+ 
+-  if (mode == TImode || mode == TFmode)
++  if (mode == TImode || mode == TFmode || mode == TDmode)
+     return (offset + 0x100) & ~0x1ff;
+ 
+   /* Use 12-bit offset by access size.  */
+@@ -12737,7 +12743,9 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,

_service:tar_scm:0156-LoongArch-Add-descriptions-of-the-compilation-option.patch Added

@@ -0,0 +1,83 @@
+From 899f1f351ddc0d76bc9d432cfe63b30cfb294860 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Fri, 25 Oct 2024 06:22:11 +0000
+Subject: PATCH 156/188 LoongArch: Add descriptions of the compilation
+ options.
+
+Add descriptions for the compilation options '-mfrecipe' '-mdiv32'
+'-mlam-bh' '-mlamcas' and '-mld-seq-sa'.
+
+gcc/ChangeLog:
+
+        * doc/invoke.texi: Add descriptions for the compilation
+        options.
+---
+ gcc/doc/invoke.texi | 45 +++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 43 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 5c6515cb1..7f24fe1e2 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -1008,8 +1008,9 @@ Objective-C and Objective-C++ Dialects}.
+ -mmax-inline-memcpy-size=@var{n} @gol
+ -mexplicit-relocs -mno-explicit-relocs @gol
+ -mdirect-extern-access -mno-direct-extern-access @gol
+--mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as} @gol
+--mrecip  -mrecip=@var{opt}
++-mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as @gol
++-mrecip  -mrecip=@var{opt} -mfrecipe -mno-frecipe -mdiv32 -mno-div32 @gol
++-mlam-bh -mno-lam-bh -mlamcas -mno-lamcas -mld-seq-sa -mno-ld-seq-sa}
+ 
+ @emph{M32R/D Options}
+ @gccoptlist{-m32r2  -m32rx  -m32r @gol
+@@ -24686,6 +24687,46 @@ Enable the approximation for vectorized reciprocal square root.
+ So, for example, @option{-mrecip=all,!sqrt} enables
+ all of the reciprocal approximations, except for scalar square root.
+ 
++@opindex mfrecipe
++@opindex mno-frecipe
++@item -mfrecipe
++@itemx -mno-frecipe
++Use (do not use) @code{frecipe.@{s/d@}} and @code{frsqrte.@{s/d@}}
++instructions.  When build with @option{-march=la664}, it is enabled by default.
++The default is @option{-mno-frecipe}.
++
++@opindex mdiv32
++@opindex mno-div32
++@item -mdiv32
++@itemx -mno-div32
++Use (do not use) @code{div.wu} and @code{mod.wu} instructions with input
++not sign-extended.  When build with @option{-march=la664}, it is enabled by
++default.  The default is @option{-mno-div32}.
++
++@opindex mlam-bh
++@opindex mno-lam-bh
++@item -mlam-bh
++@itemx -mno-lam-bh
++Use (do not use) @code{am@{swap/add@}_db.@{b/h@}} instructions.  When build
++with @option{-march=la664}, it is enabled by default.  The default is
++@option{-mno-lam-bh}.
++
++@opindex mlamcas
++@opindex mno-lamcas
++@item -mlamcas
++@itemx -mno-lamcas
++Use (do not use) @code{amcas_db.@{b/h/w/d@}} instructions.  When build with
++@option{-march=la664}, it is enabled by default.  The default is
++@option{-mno-lamcas}.
++
++@opindex mld-seq-sa
++@opindex mno-ld-seq-sa
++@item -mld-seq-sa
++@itemx -mno-ld-seq-sa
++Whether a load-load barrier (@code{dbar 0x700}) is needed.  When build with
++@option{-march=la664}, it is enabled by default.  The default is
++@option{-mno-ld-seq-sa}, the load-load barrier is needed.
++
+ @item loongarch-vect-unroll-limit
+ The vectorizer will use available tuning information to determine whether it
+ would be beneficial to unroll the main vectorized loop and by how much.  This
+-- 
+2.43.0
+

_service:tar_scm:0157-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch Added

@@ -0,0 +1,1824 @@
+From 737d2a5f1c5e725b7e5a20075270016ebf56b44c Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 13 Sep 2022 09:28:49 +0100
+Subject: PATCH 058/157 BackportSME aarch64: Vector move fixes for
+ +nosimd
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=721c0fb3aca31d3bf8ad6e929eab32e29a427e60
+
+This patch fixes various issues around the handling of vectors
+and (particularly) vector structures with +nosimd.  Previously,
+passing and returning structures would trigger an ICE, since:
+
+* we didn't allow the structure modes to be stored in FPRs
+
+* we didn't provide +nosimd move patterns
+
+* splitting the moves into word-sized pieces (the default
+  strategy without move patterns) doesn't work because the
+  registers are doubleword sized.
+
+The patch is a bit of a hodge-podge since a lot of the handling of
+moves, register costs, and register legitimacy is so interconnected.
+It didn't seem feasible to split things further.
+
+Some notes:
+
+* The patch recognises vector and tuple modes based on TARGET_FLOAT
+  rather than TARGET_SIMD, and instead adds TARGET_SIMD to places
+  that really do need the vector ISA.  This is necessary for the
+  modes to be handled correctly in register arguments and returns.
+
+* The 64-bit (DREG) STP peephole required TARGET_SIMD but the
+  LDP peephole didn't.  I think the LDP one is right, since
+  DREG moves could involve GPRs as well as FPRs.
+
+* The patch keeps the existing choices of instructions for
+  TARGET_SIMD, just in case they happen to be better than FMOV
+  on some uarches.
+
+* Before the patch, +nosimd Q<->Q moves of 128-bit scalars went via
+  a GPR, thanks to a secondary reload pattern.  This approach might
+  not be ideal, but there's no reason that 128-bit vectors should
+  behave differently from 128-bit scalars.  The patch therefore
+  extends the current scalar approach to vectors.
+
+* Multi-vector LD1 and ST1 require TARGET_SIMD, so the TARGET_FLOAT
+  structure moves need to use LDP/STP and LDR/STR combinations
+  instead.  That's also what we do for big-endian even with
+  TARGET_SIMD, so most of the code was already there.  The patterns
+  for structures of 64-bit vectors are identical, but the patterns
+  for structures of 128-bit vectors need to cope with the lack of
+  128-bit Q<->Q moves.
+
+  It isn't feasible to move multi-vector tuples via GPRs, so the
+  patch moves them via memory instead.  This contaminates the port
+  with its first secondary memory reload.
+
+gcc/
+
+	* config/aarch64/aarch64.cc (aarch64_classify_vector_mode): Use
+	TARGET_FLOAT instead of TARGET_SIMD.
+	(aarch64_vectorize_related_mode): Restrict ADVSIMD handling to
+	TARGET_SIMD.
+	(aarch64_hard_regno_mode_ok): Don't allow tuples of 2 64-bit vectors
+	in GPRs.
+	(aarch64_classify_address): Treat little-endian structure moves
+	like big-endian for TARGET_FLOAT && !TARGET_SIMD.
+	(aarch64_secondary_memory_needed): New function.
+	(aarch64_secondary_reload): Handle 128-bit Advanced SIMD vectors
+	in the same way as TF, TI and TD.
+	(aarch64_rtx_mult_cost): Restrict ADVSIMD handling to TARGET_SIMD.
+	(aarch64_rtx_costs): Likewise.
+	(aarch64_register_move_cost): Treat a pair of 64-bit vectors
+	separately from a single 128-bit vector.  Handle the cost implied
+	by aarch64_secondary_memory_needed.
+	(aarch64_simd_valid_immediate): Restrict ADVSIMD handling to
+	TARGET_SIMD.
+	(aarch64_expand_vec_perm_const_1): Likewise.
+	(TARGET_SECONDARY_MEMORY_NEEDED): New macro.
+	* config/aarch64/iterators.md (VTX): New iterator.
+	* config/aarch64/aarch64.md (arches): Add fp_q as a synonym of simd.
+	(arch_enabled): Adjust accordingly.
+	(@aarch64_reload_mov<TX:mode>): Extend to...
+	(@aarch64_reload_mov<VTX:mode>): ...this.
+	* config/aarch64/aarch64-simd.md (mov<mode>): Require TARGET_FLOAT
+	rather than TARGET_SIMD.
+	(movmisalign<mode>): Likewise.
+	(load_pair<DREG:mode><DREG2:mode>): Likewise.
+	(vec_store_pair<DREG:mode><DREG2:mode>): Likewise.
+	(load_pair<VQ:mode><VQ2:mode>): Likewise.
+	(vec_store_pair<VQ:mode><VQ2:mode>): Likewise.
+	(@aarch64_split_simd_mov<mode>): Likewise.
+	(aarch64_get_low<mode>): Likewise.
+	(aarch64_get_high<mode>): Likewise.
+	(aarch64_get_half<mode>): Likewise.  Canonicalize to a move for
+	lowpart extracts.
+	(*aarch64_simd_mov<VDMOV:mode>): Require TARGET_FLOAT rather than
+	TARGET_SIMD.  Use different w<-w and r<-w instructions for
+	!TARGET_SIMD.  Disable immediate moves for !TARGET_SIMD but
+	add an alternative specifically for w<-Z.
+	(*aarch64_simd_mov<VQMOV:mode>): Require TARGET_FLOAT rather than
+	TARGET_SIMD.  Likewise for the associated define_splits.  Disable
+	FPR moves and immediate moves for !TARGET_SIMD but add an alternative
+	specifically for w<-Z.
+	(aarch64_simd_mov_from_<mode>high): Require TARGET_FLOAT rather than
+	TARGET_SIMD.  Restrict the existing alternatives to TARGET_SIMD
+	but add a new r<-w one for !TARGET_SIMD.
+	(*aarch64_get_high<mode>): New pattern.
+	(load_pair_lanes<mode>): Require TARGET_FLOAT rather than TARGET_SIMD.
+	(store_pair_lanes<mode>): Likewise.
+	(*aarch64_combine_internal<mode>): Likewise.  Restrict existing
+	w<-w, w<-r and w<-m alternatives to TARGET_SIMD but add a new w<-r
+	alternative for !TARGET_SIMD.
+	(*aarch64_combine_internal_be<mode>): Likewise.
+	(aarch64_combinez<mode>): Require TARGET_FLOAT rather than TARGET_SIMD.
+	Remove bogus arch attribute.
+	(*aarch64_combinez_be<mode>): Likewise.
+	(@aarch64_vec_concat<mode>): Require TARGET_FLOAT rather than
+	TARGET_SIMD.
+	(aarch64_combine<mode>): Likewise.
+	(aarch64_rev_reglist<mode>): Likewise.
+	(mov<mode>): Likewise.
+	(*aarch64_be_mov<VSTRUCT_2D:mode>): Extend to TARGET_FLOAT &&
+	!TARGET_SIMD, regardless of endianness.  Extend associated
+	define_splits in the same way, both for this pattern and the
+	ones below.
+	(*aarch64_be_mov<VSTRUCT_2Qmode>): Likewise.  Restrict w<-w
+	alternative to TARGET_SIMD.
+	(*aarch64_be_movoi): Likewise.
+	(*aarch64_be_movci): Likewise.
+	(*aarch64_be_movxi): Likewise.
+	(*aarch64_be_mov<VSTRUCT_4QD:mode>): Extend to TARGET_FLOAT
+	&& !TARGET_SIMD, regardless of endianness.  Restrict w<-w alternative
+	to TARGET_SIMD for tuples of 128-bit vectors.
+	(*aarch64_be_mov<VSTRUCT_4QD:mode>): Likewise.
+	* config/aarch64/aarch64-ldpstp.md: Remove TARGET_SIMD condition
+	from DREG STP peephole.  Change TARGET_SIMD to TARGET_FLOAT in
+	the VQ and VP_2E LDP and STP peepholes.
+
+gcc/testsuite/
+	* gcc.target/aarch64/ldp_stp_20.c: New test.
+	* gcc.target/aarch64/ldp_stp_21.c: Likewise.
+	* gcc.target/aarch64/ldp_stp_22.c: Likewise.
+	* gcc.target/aarch64/ldp_stp_23.c: Likewise.
+	* gcc.target/aarch64/ldp_stp_24.c: Likewise.
+	* gcc.target/aarch64/movv16qi_1.c (gpr_to_gpr): New function.
+	* gcc.target/aarch64/movv8qi_1.c (gpr_to_gpr): Likewise.
+	* gcc.target/aarch64/movv16qi_2.c: New test.
+	* gcc.target/aarch64/movv16qi_3.c: Likewise.
+	* gcc.target/aarch64/movv2di_1.c: Likewise.
+	* gcc.target/aarch64/movv2x16qi_1.c: Likewise.
+	* gcc.target/aarch64/movv2x8qi_1.c: Likewise.
+	* gcc.target/aarch64/movv3x16qi_1.c: Likewise.
+	* gcc.target/aarch64/movv3x8qi_1.c: Likewise.
+	* gcc.target/aarch64/movv4x16qi_1.c: Likewise.
+	* gcc.target/aarch64/movv4x8qi_1.c: Likewise.
+	* gcc.target/aarch64/movv8qi_2.c: Likewise.
+	* gcc.target/aarch64/movv8qi_3.c: Likewise.
+	* gcc.target/aarch64/vect_unary_2.c: Likewise.
+---
+ gcc/config/aarch64/aarch64-ldpstp.md          |  11 +-
+ gcc/config/aarch64/aarch64-simd.md            | 199 +++++++++++-------
+ gcc/config/aarch64/aarch64.cc                 |  94 ++++++---
+ gcc/config/aarch64/aarch64.md                 |  11 +-
+ gcc/config/aarch64/iterators.md               |   2 +
+ gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c |   7 +
+ gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c |   7 +
+ gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c |  13 ++
+ gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c |  16 ++
+ gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c |  16 ++
+ gcc/testsuite/gcc.target/aarch64/movv16qi_1.c |  21 ++
+ gcc/testsuite/gcc.target/aarch64/movv16qi_2.c |  27 +++
+ gcc/testsuite/gcc.target/aarch64/movv16qi_3.c |  30 +++
+ gcc/testsuite/gcc.target/aarch64/movv2di_1.c  | 103 +++++++++
+ .../gcc.target/aarch64/movv2x16qi_1.c         |  40 ++++
+ .../gcc.target/aarch64/movv2x8qi_1.c          |  38 ++++
+ .../gcc.target/aarch64/movv3x16qi_1.c         |  44 ++++
+ .../gcc.target/aarch64/movv3x8qi_1.c          |  41 ++++
+ .../gcc.target/aarch64/movv4x16qi_1.c         |  44 ++++
+ .../gcc.target/aarch64/movv4x8qi_1.c          |  42 ++++
+ gcc/testsuite/gcc.target/aarch64/movv8qi_1.c  |  15 ++
+ gcc/testsuite/gcc.target/aarch64/movv8qi_2.c  |  27 +++
+ gcc/testsuite/gcc.target/aarch64/movv8qi_3.c  |  30 +++
+ .../gcc.target/aarch64/vect_unary_2.c         |   5 +
+ 24 files changed, 774 insertions(+), 109 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_2.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_3.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movv2di_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movv2x16qi_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movv2x8qi_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movv3x16qi_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movv3x8qi_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movv4x16qi_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movv4x8qi_1.c

_service:tar_scm:0157-LoongArch-Split-loongarch_option_override_internal-i.patch Added

@@ -0,0 +1,800 @@
+From 6dd3434f004dd1481a3d18fb416b3ddd4151b10f Mon Sep 17 00:00:00 2001
+From: Yang Yujie <yangyujie@loongson.cn>
+Date: Sat, 30 Mar 2024 16:43:14 +0800
+Subject: PATCH 157/188 LoongArch: Split loongarch_option_override_internal
+ into smaller procedures
+
+gcc/ChangeLog:
+
+	* config/loongarch/genopts/loongarch.opt.in: Mark -mno-recip as
+	aliases to -mrecip={all,none}, respectively.
+	* config/loongarch/loongarch.opt: Regenerate.
+	* config/loongarch/loongarch-def.h (ABI_FPU_64): Rename to...
+	(ABI_FPU64_P): ...this.
+	(ABI_FPU_32): Rename to...
+	(ABI_FPU32_P): ...this.
+	(ABI_FPU_NONE): Rename to...
+	(ABI_NOFPU_P): ...this.
+	(ABI_LP64_P): Define.
+	* config/loongarch/loongarch.cc (loongarch_init_print_operand_punct):
+	Merged into loongarch_global_init.
+	(loongarch_cpu_option_override): Renamed to
+	loongarch_target_option_override.
+	(loongarch_option_override_internal): Move the work after
+	loongarch_config_target into loongarch_target_option_override.
+	(loongarch_global_init): Define.
+	(INIT_TARGET_FLAG): Move to loongarch-opts.cc.
+	(loongarch_option_override): Call loongarch_global_init
+	separately.
+	* config/loongarch/loongarch-opts.cc (loongarch_parse_mrecip_scheme):
+	Split the parsing of -mrecip=<string> from
+	loongarch_option_override_internal.
+	(loongarch_generate_mrecip_scheme): Define. Split from
+	loongarch_option_override_internal.
+	(loongarch_target_option_override): Define. Renamed from
+	loongarch_cpu_option_override.
+	(loongarch_init_misc_options): Define. Split from
+	loongarch_option_override_internal.
+	(INIT_TARGET_FLAG): Move from loongarch.cc.
+	* config/loongarch/loongarch-opts.h (loongarch_target_option_override):
+	New prototype.
+	(loongarch_parse_mrecip_scheme): New prototype.
+	(loongarch_init_misc_options): New prototype.
+	(TARGET_ABI_LP64): Simplify with ABI_LP64_P.
+	* config/loongarch/loongarch.h (TARGET_RECIP_DIV): Simplify.
+	Do not reference specific CPU architecture (LA664).
+	(TARGET_RECIP_SQRT): Same.
+	(TARGET_RECIP_RSQRT): Same.
+	(TARGET_RECIP_VEC_DIV): Same.
+	(TARGET_RECIP_VEC_SQRT): Same.
+	(TARGET_RECIP_VEC_RSQRT): Same.
+---
+ gcc/config/loongarch/genopts/loongarch.opt.in |   8 +-
+ gcc/config/loongarch/loongarch-def.h          |  11 +-
+ gcc/config/loongarch/loongarch-opts.cc        | 253 ++++++++++++++++++
+ gcc/config/loongarch/loongarch-opts.h         |  27 +-
+ gcc/config/loongarch/loongarch.cc             | 253 +++---------------
+ gcc/config/loongarch/loongarch.h              |  18 +-
+ gcc/config/loongarch/loongarch.opt            |   8 +-
+ 7 files changed, 342 insertions(+), 236 deletions(-)
+
+diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
+index 4d6b1902d..9c6f59bb8 100644
+--- a/gcc/config/loongarch/genopts/loongarch.opt.in
++++ b/gcc/config/loongarch/genopts/loongarch.opt.in
+@@ -197,14 +197,14 @@ mexplicit-relocs
+ Target Alias(mexplicit-relocs=, always, none)
+ Use %reloc() assembly operators (for backward compatibility).
+ 
+-mrecip
+-Target RejectNegative Var(la_recip) Save
+-Generate approximate reciprocal divide and square root for better throughput.
+-
+ mrecip=
+ Target RejectNegative Joined Var(la_recip_name) Save
+ Control generation of reciprocal estimates.
+ 
++mrecip
++Target Alias(mrecip=, all, none)
++Generate approximate reciprocal divide and square root for better throughput.
++
+ ; The code model option names for -mcmodel.
+ Enum
+ Name(cmodel) Type(int)
+diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
+index fdcf43fc7..b1423bcfe 100644
+--- a/gcc/config/loongarch/loongarch-def.h
++++ b/gcc/config/loongarch/loongarch-def.h
+@@ -90,11 +90,16 @@ extern loongarch_def_array<const char *, N_ABI_BASE_TYPES>
+ 
+ #define TO_LP64_ABI_BASE(C) (C)
+ 
+-#define ABI_FPU_64(abi_base) \
++#define ABI_LP64_P(abi_base) \
++  (abi_base == ABI_BASE_LP64D \
++   || abi_base == ABI_BASE_LP64F \
++   || abi_base == ABI_BASE_LP64S)
++
++#define ABI_FPU64_P(abi_base) \
+   (abi_base == ABI_BASE_LP64D)
+-#define ABI_FPU_32(abi_base) \
++#define ABI_FPU32_P(abi_base) \
+   (abi_base == ABI_BASE_LP64F)
+-#define ABI_FPU_NONE(abi_base) \
++#define ABI_NOFPU_P(abi_base) \
+   (abi_base == ABI_BASE_LP64S)
+ 
+ 
+diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
+index bdecfaf49..404642a9e 100644
+--- a/gcc/config/loongarch/loongarch-opts.cc
++++ b/gcc/config/loongarch/loongarch-opts.cc
+@@ -25,6 +25,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "coretypes.h"
+ #include "tm.h"
+ #include "obstack.h"
++#include "opts.h"
+ #include "diagnostic-core.h"
+ 
+ #include "loongarch-cpu.h"
+@@ -32,8 +33,12 @@ along with GCC; see the file COPYING3.  If not see
+ #include "loongarch-str.h"
+ #include "loongarch-def.h"
+ 
++/* Target configuration */
+ struct loongarch_target la_target;
+ 
++/* RTL cost information */
++const struct loongarch_rtx_cost_data *loongarch_cost;
++
+ /* ABI-related configuration.  */
+ #define ABI_COUNT (sizeof(abi_priority_list)/sizeof(struct loongarch_abi))
+ static const struct loongarch_abi
+@@ -795,3 +800,251 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
+   /* ISA evolution features */
+   opts->x_la_isa_evolution = target->isa.evolution;
+ }
++
++/* -mrecip=<str> handling */
++static struct
++  {
++    const char *string;	    /* option name.  */
++    unsigned int mask;	    /* mask bits to set.  */
++  }
++const recip_options = {
++      { "all",       RECIP_MASK_ALL },
++      { "none",      RECIP_MASK_NONE },
++      { "div",       RECIP_MASK_DIV },
++      { "sqrt",      RECIP_MASK_SQRT },
++      { "rsqrt",     RECIP_MASK_RSQRT },
++      { "vec-div",   RECIP_MASK_VEC_DIV },
++      { "vec-sqrt",  RECIP_MASK_VEC_SQRT },
++      { "vec-rsqrt", RECIP_MASK_VEC_RSQRT },
++};
++
++/* Parser for -mrecip=<recip_string>.  */
++unsigned int
++loongarch_parse_mrecip_scheme (const char *recip_string)
++{
++  unsigned int result_mask = RECIP_MASK_NONE;
++
++  if (recip_string)
++    {
++      char *p = ASTRDUP (recip_string);
++      char *q;
++      unsigned int mask, i;
++      bool invert;
++
++      while ((q = strtok (p, ",")) != NULL)
++	{
++	  p = NULL;
++	  if (*q == '!')
++	    {
++	      invert = true;
++	      q++;
++	    }
++	  else
++	    invert = false;
++
++	  if (!strcmp (q, "default"))
++	    mask = RECIP_MASK_ALL;
++	  else
++	    {
++	      for (i = 0; i < ARRAY_SIZE (recip_options); i++)
++		if (!strcmp (q, recip_optionsi.string))
++		  {
++		    mask = recip_optionsi.mask;
++		    break;
++		  }
++
++	      if (i == ARRAY_SIZE (recip_options))
++		{
++		  error ("unknown option for %<-mrecip=%s%>", q);
++		  invert = false;
++		  mask = RECIP_MASK_NONE;
++		}
++	    }
++
++	  if (invert)
++	    result_mask &= ~mask;

_service:tar_scm:0158-Backport-SME-aarch64-Simplify-output-template-emissi.patch Added

@@ -0,0 +1,213 @@
+From b51d3b1af24758534e5a8f3a52a56106b935c485 Mon Sep 17 00:00:00 2001
+From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+Date: Wed, 31 May 2023 11:23:23 +0100
+Subject: PATCH 059/157 BackportSME aarch64: Simplify output template
+ emission code for a few patterns
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=11bd9b1f8133fc07c267e6d1aee8b01e06c7a750
+
+If the output code for a define_insn just does a switch (which_alternative) with no other computation we can almost always
+replace it with more compact MD syntax for each alternative in a mult-alternative '@' block.
+This patch cleans up some such patterns in the aarch64 backend, making them shorter and more concise.
+No behavioural change intended.
+
+Bootstrapped and tested on aarch64-none-linux-gnu.
+
+gcc/ChangeLog:
+
+	* config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>): Rewrite
+	output template to avoid explicit switch on which_alternative.
+	(*aarch64_simd_mov<VQMOV:mode>): Likewise.
+	(and<mode>3): Likewise.
+	(ior<mode>3): Likewise.
+	* config/aarch64/aarch64.md (*mov<mode>_aarch64): Likewise.
+---
+ gcc/config/aarch64/aarch64-simd.md | 97 +++++++++---------------------
+ gcc/config/aarch64/aarch64.md      | 42 ++++---------
+ 2 files changed, 40 insertions(+), 99 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
+index ef7fc4ecb..2d688edf5 100644
+--- a/gcc/config/aarch64/aarch64-simd.md
++++ b/gcc/config/aarch64/aarch64-simd.md
+@@ -122,28 +122,16 @@
+   "TARGET_FLOAT
+    && (register_operand (operands0, <MODE>mode)
+        || aarch64_simd_reg_or_zero (operands1, <MODE>mode))"
+-{
+-   switch (which_alternative)
+-     {
+-     case 0: return "ldr\t%d0, %1";
+-     case 1: return "str\txzr, %0";
+-     case 2: return "str\t%d1, %0";
+-     case 3:
+-       if (TARGET_SIMD)
+-	 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
+-       return "fmov\t%d0, %d1";
+-     case 4:
+-       if (TARGET_SIMD)
+-	 return "umov\t%0, %1.d0";
+-       return "fmov\t%x0, %d1";
+-     case 5: return "fmov\t%d0, %1";
+-     case 6: return "mov\t%0, %1";
+-     case 7:
+-	return aarch64_output_simd_mov_immediate (operands1, 64);
+-     case 8: return "fmov\t%d0, xzr";
+-     default: gcc_unreachable ();
+-     }
+-}
++  "@
++   ldr\t%d0, %1
++   str\txzr, %0
++   str\t%d1, %0
++   * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
++   * return TARGET_SIMD ? \"umov\t%0, %1.d0\" : \"fmov\t%x0, %d1\";
++   fmov\t%d0, %1
++   mov\t%0, %1
++   * return aarch64_output_simd_mov_immediate (operands1, 64);
++   fmov\t%d0, xzr"
+   (set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
+ 		     neon_logic<q>, neon_to_gp<q>, f_mcr,\
+ 		     mov_reg, neon_move<q>, f_mcr")
+@@ -158,29 +146,16 @@
+   "TARGET_FLOAT
+    && (register_operand (operands0, <MODE>mode)
+        || aarch64_simd_reg_or_zero (operands1, <MODE>mode))"
+-{
+-  switch (which_alternative)
+-    {
+-    case 0:
+-	return "ldr\t%q0, %1";
+-    case 1:
+-	return "stp\txzr, xzr, %0";
+-    case 2:
+-	return "str\t%q1, %0";
+-    case 3:
+-	return "mov\t%0.<Vbtype>, %1.<Vbtype>";
+-    case 4:
+-    case 5:
+-    case 6:
+-	return "#";
+-    case 7:
+-	return aarch64_output_simd_mov_immediate (operands1, 128);
+-    case 8:
+-	return "fmov\t%d0, xzr";
+-    default:
+-	gcc_unreachable ();
+-    }
+-}
++  "@
++   ldr\t%q0, %1
++   stp\txzr, xzr, %0
++   str\t%q1, %0
++   mov\t%0.<Vbtype>, %1.<Vbtype>
++   #
++   #
++   #
++   * return aarch64_output_simd_mov_immediate (operands1, 128);
++   fmov\t%d0, xzr"
+   (set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
+ 		     neon_logic<q>, multiple, multiple,\
+ 		     multiple, neon_move<q>, fmov")
+@@ -1004,18 +979,10 @@
+ 	(and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
+ 		   (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))
+   "TARGET_SIMD"
+-  {
+-    switch (which_alternative)
+-      {
+-      case 0:
+-	return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
+-      case 1:
+-	return aarch64_output_simd_mov_immediate (operands2, <bitsize>,
+-						  AARCH64_CHECK_BIC);
+-      default:
+-	gcc_unreachable ();
+-      }
+-  }
++  "@
++   and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
++   * return aarch64_output_simd_mov_immediate (operands2, <bitsize>,\
++					       AARCH64_CHECK_BIC);"
+   (set_attr "type" "neon_logic<q>")
+ )
+ 
+@@ -1025,18 +992,10 @@
+ 	(ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
+ 		   (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))
+   "TARGET_SIMD"
+-  {
+-    switch (which_alternative)
+-      {
+-      case 0:
+-	return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
+-      case 1:
+-	return aarch64_output_simd_mov_immediate (operands2, <bitsize>,
+-						  AARCH64_CHECK_ORR);
+-      default:
+-	gcc_unreachable ();
+-      }
+-  }
++  "@
++   orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
++   * return aarch64_output_simd_mov_immediate (operands2, <bitsize>,\
++					       AARCH64_CHECK_ORR);"
+   (set_attr "type" "neon_logic<q>")
+ )
+ 
+diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+index c0cc91756..7454a5c77 100644
+--- a/gcc/config/aarch64/aarch64.md
++++ b/gcc/config/aarch64/aarch64.md
+@@ -1198,36 +1198,18 @@
+ 	(match_operand:SHORT 1 "aarch64_mov_operand"  " r,M,D<hq>,Usv,m,m,rZ,w,w,rZ,w"))
+   "(register_operand (operands0, <MODE>mode)
+     || aarch64_reg_or_zero (operands1, <MODE>mode))"
+-{
+-   switch (which_alternative)
+-     {
+-     case 0:
+-       return "mov\t%w0, %w1";
+-     case 1:
+-       return "mov\t%w0, %1";
+-     case 2:
+-       return aarch64_output_scalar_simd_mov_immediate (operands1,
+-							<MODE>mode);
+-     case 3:
+-       return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands1);
+-     case 4:
+-       return "ldr<size>\t%w0, %1";
+-     case 5:
+-       return "ldr\t%<size>0, %1";
+-     case 6:
+-       return "str<size>\t%w1, %0";
+-     case 7:
+-       return "str\t%<size>1, %0";
+-     case 8:
+-       return TARGET_SIMD ? "umov\t%w0, %1.<v>0" : "fmov\t%w0, %s1";
+-     case 9:
+-       return TARGET_SIMD ? "dup\t%0.<Vallxd>, %w1" : "fmov\t%s0, %w1";
+-     case 10:
+-       return TARGET_SIMD ? "dup\t%<Vetype>0, %1.<v>0" : "fmov\t%s0, %s1";
+-     default:
+-       gcc_unreachable ();
+-     }
+-}
++  "@
++   mov\t%w0, %w1
++   mov\t%w0, %1
++   * return aarch64_output_scalar_simd_mov_immediate (operands1, <MODE>mode);

_service:tar_scm:0158-LoongArch-Regenerate-loongarch.opt.urls.patch Added

@@ -0,0 +1,117 @@
+From 90a0f195830a25e4179127c67e873c80f758f29d Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Fri, 25 Oct 2024 06:25:39 +0000
+Subject: PATCH 158/188 LoongArch: Regenerate loongarch.opt.urls.
+
+Fixes: d28ea8e5a704 ("LoongArch: Split loongarch_option_override_internal
+                      into smaller procedures")
+
+gcc/ChangeLog:
+
+        * config/loongarch/loongarch.opt.urls: Regenerate.
+---
+ gcc/config/loongarch/loongarch.opt.urls | 92 +++++++++++++++++++++++++
+ 1 file changed, 92 insertions(+)
+ create mode 100644 gcc/config/loongarch/loongarch.opt.urls
+
+diff --git a/gcc/config/loongarch/loongarch.opt.urls b/gcc/config/loongarch/loongarch.opt.urls
+new file mode 100644
+index 000000000..571c504e6
+--- /dev/null
++++ b/gcc/config/loongarch/loongarch.opt.urls
+@@ -0,0 +1,92 @@
++; Autogenerated by regenerate-opt-urls.py from gcc/config/loongarch/loongarch.opt and generated HTML
++
++mfpu=
++UrlSuffix(gcc/LoongArch-Options.html#index-mfpu-2)
++
++msoft-float
++UrlSuffix(gcc/LoongArch-Options.html#index-msoft-float-5)
++
++msingle-float
++UrlSuffix(gcc/LoongArch-Options.html#index-msingle-float)
++
++mdouble-float
++UrlSuffix(gcc/LoongArch-Options.html#index-mdouble-float-1)
++
++msimd=
++UrlSuffix(gcc/LoongArch-Options.html#index-msimd-1)
++
++march=
++UrlSuffix(gcc/LoongArch-Options.html#index-march-7)
++
++mtune=
++UrlSuffix(gcc/LoongArch-Options.html#index-mtune-8)
++
++mabi=
++UrlSuffix(gcc/LoongArch-Options.html#index-mabi-2)
++
++mbranch-cost=
++UrlSuffix(gcc/LoongArch-Options.html#index-mbranch-cost-2)
++
++mcheck-zero-division
++UrlSuffix(gcc/LoongArch-Options.html#index-mcheck-zero-division)
++
++mcond-move-int
++UrlSuffix(gcc/LoongArch-Options.html#index-mcond-move-int)
++
++mcond-move-float
++UrlSuffix(gcc/LoongArch-Options.html#index-mcond-move-float)
++
++mmemcpy
++UrlSuffix(gcc/LoongArch-Options.html#index-mmemcpy)
++
++mstrict-align
++UrlSuffix(gcc/LoongArch-Options.html#index-mstrict-align-1)
++
++mmax-inline-memcpy-size=
++UrlSuffix(gcc/LoongArch-Options.html#index-mmax-inline-memcpy-size)
++
++mexplicit-relocs=
++UrlSuffix(gcc/LoongArch-Options.html#index-mexplicit-relocs-1)
++
++mexplicit-relocs
++UrlSuffix(gcc/LoongArch-Options.html#index-mexplicit-relocs-1)
++
++mrecip=
++UrlSuffix(gcc/LoongArch-Options.html#index-mrecip)
++
++mrecip
++UrlSuffix(gcc/LoongArch-Options.html#index-mrecip)
++
++mcmodel=
++UrlSuffix(gcc/LoongArch-Options.html#index-mcmodel_003d-1)
++
++mdirect-extern-access
++UrlSuffix(gcc/LoongArch-Options.html#index-mdirect-extern-access)
++
++mrelax
++UrlSuffix(gcc/LoongArch-Options.html#index-mrelax-2)
++
++mpass-mrelax-to-as
++UrlSuffix(gcc/LoongArch-Options.html#index-mpass-mrelax-to-as)
++
++mtls-dialect=
++UrlSuffix(gcc/LoongArch-Options.html#index-mtls-dialect-1)
++
++mannotate-tablejump
++UrlSuffix(gcc/LoongArch-Options.html#index-mannotate-tablejump)
++
++mfrecipe
++UrlSuffix(gcc/LoongArch-Options.html#index-mfrecipe)
++
++mdiv32
++UrlSuffix(gcc/LoongArch-Options.html#index-mdiv32)
++
++mlam-bh
++UrlSuffix(gcc/LoongArch-Options.html#index-mlam-bh)
++
++mlamcas
++UrlSuffix(gcc/LoongArch-Options.html#index-mlamcas)
++
++mld-seq-sa
++UrlSuffix(gcc/LoongArch-Options.html#index-mld-seq-sa)
++
+-- 
+2.43.0
+

_service:tar_scm:0159-Backport-SME-Improve-immediate-expansion-PR106583.patch Added

@@ -0,0 +1,631 @@
+From d5293e2a8db54245553e01ad5d791b7492ad6101 Mon Sep 17 00:00:00 2001
+From: Wilco Dijkstra <wdijkstr@arm.com>
+Date: Mon, 24 Oct 2022 15:14:14 +0100
+Subject: PATCH 060/157 BackportSME Improve immediate expansion
+ PR106583
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a096036589d82175a0f729c2dab73c9a527d075d
+
+Improve immediate expansion of immediates which can be created from a
+bitmask immediate and 2 MOVKs.  Simplify, refactor and improve efficiency
+of bitmask checks.  Move various immediate handling functions together
+to avoid forward declarations.
+
+This reduces the number of 4-instruction immediates in SPECINT/FP by 10-15%.
+
+gcc/
+
+	PR target/106583
+	* config/aarch64/aarch64.cc (aarch64_internal_mov_immediate)
+	Add support for a bitmask immediate with 2 MOVKs.
+	(aarch64_check_bitmask): New function after refactorization.
+	(aarch64_bitmask_imm): Simplify replication of small modes.
+	Split function into 64-bit only version for efficiency.
+	(aarch64_move_imm): Move near other immediate functions.
+	(aarch64_uimm12_shift): Likewise.
+	(aarch64_clamp_to_uimm12_shift): Likewise.
+	(aarch64_movk_shift): Likewise.
+	(aarch64_replicate_bitmask_imm): Likewise.
+	(aarch64_and_split_imm1): Likewise.
+	(aarch64_and_split_imm2): Likewise.
+	(aarch64_and_bitmask_imm): Likewise.
+	(aarch64_movw_imm): Likewise.
+
+gcc/testsuite/
+	PR target/106583
+	* gcc.target/aarch64/pr106583.c: Add new test.
+---
+ gcc/config/aarch64/aarch64.cc               | 485 +++++++++++---------
+ gcc/testsuite/gcc.target/aarch64/pr106583.c |  41 ++
+ 2 files changed, 301 insertions(+), 225 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/pr106583.c
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index b4b646fa0..cf7736994 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -305,7 +305,6 @@ static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
+ static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64);
+ static bool aarch64_print_address_internal (FILE*, machine_mode, rtx,
+ 					    aarch64_addr_query_type);
+-static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val);
+ 
+ /* The processor for which instructions should be scheduled.  */
+ enum aarch64_processor aarch64_tune = cortexa53;
+@@ -5756,6 +5755,143 @@ aarch64_output_sve_vector_inc_dec (const char *operands, rtx x)
+ 					     factor, nelts_per_vq);
+ }
+ 
++/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2.  */
++
++static const unsigned HOST_WIDE_INT bitmask_imm_mul =
++  {
++    0x0000000100000001ull,
++    0x0001000100010001ull,
++    0x0101010101010101ull,
++    0x1111111111111111ull,
++    0x5555555555555555ull,
++  };
++
++
++
++/* Return true if 64-bit VAL is a valid bitmask immediate.  */
++static bool
++aarch64_bitmask_imm (unsigned HOST_WIDE_INT val)
++{
++  unsigned HOST_WIDE_INT tmp, mask, first_one, next_one;
++  int bits;
++
++  /* Check for a single sequence of one bits and return quickly if so.
++     The special cases of all ones and all zeroes returns false.  */
++  tmp = val + (val & -val);
++
++  if (tmp == (tmp & -tmp))
++    return (val + 1) > 1;
++
++  /* Invert if the immediate doesn't start with a zero bit - this means we
++     only need to search for sequences of one bits.  */
++  if (val & 1)
++    val = ~val;
++
++  /* Find the first set bit and set tmp to val with the first sequence of one
++     bits removed.  Return success if there is a single sequence of ones.  */
++  first_one = val & -val;
++  tmp = val & (val + first_one);
++
++  if (tmp == 0)
++    return true;
++
++  /* Find the next set bit and compute the difference in bit position.  */
++  next_one = tmp & -tmp;
++  bits = clz_hwi (first_one) - clz_hwi (next_one);
++  mask = val ^ tmp;
++
++  /* Check the bit position difference is a power of 2, and that the first
++     sequence of one bits fits within 'bits' bits.  */
++  if ((mask >> bits) != 0 || bits != (bits & -bits))
++    return false;
++
++  /* Check the sequence of one bits is repeated 64/bits times.  */
++  return val == mask * bitmask_imm_mul__builtin_clz (bits) - 26;
++}
++
++
++/* Return true if VAL is a valid bitmask immediate for MODE.  */
++bool
++aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
++{
++  if (mode == DImode)
++    return aarch64_bitmask_imm (val_in);
++
++  unsigned HOST_WIDE_INT val = val_in;
++
++  if (mode == SImode)
++    return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32));
++
++  /* Replicate small immediates to fit 64 bits.  */
++  int size = GET_MODE_UNIT_PRECISION (mode);
++  val &= (HOST_WIDE_INT_1U << size) - 1;
++  val *= bitmask_imm_mul__builtin_clz (size) - 26;
++
++  return aarch64_bitmask_imm (val);
++}
++
++
++/* Return true if the immediate VAL can be a bitfield immediate
++   by changing the given MASK bits in VAL to zeroes, ones or bits
++   from the other half of VAL.  Return the new immediate in VAL2.  */
++static inline bool
++aarch64_check_bitmask (unsigned HOST_WIDE_INT val,
++		       unsigned HOST_WIDE_INT &val2,
++		       unsigned HOST_WIDE_INT mask)
++{
++  val2 = val & ~mask;
++  if (val2 != val && aarch64_bitmask_imm (val2))
++    return true;
++  val2 = val | mask;
++  if (val2 != val && aarch64_bitmask_imm (val2))
++    return true;
++  val = val & ~mask;
++  val2 = val | (((val >> 32) | (val << 32)) & mask);
++  if (val2 != val && aarch64_bitmask_imm (val2))
++    return true;
++  val2 = val | (((val >> 16) | (val << 48)) & mask);
++  if (val2 != val && aarch64_bitmask_imm (val2))
++    return true;
++  return false;
++}
++
++
++/* Return true if val is an immediate that can be loaded into a
++   register by a MOVZ instruction.  */
++static bool
++aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
++{
++  if (GET_MODE_SIZE (mode) > 4)
++    {
++      if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
++	   || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
++	return 1;
++    }
++  else
++    {
++      /* Ignore sign extension.  */
++      val &= (HOST_WIDE_INT) 0xffffffff;
++    }
++  return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
++	  || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
++}
++
++
++/* Return true if VAL is an immediate that can be loaded into a
++   register in a single instruction.  */
++bool
++aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
++{
++  scalar_int_mode int_mode;
++  if (!is_a <scalar_int_mode> (mode, &int_mode))
++    return false;
++
++  if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
++    return 1;
++  return aarch64_bitmask_imm (val, int_mode);
++}
++
++
+ static int
+ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
+ 				scalar_int_mode mode)
+@@ -5786,7 +5922,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,

_service:tar_scm:0159-LoongArch-Add-support-for-TLS-descriptors.patch Added

@@ -0,0 +1,724 @@
+From 0d5ff38a94dbd655bc86e0be262458ac71726ea4 Mon Sep 17 00:00:00 2001
+From: mengqinggang <mengqinggang@loongson.cn>
+Date: Tue, 2 Apr 2024 09:57:20 +0800
+Subject: PATCH 159/188 LoongArch: Add support for TLS descriptors.
+
+Add support for TLS descriptors on normal code model and extreme
+code model.
+
+Normal code model instruction sequence:
+  -mno-explicit-relocs:
+    la.tls.desc	$r4, s
+    add.d	$r12, $r4, $r2
+  -mexplicit-relocs:
+    pcalau12i	$r4,%desc_pc_hi20(s)
+    addi.d	$r4,$r4,%desc_pc_lo12(s)
+    ld.d	$r1,$r4,%desc_ld(s)
+    jirl	$r1,$r1,%desc_call(s)
+    add.d	$r12, $r4, $r2
+
+Extreme code model instruction sequence:
+  -mno-explicit-relocs:
+    la.tls.desc	$r4, $r12, s
+    add.d	$r12, $r4, $r2
+  -mexplicit-relocs:
+    pcalau12i	$r4,%desc_pc_hi20(s)
+    addi.d	$r12,$r0,%desc_pc_lo12(s)
+    lu32i.d	$r12,%desc64_pc_lo20(s)
+    lu52i.d	$r12,$r12,%desc64_pc_hi12(s)
+    add.d	$r4,$r4,$r12
+    ld.d	$r1,$r4,%desc_ld(s)
+    jirl	$r1,$r1,%desc_call(s)
+    add.d	$r12, $r4, $r2
+
+The default is still traditional TLS model, but can be configured with
+--with-tls={trad,desc}. The default can change to TLS descriptors once
+libc and LLVM support this.
+
+gcc/ChangeLog:
+
+	* config.gcc: Add --with-tls option to change TLS flavor.
+	* config/loongarch/genopts/loongarch.opt.in: Add -mtls-dialect to
+	configure TLS flavor.
+	* config/loongarch/loongarch-def.h (struct loongarch_target): Add
+	tls_dialect.
+	* config/loongarch/loongarch-driver.cc (la_driver_init): Add tls
+	flavor.
+	* config/loongarch/loongarch-opts.cc (loongarch_init_target): Add
+	tls_dialect.
+	(loongarch_config_target): Ditto.
+	(loongarch_update_gcc_opt_status): Ditto.
+	* config/loongarch/loongarch-opts.h (loongarch_init_target): Ditto.
+	(TARGET_TLS_DESC): New define.
+	* config/loongarch/loongarch.cc (loongarch_symbol_insns): Add TLS
+	DESC instructions sequence length.
+	(loongarch_legitimize_tls_address): New TLS DESC instruction sequence.
+	(loongarch_option_override_internal): Add la_opt_tls_dialect.
+	(loongarch_option_restore): Add la_target.tls_dialect.
+	* config/loongarch/loongarch.md (@got_load_tls_desc<mode>): Normal
+	code model for TLS DESC.
+	(got_load_tls_desc_off64): Extreme cmode model for TLS DESC.
+	* config/loongarch/loongarch.opt: Regenerate.
+	* config/loongarch/loongarch.opt.urls: Ditto.
+	* doc/invoke.texi: Add a description of the compilation option
+	'-mtls-dialect={trad,desc}'.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/cmodel-extreme-1.c: Add -mtls-dialect=trad.
+	* gcc.target/loongarch/cmodel-extreme-2.c: Ditto.
+	* gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c: Ditto.
+	* gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c:
+	Ditto.
+	* gcc.target/loongarch/func-call-medium-1.c: Ditto.
+	* gcc.target/loongarch/func-call-medium-2.c: Ditto.
+	* gcc.target/loongarch/func-call-medium-3.c: Ditto.
+	* gcc.target/loongarch/func-call-medium-4.c: Ditto.
+	* gcc.target/loongarch/tls-extreme-macro.c: Ditto.
+	* gcc.target/loongarch/tls-gd-noplt.c: Ditto.
+	* gcc.target/loongarch/explicit-relocs-auto-extreme-tls-desc.c: New test.
+	* gcc.target/loongarch/explicit-relocs-auto-tls-desc.c: New test.
+	* gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c: New test.
+	* gcc.target/loongarch/explicit-relocs-tls-desc.c: New test.
+
+Co-authored-by: Lulu Cheng <chenglulu@loongson.cn>
+Co-authored-by: Xi Ruoyao <xry111@xry111.site>
+---
+ gcc/config.gcc                                | 19 +++++-
+ gcc/config/loongarch/genopts/loongarch.opt.in | 14 ++++
+ gcc/config/loongarch/loongarch-def.h          |  7 ++
+ gcc/config/loongarch/loongarch-driver.cc      |  2 +-
+ gcc/config/loongarch/loongarch-opts.cc        | 12 +++-
+ gcc/config/loongarch/loongarch-opts.h         |  3 +
+ gcc/config/loongarch/loongarch.cc             | 45 ++++++++----
+ gcc/config/loongarch/loongarch.md             | 68 +++++++++++++++++++
+ gcc/config/loongarch/loongarch.opt            | 14 ++++
+ gcc/doc/invoke.texi                           | 16 ++++-
+ .../gcc.target/loongarch/cmodel-extreme-1.c   |  2 +-
+ .../gcc.target/loongarch/cmodel-extreme-2.c   |  2 +-
+ .../explicit-relocs-auto-extreme-tls-desc.c   | 10 +++
+ .../loongarch/explicit-relocs-auto-tls-desc.c | 10 +++
+ .../explicit-relocs-auto-tls-ld-gd.c          |  2 +-
+ .../explicit-relocs-extreme-tls-desc.c        | 16 +++++
+ ...icit-relocs-medium-call36-auto-tls-ld-gd.c |  2 +-
+ .../loongarch/explicit-relocs-tls-desc.c      | 13 ++++
+ .../gcc.target/loongarch/func-call-medium-1.c |  2 +-
+ .../gcc.target/loongarch/func-call-medium-2.c |  2 +-
+ .../gcc.target/loongarch/func-call-medium-3.c |  2 +-
+ .../gcc.target/loongarch/func-call-medium-4.c |  2 +-
+ .../gcc.target/loongarch/tls-extreme-macro.c  |  2 +-
+ .../gcc.target/loongarch/tls-gd-noplt.c       |  2 +-
+ 24 files changed, 243 insertions(+), 26 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-extreme-tls-desc.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-desc.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c
+
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index 499b36b45..1db558d4c 100644
+--- a/gcc/config.gcc
++++ b/gcc/config.gcc
+@@ -4982,7 +4982,7 @@ case "${target}" in
+ 		;;
+ 
+ 	loongarch*-*)
+-		supported_defaults="abi arch tune fpu simd multilib-default strict-align-lib"
++		supported_defaults="abi arch tune fpu simd multilib-default strict-align-lib tls"
+ 
+ 		# Local variables
+ 		unset \
+@@ -5240,6 +5240,18 @@ case "${target}" in
+ 			with_multilib_list="${abi_base}/${abi_ext}"
+ 		fi
+ 
++		# Handle --with-tls.
++		case "$with_tls" in
++		"" \
++		| trad | desc)
++		    # OK
++		    ;;
++		*)
++		    echo "Unknown TLS method used in --with-tls=$with_tls" 1>&2
++		    exit 1
++		    ;;
++		esac
++
+ 		# Check if the configured default ABI combination is included in
+ 		# ${with_multilib_list}.
+ 		loongarch_multilib_list_sane=no
+@@ -5875,6 +5887,11 @@ case ${target} in
+ 		lasx)    tm_defines="$tm_defines DEFAULT_ISA_EXT_SIMD=ISA_EXT_SIMD_LASX" ;;
+ 		esac
+ 
++		case ${with_tls} in
++		"" | trad)	tm_defines="$tm_defines DEFAULT_TLS_TYPE=TLS_TRADITIONAL" ;;
++		desc)		tm_defines="$tm_defines DEFAULT_TLS_TYPE=TLS_DESCRIPTORS" ;;
++		esac
++
+ 		tmake_file="loongarch/t-loongarch $tmake_file"
+ 		;;
+ 
+diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
+index 9c6f59bb8..f3d53f03c 100644
+--- a/gcc/config/loongarch/genopts/loongarch.opt.in
++++ b/gcc/config/loongarch/genopts/loongarch.opt.in
+@@ -245,6 +245,20 @@ mpass-mrelax-to-as
+ Driver Var(la_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION)
+ Pass -mrelax or -mno-relax option to the assembler.
+ 
++Enum
++Name(tls_type) Type(int)
++The possible TLS dialects:
++
++EnumValue
++Enum(tls_type) String(trad) Value(TLS_TRADITIONAL)
++
++EnumValue
++Enum(tls_type) String(desc) Value(TLS_DESCRIPTORS)
++
++mtls-dialect=
++Target RejectNegative Joined Enum(tls_type) Var(la_opt_tls_dialect) Init(M_OPT_UNSET) Save
++Specify TLS dialect.
++
+ -param=loongarch-vect-unroll-limit=
+ Target Joined UInteger Var(la_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param
+ Used to limit unroll factor which indicates how much the autovectorizer may
+diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
+index b1423bcfe..2fe44da5a 100644
+--- a/gcc/config/loongarch/loongarch-def.h
++++ b/gcc/config/loongarch/loongarch-def.h
+@@ -180,6 +180,7 @@ struct loongarch_target
+   int cpu_arch;	    /* CPU_ */
+   int cpu_tune;	    /* same */
+   int cmodel;	    /* CMODEL_ */
++  int tls_dialect;  /* TLS_ */
+ };
+ 
+ /* CPU model */
+@@ -193,6 +194,12 @@ enum {
+   N_TUNE_TYPES	    = 5

_service:tar_scm:0160-Backport-SME-AArch64-Cleanup-move-immediate-code.patch Added

@@ -0,0 +1,410 @@
+From d76be4acadc0641cc8e795cd6b8a1c3c83b4fdb2 Mon Sep 17 00:00:00 2001
+From: Wilco Dijkstra <wilco.dijkstra@arm.com>
+Date: Mon, 5 Dec 2022 10:49:25 +0000
+Subject: PATCH 061/157 BackportSME AArch64: Cleanup move immediate code
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ba1536dac780f3f92c5eab999fda6931f6247fc1
+
+Simplify, refactor and improve various move immediate functions.
+Allow 32-bit MOVN/I as a valid 64-bit immediate which removes special
+cases in aarch64_internal_mov_immediate.  Add new constraint so the movdi
+pattern only needs a single alternative for move immediate.
+
+gcc/
+	* config/aarch64/aarch64.cc (aarch64_bitmask_imm): Use unsigned type.
+	(aarch64_is_mov_xn_imm): New function.
+	(aarch64_move_imm): Refactor, assert mode is SImode or DImode.
+	(aarch64_internal_mov_immediate): Assert mode is SImode or DImode.
+	Simplify special cases.
+	(aarch64_uimm12_shift): Simplify code.
+	(aarch64_clamp_to_uimm12_shift): Likewise.
+	(aarch64_movw_imm): Rename to aarch64_is_movz.
+	(aarch64_float_const_rtx_p): Pass either SImode or DImode to
+	aarch64_internal_mov_immediate.
+	(aarch64_rtx_costs): Likewise.
+	* config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M'
+	constraints into single 'O'.
+	(mov<mode>_aarch64): Likewise.
+	* config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned.
+	(aarch64_bitmask_imm): Likewise.
+	(aarch64_uimm12_shift): Likewise.
+	(aarch64_is_mov_xn_imm): New prototype.
+	* config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates,
+	limit 'N' to 64-bit only moves.
+---
+ gcc/config/aarch64/aarch64-protos.h |   7 +-
+ gcc/config/aarch64/aarch64.cc       | 158 ++++++++++++----------------
+ gcc/config/aarch64/aarch64.md       |  17 ++-
+ gcc/config/aarch64/constraints.md   |   5 +
+ 4 files changed, 85 insertions(+), 102 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
+index 97984f3ab..3ff1a0163 100644
+--- a/gcc/config/aarch64/aarch64-protos.h
++++ b/gcc/config/aarch64/aarch64-protos.h
+@@ -755,7 +755,7 @@ void aarch64_post_cfi_startproc (void);
+ poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
+ int aarch64_get_condition_code (rtx);
+ bool aarch64_address_valid_for_prefetch_p (rtx, bool);
+-bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
++bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode);
+ unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
+ unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
+ bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
+@@ -793,7 +793,7 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT,
+ 					unsigned HOST_WIDE_INT,
+ 					unsigned HOST_WIDE_INT);
+ bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
+-bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
++bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode);
+ machine_mode aarch64_sve_int_mode (machine_mode);
+ opt_machine_mode aarch64_sve_pred_mode (unsigned int);
+ machine_mode aarch64_sve_pred_mode (machine_mode);
+@@ -843,8 +843,9 @@ bool aarch64_sve_float_arith_immediate_p (rtx, bool);
+ bool aarch64_sve_float_mul_immediate_p (rtx);
+ bool aarch64_split_dimode_const_store (rtx, rtx);
+ bool aarch64_symbolic_address_p (rtx);
+-bool aarch64_uimm12_shift (HOST_WIDE_INT);
++bool aarch64_uimm12_shift (unsigned HOST_WIDE_INT);
+ int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &);
++bool aarch64_is_mov_xn_imm (unsigned HOST_WIDE_INT);
+ bool aarch64_use_return_insn_p (void);
+ const char *aarch64_output_casesi (rtx *);
+ 
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index cf7736994..acb659f53 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -5812,12 +5812,10 @@ aarch64_bitmask_imm (unsigned HOST_WIDE_INT val)
+ 
+ /* Return true if VAL is a valid bitmask immediate for MODE.  */
+ bool
+-aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
++aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
+ {
+   if (mode == DImode)
+-    return aarch64_bitmask_imm (val_in);
+-
+-  unsigned HOST_WIDE_INT val = val_in;
++    return aarch64_bitmask_imm (val);
+ 
+   if (mode == SImode)
+     return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32));
+@@ -5856,51 +5854,55 @@ aarch64_check_bitmask (unsigned HOST_WIDE_INT val,
+ }
+ 
+ 
+-/* Return true if val is an immediate that can be loaded into a
+-   register by a MOVZ instruction.  */
+-static bool
+-aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
++/* Return true if VAL is a valid MOVZ immediate.  */
++static inline bool
++aarch64_is_movz (unsigned HOST_WIDE_INT val)
+ {
+-  if (GET_MODE_SIZE (mode) > 4)
+-    {
+-      if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
+-	   || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
+-	return 1;
+-    }
+-  else
+-    {
+-      /* Ignore sign extension.  */
+-      val &= (HOST_WIDE_INT) 0xffffffff;
+-    }
+-  return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
+-	  || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
++  return (val >> (ctz_hwi (val) & 48)) < 65536;
+ }
+ 
+ 
+-/* Return true if VAL is an immediate that can be loaded into a
+-   register in a single instruction.  */
++/* Return true if immediate VAL can be created by a 64-bit MOVI/MOVN/MOVZ.  */
+ bool
+-aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
++aarch64_is_mov_xn_imm (unsigned HOST_WIDE_INT val)
+ {
+-  scalar_int_mode int_mode;
+-  if (!is_a <scalar_int_mode> (mode, &int_mode))
+-    return false;
++  return aarch64_is_movz (val) || aarch64_is_movz (~val)
++    || aarch64_bitmask_imm (val);
++}
+ 
+-  if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
+-    return 1;
+-  return aarch64_bitmask_imm (val, int_mode);
++
++/* Return true if VAL is an immediate that can be created by a single
++   MOV instruction.  */
++bool
++aarch64_move_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
++{
++  gcc_assert (mode == SImode || mode == DImode);
++
++  if (val < 65536)
++    return true;
++
++  unsigned HOST_WIDE_INT mask =
++    (val >> 32) == 0 || mode == SImode ? 0xffffffff : HOST_WIDE_INT_M1U;
++
++  if (aarch64_is_movz (val & mask) || aarch64_is_movz (~val & mask))
++    return true;
++
++  val = (val & mask) | ((val << 32) & ~mask);
++  return aarch64_bitmask_imm (val);
+ }
+ 
+ 
+ static int
+ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
+-				scalar_int_mode mode)
++				machine_mode mode)
+ {
+   int i;
+   unsigned HOST_WIDE_INT val, val2, mask;
+   int one_match, zero_match;
+   int num_insns;
+ 
++  gcc_assert (mode == SImode || mode == DImode);
++
+   val = INTVAL (imm);
+ 
+   if (aarch64_move_imm (val, mode))
+@@ -5910,31 +5912,6 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
+       return 1;
+     }
+ 
+-  /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
+-     (with XXXX non-zero). In that case check to see if the move can be done in
+-     a smaller mode.  */
+-  val2 = val & 0xffffffff;
+-  if (mode == DImode
+-      && aarch64_move_imm (val2, SImode)
+-      && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
+-    {
+-      if (generate)
+-	emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
+-
+-      /* Check if we have to emit a second instruction by checking to see
+-	 if any of the upper 32 bits of the original DI mode value is set.  */
+-      if (val == val2)
+-	return 1;
+-
+-      i = (val >> 48) ? 48 : 32;
+-
+-      if (generate)
+-	 emit_insn (gen_insv_immdi (dest, GEN_INT (i),

_service:tar_scm:0160-LoongArch-Fix-missing-plugin-header.patch Added

@@ -0,0 +1,32 @@
+From bec97638d68c760f6ee4b0a86ce4f9ffe9a691b3 Mon Sep 17 00:00:00 2001
+From: Yang Yujie <yangyujie@loongson.cn>
+Date: Tue, 2 Apr 2024 09:20:32 +0800
+Subject: PATCH 160/188 LoongArch: Fix missing plugin header
+
+gcc/ChangeLog:
+
+	* config/loongarch/t-loongarch: Add loongarch-def-arrays.h
+	to OPTION_H_EXTRA.
+---
+ gcc/config/loongarch/t-loongarch | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch
+index 994f4d19c..488e8cff3 100644
+--- a/gcc/config/loongarch/t-loongarch
++++ b/gcc/config/loongarch/t-loongarch
+@@ -18,8 +18,9 @@
+ 
+ 
+ GTM_H += loongarch-multilib.h
+-OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h	\
+-		   $(srcdir)/config/loongarch/loongarch-tune.h	\
++OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h	    \
++		   $(srcdir)/config/loongarch/loongarch-def-array.h \
++		   $(srcdir)/config/loongarch/loongarch-tune.h	    \
+ 		   $(srcdir)/config/loongarch/loongarch-cpucfg-map.h
+ 
+ # Canonical target triplet from config.gcc
+-- 
+2.43.0
+

_service:tar_scm:0161-Backport-SME-AArch64-convert-some-patterns-to-compac.patch Added

@@ -0,0 +1,229 @@
+From 5db3e7b68d5a443e908011b8d53de625ae462f82 Mon Sep 17 00:00:00 2001
+From: Tamar Christina <tamar.christina@arm.com>
+Date: Mon, 19 Jun 2023 15:55:28 +0100
+Subject: PATCH 062/157 BackportSME AArch64: convert some patterns to
+ compact MD syntax
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c72a7b849853716d94e8d313be5dce3c22850113
+
+Hi All,
+
+This converts some patterns in the AArch64 backend to use the new
+compact syntax.
+
+gcc/ChangeLog:
+
+	* config/aarch64/aarch64.md (arches): Add nosimd.
+	(*mov<mode>_aarch64, *movsi_aarch64, *movdi_aarch64): Rewrite to
+	compact syntax.
+---
+ gcc/config/aarch64/aarch64.md | 161 ++++++++++++++++------------------
+ 1 file changed, 78 insertions(+), 83 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+index ea94152bf..5d02da42f 100644
+--- a/gcc/config/aarch64/aarch64.md
++++ b/gcc/config/aarch64/aarch64.md
+@@ -378,7 +378,7 @@
+ ;; As a convenience, "fp_q" means "fp" + the ability to move between
+ ;; Q registers and is equivalent to "simd".
+ 
+-(define_enum "arches"  any rcpc8_4 fp fp_q simd sve fp16)
++(define_enum "arches"  any rcpc8_4 fp fp_q simd nosimd sve fp16)
+ 
+ (define_enum_attr "arch" "arches" (const_string "any"))
+ 
+@@ -409,6 +409,9 @@
+ 	(and (eq_attr "arch" "fp_q, simd")
+ 	     (match_test "TARGET_SIMD"))
+ 
++	(and (eq_attr "arch" "nosimd")
++	     (match_test "!TARGET_SIMD"))
++
+ 	(and (eq_attr "arch" "fp16")
+ 	     (match_test "TARGET_FP_F16INST"))
+ 
+@@ -1194,26 +1197,27 @@
+ )
+ 
+ (define_insn "*mov<mode>_aarch64"
+-  (set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r,    w,r  ,r,w, m,m,r,w,w")
+-	(match_operand:SHORT 1 "aarch64_mov_operand"  " r,M,D<hq>,Usv,m,m,rZ,w,w,rZ,w"))
++  (set (match_operand:SHORT 0 "nonimmediate_operand")
++	(match_operand:SHORT 1 "aarch64_mov_operand"))
+   "(register_operand (operands0, <MODE>mode)
+     || aarch64_reg_or_zero (operands1, <MODE>mode))"
+-  "@
+-   mov\t%w0, %w1
+-   mov\t%w0, %1
+-   * return aarch64_output_scalar_simd_mov_immediate (operands1, <MODE>mode);
+-   * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands1);
+-   ldr<size>\t%w0, %1
+-   ldr\t%<size>0, %1
+-   str<size>\t%w1, %0
+-   str\t%<size>1, %0
+-   * return TARGET_SIMD ? \"umov\t%w0, %1.<v>0\" : \"fmov\t%w0, %s1\";
+-   * return TARGET_SIMD ? \"dup\t%0.<Vallxd>, %w1\" : \"fmov\t%s0, %w1\";
+-   * return TARGET_SIMD ? \"dup\t%<Vetype>0, %1.<v>0\" : \"fmov\t%s0, %s1\";"
+-  ;; The "mov_imm" type for CNT is just a placeholder.
+-  (set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4,
+-		     store_4,neon_to_gp<q>,neon_from_gp<q>,neon_dup")
+-   (set_attr "arch" "*,*,simd,sve,*,*,*,*,*,*,*")
++  {@ cons: =0, 1; attrs: type, arch
++     r, r    ; mov_reg        , *      mov\t%w0, %w1
++     r, M    ; mov_imm        , *      mov\t%w0, %1
++     w, D<hq>; neon_move      , simd   << aarch64_output_scalar_simd_mov_immediate (operands1, <MODE>mode);
++     /* The "mov_imm" type for CNT is just a placeholder.  */
++     r, Usv  ; mov_imm        , sve    << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands1);
++     r, m    ; load_4         , *      ldr<size>\t%w0, %1
++     w, m    ; load_4         , *      ldr\t%<size>0, %1
++     m, r Z  ; store_4        , *      str<size>\\t%w1, %0
++     m, w    ; store_4        , *      str\t%<size>1, %0
++     r, w    ; neon_to_gp<q>  , simd   umov\t%w0, %1.<v>0
++     r, w    ; neon_to_gp<q>  , nosimd fmov\t%w0, %s1 /*foo */
++     w, r Z  ; neon_from_gp<q>, simd   dup\t%0.<Vallxd>, %w1
++     w, r Z  ; neon_from_gp<q>, nosimd fmov\t%s0, %w1
++     w, w    ; neon_dup       , simd   dup\t%<Vetype>0, %1.<v>0
++     w, w    ; neon_dup       , nosimd fmov\t%s0, %s1
++  }
+ )
+ 
+ (define_expand "mov<mode>"
+@@ -1250,79 +1254,70 @@
+ )
+ 
+ (define_insn_and_split "*movsi_aarch64"
+-  (set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,  r,  r,  r, w,r,w, w")
+-	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))
++  (set (match_operand:SI 0 "nonimmediate_operand")
++	(match_operand:SI 1 "aarch64_mov_operand"))
+   "(register_operand (operands0, SImode)
+     || aarch64_reg_or_zero (operands1, SImode))"
+-  "@
+-   mov\\t%w0, %w1
+-   mov\\t%w0, %w1
+-   mov\\t%w0, %w1
+-   mov\\t%w0, %1
+-   #
+-   * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands1);
+-   ldr\\t%w0, %1
+-   ldr\\t%s0, %1
+-   str\\t%w1, %0
+-   str\\t%s1, %0
+-   adrp\\t%x0, %A1\;ldr\\t%w0, %x0, %L1
+-   adr\\t%x0, %c1
+-   adrp\\t%x0, %A1
+-   fmov\\t%s0, %w1
+-   fmov\\t%w0, %s1
+-   fmov\\t%s0, %s1
+-   * return aarch64_output_scalar_simd_mov_immediate (operands1, SImode);"
++  {@ cons: =0, 1; attrs: type, arch, length
++     r k, r  ; mov_reg  , *   , 4 mov\t%w0, %w1
++     r  , k  ; mov_reg  , *   , 4 ^
++     r  , M  ; mov_imm  , *   , 4 mov\t%w0, %1
++     r  , n  ; mov_imm  , *   ,16 #
++     /* The "mov_imm" type for CNT is just a placeholder.  */
++     r  , Usv; mov_imm  , sve , 4 << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands1);
++     r  , m  ; load_4   , *   , 4 ldr\t%w0, %1
++     w  , m  ; load_4   , fp  , 4 ldr\t%s0, %1
++     m  , r Z; store_4  , *   , 4 str\t%w1, %0
++     m  , w  ; store_4  , fp  , 4 str\t%s1, %0
++     r  , Usw; load_4   , *   , 8 adrp\t%x0, %A1;ldr\t%w0, %x0, %L1
++     r  , Usa; adr      , *   , 4 adr\t%x0, %c1
++     r  , Ush; adr      , *   , 4 adrp\t%x0, %A1
++     w  , r Z; f_mcr    , fp  , 4 fmov\t%s0, %w1
++     r  , w  ; f_mrc    , fp  , 4 fmov\t%w0, %s1
++     w  , w  ; fmov     , fp  , 4 fmov\t%s0, %s1
++     w  , Ds ; neon_move, simd, 4 << aarch64_output_scalar_simd_mov_immediate (operands1, SImode);
++  }
+   "CONST_INT_P (operands1) && !aarch64_move_imm (INTVAL (operands1), SImode)
+     && REG_P (operands0) && GP_REGNUM_P (REGNO (operands0))"
+-   (const_int 0)
+-   "{
+-       aarch64_expand_mov_immediate (operands0, operands1);
+-       DONE;
+-    }"
+-  ;; The "mov_imm" type for CNT is just a placeholder.
+-  (set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
+-		    load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
+-   (set_attr "arch"   "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
+-   (set_attr "length" "4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")
+-
++  (const_int 0)
++  {
++    aarch64_expand_mov_immediate (operands0, operands1);
++    DONE;
++  }
+ )
+ 
+ (define_insn_and_split "*movdi_aarch64"
+-  (set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m,   r,  r,  r, w,r,w, w")
+-	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))
++  (set (match_operand:DI 0 "nonimmediate_operand")
++	(match_operand:DI 1 "aarch64_mov_operand"))
+   "(register_operand (operands0, DImode)
+     || aarch64_reg_or_zero (operands1, DImode))"
+-  "@
+-   mov\\t%x0, %x1
+-   mov\\t%0, %x1
+-   mov\\t%x0, %1
+-   * return aarch64_is_mov_xn_imm (INTVAL (operands1)) ? \"mov\\t%x0, %1\" : \"mov\\t%w0, %1\";
+-   #
+-   * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands1);
+-   ldr\\t%x0, %1
+-   ldr\\t%d0, %1
+-   str\\t%x1, %0
+-   str\\t%d1, %0
+-   * return TARGET_ILP32 ? \"adrp\\t%0, %A1\;ldr\\t%w0, %0, %L1\" : \"adrp\\t%0, %A1\;ldr\\t%0, %0, %L1\";
+-   adr\\t%x0, %c1
+-   adrp\\t%x0, %A1
+-   fmov\\t%d0, %x1
+-   fmov\\t%x0, %d1
+-   fmov\\t%d0, %d1
+-   * return aarch64_output_scalar_simd_mov_immediate (operands1, DImode);"
+-   "CONST_INT_P (operands1) && !aarch64_move_imm (INTVAL (operands1), DImode)
+-    && REG_P (operands0) && GP_REGNUM_P (REGNO (operands0))"
+-   (const_int 0)
+-   "{
+-       aarch64_expand_mov_immediate (operands0, operands1);
+-       DONE;
+-    }"
+-  ;; The "mov_imm" type for CNTD is just a placeholder.
+-  (set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,
+-		     load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc,
+-		     fmov,neon_move")
+-   (set_attr "arch"   "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
+-   (set_attr "length" "4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")
++  {@ cons: =0, 1; attrs: type, arch, length
++     r, r  ; mov_reg  , *   , 4 mov\t%x0, %x1
++     k, r  ; mov_reg  , *   , 4 mov\t%0, %x1

_service:tar_scm:0161-LoongArch-Remove-unused-code.patch Added

@@ -0,0 +1,344 @@
+From 47581dd6da960172bc768435400010748b3f97eb Mon Sep 17 00:00:00 2001
+From: Jiahao Xu <xujiahao@loongson.cn>
+Date: Wed, 3 Apr 2024 09:38:23 +0800
+Subject: PATCH 161/188 LoongArch: Remove unused code.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasx.md: Remove unused code.
+	* config/loongarch/loongarch-protos.h
+	(loongarch_split_lsx_copy_d): Remove.
+	(loongarch_split_lsx_insert_d): Ditto.
+	(loongarch_split_lsx_fill_d): Ditto.
+	* config/loongarch/loongarch.cc
+	(loongarch_split_lsx_copy_d): Ditto.
+	(loongarch_split_lsx_insert_d): Ditto.
+	(loongarch_split_lsx_fill_d): Ditto.
+	* config/loongarch/lsx.md (lsx_vpickve2gr_du): Remove splitter.
+	(lsx_vpickve2gr_<lsxfmt_f>): Ditto.
+	(abs<mode>2): Remove expander.
+	(vabs<mode>2): Rename 2 abs<mode>2.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/lsx/lsx-abs.c: New test.
+---
+ gcc/config/loongarch/lasx.md                  | 12 +--
+ gcc/config/loongarch/loongarch-protos.h       |  3 -
+ gcc/config/loongarch/loongarch.cc             | 76 ----------------
+ gcc/config/loongarch/lsx.md                   | 89 ++-----------------
+ .../gcc.target/loongarch/vector/lsx/lsx-abs.c | 26 ++++++
+ 5 files changed, 35 insertions(+), 171 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-abs.c
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index 45a0a8cc8..44a7d58ff 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -572,12 +572,7 @@
+ 	  (match_operand 3 "const_<bitmask256>_operand" "")))
+   "ISA_HAS_LASX"
+ {
+-#if 0
+-  if (!TARGET_64BIT && (<MODE>mode == V4DImode || <MODE>mode == V4DFmode))
+-    return "#";
+-  else
+-#endif
+-    return "xvinsgr2vr.<lasxfmt>\t%u0,%z1,%y3";
++  return "xvinsgr2vr.<lasxfmt>\t%u0,%z1,%y3";
+ }
+   (set_attr "type" "simd_insert")
+    (set_attr "mode" "<MODE>"))
+@@ -1446,10 +1441,7 @@
+   if (which_alternative == 1)
+     return "xvldi.b\t%u0,0" ;
+ 
+-  if (!TARGET_64BIT && (<MODE>mode == V2DImode || <MODE>mode == V2DFmode))
+-    return "#";
+-  else
+-    return "xvreplgr2vr.<lasxfmt>\t%u0,%z1";
++  return "xvreplgr2vr.<lasxfmt>\t%u0,%z1";
+ }
+   (set_attr "type" "simd_fill")
+    (set_attr "mode" "<MODE>")
+diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
+index 8523da8d6..0c31a74b7 100644
+--- a/gcc/config/loongarch/loongarch-protos.h
++++ b/gcc/config/loongarch/loongarch-protos.h
+@@ -89,9 +89,6 @@ extern void loongarch_split_128bit_move (rtx, rtx);
+ extern bool loongarch_split_128bit_move_p (rtx, rtx);
+ extern void loongarch_split_256bit_move (rtx, rtx);
+ extern bool loongarch_split_256bit_move_p (rtx, rtx);
+-extern void loongarch_split_lsx_copy_d (rtx, rtx, rtx, rtx (*)(rtx, rtx, rtx));
+-extern void loongarch_split_lsx_insert_d (rtx, rtx, rtx, rtx);
+-extern void loongarch_split_lsx_fill_d (rtx, rtx);
+ extern const char *loongarch_output_move (rtx, rtx);
+ #ifdef RTX_CODE
+ extern void loongarch_expand_scc (rtx *);
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index e27335b3c..8d8a50b70 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -4772,82 +4772,6 @@ loongarch_split_256bit_move (rtx dest, rtx src)
+     }
+ }
+ 
+-
+-/* Split a COPY_S.D with operands DEST, SRC and INDEX.  GEN is a function
+-   used to generate subregs.  */
+-
+-void
+-loongarch_split_lsx_copy_d (rtx dest, rtx src, rtx index,
+-			    rtx (*gen_fn)(rtx, rtx, rtx))
+-{
+-  gcc_assert ((GET_MODE (src) == V2DImode && GET_MODE (dest) == DImode)
+-	      || (GET_MODE (src) == V2DFmode && GET_MODE (dest) == DFmode));
+-
+-  /* Note that low is always from the lower index, and high is always
+-     from the higher index.  */
+-  rtx low = loongarch_subword (dest, false);
+-  rtx high = loongarch_subword (dest, true);
+-  rtx new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0);
+-
+-  emit_insn (gen_fn (low, new_src, GEN_INT (INTVAL (index) * 2)));
+-  emit_insn (gen_fn (high, new_src, GEN_INT (INTVAL (index) * 2 + 1)));
+-}
+-
+-/* Split a INSERT.D with operand DEST, SRC1.INDEX and SRC2.  */
+-
+-void
+-loongarch_split_lsx_insert_d (rtx dest, rtx src1, rtx index, rtx src2)
+-{
+-  int i;
+-  gcc_assert (GET_MODE (dest) == GET_MODE (src1));
+-  gcc_assert ((GET_MODE (dest) == V2DImode
+-	       && (GET_MODE (src2) == DImode || src2 == const0_rtx))
+-	      || (GET_MODE (dest) == V2DFmode && GET_MODE (src2) == DFmode));
+-
+-  /* Note that low is always from the lower index, and high is always
+-     from the higher index.  */
+-  rtx low = loongarch_subword (src2, false);
+-  rtx high = loongarch_subword (src2, true);
+-  rtx new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
+-  rtx new_src1 = simplify_gen_subreg (V4SImode, src1, GET_MODE (src1), 0);
+-  i = exact_log2 (INTVAL (index));
+-  gcc_assert (i != -1);
+-
+-  emit_insn (gen_lsx_vinsgr2vr_w (new_dest, low, new_src1,
+-				  GEN_INT (1 << (i * 2))));
+-  emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest,
+-				  GEN_INT (1 << (i * 2 + 1))));
+-}
+-
+-/* Split FILL.D.  */
+-
+-void
+-loongarch_split_lsx_fill_d (rtx dest, rtx src)
+-{
+-  gcc_assert ((GET_MODE (dest) == V2DImode
+-	       && (GET_MODE (src) == DImode || src == const0_rtx))
+-	      || (GET_MODE (dest) == V2DFmode && GET_MODE (src) == DFmode));
+-
+-  /* Note that low is always from the lower index, and high is always
+-     from the higher index.  */
+-  rtx low, high;
+-  if (src == const0_rtx)
+-    {
+-      low = src;
+-      high = src;
+-    }
+-  else
+-    {
+-      low = loongarch_subword (src, false);
+-      high = loongarch_subword (src, true);
+-    }
+-  rtx new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
+-  emit_insn (gen_lsx_vreplgr2vr_w (new_dest, low));
+-  emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest, GEN_INT (1 << 1)));
+-  emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest, GEN_INT (1 << 3)));
+-}
+-
+-
+ /* Return the appropriate instructions to move SRC into DEST.  Assume
+    that SRC is operand 1 and DEST is operand 0.  */
+ 
+diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
+index dc81093e9..2eac11473 100644
+--- a/gcc/config/loongarch/lsx.md
++++ b/gcc/config/loongarch/lsx.md
+@@ -582,28 +582,11 @@
+ 	  (match_operand 3 "const_<bitmask>_operand" "")))
+   "ISA_HAS_LSX"
+ {
+-  if (!TARGET_64BIT && (<MODE>mode == V2DImode || <MODE>mode == V2DFmode))
+-    return "#";
+-  else
+-    return "vinsgr2vr.<lsxfmt>\t%w0,%z1,%y3";
++  return "vinsgr2vr.<lsxfmt>\t%w0,%z1,%y3";
+ }
+   (set_attr "type" "simd_insert")
+    (set_attr "mode" "<MODE>"))
+ 
+-(define_split
+-  (set (match_operand:LSX_D 0 "register_operand")
+-	(vec_merge:LSX_D
+-	  (vec_duplicate:LSX_D
+-	    (match_operand:<UNITMODE> 1 "<LSX_D:lsx_d>_operand"))
+-	  (match_operand:LSX_D 2 "register_operand")
+-	  (match_operand 3 "const_<bitmask>_operand")))
+-  "reload_completed && ISA_HAS_LSX && !TARGET_64BIT"
+-  (const_int 0)
+-{
+-  loongarch_split_lsx_insert_d (operands0, operands2, operands3, operands1);
+-  DONE;
+-})
+-
+ (define_insn "lsx_vextrins_<lsxfmt_f>_internal"
+   (set (match_operand:LSX 0 "register_operand" "=f")
+ 	(vec_merge:LSX
+@@ -653,70 +636,26 @@

_service:tar_scm:0162-Backport-SME-aarch64-Use-SVE-s-RDVL-instruction.patch Added

@@ -0,0 +1,792 @@
+From 46310765c05cde8732e07bfb0df9f0ec25a34018 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:18 +0000
+Subject: PATCH 063/157 BackportSME aarch64: Use SVE's RDVL instruction
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=80f47d7bbe38234e1530d27fe5c2f130223ca7a0
+
+We didn't previously use SVE's RDVL instruction, since the CNT*
+forms are preferred and provide most of the range.  However,
+there are some cases that RDVL can handle and CNT* can't,
+and using RDVL-like instructions becomes important for SME.
+
+gcc/
+	* config/aarch64/aarch64-protos.h (aarch64_sve_rdvl_immediate_p)
+	(aarch64_output_sve_rdvl): Declare.
+	* config/aarch64/aarch64.cc (aarch64_sve_cnt_factor_p): New
+	function, split out from...
+	(aarch64_sve_cnt_immediate_p): ...here.
+	(aarch64_sve_rdvl_factor_p): New function.
+	(aarch64_sve_rdvl_immediate_p): Likewise.
+	(aarch64_output_sve_rdvl): Likewise.
+	(aarch64_offset_temporaries): Rewrite the SVE handling to use RDVL
+	for some cases.
+	(aarch64_expand_mov_immediate): Handle RDVL immediates.
+	(aarch64_mov_operand_p): Likewise.
+	* config/aarch64/constraints.md (Usr): New constraint.
+	* config/aarch64/aarch64.md (*mov<SHORT:mode>_aarch64): Add an RDVL
+	alternative.
+	(*movsi_aarch64, *movdi_aarch64): Likewise.
+
+gcc/testsuite/
+	* gcc.target/aarch64/sve/acle/asm/cntb.c: Tweak expected output.
+	* gcc.target/aarch64/sve/acle/asm/cnth.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/cntw.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/cntd.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/prfb.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/prfh.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/prfw.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/prfd.c: Likewise.
+	* gcc.target/aarch64/sve/loop_add_4.c: Expect RDVL to be used
+	to calculate the -17 and 17 factors.
+	* gcc.target/aarch64/sve/pcs/stack_clash_1.c: Likewise the 18 factor.
+---
+ gcc/config/aarch64/aarch64-protos.h           |   2 +
+ gcc/config/aarch64/aarch64.cc                 | 191 ++++++++++++------
+ gcc/config/aarch64/aarch64.md                 |   3 +
+ gcc/config/aarch64/constraints.md             |   6 +
+ .../gcc.target/aarch64/sve/acle/asm/cntb.c    |  71 +++++--
+ .../gcc.target/aarch64/sve/acle/asm/cntd.c    |  12 +-
+ .../gcc.target/aarch64/sve/acle/asm/cnth.c    |  20 +-
+ .../gcc.target/aarch64/sve/acle/asm/cntw.c    |  16 +-
+ .../gcc.target/aarch64/sve/acle/asm/prfb.c    |   6 +-
+ .../gcc.target/aarch64/sve/acle/asm/prfd.c    |   4 +-
+ .../gcc.target/aarch64/sve/acle/asm/prfh.c    |   4 +-
+ .../gcc.target/aarch64/sve/acle/asm/prfw.c    |   4 +-
+ .../gcc.target/aarch64/sve/loop_add_4.c       |   6 +-
+ .../aarch64/sve/pcs/stack_clash_1.c           |   3 +-
+ 14 files changed, 225 insertions(+), 123 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
+index 3ff1a0163..14a568140 100644
+--- a/gcc/config/aarch64/aarch64-protos.h
++++ b/gcc/config/aarch64/aarch64-protos.h
+@@ -802,6 +802,7 @@ bool aarch64_sve_mode_p (machine_mode);
+ HOST_WIDE_INT aarch64_fold_sve_cnt_pat (aarch64_svpattern, unsigned int);
+ bool aarch64_sve_cnt_immediate_p (rtx);
+ bool aarch64_sve_scalar_inc_dec_immediate_p (rtx);
++bool aarch64_sve_rdvl_immediate_p (rtx);
+ bool aarch64_sve_addvl_addpl_immediate_p (rtx);
+ bool aarch64_sve_vector_inc_dec_immediate_p (rtx);
+ int aarch64_add_offset_temporaries (rtx);
+@@ -814,6 +815,7 @@ char *aarch64_output_sve_prefetch (const char *, rtx, const char *);
+ char *aarch64_output_sve_cnt_immediate (const char *, const char *, rtx);
+ char *aarch64_output_sve_cnt_pat_immediate (const char *, const char *, rtx *);
+ char *aarch64_output_sve_scalar_inc_dec (rtx);
++char *aarch64_output_sve_rdvl (rtx);
+ char *aarch64_output_sve_addvl_addpl (rtx);
+ char *aarch64_output_sve_vector_inc_dec (const char *, rtx);
+ char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode);
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index acb659f53..4194dfc70 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -5520,6 +5520,18 @@ aarch64_fold_sve_cnt_pat (aarch64_svpattern pattern, unsigned int nelts_per_vq)
+   return -1;
+ }
+ 
++/* Return true if a single CNTBHWD instruction can multiply FACTOR
++   by the number of 128-bit quadwords in an SVE vector.  */
++
++static bool
++aarch64_sve_cnt_factor_p (HOST_WIDE_INT factor)
++{
++  /* The coefficient must be 1, 16 * {2, 4, 8, 16}.  */
++  return (IN_RANGE (factor, 2, 16 * 16)
++	  && (factor & 1) == 0
++	  && factor <= 16 * (factor & -factor));
++}
++
+ /* Return true if we can move VALUE into a register using a single
+    CNTBHWD instruction.  */
+ 
+@@ -5527,11 +5539,7 @@ static bool
+ aarch64_sve_cnt_immediate_p (poly_int64 value)
+ {
+   HOST_WIDE_INT factor = value.coeffs0;
+-  /* The coefficient must be 1, 16 * {2, 4, 8, 16}.  */
+-  return (value.coeffs1 == factor
+-	  && IN_RANGE (factor, 2, 16 * 16)
+-	  && (factor & 1) == 0
+-	  && factor <= 16 * (factor & -factor));
++  return value.coeffs1 == factor && aarch64_sve_cnt_factor_p (factor);
+ }
+ 
+ /* Likewise for rtx X.  */
+@@ -5647,6 +5655,50 @@ aarch64_output_sve_scalar_inc_dec (rtx offset)
+ 					     -offset_value.coeffs1, 0);
+ }
+ 
++/* Return true if a single RDVL instruction can multiply FACTOR by the
++   number of 128-bit quadwords in an SVE vector.  */
++
++static bool
++aarch64_sve_rdvl_factor_p (HOST_WIDE_INT factor)
++{
++  return (multiple_p (factor, 16)
++	  && IN_RANGE (factor, -32 * 16, 31 * 16));
++}
++
++/* Return true if we can move VALUE into a register using a single
++   RDVL instruction.  */
++
++static bool
++aarch64_sve_rdvl_immediate_p (poly_int64 value)
++{
++  HOST_WIDE_INT factor = value.coeffs0;
++  return value.coeffs1 == factor && aarch64_sve_rdvl_factor_p (factor);
++}
++
++/* Likewise for rtx X.  */
++
++bool
++aarch64_sve_rdvl_immediate_p (rtx x)
++{
++  poly_int64 value;
++  return poly_int_rtx_p (x, &value) && aarch64_sve_rdvl_immediate_p (value);
++}
++
++/* Return the asm string for moving RDVL immediate OFFSET into register
++   operand 0.  */
++
++char *
++aarch64_output_sve_rdvl (rtx offset)
++{
++  static char buffersizeof ("rdvl\t%x0, #-") + 3 * sizeof (int);
++  poly_int64 offset_value = rtx_to_poly_int64 (offset);
++  gcc_assert (aarch64_sve_rdvl_immediate_p (offset_value));
++
++  int factor = offset_value.coeffs1;
++  snprintf (buffer, sizeof (buffer), "rdvl\t%%x0, #%d", factor / 16);
++  return buffer;
++}
++
+ /* Return true if we can add VALUE to a register using a single ADDVL
+    or ADDPL instruction.  */
+ 
+@@ -6227,13 +6279,13 @@ aarch64_offset_temporaries (bool add_p, poly_int64 offset)
+     count += 1;
+   else if (factor != 0)
+     {
+-      factor = abs (factor);
+-      if (factor > 16 * (factor & -factor))
+-	/* Need one register for the CNT result and one for the multiplication
+-	   factor.  If necessary, the second temporary can be reused for the
+-	   constant part of the offset.  */
++      factor /= (HOST_WIDE_INT) least_bit_hwi (factor);
++      if (!IN_RANGE (factor, -32, 31))
++	/* Need one register for the CNT or RDVL result and one for the
++	   multiplication factor.  If necessary, the second temporary
++	   can be reused for the constant part of the offset.  */
+ 	return 2;
+-      /* Need one register for the CNT result (which might then
++      /* Need one register for the CNT or RDVL result (which might then
+ 	 be shifted).  */
+       count += 1;
+     }
+@@ -6322,85 +6374,100 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
+   /* Otherwise use a CNT-based sequence.  */
+   else if (factor != 0)
+     {
+-      /* Use a subtraction if we have a negative factor.  */
+-      rtx_code code = PLUS;
+-      if (factor < 0)
+-	{
+-	  factor = -factor;
+-	  code = MINUS;
+-	}
++      /* Calculate CNTB * FACTOR / 16 as CNTB * REL_FACTOR * 2**SHIFT,
++	 with negative shifts indicating a shift right.  */

_service:tar_scm:0162-LoongArch-Set-default-alignment-for-functions-jumps-.patch Added

@@ -0,0 +1,135 @@
+From 7dff9d3f7fefe074e78cd7ff6529d7c1ea6cc3b1 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Tue, 2 Apr 2024 14:29:08 +0800
+Subject: PATCH 162/188 LoongArch: Set default alignment for functions jumps
+ and loops PR112919.
+
+Xi Ruoyao set the alignment rules under LA464 in commit r14-1839,
+but the macro ASM_OUTPUT_ALIGN_WITH_NOP was removed in R14-4674,
+which affected the alignment rules.
+
+So I set different aligns on LA464 and LA664 again to test the
+performance of spec2006, and modify the alignment based on the test
+results.
+
+gcc/ChangeLog:
+
+	PR target/112919
+	* config/loongarch/loongarch-def.cc (la664_align): Newly defined
+	function that sets alignment rules under the LA664 microarchitecture.
+	* config/loongarch/loongarch-opts.cc
+	(loongarch_target_option_override): If not optimizing for size, set
+	the default alignment to what the target wants.
+	* config/loongarch/loongarch-tune.h (struct loongarch_align): Add
+	new member variables jump and loop.
+---
+ gcc/config/loongarch/loongarch-def.cc  | 11 ++++++++---
+ gcc/config/loongarch/loongarch-opts.cc | 19 +++++++++++++------
+ gcc/config/loongarch/loongarch-tune.h  | 22 +++++++++++++++-------
+ 3 files changed, 36 insertions(+), 16 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
+index 533dd0af2..a48050c5f 100644
+--- a/gcc/config/loongarch/loongarch-def.cc
++++ b/gcc/config/loongarch/loongarch-def.cc
+@@ -81,14 +81,19 @@ array_tune<loongarch_cache> loongarch_cpu_cache =
+ 
+ static inline loongarch_align la464_align ()
+ {
+-  return loongarch_align ().function_ ("32").label_ ("16");
++  return loongarch_align ().function_ ("32").loop_ ("16").jump_ ("16");
++}
++
++static inline loongarch_align la664_align ()
++{
++  return loongarch_align ().function_ ("8").loop_ ("8").jump_ ("32");
+ }
+ 
+ array_tune<loongarch_align> loongarch_cpu_align =
+   array_tune<loongarch_align> ()
+-    .set (CPU_LOONGARCH64, la464_align ())
++    .set (CPU_LOONGARCH64, la664_align ())
+     .set (CPU_LA464, la464_align ())
+-    .set (CPU_LA664, la464_align ());
++    .set (CPU_LA664, la664_align ());
+ 
+ /* Default RTX cost initializer.  */
+ loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
+diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
+index 062d430c2..c455c5e32 100644
+--- a/gcc/config/loongarch/loongarch-opts.cc
++++ b/gcc/config/loongarch/loongarch-opts.cc
+@@ -922,13 +922,20 @@ loongarch_target_option_override (struct loongarch_target *target,
+ {
+   loongarch_update_gcc_opt_status (target, opts, opts_set);
+ 
+-  /* alignments */
+-  if (opts->x_flag_align_functions && !opts->x_str_align_functions)
+-    opts->x_str_align_functions
+-      = loongarch_cpu_aligntarget->cpu_tune.function;
++  /* If not optimizing for size, set the default
++     alignment to what the target wants.  */
++  if (!opts->x_optimize_size)
++    {
++      if (opts->x_flag_align_functions && !opts->x_str_align_functions)
++	opts->x_str_align_functions
++	  = loongarch_cpu_aligntarget->cpu_tune.function;
++
++      if (opts->x_flag_align_loops && !opts->x_str_align_loops)
++	opts->x_str_align_loops = loongarch_cpu_aligntarget->cpu_tune.loop;
+ 
+-  if (opts->x_flag_align_labels && !opts->x_str_align_labels)
+-    opts->x_str_align_labels = loongarch_cpu_aligntarget->cpu_tune.label;
++      if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
++	opts->x_str_align_jumps = loongarch_cpu_aligntarget->cpu_tune.jump;
++    }
+ 
+   /* Set up parameters to be used in prefetching algorithm.  */
+   int simultaneous_prefetches
+diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
+index 26f163f0a..d286eee0b 100644
+--- a/gcc/config/loongarch/loongarch-tune.h
++++ b/gcc/config/loongarch/loongarch-tune.h
+@@ -162,14 +162,16 @@ struct loongarch_cache {
+   }
+ };
+ 
+-/* Alignment for functions and labels for best performance.  For new uarchs
+-   the value should be measured via benchmarking.  See the documentation for
+-   -falign-functions and -falign-labels in invoke.texi for the format.  */
++/* Alignment for functions loops and jumps for best performance.  For new
++   uarchs the value should be measured via benchmarking.  See the
++   documentation for -falign-functions, -falign-loops, and -falign-jumps in
++   invoke.texi for the format.  */
+ struct loongarch_align {
+   const char *function;	/* default value for -falign-functions */
+-  const char *label;	/* default value for -falign-labels */
++  const char *loop;	/* default value for -falign-loops */
++  const char *jump;	/* default value for -falign-jumps */
+ 
+-  loongarch_align () : function (nullptr), label (nullptr) {}
++  loongarch_align () : function (nullptr), loop (nullptr), jump (nullptr) {}
+ 
+   loongarch_align function_ (const char *_function)
+   {
+@@ -177,9 +179,15 @@ struct loongarch_align {
+     return *this;
+   }
+ 
+-  loongarch_align label_ (const char *_label)
++  loongarch_align loop_ (const char *_loop)
+   {
+-    label = _label;
++    loop = _loop;
++    return *this;
++  }
++
++  loongarch_align jump_ (const char *_jump)
++  {
++    jump = _jump;
+     return *this;
+   }
+ };
+-- 
+2.43.0
+

_service:tar_scm:0163-Backport-SME-aarch64-Make-AARCH64_FL_SVE-requirement.patch Added

@@ -0,0 +1,137 @@
+From c0badff223a1f5ea5a0f75df72f5d0138d94d8e6 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:19 +0000
+Subject: PATCH 064/157 BackportSME aarch64: Make AARCH64_FL_SVE
+ requirements explicit
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dd7aaef62a43efe52eece525eea4d7d252b0c148
+
+So far, all intrinsics covered by the aarch64-sve-builtins*
+framework have (naturally enough) required at least SVE.
+However, arm_sme.h defines a couple of intrinsics that can
+be called by any code.  It's therefore necessary to make
+the implicit SVE requirement explicit.
+
+gcc/
+	* config/aarch64/aarch64-sve-builtins.cc (function_groups): Remove
+	implied requirement on SVE.
+	* config/aarch64/aarch64-sve-builtins-base.def: Explicitly require SVE.
+	* config/aarch64/aarch64-sve-builtins-sve2.def: Likewise.
+---
+ .../aarch64/aarch64-sve-builtins-base.def      | 10 +++++-----
+ .../aarch64/aarch64-sve-builtins-sve2.def      | 18 +++++++++++++-----
+ gcc/config/aarch64/aarch64-sve-builtins.cc     |  2 +-
+ 3 files changed, 19 insertions(+), 11 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def
+index ffdf7cb4c..3a58f76c3 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins-base.def
++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def
+@@ -17,7 +17,7 @@
+    along with GCC; see the file COPYING3.  If not see
+    <http://www.gnu.org/licenses/>.  */
+ 
+-#define REQUIRED_EXTENSIONS 0
++#define REQUIRED_EXTENSIONS AARCH64_FL_SVE
+ DEF_SVE_FUNCTION (svabd, binary_opt_n, all_arith, mxz)
+ DEF_SVE_FUNCTION (svabs, unary, all_float_and_signed, mxz)
+ DEF_SVE_FUNCTION (svacge, compare_opt_n, all_float, implicit)
+@@ -318,7 +318,7 @@ DEF_SVE_FUNCTION (svzip2, binary, all_data, none)
+ DEF_SVE_FUNCTION (svzip2, binary_pred, all_pred, none)
+ #undef REQUIRED_EXTENSIONS
+ 
+-#define REQUIRED_EXTENSIONS AARCH64_FL_BF16
++#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_BF16
+ DEF_SVE_FUNCTION (svbfdot, ternary_bfloat_opt_n, s_float, none)
+ DEF_SVE_FUNCTION (svbfdot_lane, ternary_bfloat_lanex2, s_float, none)
+ DEF_SVE_FUNCTION (svbfmlalb, ternary_bfloat_opt_n, s_float, none)
+@@ -330,7 +330,7 @@ DEF_SVE_FUNCTION (svcvt, unary_convert, cvt_bfloat, mxz)
+ DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, mx)
+ #undef REQUIRED_EXTENSIONS
+ 
+-#define REQUIRED_EXTENSIONS AARCH64_FL_I8MM
++#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_I8MM
+ DEF_SVE_FUNCTION (svmmla, mmla, s_integer, none)
+ DEF_SVE_FUNCTION (svusmmla, ternary_uintq_intq, s_signed, none)
+ DEF_SVE_FUNCTION (svsudot, ternary_intq_uintq_opt_n, s_signed, none)
+@@ -339,11 +339,11 @@ DEF_SVE_FUNCTION (svusdot, ternary_uintq_intq_opt_n, s_signed, none)
+ DEF_SVE_FUNCTION (svusdot_lane, ternary_uintq_intq_lane, s_signed, none)
+ #undef REQUIRED_EXTENSIONS
+ 
+-#define REQUIRED_EXTENSIONS AARCH64_FL_F32MM
++#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_F32MM
+ DEF_SVE_FUNCTION (svmmla, mmla, s_float, none)
+ #undef REQUIRED_EXTENSIONS
+ 
+-#define REQUIRED_EXTENSIONS AARCH64_FL_F64MM
++#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_F64MM
+ DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit)
+ DEF_SVE_FUNCTION (svmmla, mmla, d_float, none)
+ DEF_SVE_FUNCTION (svtrn1q, binary, all_data, none)
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
+index 635089ffc..d5f23a887 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
++++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
+@@ -17,7 +17,7 @@
+    along with GCC; see the file COPYING3.  If not see
+    <http://www.gnu.org/licenses/>.  */
+ 
+-#define REQUIRED_EXTENSIONS AARCH64_FL_SVE2
++#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_SVE2
+ DEF_SVE_FUNCTION (svaba, ternary_opt_n, all_integer, none)
+ DEF_SVE_FUNCTION (svabalb, ternary_long_opt_n, hsd_integer, none)
+ DEF_SVE_FUNCTION (svabalt, ternary_long_opt_n, hsd_integer, none)
+@@ -189,7 +189,9 @@ DEF_SVE_FUNCTION (svwhilewr, compare_ptr, all_data, none)
+ DEF_SVE_FUNCTION (svxar, ternary_shift_right_imm, all_integer, none)
+ #undef REQUIRED_EXTENSIONS
+ 
+-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES)
++#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
++			     | AARCH64_FL_SVE2 \
++			     | AARCH64_FL_SVE2_AES)
+ DEF_SVE_FUNCTION (svaesd, binary, b_unsigned, none)
+ DEF_SVE_FUNCTION (svaese, binary, b_unsigned, none)
+ DEF_SVE_FUNCTION (svaesmc, unary, b_unsigned, none)
+@@ -198,17 +200,23 @@ DEF_SVE_FUNCTION (svpmullb_pair, binary_opt_n, d_unsigned, none)
+ DEF_SVE_FUNCTION (svpmullt_pair, binary_opt_n, d_unsigned, none)
+ #undef REQUIRED_EXTENSIONS
+ 
+-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM)
++#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
++			     | AARCH64_FL_SVE2 \
++			     | AARCH64_FL_SVE2_BITPERM)
+ DEF_SVE_FUNCTION (svbdep, binary_opt_n, all_unsigned, none)
+ DEF_SVE_FUNCTION (svbext, binary_opt_n, all_unsigned, none)
+ DEF_SVE_FUNCTION (svbgrp, binary_opt_n, all_unsigned, none)
+ #undef REQUIRED_EXTENSIONS
+ 
+-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_SHA3)
++#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
++			     | AARCH64_FL_SVE2 \
++			     | AARCH64_FL_SVE2_SHA3)
+ DEF_SVE_FUNCTION (svrax1, binary, d_integer, none)
+ #undef REQUIRED_EXTENSIONS
+ 
+-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_SM4)
++#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
++			     | AARCH64_FL_SVE2 \
++			     | AARCH64_FL_SVE2_SM4)
+ DEF_SVE_FUNCTION (svsm4e, binary, s_unsigned, none)
+ DEF_SVE_FUNCTION (svsm4ekey, binary, s_unsigned, none)
+ #undef REQUIRED_EXTENSIONS
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
+index 7924cdf0f..dde01f676 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
+@@ -525,7 +525,7 @@ static const predication_index preds_z = { PRED_z, NUM_PREDS };
+ static CONSTEXPR const function_group_info function_groups = {
+ #define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \
+   { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, preds_##PREDS, \
+-    REQUIRED_EXTENSIONS | AARCH64_FL_SVE },
++    REQUIRED_EXTENSIONS },
+ #include "aarch64-sve-builtins.def"
+ };
+ 
+-- 
+2.33.0
+

_service:tar_scm:0163-LoongArch-Enable-switchable-target.patch Added

@@ -0,0 +1,281 @@
+From 427d5f10951435241d883a13557f862683046ddd Mon Sep 17 00:00:00 2001
+From: Yang Yujie <yangyujie@loongson.cn>
+Date: Mon, 8 Apr 2024 16:45:13 +0800
+Subject: PATCH 163/188 LoongArch: Enable switchable target
+
+This patch fixes the back-end context switching in cases where functions
+should be built with their own target contexts instead of the
+global one, such as LTO linking and functions with target attributes (TBD).
+
+	PR target/113233
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_reg_init):
+	Reinitialize the loongarch_regno_mode_ok cache.
+	(loongarch_option_override): Same.
+	(loongarch_save_restore_target_globals): Restore target globals.
+	(loongarch_set_current_function): Restore the target contexts
+	for functions.
+	(TARGET_SET_CURRENT_FUNCTION): Define.
+	* config/loongarch/loongarch.h (SWITCHABLE_TARGET): Enable
+	switchable target context.
+	* config/loongarch/loongarch-builtins.cc (loongarch_init_builtins):
+	Initialize all builtin functions at startup.
+	(loongarch_expand_builtin): Turn assertion of builtin availability
+	into a test.
+
+gcc/testsuite/ChangeLog:
+
+	* lib/target-supports.exp: Define condition loongarch_sx_as.
+	* gcc.dg/lto/pr113233_0.c: New test.
+---
+ gcc/config/loongarch/loongarch-builtins.cc | 25 +++---
+ gcc/config/loongarch/loongarch.cc          | 91 ++++++++++++++++++++--
+ gcc/config/loongarch/loongarch.h           |  2 +
+ gcc/testsuite/gcc.dg/lto/pr113233_0.c      | 14 ++++
+ gcc/testsuite/lib/target-supports.exp      | 12 +++
+ 5 files changed, 127 insertions(+), 17 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/lto/pr113233_0.c
+
+diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
+index e3b4dbc52..51abba007 100644
+--- a/gcc/config/loongarch/loongarch-builtins.cc
++++ b/gcc/config/loongarch/loongarch-builtins.cc
+@@ -2507,14 +2507,11 @@ loongarch_init_builtins (void)
+   for (i = 0; i < ARRAY_SIZE (loongarch_builtins); i++)
+     {
+       d = &loongarch_builtinsi;
+-      if (d->avail ())
+-	{
+-	  type = loongarch_build_function_type (d->function_type);
+-	  loongarch_builtin_declsi
+-	    = add_builtin_function (d->name, type, i, BUILT_IN_MD, NULL,
+-				    NULL);
+-	  loongarch_get_builtin_decl_indexd->icode = i;
+-	}
++      type = loongarch_build_function_type (d->function_type);
++      loongarch_builtin_declsi
++	= add_builtin_function (d->name, type, i, BUILT_IN_MD, NULL,
++			  NULL);
++      loongarch_get_builtin_decl_indexd->icode = i;
+     }
+ }
+ 
+@@ -3100,15 +3097,21 @@ loongarch_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+ 			  int ignore ATTRIBUTE_UNUSED)
+ {
+   tree fndecl;
+-  unsigned int fcode, avail;
++  unsigned int fcode;
+   const struct loongarch_builtin_description *d;
+ 
+   fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+   fcode = DECL_MD_FUNCTION_CODE (fndecl);
+   gcc_assert (fcode < ARRAY_SIZE (loongarch_builtins));
+   d = &loongarch_builtinsfcode;
+-  avail = d->avail ();
+-  gcc_assert (avail != 0);
++
++  if (!d->avail ())
++    {
++      error_at (EXPR_LOCATION (exp),
++		"built-in function %qD is not enabled", fndecl);
++      return target;
++    }
++
+   switch (d->builtin_type)
+     {
+     case LARCH_BUILTIN_DIRECT:
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 8d8a50b70..50ab6a82a 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -7567,15 +7567,19 @@ loongarch_global_init (void)
+ 	loongarch_dwarf_regnoi = INVALID_REGNUM;
+     }
+ 
++  /* Function to allocate machine-dependent function status.  */
++  init_machine_status = &loongarch_init_machine_status;
++};
++
++static void
++loongarch_reg_init (void)
++{
+   /* Set up loongarch_hard_regno_mode_ok.  */
+   for (int mode = 0; mode < MAX_MACHINE_MODE; mode++)
+     for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+       loongarch_hard_regno_mode_ok_pmoderegno
+ 	= loongarch_hard_regno_mode_ok_uncached (regno, (machine_mode) mode);
+-
+-  /* Function to allocate machine-dependent function status.  */
+-  init_machine_status = &loongarch_init_machine_status;
+-};
++}
+ 
+ static void
+ loongarch_option_override_internal (struct loongarch_target *target,
+@@ -7602,20 +7606,92 @@ loongarch_option_override_internal (struct loongarch_target *target,
+ 
+   /* Override some options according to the resolved target.  */
+   loongarch_target_option_override (target, opts, opts_set);
++
++  target_option_default_node = target_option_current_node
++    = build_target_option_node (opts, opts_set);
++
++  loongarch_reg_init ();
++}
++
++/* Remember the last target of loongarch_set_current_function.  */
++
++static GTY(()) tree loongarch_previous_fndecl;
++
++/* Restore or save the TREE_TARGET_GLOBALS from or to new_tree.
++   Used by loongarch_set_current_function to
++   make sure optab availability predicates are recomputed when necessary.  */
++
++static void
++loongarch_save_restore_target_globals (tree new_tree)
++{
++  if (TREE_TARGET_GLOBALS (new_tree))
++    restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
++  else if (new_tree == target_option_default_node)
++    restore_target_globals (&default_target_globals);
++  else
++    TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
++}
++
++/* Implement TARGET_SET_CURRENT_FUNCTION.  */
++
++static void
++loongarch_set_current_function (tree fndecl)
++{
++  if (fndecl == loongarch_previous_fndecl)
++    return;
++
++  tree old_tree;
++  if (loongarch_previous_fndecl == NULL_TREE)
++    old_tree = target_option_current_node;
++  else if (DECL_FUNCTION_SPECIFIC_TARGET (loongarch_previous_fndecl))
++    old_tree = DECL_FUNCTION_SPECIFIC_TARGET (loongarch_previous_fndecl);
++  else
++    old_tree = target_option_default_node;
++
++  if (fndecl == NULL_TREE)
++    {
++      if (old_tree != target_option_current_node)
++	{
++	  loongarch_previous_fndecl = NULL_TREE;
++	  cl_target_option_restore (&global_options, &global_options_set,
++				    TREE_TARGET_OPTION
++				    (target_option_current_node));
++	}
++      return;
++    }
++
++  tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
++  if (new_tree == NULL_TREE)
++    new_tree = target_option_default_node;
++
++  loongarch_previous_fndecl = fndecl;
++
++  if (new_tree == old_tree)
++    return;
++
++  cl_target_option_restore (&global_options, &global_options_set,
++			    TREE_TARGET_OPTION (new_tree));
++
++  loongarch_reg_init ();
++
++  loongarch_save_restore_target_globals (new_tree);
+ }
+ 
++
++
+ /* Implement TARGET_OPTION_OVERRIDE.  */
+ 
+ static void
+ loongarch_option_override (void)
+ {

_service:tar_scm:0164-Backport-SME-aarch64-Add-group-suffixes-to-SVE-intri.patch Added

@@ -0,0 +1,562 @@
+From e99332e15895156632949f3b6c3080fc9d994b13 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:19 +0000
+Subject: PATCH 065/157 BackportSME aarch64: Add group suffixes to SVE
+ intrinsics
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7b607f197967e052d7d7e29f6b41eded18f8c65d
+
+The SME2 ACLE adds a new "group" suffix component to the naming
+convention for SVE intrinsics.  This is also used in the new tuple
+forms of the svreinterpret intrinsics.
+
+This patch adds support for group suffixes and defines the
+x2, x3 and x4 suffixes that are needed for the svreinterprets.
+
+gcc/
+	* config/aarch64/aarch64-sve-builtins-shapes.cc (build_one): Take
+	a group suffix index parameter.
+	(build_32_64, build_all): Update accordingly.  Iterate over all
+	group suffixes.
+	* config/aarch64/aarch64-sve-builtins-sve2.cc (svqrshl_impl::fold)
+	(svqshl_impl::fold, svrshl_impl::fold): Update function_instance
+	constructors.
+	* config/aarch64/aarch64-sve-builtins.cc (group_suffixes): New array.
+	(groups_none): New constant.
+	(function_groups): Initialize the groups field.
+	(function_instance::hash): Hash the group index.
+	(function_builder::get_name): Add the group suffix.
+	(function_builder::add_overloaded_functions): Iterate over all
+	group suffixes.
+	(function_resolver::lookup_form): Take a group suffix parameter.
+	(function_resolver::resolve_to): Likewise.
+	* config/aarch64/aarch64-sve-builtins.def (DEF_SVE_GROUP_SUFFIX): New
+	macro.
+	(x2, x3, x4): New group suffixes.
+	* config/aarch64/aarch64-sve-builtins.h (group_suffix_index): New enum.
+	(group_suffix_info): New structure.
+	(function_group_info::groups): New member variable.
+	(function_instance::group_suffix_id): Likewise.
+	(group_suffixes): New array.
+	(function_instance::operator==): Compare the group suffixes.
+	(function_instance::group_suffix): New function.
+---
+ .../aarch64/aarch64-sve-builtins-shapes.cc    | 53 ++++++------
+ .../aarch64/aarch64-sve-builtins-sve2.cc      | 10 +--
+ gcc/config/aarch64/aarch64-sve-builtins.cc    | 84 +++++++++++++------
+ gcc/config/aarch64/aarch64-sve-builtins.def   |  9 ++
+ gcc/config/aarch64/aarch64-sve-builtins.h     | 81 ++++++++++++++----
+ 5 files changed, 165 insertions(+), 72 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+index 4fa4181b9..3ecef026c 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+@@ -275,18 +275,20 @@ parse_signature (const function_instance &instance, const char *format,
+ }
+ 
+ /* Add one function instance for GROUP, using mode suffix MODE_SUFFIX_ID,
+-   the type suffixes at index TI and the predication suffix at index PI.
+-   The other arguments are as for build_all.  */
++   the type suffixes at index TI, the group suffixes at index GI, and the
++   predication suffix at index PI.  The other arguments are as for
++   build_all.  */
+ static void
+ build_one (function_builder &b, const char *signature,
+ 	   const function_group_info &group, mode_suffix_index mode_suffix_id,
+-	   unsigned int ti, unsigned int pi, bool force_direct_overloads)
++	   unsigned int ti, unsigned int gi, unsigned int pi,
++	   bool force_direct_overloads)
+ {
+   /* Byte forms of svdupq take 16 arguments.  */
+   auto_vec<tree, 16> argument_types;
+   function_instance instance (group.base_name, *group.base, *group.shape,
+ 			      mode_suffix_id, group.typesti,
+-			      group.predspi);
++			      group.groupsgi, group.predspi);
+   tree return_type = parse_signature (instance, signature, argument_types);
+   apply_predication (instance, return_type, argument_types);
+   b.add_unique_function (instance, return_type, argument_types,
+@@ -312,24 +314,26 @@ build_32_64 (function_builder &b, const char *signature,
+ 	     mode_suffix_index mode64, bool force_direct_overloads = false)
+ {
+   for (unsigned int pi = 0; group.predspi != NUM_PREDS; ++pi)
+-    if (group.types00 == NUM_TYPE_SUFFIXES)
+-      {
+-	gcc_assert (mode32 != MODE_none && mode64 != MODE_none);
+-	build_one (b, signature, group, mode32, 0, pi,
+-		   force_direct_overloads);
+-	build_one (b, signature, group, mode64, 0, pi,
+-		   force_direct_overloads);
+-      }
+-    else
+-      for (unsigned int ti = 0; group.typesti0 != NUM_TYPE_SUFFIXES; ++ti)
++    for (unsigned int gi = 0; group.groupsgi != NUM_GROUP_SUFFIXES; ++gi)
++      if (group.types00 == NUM_TYPE_SUFFIXES)
+ 	{
+-	  unsigned int bits = type_suffixesgroup.typesti0.element_bits;
+-	  gcc_assert (bits == 32 || bits == 64);
+-	  mode_suffix_index mode = bits == 32 ? mode32 : mode64;
+-	  if (mode != MODE_none)
+-	    build_one (b, signature, group, mode, ti, pi,
+-		       force_direct_overloads);
++	  gcc_assert (mode32 != MODE_none && mode64 != MODE_none);
++	  build_one (b, signature, group, mode32, 0, gi, pi,
++		     force_direct_overloads);
++	  build_one (b, signature, group, mode64, 0, gi, pi,
++		     force_direct_overloads);
+ 	}
++      else
++	for (unsigned int ti = 0; group.typesti0 != NUM_TYPE_SUFFIXES;
++	     ++ti)
++	  {
++	    unsigned int bits = type_suffixesgroup.typesti0.element_bits;
++	    gcc_assert (bits == 32 || bits == 64);
++	    mode_suffix_index mode = bits == 32 ? mode32 : mode64;
++	    if (mode != MODE_none)
++	      build_one (b, signature, group, mode, ti, gi, pi,
++			 force_direct_overloads);
++	  }
+ }
+ 
+ /* For every type and predicate combination in GROUP, add one function
+@@ -423,10 +427,11 @@ build_all (function_builder &b, const char *signature,
+ 	   bool force_direct_overloads = false)
+ {
+   for (unsigned int pi = 0; group.predspi != NUM_PREDS; ++pi)
+-    for (unsigned int ti = 0;
+-	 ti == 0 || group.typesti0 != NUM_TYPE_SUFFIXES; ++ti)
+-      build_one (b, signature, group, mode_suffix_id, ti, pi,
+-		 force_direct_overloads);
++    for (unsigned int gi = 0; group.groupsgi != NUM_GROUP_SUFFIXES; ++gi)
++      for (unsigned int ti = 0;
++	   ti == 0 || group.typesti0 != NUM_TYPE_SUFFIXES; ++ti)
++	build_one (b, signature, group, mode_suffix_id, ti, gi, pi,
++		   force_direct_overloads);
+ }
+ 
+ /* TYPE is the largest type suffix associated with the arguments of R,
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+index e066f096d..a94e5e269 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+@@ -252,7 +252,7 @@ public:
+ 	       that we can use for sensible shift amounts.  */
+ 	    function_instance instance ("svqshl", functions::svqshl,
+ 					shapes::binary_int_opt_n, MODE_n,
+-					f.type_suffix_ids, f.pred);
++					f.type_suffix_ids, GROUP_none, f.pred);
+ 	    return f.redirect_call (instance);
+ 	  }
+ 	else
+@@ -261,7 +261,7 @@ public:
+ 	       that we can use for sensible shift amounts.  */
+ 	    function_instance instance ("svrshl", functions::svrshl,
+ 					shapes::binary_int_opt_n, MODE_n,
+-					f.type_suffix_ids, f.pred);
++					f.type_suffix_ids, GROUP_none, f.pred);
+ 	    return f.redirect_call (instance);
+ 	  }
+       }
+@@ -290,7 +290,7 @@ public:
+ 				       -wi::to_wide (amount));
+ 	    function_instance instance ("svasr", functions::svasr,
+ 					shapes::binary_uint_opt_n, MODE_n,
+-					f.type_suffix_ids, f.pred);
++					f.type_suffix_ids, GROUP_none, f.pred);
+ 	    if (f.type_suffix (0).unsigned_p)
+ 	      {
+ 		instance.base_name = "svlsr";
+@@ -322,7 +322,7 @@ public:
+ 	       that we can use for sensible shift amounts.  */
+ 	    function_instance instance ("svlsl", functions::svlsl,
+ 					shapes::binary_uint_opt_n, MODE_n,
+-					f.type_suffix_ids, f.pred);
++					f.type_suffix_ids, GROUP_none, f.pred);
+ 	    gcall *call = as_a <gcall *> (f.redirect_call (instance));
+ 	    gimple_call_set_arg (call, 2, amount);
+ 	    return call;
+@@ -335,7 +335,7 @@ public:
+ 				       -wi::to_wide (amount));
+ 	    function_instance instance ("svrshr", functions::svrshr,
+ 					shapes::shift_right_imm, MODE_n,
+-					f.type_suffix_ids, f.pred);
++					f.type_suffix_ids, GROUP_none, f.pred);
+ 	    gcall *call = as_a <gcall *> (f.redirect_call (instance));
+ 	    gimple_call_set_arg (call, 2, amount);
+ 	    return call;
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
+index dde01f676..dc3fd80da 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
+@@ -144,6 +144,13 @@ CONSTEXPR const type_suffix_info type_suffixesNUM_TYPE_SUFFIXES + 1 = {
+     0, VOIDmode }
+ };
+ 
++CONSTEXPR const group_suffix_info group_suffixes = {
++#define DEF_SVE_GROUP_SUFFIX(NAME, VG, VECTORS_PER_TUPLE) \
++  { "_" #NAME, VG, VECTORS_PER_TUPLE },
++#include "aarch64-sve-builtins.def"

_service:tar_scm:0164-LoongArch-Define-ISA-versions.patch Added

@@ -0,0 +1,1016 @@
+From 66c8369ff9e5987c14786692cf6fd945a94273a1 Mon Sep 17 00:00:00 2001
+From: Yang Yujie <yangyujie@loongson.cn>
+Date: Tue, 23 Apr 2024 10:42:47 +0800
+Subject: PATCH 164/188 LoongArch: Define ISA versions
+
+These ISA versions are defined as -march= parameters and
+are recommended for building binaries for distribution.
+
+Detailed description of these definitions can be found at
+https://github.com/loongson/la-toolchain-conventions, which
+the LoongArch GCC port aims to conform to.
+
+gcc/ChangeLog:
+
+	* config.gcc: Make la64v1.0 the default ISA preset of the lp64d ABI.
+	* config/loongarch/genopts/loongarch-strings: Define la64v1.0, la64v1.1.
+	* config/loongarch/genopts/loongarch.opt.in: Likewise.
+	* config/loongarch/loongarch-c.cc (LARCH_CPP_SET_PROCESSOR): Likewise.
+	(loongarch_cpu_cpp_builtins): Likewise.
+	* config/loongarch/loongarch-cpu.cc (get_native_prid): Likewise.
+	(fill_native_cpu_config): Likewise.
+	* config/loongarch/loongarch-def.cc (array_tune): Likewise.
+	* config/loongarch/loongarch-def.h: Likewise.
+	* config/loongarch/loongarch-driver.cc (driver_set_m_parm): Likewise.
+	(driver_get_normalized_m_opts): Likewise.
+	* config/loongarch/loongarch-opts.cc (default_tune_for_arch): Likewise.
+	(TUNE_FOR_ARCH): Likewise.
+	(arch_str): Likewise.
+	(loongarch_target_option_override): Likewise.
+	* config/loongarch/loongarch-opts.h (TARGET_uARCH_LA464): Likewise.
+	(TARGET_uARCH_LA664): Likewise.
+	* config/loongarch/loongarch-str.h (STR_CPU_ABI_DEFAULT): Likewise.
+	(STR_ARCH_ABI_DEFAULT): Likewise.
+	(STR_TUNE_GENERIC): Likewise.
+	(STR_ARCH_LA64V1_0): Likewise.
+	(STR_ARCH_LA64V1_1): Likewise.
+	* config/loongarch/loongarch.cc (loongarch_cpu_sched_reassociation_width): Likewise.
+	(loongarch_asm_code_end): Likewise.
+	* config/loongarch/loongarch.opt: Likewise.
+	* doc/invoke.texi: Likewise.
+---
+ gcc/config.gcc                                | 34 ++++----
+ .../loongarch/genopts/loongarch-strings       |  5 +-
+ gcc/config/loongarch/genopts/loongarch.opt.in | 43 ++++++++--
+ gcc/config/loongarch/loongarch-c.cc           | 37 +++------
+ gcc/config/loongarch/loongarch-cpu.cc         | 35 ++++----
+ gcc/config/loongarch/loongarch-def.cc         | 83 +++++++++++++------
+ gcc/config/loongarch/loongarch-def.h          | 37 ++++++---
+ gcc/config/loongarch/loongarch-driver.cc      |  8 +-
+ gcc/config/loongarch/loongarch-opts.cc        | 66 +++++++++++----
+ gcc/config/loongarch/loongarch-opts.h         |  4 +-
+ gcc/config/loongarch/loongarch-str.h          |  5 +-
+ gcc/config/loongarch/loongarch.cc             | 11 +--
+ gcc/config/loongarch/loongarch.opt            | 43 ++++++++--
+ gcc/doc/invoke.texi                           | 57 ++++++++-----
+ 14 files changed, 300 insertions(+), 168 deletions(-)
+
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index 1db558d4c..c6820d0f1 100644
+--- a/gcc/config.gcc
++++ b/gcc/config.gcc
+@@ -5035,7 +5035,7 @@ case "${target}" in
+ 
+ 		# Perform initial sanity checks on --with-* options.
+ 		case ${with_arch} in
+-		"" | abi-default | loongarch64 | la4664) ;; # OK, append here.
++		"" | la64v1.01 | abi-default | loongarch64 | la4664) ;; # OK, append here.
+ 		native)
+ 			if test x${host} != x${target}; then
+ 				echo "--with-arch=native is illegal for cross-compiler." 1>&2
+@@ -5082,10 +5082,18 @@ case "${target}" in
+ 
+ 		# Infer ISA-related default options from the ABI: pass 1
+ 		case ${abi_base}/${abi_ext} in
+-		lp64*/base)
++		lp64d/base)
+ 			# architectures that support lp64* ABI
+-			arch_pattern="native|abi-default|loongarch64|la4664"
+-			# default architecture for lp64* ABI
++			arch_pattern="native|abi-default|la64v1.01|loongarch64|la4664"
++
++			# default architecture for lp64d ABI
++			arch_default="la64v1.0"
++			;;
++		lp64fs/base)
++			# architectures that support lp64* ABI
++			arch_pattern="native|abi-default|la64v1.01|loongarch64|la4664"
++
++			# default architecture for lp64fs ABI
+ 			arch_default="abi-default"
+ 			;;
+ 		*)
+@@ -5157,15 +5165,7 @@ case "${target}" in
+ 
+ 
+ 		# Check default with_tune configuration using with_arch.
+-		case ${with_arch} in
+-		loongarch64)
+-			tune_pattern="native|abi-default|loongarch64|la4664"
+-			;;
+-		*)
+-			# By default, $with_tune == $with_arch
+-			tune_pattern="*"
+-			;;
+-		esac
++		tune_pattern="native|generic|loongarch64|la4664"
+ 
+ 		case ${with_tune} in
+ 		"") ;; # OK
+@@ -5215,7 +5215,7 @@ case "${target}" in
+ 					# Fixed: use the default gcc configuration for all multilib
+ 					# builds by default.
+ 					with_multilib_default="" ;;
+-				arch,native|arch,loongarch64|arch,la4664) # OK, append here.
++				arch,native|arch,la64v1.01|arch,loongarch64|arch,la4664) # OK, append here.
+ 					with_multilib_default="/march=${component}" ;;
+ 				arch,*)
+ 					with_multilib_default="/march=abi-default"
+@@ -5315,7 +5315,7 @@ case "${target}" in
+ 				if test x${parse_state} = x"arch"; then
+ 					# -march option
+ 					case ${component} in
+-					native | abi-default | loongarch64 | la4664) # OK, append here.
++					native | abi-default | la64v1.01 | loongarch64 | la4664) # OK, append here.
+ 						# Append -march spec for each multilib variant.
+ 						loongarch_multilib_list_make="${loongarch_multilib_list_make}/march=${component}"
+ 						parse_state="opts"
+@@ -5858,7 +5858,7 @@ case ${target} in
+ 		# See macro definitions from loongarch-opts.h and loongarch-cpu.h.
+ 
+ 		# Architecture
+-		tm_defines="${tm_defines} DEFAULT_CPU_ARCH=CPU_$(echo ${with_arch} | tr a-z- A-Z_)"
++		tm_defines="${tm_defines} DEFAULT_CPU_ARCH=ARCH_$(echo ${with_arch} | tr a-z.- A-Z__)"
+ 
+ 		# Base ABI type
+ 		tm_defines="${tm_defines} DEFAULT_ABI_BASE=ABI_BASE_$(echo ${abi_base} | tr a-z- A-Z_)"
+@@ -5870,7 +5870,7 @@ case ${target} in
+ 
+ 		# Microarchitecture
+ 		if test x${with_tune} != x; then
+-		  tm_defines="${tm_defines} DEFAULT_CPU_TUNE=CPU_$(echo ${with_tune} | tr a-z- A-Z_)"
++		  tm_defines="${tm_defines} DEFAULT_CPU_TUNE=TUNE_$(echo ${with_tune} | tr a-z.- A-Z__)"
+ 		fi
+ 
+ 		# FPU adjustment
+diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings
+index 99fd4e7cd..fd2f9b4f3 100644
+--- a/gcc/config/loongarch/genopts/loongarch-strings
++++ b/gcc/config/loongarch/genopts/loongarch-strings
+@@ -23,10 +23,13 @@ OPTSTR_ARCH	      arch
+ OPTSTR_TUNE	      tune
+ 
+ STR_CPU_NATIVE	      native
+-STR_CPU_ABI_DEFAULT   abi-default
++STR_ARCH_ABI_DEFAULT  abi-default
++STR_TUNE_GENERIC      generic
+ STR_CPU_LOONGARCH64   loongarch64
+ STR_CPU_LA464	      la464
+ STR_CPU_LA664	      la664
++STR_ARCH_LA64V1_0     la64v1.0
++STR_ARCH_LA64V1_1     la64v1.1
+ 
+ # Base architecture
+ STR_ISA_BASE_LA64 la64
+diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
+index f3d53f03c..0ecd10922 100644
+--- a/gcc/config/loongarch/genopts/loongarch.opt.in
++++ b/gcc/config/loongarch/genopts/loongarch.opt.in
+@@ -95,30 +95,55 @@ Enable LoongArch Advanced SIMD Extension (LASX, 256-bit).
+ 
+ ;; Base target models (implies ISA & tune parameters)
+ Enum
+-Name(cpu_type) Type(int)
+-LoongArch CPU types:
++Name(arch_type) Type(int)
++LoongArch ARCH presets:
+ 
+ EnumValue
+-Enum(cpu_type) String(@@STR_CPU_NATIVE@@) Value(CPU_NATIVE)
++Enum(arch_type) String(@@STR_CPU_NATIVE@@) Value(ARCH_NATIVE)
+ 
+ EnumValue
+-Enum(cpu_type) String(@@STR_CPU_ABI_DEFAULT@@) Value(CPU_ABI_DEFAULT)
++Enum(arch_type) String(@@STR_ARCH_ABI_DEFAULT@@) Value(ARCH_ABI_DEFAULT)
+ 
+ EnumValue
+-Enum(cpu_type) String(@@STR_CPU_LOONGARCH64@@) Value(CPU_LOONGARCH64)
++Enum(arch_type) String(@@STR_CPU_LOONGARCH64@@) Value(ARCH_LOONGARCH64)
+ 
+ EnumValue
+-Enum(cpu_type) String(@@STR_CPU_LA464@@) Value(CPU_LA464)
++Enum(arch_type) String(@@STR_CPU_LA464@@) Value(ARCH_LA464)
+ 
+ EnumValue
+-Enum(cpu_type) String(@@STR_CPU_LA664@@) Value(CPU_LA664)
++Enum(arch_type) String(@@STR_CPU_LA664@@) Value(ARCH_LA664)
++
++EnumValue
++Enum(arch_type) String(@@STR_ARCH_LA64V1_0@@) Value(ARCH_LA64V1_0)

_service:tar_scm:0165-Backport-SME-aarch64-Add-sve_type-to-SVE-builtins-co.patch Added

@@ -0,0 +1,230 @@
+From a32a9321b3336907fe2d17148cb9e4652642a3e6 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:20 +0000
+Subject: PATCH 066/157 BackportSME aarch64: Add sve_type to SVE builtins
+ code
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7f6de9861e5d7745a0af5174582519a39d545a92
+
+Until now, the SVE ACLE code had mostly been able to represent
+individual SVE arguments with just an element type suffix (s32, u32,
+etc.).  However, the SME2 ACLE provides many overloaded intrinsics
+that operate on tuples rather than single vectors.  This patch
+therefore adds a new type (sve_type) that combines an element
+type suffix with a vector count.  This is enough to uniquely
+represent all SVE ACLE types.
+
+gcc/
+	* config/aarch64/aarch64-sve-builtins.h (sve_type): New struct.
+	(sve_type::operator==): New function.
+	(function_resolver::get_vector_type): Delete.
+	(function_resolver::report_no_such_form): Take an sve_type rather
+	than a type_suffix_index.
+	* config/aarch64/aarch64-sve-builtins.cc (get_vector_type): New
+	function.
+	(function_resolver::get_vector_type): Delete.
+	(function_resolver::report_no_such_form): Take an sve_type rather
+	than a type_suffix_index.
+	(find_sve_type): New function, split out from...
+	(function_resolver::infer_vector_or_tuple_type): ...here.
+---
+ gcc/config/aarch64/aarch64-sve-builtins.cc | 93 ++++++++++++----------
+ gcc/config/aarch64/aarch64-sve-builtins.h  | 37 ++++++++-
+ 2 files changed, 88 insertions(+), 42 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
+index dc3fd80da..cc676bfe1 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
+@@ -659,6 +659,14 @@ find_type_suffix_for_scalar_type (const_tree type)
+   return NUM_TYPE_SUFFIXES;
+ }
+ 
++/* Return the vector type associated with TYPE.  */
++static tree
++get_vector_type (sve_type type)
++{
++  auto vector_type = type_suffixestype.type.vector_type;
++  return acle_vector_typestype.num_vectors - 1vector_type;
++}
++
+ /* Report an error against LOCATION that the user has tried to use
+    function FNDECL when extension EXTENSION is disabled.  */
+ static void
+@@ -1190,13 +1198,6 @@ function_resolver::function_resolver (location_t location,
+ {
+ }
+ 
+-/* Return the vector type associated with type suffix TYPE.  */
+-tree
+-function_resolver::get_vector_type (type_suffix_index type)
+-{
+-  return acle_vector_types0type_suffixestype.vector_type;
+-}
+-
+ /* Return the <stdint.h> name associated with TYPE.  Using the <stdint.h>
+    name should be more user-friendly than the underlying canonical type,
+    since it makes the signedness and bitwidth explicit.  */
+@@ -1227,10 +1228,10 @@ function_resolver::scalar_argument_p (unsigned int i)
+ 	  || SCALAR_FLOAT_TYPE_P (type));
+ }
+ 
+-/* Report that the function has no form that takes type suffix TYPE.
++/* Report that the function has no form that takes type TYPE.
+    Return error_mark_node.  */
+ tree
+-function_resolver::report_no_such_form (type_suffix_index type)
++function_resolver::report_no_such_form (sve_type type)
+ {
+   error_at (location, "%qE has no form that takes %qT arguments",
+ 	    fndecl, get_vector_type (type));
+@@ -1352,6 +1353,25 @@ function_resolver::infer_pointer_type (unsigned int argno,
+   return type;
+ }
+ 
++/* If TYPE is an SVE predicate or vector type, or a tuple of such a type,
++   return the associated sve_type, otherwise return an invalid sve_type.  */
++static sve_type
++find_sve_type (const_tree type)
++{
++  /* A linear search should be OK here, since the code isn't hot and
++     the number of types is only small.  */
++  for (unsigned int size_i = 0; size_i < MAX_TUPLE_SIZE; ++size_i)
++    for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i)
++      {
++	vector_type_index type_i = type_suffixessuffix_i.vector_type;
++	tree this_type = acle_vector_typessize_itype_i;
++	if (this_type && matches_type_p (this_type, type))
++	  return { type_suffix_index (suffix_i), size_i + 1 };
++      }
++
++  return {};
++}
++
+ /* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS
+    vectors; NUM_VECTORS is 1 for the former.  Return the associated type
+    suffix on success, using TYPE_SUFFIX_b for predicates.  Report an error
+@@ -1364,37 +1384,30 @@ function_resolver::infer_vector_or_tuple_type (unsigned int argno,
+   if (actual == error_mark_node)
+     return NUM_TYPE_SUFFIXES;
+ 
+-  /* A linear search should be OK here, since the code isn't hot and
+-     the number of types is only small.  */
+-  for (unsigned int size_i = 0; size_i < MAX_TUPLE_SIZE; ++size_i)
+-    for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i)
+-      {
+-	vector_type_index type_i = type_suffixessuffix_i.vector_type;
+-	tree type = acle_vector_typessize_itype_i;
+-	if (type && matches_type_p (type, actual))
+-	  {
+-	    if (size_i + 1 == num_vectors)
+-	      return type_suffix_index (suffix_i);
+-
+-	    if (num_vectors == 1)
+-	      error_at (location, "passing %qT to argument %d of %qE, which"
+-			" expects a single SVE vector rather than a tuple",
+-			actual, argno + 1, fndecl);
+-	    else if (size_i == 0 && type_i != VECTOR_TYPE_svbool_t)
+-	      /* num_vectors is always != 1, so the singular isn't needed.  */
+-	      error_n (location, num_vectors, "%qT%d%qE%d",
+-		       "passing single vector %qT to argument %d"
+-		       " of %qE, which expects a tuple of %d vectors",
+-		       actual, argno + 1, fndecl, num_vectors);
+-	    else
+-	      /* num_vectors is always != 1, so the singular isn't needed.  */
+-	      error_n (location, num_vectors, "%qT%d%qE%d",
+-		       "passing %qT to argument %d of %qE, which"
+-		       " expects a tuple of %d vectors", actual, argno + 1,
+-		       fndecl, num_vectors);
+-	    return NUM_TYPE_SUFFIXES;
+-	  }
+-      }
++  if (auto sve_type = find_sve_type (actual))
++    {
++      if (sve_type.num_vectors == num_vectors)
++	return sve_type.type;
++
++      if (num_vectors == 1)
++	error_at (location, "passing %qT to argument %d of %qE, which"
++		  " expects a single SVE vector rather than a tuple",
++		  actual, argno + 1, fndecl);
++      else if (sve_type.num_vectors == 1
++	       && sve_type.type != TYPE_SUFFIX_b)
++	/* num_vectors is always != 1, so the singular isn't needed.  */
++	error_n (location, num_vectors, "%qT%d%qE%d",
++		 "passing single vector %qT to argument %d"
++		 " of %qE, which expects a tuple of %d vectors",
++		 actual, argno + 1, fndecl, num_vectors);
++      else
++	/* num_vectors is always != 1, so the singular isn't needed.  */
++	error_n (location, num_vectors, "%qT%d%qE%d",
++		 "passing %qT to argument %d of %qE, which"
++		 " expects a tuple of %d vectors", actual, argno + 1,
++		 fndecl, num_vectors);
++      return NUM_TYPE_SUFFIXES;
++    }
+ 
+   if (num_vectors == 1)
+     error_at (location, "passing %qT to argument %d of %qE, which"
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
+index 374c57e93..f4f2c415f 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.h
++++ b/gcc/config/aarch64/aarch64-sve-builtins.h
+@@ -263,6 +263,40 @@ struct group_suffix_info
+   unsigned int vectors_per_tuple;
+ };
+ 
++/* Represents an SVE vector, predicate, tuple of vectors, or tuple of
++   predicates.  There is also a representation of "no type"/"invalid type".  */
++struct sve_type
++{
++  sve_type () = default;
++  sve_type (type_suffix_index type) : type (type), num_vectors (1) {}
++  sve_type (type_suffix_index type, unsigned int num_vectors)
++    : type (type), num_vectors (num_vectors) {}
++
++  /* Return true if the type is valid.  */
++  explicit operator bool () const { return type != NUM_TYPE_SUFFIXES; }
++
++  bool operator== (const sve_type &) const;
++  bool operator!= (const sve_type &x) const { return !operator== (x); }
++
++  /* This is one of:
++
++     - TYPE_SUFFIX_b for svbool_t-based types
++     - TYPE_SUFFIX_c for svcount_t-based types
++     - the type suffix of a data element for SVE data vectors and tuples
++     - NUM_TYPE_SUFFIXES for invalid types.  */
++  type_suffix_index type = NUM_TYPE_SUFFIXES;
++

_service:tar_scm:0165-LoongArch-Define-builtin-macros-for-ISA-evolutions.patch Added

@@ -0,0 +1,678 @@
+From 9af73fb7213d5c10b3683465e6682ad20f5abe64 Mon Sep 17 00:00:00 2001
+From: Yang Yujie <yangyujie@loongson.cn>
+Date: Tue, 23 Apr 2024 10:42:48 +0800
+Subject: PATCH 165/188 LoongArch: Define builtin macros for ISA evolutions
+
+Detailed description of these definitions can be found at
+https://github.com/loongson/la-toolchain-conventions, which
+the LoongArch GCC port aims to conform to.
+
+gcc/ChangeLog:
+
+	* config.gcc: Add loongarch-evolution.o.
+	* config/loongarch/genopts/genstr.sh: Enable generation of
+	loongarch-evolution.cc,h.
+	* config/loongarch/t-loongarch: Likewise.
+	* config/loongarch/genopts/gen-evolution.awk: New file.
+	* config/loongarch/genopts/isa-evolution.in: Mark ISA version
+	of introduction for each ISA evolution feature.
+	* config/loongarch/loongarch-c.cc (loongarch_cpu_cpp_builtins):
+	Define builtin macros for enabled ISA evolutions and the ISA
+	version.
+	* config/loongarch/loongarch-cpu.cc: Use loongarch-evolution.h.
+	* config/loongarch/loongarch.h: Likewise.
+	* config/loongarch/loongarch-cpucfg-map.h: Delete.
+	* config/loongarch/loongarch-evolution.cc: New file.
+	* config/loongarch/loongarch-evolution.h: New file.
+	* config/loongarch/loongarch-opts.h (ISA_HAS_FRECIPE): Define.
+	(ISA_HAS_DIV32): Likewise.
+	(ISA_HAS_LAM_BH): Likewise.
+	(ISA_HAS_LAMCAS): Likewise.
+	(ISA_HAS_LD_SEQ_SA): Likewise.
+---
+ gcc/config.gcc                                |   2 +-
+ .../loongarch/genopts/gen-evolution.awk       | 230 ++++++++++++++++++
+ gcc/config/loongarch/genopts/genstr.sh        |  82 ++-----
+ gcc/config/loongarch/genopts/isa-evolution.in |  10 +-
+ gcc/config/loongarch/loongarch-c.cc           |  23 ++
+ gcc/config/loongarch/loongarch-cpu.cc         |   2 +-
+ gcc/config/loongarch/loongarch-evolution.cc   |  60 +++++
+ ...rch-cpucfg-map.h => loongarch-evolution.h} |  46 +++-
+ gcc/config/loongarch/loongarch-opts.h         |  11 -
+ gcc/config/loongarch/loongarch.h              |   1 +
+ gcc/config/loongarch/t-loongarch              |  26 +-
+ 11 files changed, 398 insertions(+), 95 deletions(-)
+ create mode 100644 gcc/config/loongarch/genopts/gen-evolution.awk
+ create mode 100644 gcc/config/loongarch/loongarch-evolution.cc
+ rename gcc/config/loongarch/{loongarch-cpucfg-map.h => loongarch-evolution.h} (52%)
+
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index c6820d0f1..a405e6d2e 100644
+--- a/gcc/config.gcc
++++ b/gcc/config.gcc
+@@ -458,7 +458,7 @@ loongarch*-*-*)
+ 	cpu_type=loongarch
+ 	d_target_objs="loongarch-d.o"
+ 	extra_headers="larchintrin.h lsxintrin.h lasxintrin.h"
+-	extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o"
++	extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o loongarch-evolution.o"
+ 	extra_gcc_objs="loongarch-driver.o loongarch-cpu.o loongarch-opts.o loongarch-def.o"
+ 	extra_options="${extra_options} g.opt fused-madd.opt"
+ 	;;
+diff --git a/gcc/config/loongarch/genopts/gen-evolution.awk b/gcc/config/loongarch/genopts/gen-evolution.awk
+new file mode 100644
+index 000000000..4d105afa9
+--- /dev/null
++++ b/gcc/config/loongarch/genopts/gen-evolution.awk
+@@ -0,0 +1,230 @@
++#!/usr/bin/gawk
++#
++# A simple script that generates loongarch-evolution.h
++# from genopts/isa-evolution.in
++#
++# Copyright (C) 2021-2024 Free Software Foundation, Inc.
++#
++# This file is part of GCC.
++#
++# GCC is free software; you can redistribute it and/or modify it under
++# the terms of the GNU General Public License as published by the Free
++# Software Foundation; either version 3, or (at your option) any later
++# version.
++#
++# GCC is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++# License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with GCC; see the file COPYING3.  If not see
++# <http://www.gnu.org/licenses/>.
++
++BEGIN {
++    # isa_version_major
++    # isa_version_minor
++    # cpucfg_word
++    # cpucfg_bit_in_word
++    # name_capitalized
++    # comment
++}
++
++{
++    cpucfg_wordNR = $1
++    cpucfg_bit_in_wordNR = $2
++    nameNR = gensub(/-/, "_", "g", $3)
++    name_capitalizedNR = toupper(nameNR)
++    isa_version_majorNR = gensub(/^(1-90-9*)\.(0-9+)$/, "\\1", 1, $4)
++    isa_version_minorNR = gensub(/^(1-90-9*)\.(0-9+)$/, "\\2", 1, $4)
++
++    $1 = $2 = $3 = $4 = ""
++    sub (/^\s*/, "")
++    commentNR = $0
++}
++
++function copyright_header(from_year,to_year)
++{
++    print "   Copyright (C) " from_year "-" to_year \
++          " Free Software Foundation, Inc."
++    print ""
++    print "This file is part of GCC."
++    print ""
++    print "GCC is free software; you can redistribute it and/or modify"
++    print "it under the terms of the GNU General Public License as published by"
++    print "the Free Software Foundation; either version 3, or (at your option)"
++    print "any later version."
++    print ""
++    print "GCC is distributed in the hope that it will be useful,"
++    print "but WITHOUT ANY WARRANTY; without even the implied warranty of"
++    print "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the"
++    print "GNU General Public License for more details."
++    print ""
++    print "You should have received a copy of the GNU General Public License"
++    print "along with GCC; see the file COPYING3.  If not see"
++    print "<http://www.gnu.org/licenses/>."
++}
++
++function gen_cpucfg_map()
++{
++    print "static constexpr struct {"
++    print "  int cpucfg_word;"
++    print "  unsigned int cpucfg_bit;"
++    print "  HOST_WIDE_INT isa_evolution_bit;"
++    print "} cpucfg_map = {"
++
++    for (i = 1; i <= NR; i++)
++    printf ("  { %d, 1u << %d, OPTION_MASK_ISA_%s },\n",
++            cpucfg_wordi, cpucfg_bit_in_wordi, name_capitalizedi)
++
++    print "};"
++}
++
++function gen_cpucfg_useful_idx()
++{
++    split("0 1 2 16 17 18 19", init_useful_idx)
++
++    delete idx_bucket
++
++    for (i in init_useful_idx)
++        idx_bucketinit_useful_idxi = 1
++    delete init_useful_idx
++
++    for (i in cpucfg_word)
++        idx_bucketcpucfg_wordi = 1
++
++    delete idx_list
++    for (i in idx_bucket)
++        idx_listlength(idx_list)-1 = i+0
++    delete idx_bucket
++
++    asort (idx_list)
++
++    print "static constexpr int cpucfg_useful_idx = {"
++    for (i in idx_list)
++        printf("  %d,\n", idx_listi)
++    print "};"
++
++    print ""
++
++    printf ("static constexpr int N_CPUCFG_WORDS = %d;\n",
++            idx_listlength(idx_list) + 1)
++
++    delete idx_list
++}
++
++function gen_evolution_decl()
++{
++    print "/* ISA evolution features */"
++    print "enum {"
++
++    for (i = 1; i <= NR; i++)
++    print "  EVO_" name_capitalizedi " = " i - 1 ","
++
++    print "  N_EVO_FEATURES = " NR
++    print "};"
++    print ""
++
++    print "/* Condition macros */"
++    for (i = 1; i <= NR; i++)
++    printf ("#define ISA_HAS_%s \\\n" \
++            "  (la_target.isa.evolution & OPTION_MASK_ISA_%s)\n",
++            name_capitalizedi, name_capitalizedi)

_service:tar_scm:0166-Backport-SME-aarch64-Generalise-some-SVE-ACLE-error-.patch Added

@@ -0,0 +1,1474 @@
+From 21839879d5f00db48cdacd472044a9bd4e23a2c6 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:20 +0000
+Subject: PATCH 067/157 BackportSME aarch64: Generalise some SVE ACLE
+ error messages
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bb01ef94ff5096b907639aa3a1d77850921e7d37
+
+The current SVE ACLE function-resolution diagnostics assume
+that a function has a fixed choice between vectors or tuples
+of vectors.  If an argument was not an SVE type at all, the
+error message said the function "expects an SVE vector type"
+or "expects an SVE tuple type".
+
+This patch generalises the error to cope with cases where
+an argument can be either a vector or a tuple.  It also splits
+out the diagnostics for mismatched tuple sizes, so that they
+can be reused by later patches.
+
+gcc/
+	* config/aarch64/aarch64-sve-builtins.h
+	(function_resolver::infer_sve_type): New member function.
+	(function_resolver::report_incorrect_num_vectors): Likewise.
+	* config/aarch64/aarch64-sve-builtins.cc
+	(function_resolver::infer_sve_type): New function,.
+	(function_resolver::report_incorrect_num_vectors): New function,
+	split out from...
+	(function_resolver::infer_vector_or_tuple_type): ...here.  Use
+	infer_sve_type.
+
+gcc/testsuite/
+	* gcc.target/aarch64/sve/acle/general-c/*: Update expected error
+	messages.
+---
+ gcc/config/aarch64/aarch64-sve-builtins.cc    | 87 ++++++++++++-------
+ gcc/config/aarch64/aarch64-sve-builtins.h     |  3 +
+ .../aarch64/sve/acle/general-c/adr_index_1.c  |  6 +-
+ .../aarch64/sve/acle/general-c/adr_offset_1.c |  6 +-
+ .../aarch64/sve/acle/general-c/binary_1.c     |  2 +-
+ .../sve/acle/general-c/binary_int_opt_n.c     |  2 +-
+ .../sve/acle/general-c/binary_lane_1.c        |  4 +-
+ .../sve/acle/general-c/binary_long_lane_1.c   |  4 +-
+ .../sve/acle/general-c/binary_long_opt_n_1.c  |  2 +-
+ .../aarch64/sve/acle/general-c/binary_n_1.c   |  2 +-
+ .../acle/general-c/binary_narrowb_opt_n_1.c   |  2 +-
+ .../acle/general-c/binary_narrowt_opt_n_1.c   |  4 +-
+ .../sve/acle/general-c/binary_opt_n_2.c       |  2 +-
+ .../sve/acle/general-c/binary_opt_n_3.c       |  2 +-
+ .../sve/acle/general-c/binary_rotate_1.c      |  4 +-
+ .../sve/acle/general-c/binary_to_uint_1.c     |  4 +-
+ .../sve/acle/general-c/binary_uint64_n_1.c    |  2 +-
+ .../acle/general-c/binary_uint64_opt_n_2.c    |  2 +-
+ .../sve/acle/general-c/binary_uint_1.c        |  2 +-
+ .../sve/acle/general-c/binary_uint_n_1.c      |  2 +-
+ .../sve/acle/general-c/binary_uint_opt_n_1.c  |  2 +-
+ .../sve/acle/general-c/binary_wide_1.c        |  8 +-
+ .../sve/acle/general-c/binary_wide_opt_n_1.c  |  4 +-
+ .../aarch64/sve/acle/general-c/clast_1.c      |  4 +-
+ .../aarch64/sve/acle/general-c/compare_1.c    |  4 +-
+ .../sve/acle/general-c/compare_opt_n_1.c      |  2 +-
+ .../sve/acle/general-c/compare_wide_opt_n_1.c |  2 +-
+ .../sve/acle/general-c/count_vector_1.c       |  2 +-
+ .../aarch64/sve/acle/general-c/create_1.c     |  4 +-
+ .../aarch64/sve/acle/general-c/create_3.c     |  4 +-
+ .../aarch64/sve/acle/general-c/create_5.c     |  4 +-
+ .../aarch64/sve/acle/general-c/fold_left_1.c  |  4 +-
+ .../sve/acle/general-c/inc_dec_pred_1.c       |  2 +-
+ .../aarch64/sve/acle/general-c/mmla_1.c       | 10 +--
+ .../acle/general-c/prefetch_gather_offset_2.c |  2 +-
+ .../aarch64/sve/acle/general-c/reduction_1.c  |  2 +-
+ .../sve/acle/general-c/reduction_wide_1.c     |  2 +-
+ .../general-c/shift_right_imm_narrowb_1.c     |  2 +-
+ .../shift_right_imm_narrowb_to_uint_1.c       |  2 +-
+ .../general-c/shift_right_imm_narrowt_1.c     |  4 +-
+ .../shift_right_imm_narrowt_to_uint_1.c       |  4 +-
+ .../aarch64/sve/acle/general-c/store_1.c      |  2 +-
+ .../aarch64/sve/acle/general-c/store_2.c      |  2 +-
+ .../acle/general-c/store_scatter_offset_1.c   |  4 +-
+ .../sve/acle/general-c/ternary_bfloat16_1.c   |  2 +-
+ .../acle/general-c/ternary_bfloat16_lane_1.c  |  2 +-
+ .../general-c/ternary_bfloat16_lanex2_1.c     |  2 +-
+ .../acle/general-c/ternary_bfloat16_opt_n_1.c |  2 +-
+ .../general-c/ternary_intq_uintq_lane_1.c     |  6 +-
+ .../general-c/ternary_intq_uintq_opt_n_1.c    |  4 +-
+ .../sve/acle/general-c/ternary_lane_1.c       |  6 +-
+ .../acle/general-c/ternary_lane_rotate_1.c    |  6 +-
+ .../sve/acle/general-c/ternary_long_lane_1.c  |  6 +-
+ .../sve/acle/general-c/ternary_long_opt_n_1.c |  4 +-
+ .../sve/acle/general-c/ternary_opt_n_1.c      |  4 +-
+ .../sve/acle/general-c/ternary_qq_lane_1.c    |  6 +-
+ .../acle/general-c/ternary_qq_lane_rotate_1.c |  6 +-
+ .../sve/acle/general-c/ternary_qq_opt_n_2.c   |  4 +-
+ .../sve/acle/general-c/ternary_qq_rotate_1.c  |  6 +-
+ .../sve/acle/general-c/ternary_rotate_1.c     |  6 +-
+ .../general-c/ternary_shift_right_imm_1.c     |  4 +-
+ .../sve/acle/general-c/ternary_uint_1.c       |  6 +-
+ .../sve/acle/general-c/ternary_uintq_intq_1.c |  6 +-
+ .../general-c/ternary_uintq_intq_lane_1.c     |  6 +-
+ .../general-c/ternary_uintq_intq_opt_n_1.c    |  4 +-
+ .../aarch64/sve/acle/general-c/tmad_1.c       |  4 +-
+ .../aarch64/sve/acle/general-c/unary_1.c      |  2 +-
+ .../aarch64/sve/acle/general-c/unary_2.c      |  2 +-
+ .../sve/acle/general-c/unary_convert_1.c      |  2 +-
+ .../sve/acle/general-c/unary_convert_2.c      |  2 +-
+ .../acle/general-c/unary_convert_narrowt_1.c  |  2 +-
+ .../sve/acle/general-c/unary_narrowb_1.c      |  2 +-
+ .../acle/general-c/unary_narrowb_to_uint_1.c  |  2 +-
+ .../sve/acle/general-c/unary_narrowt_1.c      |  4 +-
+ .../acle/general-c/unary_narrowt_to_uint_1.c  |  4 +-
+ .../sve/acle/general-c/unary_to_int_1.c       |  2 +-
+ .../sve/acle/general-c/unary_to_uint_1.c      |  2 +-
+ .../sve/acle/general-c/unary_to_uint_2.c      |  2 +-
+ .../sve/acle/general-c/unary_to_uint_3.c      |  2 +-
+ .../aarch64/sve/acle/general-c/unary_uint_1.c |  2 +-
+ .../sve/acle/general-c/unary_widen_1.c        |  4 +-
+ 81 files changed, 195 insertions(+), 169 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
+index cc676bfe1..4e94e3633 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
+@@ -1228,6 +1228,32 @@ function_resolver::scalar_argument_p (unsigned int i)
+ 	  || SCALAR_FLOAT_TYPE_P (type));
+ }
+ 
++/* Report that argument ARGNO was expected to have NUM_VECTORS vectors.
++   TYPE is the type that ARGNO actually has.  */
++void
++function_resolver::report_incorrect_num_vectors (unsigned int argno,
++						 sve_type type,
++						 unsigned int num_vectors)
++{
++  if (num_vectors == 1)
++    error_at (location, "passing %qT to argument %d of %qE, which"
++	      " expects a single SVE vector rather than a tuple",
++	      get_vector_type (type), argno + 1, fndecl);
++  else if (type.num_vectors == 1
++	   && type.type != TYPE_SUFFIX_b)
++    /* num_vectors is always != 1, so the singular isn't needed.  */
++    error_n (location, num_vectors, "%qT%d%qE%d",
++	     "passing single vector %qT to argument %d"
++	     " of %qE, which expects a tuple of %d vectors",
++	     get_vector_type (type), argno + 1, fndecl, num_vectors);
++  else
++    /* num_vectors is always != 1, so the singular isn't needed.  */
++    error_n (location, num_vectors, "%qT%d%qE%d",
++	     "passing %qT to argument %d of %qE, which"
++	     " expects a tuple of %d vectors", get_vector_type (type),
++	     argno + 1, fndecl, num_vectors);
++}
++
+ /* Report that the function has no form that takes type TYPE.
+    Return error_mark_node.  */
+ tree
+@@ -1372,6 +1398,30 @@ find_sve_type (const_tree type)
+   return {};
+ }
+ 
++/* Require argument ARGNO to be an SVE type (i.e. something that can be
++   represented by sve_type).  Return the (valid) type if it is, otherwise
++   report an error and return an invalid type.  */
++sve_type
++function_resolver::infer_sve_type (unsigned int argno)
++{
++  tree actual = get_argument_type (argno);
++  if (actual == error_mark_node)
++    return {};
++
++  if (sve_type type = find_sve_type (actual))
++    return type;
++
++  if (scalar_argument_p (argno))
++    error_at (location, "passing %qT to argument %d of %qE, which"
++	      " expects an SVE type rather than a scalar type",
++	      actual, argno + 1, fndecl);
++  else
++    error_at (location, "passing %qT to argument %d of %qE, which"
++	      " expects an SVE type",
++	      actual, argno + 1, fndecl);
++  return {};
++}
++
+ /* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS
+    vectors; NUM_VECTORS is 1 for the former.  Return the associated type
+    suffix on success, using TYPE_SUFFIX_b for predicates.  Report an error
+@@ -1380,41 +1430,14 @@ type_suffix_index
+ function_resolver::infer_vector_or_tuple_type (unsigned int argno,
+ 					       unsigned int num_vectors)
+ {
+-  tree actual = get_argument_type (argno);
+-  if (actual == error_mark_node)
++  auto type = infer_sve_type (argno);
++  if (!type)
+     return NUM_TYPE_SUFFIXES;
+ 
+-  if (auto sve_type = find_sve_type (actual))
+-    {
+-      if (sve_type.num_vectors == num_vectors)
+-	return sve_type.type;

_service:tar_scm:0166-LoongArch-Add-constraints-for-bit-string-operation-d.patch Added

@@ -0,0 +1,120 @@
+From 3bb46830b0f92f54d1ef529796348c0a86504065 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Fri, 26 Apr 2024 15:59:11 +0800
+Subject: PATCH 166/188 LoongArch: Add constraints for bit string operation
+ define_insn_and_split's PR114861
+
+Without the constrants, the compiler attempts to use a stack slot as the
+target, causing an ICE building the kernel with -Os:
+
+    drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c:3144:1:
+    error: could not split insn
+    (insn:TI 1764 67 1745
+      (set (mem/c:DI (reg/f:DI 3 $r3) 707 %sfp+-80 S8 A64)
+           (and:DI (reg/v:DI 28 $r28 orig:422 raster_config  422)
+                   (const_int -50331649 0xfffffffffcffffff)))
+      "drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c":1386:21 111
+      {*bstrins_di_for_mask}
+      (nil))
+
+Add these constrants to fix the issue.
+
+gcc/ChangeLog:
+
+	PR target/114861
+	* config/loongarch/loongarch.md (bstrins_<mode>_for_mask): Add
+	constraints for operands.
+	(bstrins_<mode>_for_ior_mask): Likewise.
+
+gcc/testsuite/ChangeLog:
+
+	PR target/114861
+	* gcc.target/loongarch/pr114861.c: New test.
+---
+ gcc/config/loongarch/loongarch.md             | 16 ++++----
+ gcc/testsuite/gcc.target/loongarch/pr114861.c | 39 +++++++++++++++++++
+ 2 files changed, 47 insertions(+), 8 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/pr114861.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 95beb88fe..20494ce8a 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -1543,9 +1543,9 @@
+    (set_attr "mode" "<MODE>"))
+ 
+ (define_insn_and_split "*bstrins_<mode>_for_mask"
+-  (set (match_operand:GPR 0 "register_operand")
+-	(and:GPR (match_operand:GPR 1 "register_operand")
+-		 (match_operand:GPR 2 "ins_zero_bitmask_operand")))
++  (set (match_operand:GPR 0 "register_operand" "=r")
++	(and:GPR (match_operand:GPR 1 "register_operand" "r")
++		 (match_operand:GPR 2 "ins_zero_bitmask_operand" "i")))
+   ""
+   "#"
+   ""
+@@ -1563,11 +1563,11 @@
+   })
+ 
+ (define_insn_and_split "*bstrins_<mode>_for_ior_mask"
+-  (set (match_operand:GPR 0 "register_operand")
+-	(ior:GPR (and:GPR (match_operand:GPR 1 "register_operand")
+-                          (match_operand:GPR 2 "const_int_operand"))
+-		 (and:GPR (match_operand:GPR 3 "register_operand")
+-			  (match_operand:GPR 4 "const_int_operand"))))
++  (set (match_operand:GPR 0 "register_operand" "=r")
++	(ior:GPR (and:GPR (match_operand:GPR 1 "register_operand" "r")
++			  (match_operand:GPR 2 "const_int_operand" "i"))
++		 (and:GPR (match_operand:GPR 3 "register_operand" "r")
++			  (match_operand:GPR 4 "const_int_operand" "i"))))
+   "loongarch_pre_reload_split ()
+    && loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)"
+   "#"
+diff --git a/gcc/testsuite/gcc.target/loongarch/pr114861.c b/gcc/testsuite/gcc.target/loongarch/pr114861.c
+new file mode 100644
+index 000000000..e6507c406
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/pr114861.c
+@@ -0,0 +1,39 @@
++/* PR114861: ICE building the kernel with -Os
++   Reduced from linux/fs/ntfs3/attrib.c at revision c942a0cd3603.  */
++/* { dg-do compile } */
++/* { dg-options "-Os -march=loongarch64 -msoft-float -mabi=lp64s" } */
++
++long evcn, attr_collapse_range_vbo, attr_collapse_range_bytes;
++unsigned short flags;
++int attr_collapse_range_ni_0_0;
++int *attr_collapse_range_mi;
++unsigned attr_collapse_range_svcn, attr_collapse_range_vcn1;
++void ni_insert_nonresident (unsigned, unsigned short, int **);
++int mi_pack_runs (int);
++int
++attr_collapse_range (void)
++{
++  _Bool __trans_tmp_1;
++  int run = attr_collapse_range_ni_0_0;
++  unsigned evcn1, vcn, end;
++  short a_flags = flags;
++  __trans_tmp_1 = flags & (32768 | 1);
++  if (__trans_tmp_1)
++    return 2;
++  vcn = attr_collapse_range_vbo;
++  end = attr_collapse_range_bytes;
++  evcn1 = evcn;
++  for (;;)
++    if (attr_collapse_range_svcn >= end)
++      {
++        unsigned eat, next_svcn = mi_pack_runs (42);
++        attr_collapse_range_vcn1 = (vcn ? vcn : attr_collapse_range_svcn);
++        eat = (0 < end) - attr_collapse_range_vcn1;
++        mi_pack_runs (run - eat);
++        if (next_svcn + eat)
++          ni_insert_nonresident (evcn1 - eat - next_svcn, a_flags,
++                                 &attr_collapse_range_mi);
++      }
++    else
++      return 42;
++}
+-- 
+2.43.0
+

_service:tar_scm:0167-Backport-SME-aarch64-Replace-vague-previous-argument.patch Added

@@ -0,0 +1,698 @@
+From 6a7cb5074824416ae562de0589550a930e9dbcaf Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:21 +0000
+Subject: PATCH 068/157 BackportSME aarch64: Replace vague "previous
+ arguments" message
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1b52d4b66e8b91ec1e3de9c0b79aaf258824b875
+
+If an SVE ACLE intrinsic requires two arguments to have the
+same type, the C resolver would report mismatches as "argument N
+has type T2, but previous arguments had type T1".  This patch makes
+the message say which argument had type T1.
+
+This is needed to give decent error messages for some SME cases.
+
+gcc/
+	* config/aarch64/aarch64-sve-builtins.h
+	(function_resolver::require_matching_vector_type): Add a parameter
+	that specifies the number of the earlier argument that is being
+	matched against.
+	* config/aarch64/aarch64-sve-builtins.cc
+	(function_resolver::require_matching_vector_type): Likewise.
+	(require_derived_vector_type): Update calls accordingly.
+	(function_resolver::resolve_unary): Likewise.
+	(function_resolver::resolve_uniform): Likewise.
+	(function_resolver::resolve_uniform_opt_n): Likewise.
+	* config/aarch64/aarch64-sve-builtins-shapes.cc
+	(binary_long_lane_def::resolve): Likewise.
+	(clast_def::resolve, ternary_uint_def::resolve): Likewise.
+
+gcc/testsuite/
+	* gcc.target/aarch64/sve/acle/general-c/*: Replace "but previous
+	arguments had" with "but argument N had".
+---
+ .../aarch64/aarch64-sve-builtins-shapes.cc    |  6 ++--
+ gcc/config/aarch64/aarch64-sve-builtins.cc    | 17 +++++------
+ gcc/config/aarch64/aarch64-sve-builtins.h     |  3 +-
+ .../aarch64/sve/acle/general-c/binary_1.c     |  6 ++--
+ .../sve/acle/general-c/binary_lane_1.c        |  2 +-
+ .../sve/acle/general-c/binary_long_lane_1.c   |  2 +-
+ .../sve/acle/general-c/binary_long_opt_n_1.c  |  8 +++---
+ .../acle/general-c/binary_narrowb_opt_n_1.c   |  8 +++---
+ .../acle/general-c/binary_narrowt_opt_n_1.c   |  8 +++---
+ .../sve/acle/general-c/binary_opt_n_2.c       | 14 +++++-----
+ .../sve/acle/general-c/binary_opt_n_3.c       | 16 +++++------
+ .../sve/acle/general-c/binary_rotate_1.c      |  2 +-
+ .../sve/acle/general-c/binary_to_uint_1.c     |  4 +--
+ .../aarch64/sve/acle/general-c/clast_1.c      |  2 +-
+ .../aarch64/sve/acle/general-c/compare_1.c    | 14 +++++-----
+ .../sve/acle/general-c/compare_opt_n_1.c      | 14 +++++-----
+ .../aarch64/sve/acle/general-c/create_1.c     |  6 ++--
+ .../aarch64/sve/acle/general-c/create_3.c     |  6 ++--
+ .../aarch64/sve/acle/general-c/create_5.c     |  6 ++--
+ .../aarch64/sve/acle/general-c/mmla_1.c       | 14 +++++-----
+ .../sve/acle/general-c/ternary_lane_1.c       |  4 +--
+ .../acle/general-c/ternary_lane_rotate_1.c    |  4 +--
+ .../sve/acle/general-c/ternary_opt_n_1.c      | 28 +++++++++----------
+ .../sve/acle/general-c/ternary_rotate_1.c     |  4 +--
+ .../general-c/ternary_shift_right_imm_1.c     |  6 ++--
+ .../sve/acle/general-c/ternary_uint_1.c       |  6 ++--
+ .../aarch64/sve/acle/general-c/tmad_1.c       |  2 +-
+ .../aarch64/sve/acle/general-c/unary_1.c      |  8 +++---
+ .../aarch64/sve/acle/general-c/undeclared_2.c |  2 +-
+ 29 files changed, 112 insertions(+), 110 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+index 3ecef026c..40aa418e0 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+@@ -1153,7 +1153,7 @@ struct binary_long_lane_def : public overloaded_base<0>
+     type_suffix_index type, result_type;
+     if (!r.check_gp_argument (3, i, nargs)
+ 	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+-	|| !r.require_matching_vector_type (i + 1, type)
++	|| !r.require_matching_vector_type (i + 1, i, type)
+ 	|| !r.require_integer_immediate (i + 2)
+ 	|| (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES)
+       return error_mark_node;
+@@ -1608,7 +1608,7 @@ struct clast_def : public overloaded_base<0>
+       {
+ 	type_suffix_index type;
+ 	if ((type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+-	    || !r.require_matching_vector_type (i + 1, type))
++	    || !r.require_matching_vector_type (i + 1, i, type))
+ 	  return error_mark_node;
+ 	return r.resolve_to (MODE_none, type);
+       }
+@@ -3108,7 +3108,7 @@ struct ternary_uint_def : public overloaded_base<0>
+     type_suffix_index type;
+     if (!r.check_gp_argument (3, i, nargs)
+ 	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+-	|| !r.require_matching_vector_type (i + 1, type)
++	|| !r.require_matching_vector_type (i + 1, i, type)
+ 	|| !r.require_derived_vector_type (i + 2, i, type, TYPE_unsigned))
+       return error_mark_node;
+ 
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
+index 4e94e3633..1545fd78d 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
+@@ -1561,11 +1561,12 @@ function_resolver::require_vector_type (unsigned int argno,
+   return true;
+ }
+ 
+-/* Like require_vector_type, but TYPE is inferred from previous arguments
++/* Like require_vector_type, but TYPE is inferred from argument FIRST_ARGNO
+    rather than being a fixed part of the function signature.  This changes
+    the nature of the error messages.  */
+ bool
+ function_resolver::require_matching_vector_type (unsigned int argno,
++						 unsigned int first_argno,
+ 						 type_suffix_index type)
+ {
+   type_suffix_index new_type = infer_vector_type (argno);
+@@ -1575,9 +1576,9 @@ function_resolver::require_matching_vector_type (unsigned int argno,
+   if (type != new_type)
+     {
+       error_at (location, "passing %qT to argument %d of %qE, but"
+-		" previous arguments had type %qT",
++		" argument %d had type %qT",
+ 		get_vector_type (new_type), argno + 1, fndecl,
+-		get_vector_type (type));
++		first_argno + 1, get_vector_type (type));
+       return false;
+     }
+   return true;
+@@ -1626,7 +1627,7 @@ require_derived_vector_type (unsigned int argno,
+     {
+       /* There's no need to resolve this case out of order.  */
+       gcc_assert (argno > first_argno);
+-      return require_matching_vector_type (argno, first_type);
++      return require_matching_vector_type (argno, first_argno, first_type);
+     }
+ 
+   /* Use FIRST_TYPE to get the expected type class and element size.  */
+@@ -2314,7 +2315,7 @@ function_resolver::resolve_unary (type_class_index merge_tclass,
+ 	     so we can use normal left-to-right resolution.  */
+ 	  if ((type = infer_vector_type (0)) == NUM_TYPE_SUFFIXES
+ 	      || !require_vector_type (1, VECTOR_TYPE_svbool_t)
+-	      || !require_matching_vector_type (2, type))
++	      || !require_matching_vector_type (2, 0, type))
+ 	    return error_mark_node;
+ 	}
+       else
+@@ -2359,9 +2360,9 @@ function_resolver::resolve_uniform (unsigned int nops, unsigned int nimm)
+       || (type = infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
+     return error_mark_node;
+ 
+-  i += 1;
++  unsigned int first_arg = i++;
+   for (; i < nargs - nimm; ++i)
+-    if (!require_matching_vector_type (i, type))
++    if (!require_matching_vector_type (i, first_arg, type))
+       return error_mark_node;
+ 
+   for (; i < nargs; ++i)
+@@ -2390,7 +2391,7 @@ function_resolver::resolve_uniform_opt_n (unsigned int nops)
+ 
+   unsigned int first_arg = i++;
+   for (; i < nargs - 1; ++i)
+-    if (!require_matching_vector_type (i, type))
++    if (!require_matching_vector_type (i, first_arg, type))
+       return error_mark_node;
+ 
+   return finish_opt_n_resolution (i, first_arg, type);
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
+index 5a4f35123..f7d6cc084 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.h
++++ b/gcc/config/aarch64/aarch64-sve-builtins.h
+@@ -476,7 +476,8 @@ public:
+   bool require_vector_or_scalar_type (unsigned int);
+ 
+   bool require_vector_type (unsigned int, vector_type_index);
+-  bool require_matching_vector_type (unsigned int, type_suffix_index);
++  bool require_matching_vector_type (unsigned int, unsigned int,
++				     type_suffix_index);
+   bool require_derived_vector_type (unsigned int, unsigned int,
+ 				    type_suffix_index,
+ 				    type_class_index = SAME_TYPE_CLASS,
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c
+index 4343146de..2e919d287 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c
+@@ -7,8 +7,8 @@ f1 (svbool_t pg, svuint8_t u8, svint16_t s16)
+ {
+   svzip1 (pg); /* { dg-error {too few arguments to function 'svzip1'} } */
+   svzip1 (pg, u8, u8); /* { dg-error {too many arguments to function 'svzip1'} } */
+-  svzip1 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svzip1', but previous arguments had type 'svbool_t'} } */
+-  svzip1 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */
+-  svzip1 (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */
++  svzip1 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svzip1', but argument 1 had type 'svbool_t'} } */
++  svzip1 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svzip1', but argument 1 had type 'svuint8_t'} } */
++  svzip1 (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svzip1', but argument 1 had type 'svuint8_t'} } */
+   svzip1 (u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svzip1', which expects an SVE type rather than a scalar} } */
+ }
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c
+index 10b6b7e81..81533b25d 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c

_service:tar_scm:0167-LoongArch-Guard-REGNO-with-REG_P-in-loongarch_expand.patch Added

@@ -0,0 +1,67 @@
+From be1397b598a436d562e6a35a13ed2ae695531255 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Wed, 22 May 2024 09:29:43 +0800
+Subject: PATCH 167/188 LoongArch: Guard REGNO with REG_P in
+ loongarch_expand_conditional_move PR115169
+
+gcc/ChangeLog:
+
+	PR target/115169
+	* config/loongarch/loongarch.cc
+	(loongarch_expand_conditional_move): Guard REGNO with REG_P.
+---
+ gcc/config/loongarch/loongarch.cc | 17 ++++++++++++-----
+ 1 file changed, 12 insertions(+), 5 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index c86a0856b..0c2c38f6f 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -5341,6 +5341,7 @@ loongarch_expand_conditional_move (rtx *operands)
+   rtx op1_extend = op1;
+ 
+   /* Record whether operands2 and operands3 modes are promoted to word_mode.  */
++  bool promote_op2 = {false, false};
+   bool promote_p = false;
+   machine_mode mode = GET_MODE (operands0);
+ 
+@@ -5348,9 +5349,15 @@ loongarch_expand_conditional_move (rtx *operands)
+     loongarch_emit_float_compare (&code, &op0, &op1);
+   else
+     {
+-      if ((REGNO (op0) == REGNO (operands2)
+-	   || (REGNO (op1) == REGNO (operands3) && (op1 != const0_rtx)))
+-	  && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
++      if (GET_MODE_SIZE (GET_MODE (op0)) < word_mode)
++	{
++	  promote_op0 = (REG_P (op0) && REG_P (operands2) &&
++			   REGNO (op0) == REGNO (operands2));
++	  promote_op1 = (REG_P (op1) && REG_P (operands3) &&
++			   REGNO (op1) == REGNO (operands3));
++	}
++
++      if (promote_op0 || promote_op1)
+ 	{
+ 	  mode = word_mode;
+ 	  promote_p = true;
+@@ -5392,7 +5399,7 @@ loongarch_expand_conditional_move (rtx *operands)
+ 
+       if (promote_p)
+ 	{
+-	  if (REGNO (XEXP (operands1, 0)) == REGNO (operands2))
++	  if (promote_op0)
+ 	    op2 = op0_extend;
+ 	  else
+ 	    {
+@@ -5400,7 +5407,7 @@ loongarch_expand_conditional_move (rtx *operands)
+ 	      op2 = force_reg (mode, op2);
+ 	    }
+ 
+-	  if (REGNO (XEXP (operands1, 1)) == REGNO (operands3))
++	  if (promote_op1)
+ 	    op3 = op1_extend;
+ 	  else
+ 	    {
+-- 
+2.43.0
+

_service:tar_scm:0168-Backport-SME-aarch64-Make-more-use-of-sve_type-in-AC.patch Added

@@ -0,0 +1,368 @@
+From 05dee9ad331c27345b014fe9aec0067a6f3b07d9 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:21 +0000
+Subject: PATCH 069/157 BackportSME aarch64: Make more use of sve_type in
+ ACLE code
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1f7f076ad6293cad19d35efdf726eb48cf78e3dd
+
+This patch makes some functions operate on sve_type, rather than just
+on type suffixes.  It also allows an overload to be resolved based on
+a mode and sve_type.  In this case the sve_type is used to derive the
+group size as well as a type suffix.
+
+This is needed for the SME2 intrinsics and the new tuple forms of
+svreinterpret.  No functional change intended on its own.
+
+gcc/
+	* config/aarch64/aarch64-sve-builtins.h
+	(function_resolver::lookup_form): Add an overload that takes
+	an sve_type rather than type and group suffixes.
+	(function_resolver::resolve_to): Likewise.
+	(function_resolver::infer_vector_or_tuple_type): Return an sve_type.
+	(function_resolver::infer_tuple_type): Likewise.
+	(function_resolver::require_matching_vector_type): Take an sve_type
+	rather than a type_suffix_index.
+	(function_resolver::require_derived_vector_type): Likewise.
+	* config/aarch64/aarch64-sve-builtins.cc (num_vectors_to_group):
+	New function.
+	(function_resolver::lookup_form): Add an overload that takes
+	an sve_type rather than type and group suffixes.
+	(function_resolver::resolve_to): Likewise.
+	(function_resolver::infer_vector_or_tuple_type): Return an sve_type.
+	(function_resolver::infer_tuple_type): Likewise.
+	(function_resolver::infer_vector_type): Update accordingly.
+	(function_resolver::require_matching_vector_type): Take an sve_type
+	rather than a type_suffix_index.
+	(function_resolver::require_derived_vector_type): Likewise.
+	* config/aarch64/aarch64-sve-builtins-shapes.cc (get_def::resolve)
+	(set_def::resolve, store_def::resolve, tbl_tuple_def::resolve): Update
+	calls accordingly.
+---
+ .../aarch64/aarch64-sve-builtins-shapes.cc    |  16 +--
+ gcc/config/aarch64/aarch64-sve-builtins.cc    | 111 +++++++++++++-----
+ gcc/config/aarch64/aarch64-sve-builtins.h     |  12 +-
+ 3 files changed, 95 insertions(+), 44 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+index 40aa418e0..f187b4cb2 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+@@ -1904,9 +1904,9 @@ struct get_def : public overloaded_base<0>
+   resolve (function_resolver &r) const OVERRIDE
+   {
+     unsigned int i, nargs;
+-    type_suffix_index type;
++    sve_type type;
+     if (!r.check_gp_argument (2, i, nargs)
+-	|| (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES
++	|| !(type = r.infer_tuple_type (i))
+ 	|| !r.require_integer_immediate (i + 1))
+       return error_mark_node;
+ 
+@@ -2417,9 +2417,9 @@ struct set_def : public overloaded_base<0>
+   resolve (function_resolver &r) const OVERRIDE
+   {
+     unsigned int i, nargs;
+-    type_suffix_index type;
++    sve_type type;
+     if (!r.check_gp_argument (3, i, nargs)
+-	|| (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES
++	|| !(type = r.infer_tuple_type (i))
+ 	|| !r.require_integer_immediate (i + 1)
+ 	|| !r.require_derived_vector_type (i + 2, i, type))
+       return error_mark_node;
+@@ -2592,11 +2592,11 @@ struct store_def : public overloaded_base<0>
+     gcc_assert (r.mode_suffix_id == MODE_none || vnum_p);
+ 
+     unsigned int i, nargs;
+-    type_suffix_index type;
++    sve_type type;
+     if (!r.check_gp_argument (vnum_p ? 3 : 2, i, nargs)
+ 	|| !r.require_pointer_type (i)
+ 	|| (vnum_p && !r.require_scalar_type (i + 1, "int64_t"))
+-	|| ((type = r.infer_tuple_type (nargs - 1)) == NUM_TYPE_SUFFIXES))
++	|| !(type = r.infer_tuple_type (nargs - 1)))
+       return error_mark_node;
+ 
+     return r.resolve_to (r.mode_suffix_id, type);
+@@ -2713,9 +2713,9 @@ struct tbl_tuple_def : public overloaded_base<0>
+   resolve (function_resolver &r) const OVERRIDE
+   {
+     unsigned int i, nargs;
+-    type_suffix_index type;
++    sve_type type;
+     if (!r.check_gp_argument (2, i, nargs)
+-	|| (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES
++	|| !(type = r.infer_tuple_type (i))
+ 	|| !r.require_derived_vector_type (i + 1, i, type, TYPE_unsigned))
+       return error_mark_node;
+ 
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
+index 1545fd78d..e98274f8a 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
+@@ -659,6 +659,21 @@ find_type_suffix_for_scalar_type (const_tree type)
+   return NUM_TYPE_SUFFIXES;
+ }
+ 
++/* Return the implicit group suffix for intrinsics that operate on NVECTORS
++   vectors.  */
++static group_suffix_index
++num_vectors_to_group (unsigned int nvectors)
++{
++  switch (nvectors)
++    {
++    case 1: return GROUP_none;
++    case 2: return GROUP_x2;
++    case 3: return GROUP_x3;
++    case 4: return GROUP_x4;
++    }
++  gcc_unreachable ();
++}
++
+ /* Return the vector type associated with TYPE.  */
+ static tree
+ get_vector_type (sve_type type)
+@@ -1282,6 +1297,27 @@ function_resolver::lookup_form (mode_suffix_index mode,
+   return rfn ? rfn->decl : NULL_TREE;
+ }
+ 
++/* Silently check whether there is an instance of the function that has the
++   mode suffix given by MODE and the type and group suffixes implied by TYPE.
++   If the overloaded function has an explicit first type suffix (like
++   conversions do), TYPE describes the implicit second type suffix.
++   Otherwise, TYPE describes the only type suffix.
++
++   Return the decl of the function if it exists, otherwise return null.  */
++tree
++function_resolver::lookup_form (mode_suffix_index mode, sve_type type)
++{
++  type_suffix_index type0 = type_suffix_ids0;
++  type_suffix_index type1 = type_suffix_ids1;
++  (type0 == NUM_TYPE_SUFFIXES ? type0 : type1) = type.type;
++
++  group_suffix_index group = group_suffix_id;
++  if (group == GROUP_none && type.num_vectors != vectors_per_tuple ())
++    group = num_vectors_to_group (type.num_vectors);
++
++  return lookup_form (mode, type0, type1, group);
++}
++
+ /* Resolve the function to one with the mode suffix given by MODE, the
+    type suffixes given by TYPE0 and TYPE1, and group suffix given by
+    GROUP.  Return its function decl on success, otherwise report an
+@@ -1305,6 +1341,19 @@ function_resolver::resolve_to (mode_suffix_index mode,
+   return res;
+ }
+ 
++/* Resolve the function to one that has the suffixes associated with MODE
++   and TYPE; see lookup_form for how TYPE is interpreted.  Return the
++   function decl on success, otherwise report an error and return
++   error_mark_node.  */
++tree
++function_resolver::resolve_to (mode_suffix_index mode, sve_type type)
++{
++  if (tree res = lookup_form (mode, type))
++    return res;
++
++  return report_no_such_form (type);
++}
++
+ /* Require argument ARGNO to be a 32-bit or 64-bit scalar integer type.
+    Return the associated type suffix on success, otherwise report an
+    error and return NUM_TYPE_SUFFIXES.  */
+@@ -1424,21 +1473,20 @@ function_resolver::infer_sve_type (unsigned int argno)
+ 
+ /* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS
+    vectors; NUM_VECTORS is 1 for the former.  Return the associated type
+-   suffix on success, using TYPE_SUFFIX_b for predicates.  Report an error
+-   and return NUM_TYPE_SUFFIXES on failure.  */
+-type_suffix_index
++   on success.  Report an error on failure.  */
++sve_type
+ function_resolver::infer_vector_or_tuple_type (unsigned int argno,
+ 					       unsigned int num_vectors)
+ {
+   auto type = infer_sve_type (argno);
+   if (!type)
+-    return NUM_TYPE_SUFFIXES;
++    return type;
+ 
+   if (type.num_vectors == num_vectors)
+-    return type.type;
++    return type;
+ 
+   report_incorrect_num_vectors (argno, type, num_vectors);
+-  return NUM_TYPE_SUFFIXES;
++  return {};
+ }

_service:tar_scm:0168-LoongArch-Fix-mode-size-comparision-in-loongarch_exp.patch Added

@@ -0,0 +1,36 @@
+From 7675f45536691eeca7d8163020c9bfb127d5ee4f Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Wed, 12 Jun 2024 11:01:53 +0800
+Subject: PATCH 168/188 LoongArch: Fix mode size comparision in
+ loongarch_expand_conditional_move
+
+We were comparing a mode size with word_mode, but word_mode is an enum
+value thus this does not really make any sense.  (Un)luckily E_DImode
+happens to be 8 so this seemed to work, but let's make it correct so it
+won't blow up when we add LA32 support or add another machine mode...
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc
+	(loongarch_expand_conditional_move): Compare mode size with
+	UNITS_PER_WORD instead of word_mode.
+---
+ gcc/config/loongarch/loongarch.cc | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 0c2c38f6f..77f83ab9e 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -5349,7 +5349,7 @@ loongarch_expand_conditional_move (rtx *operands)
+     loongarch_emit_float_compare (&code, &op0, &op1);
+   else
+     {
+-      if (GET_MODE_SIZE (GET_MODE (op0)) < word_mode)
++      if (GET_MODE_SIZE (GET_MODE (op0)) < UNITS_PER_WORD)
+ 	{
+ 	  promote_op0 = (REG_P (op0) && REG_P (operands2) &&
+ 			   REGNO (op0) == REGNO (operands2));
+-- 
+2.43.0
+

_service:tar_scm:0169-Backport-SME-aarch64-Tweak-error-message-for-tuple-v.patch Added

@@ -0,0 +1,106 @@
+From 1abb02c636eef4f9a5f55f243bc0c4d38ee1f849 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:22 +0000
+Subject: PATCH 070/157 BackportSME aarch64: Tweak error message for
+ (tuple,vector) pairs
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5ce2e22b7e02c7fbd1ab8145b632559b67ae9958
+
+SME2 adds more intrinsics that take a tuple of vectors followed
+by a single vector, with the two arguments expected to have the
+same element type.  Unlike with the existing svset* intrinsics,
+the size of the tuple is not fixed by the overloaded function name.
+
+This patch adds an error message that (hopefully) copes better
+with that combination.
+
+gcc/
+	* config/aarch64/aarch64-sve-builtins.cc
+	(function_resolver::require_derived_vector_type): Add a specific
+	error message for the case in which the caller wants a single
+	vector whose element type matches a previous tuyple argument.
+
+gcc/testsuite/
+	* gcc.target/aarch64/sve/acle/general-c/set_1.c: Tweak expected
+	error message.
+	* gcc.target/aarch64/sve/acle/general-c/set_3.c: Likewise.
+	* gcc.target/aarch64/sve/acle/general-c/set_5.c: Likewise.
+---
+ gcc/config/aarch64/aarch64-sve-builtins.cc          | 13 +++++++++++++
+ .../gcc.target/aarch64/sve/acle/general-c/set_1.c   |  4 ++--
+ .../gcc.target/aarch64/sve/acle/general-c/set_3.c   |  4 ++--
+ .../gcc.target/aarch64/sve/acle/general-c/set_5.c   |  4 ++--
+ 4 files changed, 19 insertions(+), 6 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
+index e98274f8a..9224916a7 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
+@@ -1707,6 +1707,19 @@ require_derived_vector_type (unsigned int argno,
+   if (!actual_type)
+     return false;
+ 
++  if (orig_expected_tclass == SAME_TYPE_CLASS
++      && orig_expected_bits == SAME_SIZE)
++    {
++      if (actual_type.type == first_type.type)
++	return true;
++
++      error_at (location, "passing %qT to argument %d of %qE, but"
++		" argument %d was a tuple of %qT",
++		get_vector_type (actual_type), argno + 1, fndecl,
++		first_argno + 1, get_vector_type (first_type.type));
++      return false;
++    }
++
+   /* Exit now if we got the right type.  */
+   auto &actual_type_suffix = type_suffixesactual_type.type;
+   bool tclass_ok_p = (actual_type_suffix.tclass == expected_tclass);
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c
+index f07c76102..f2a6da536 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c
+@@ -16,8 +16,8 @@ f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svuint8x3_t u8x3, int x)
+   u8x2 = svset2 (u8x3, 0, u8); /* { dg-error {passing 'svuint8x3_t' to argument 1 of 'svset2', which expects a tuple of 2 vectors} } */
+   u8x2 = svset2 (pg, 0, u8); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset2', which expects a tuple of 2 vectors} } */
+   u8x2 = svset2 (u8x2, 0, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 3 of 'svset2', which expects a single SVE vector rather than a tuple} } */
+-  u8x2 = svset2 (u8x2, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svuint8_t' to argument 3 of 'svset2', after passing 'svuint8x2_t' to argument 1} } */
+-  u8x2 = svset2 (u8x2, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svuint8_t' to argument 3 of 'svset2', after passing 'svuint8x2_t' to argument 1} } */
++  u8x2 = svset2 (u8x2, 0, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svset2', but argument 1 was a tuple of 'svuint8_t'} } */
++  u8x2 = svset2 (u8x2, 0, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svset2', but argument 1 was a tuple of 'svuint8_t'} } */
+   u8x2 = svset2 (u8x2, x, u8); /* { dg-error {argument 2 of 'svset2' must be an integer constant expression} } */
+   u8x2 = svset2 (u8x2, 0, u8);
+   f64 = svset2 (u8x2, 0, u8); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svuint8x2_t'} } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c
+index 543a1bea8..92b955f83 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c
+@@ -17,8 +17,8 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svfloat16x4_t f16x4,
+   f16x3 = svset3 (f16x4, 0, f16); /* { dg-error {passing 'svfloat16x4_t' to argument 1 of 'svset3', which expects a tuple of 3 vectors} } */
+   f16x3 = svset3 (pg, 0, f16); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset3', which expects a tuple of 3 vectors} } */
+   f16x3 = svset3 (f16x3, 0, f16x3); /* { dg-error {passing 'svfloat16x3_t' to argument 3 of 'svset3', which expects a single SVE vector rather than a tuple} } */
+-  f16x3 = svset3 (f16x3, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svfloat16_t' to argument 3 of 'svset3', after passing 'svfloat16x3_t' to argument 1} } */
+-  f16x3 = svset3 (f16x3, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svfloat16_t' to argument 3 of 'svset3', after passing 'svfloat16x3_t' to argument 1} } */
++  f16x3 = svset3 (f16x3, 0, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svset3', but argument 1 was a tuple of 'svfloat16_t'} } */
++  f16x3 = svset3 (f16x3, 0, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svset3', but argument 1 was a tuple of 'svfloat16_t'} } */
+   f16x3 = svset3 (f16x3, x, f16); /* { dg-error {argument 2 of 'svset3' must be an integer constant expression} } */
+   f16x3 = svset3 (f16x3, 0, f16);
+   f64 = svset3 (f16x3, 0, f16); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svfloat16x3_t'} } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c
+index be911a731..f0696fb07 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c
+@@ -16,8 +16,8 @@ f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svint32x2_t s32x2, int x)
+   s32x4 = svset4 (s32x2, 0, s32); /* { dg-error {passing 'svint32x2_t' to argument 1 of 'svset4', which expects a tuple of 4 vectors} } */
+   s32x4 = svset4 (pg, 0, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset4', which expects a tuple of 4 vectors} } */
+   s32x4 = svset4 (s32x4, 0, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 3 of 'svset4', which expects a single SVE vector rather than a tuple} } */
+-  s32x4 = svset4 (s32x4, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svint32_t' to argument 3 of 'svset4', after passing 'svint32x4_t' to argument 1} } */
+-  s32x4 = svset4 (s32x4, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svint32_t' to argument 3 of 'svset4', after passing 'svint32x4_t' to argument 1} } */
++  s32x4 = svset4 (s32x4, 0, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svset4', but argument 1 was a tuple of 'svint32_t'} } */
++  s32x4 = svset4 (s32x4, 0, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svset4', but argument 1 was a tuple of 'svint32_t'} } */
+   s32x4 = svset4 (s32x4, x, s32); /* { dg-error {argument 2 of 'svset4' must be an integer constant expression} } */
+   s32x4 = svset4 (s32x4, 0, s32);
+   f64 = svset4 (s32x4, 0, s32); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svint32x4_t'} } */
+-- 
+2.33.0
+

_service:tar_scm:0169-LoongArch-Use-bstrins-for-value-1u-const.patch Added

@@ -0,0 +1,135 @@
+From 7e34bede110bfa7b2f91dc657c41ed0e7b4b11f7 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 9 Jun 2024 14:43:48 +0800
+Subject: PATCH 169/188 LoongArch: Use bstrins for "value & (-1u << const)"
+
+A move/bstrins pair is as fast as a (addi.w|lu12i.w|lu32i.d|lu52i.d)/and
+pair, and twice fast as a srli/slli pair.  When the src reg and the dst
+reg happens to be the same, the move instruction can be optimized away.
+
+gcc/ChangeLog:
+
+	* config/loongarch/predicates.md (high_bitmask_operand): New
+	predicate.
+	* config/loongarch/constraints.md (Yy): New constriant.
+	* config/loongarch/loongarch.md (and<mode>3_align): New
+	define_insn_and_split.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/bstrins-1.c: New test.
+	* gcc.target/loongarch/bstrins-2.c: New test.
+---
+ gcc/config/loongarch/constraints.md            |  5 +++++
+ gcc/config/loongarch/loongarch.md              | 17 +++++++++++++++++
+ gcc/config/loongarch/predicates.md             |  4 ++++
+ gcc/testsuite/gcc.target/loongarch/bstrins-1.c |  9 +++++++++
+ gcc/testsuite/gcc.target/loongarch/bstrins-2.c | 14 ++++++++++++++
+ 5 files changed, 49 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-1.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-2.c
+
+diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
+index cec5d8857..f3f5776da 100644
+--- a/gcc/config/loongarch/constraints.md
++++ b/gcc/config/loongarch/constraints.md
+@@ -94,6 +94,7 @@
+ ;;       "A constant @code{move_operand} that can be safely loaded using
+ ;;	  @code{la}."
+ ;;    "Yx"
++;;    "Yy"
+ ;; "Z" -
+ ;;    "ZC"
+ ;;      "A memory operand whose address is formed by a base register and offset
+@@ -291,6 +292,10 @@
+    "@internal"
+    (match_operand 0 "low_bitmask_operand"))
+ 
++(define_constraint "Yy"
++   "@internal"
++   (match_operand 0 "high_bitmask_operand"))
++
+ (define_constraint "YI"
+   "@internal
+    A replicated vector const in which the replicated value is in the range
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 20494ce8a..55a759850 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -1542,6 +1542,23 @@
+   (set_attr "move_type" "pick_ins")
+    (set_attr "mode" "<MODE>"))
+ 
++(define_insn_and_split "and<mode>3_align"
++  (set (match_operand:GPR 0 "register_operand" "=r")
++	(and:GPR (match_operand:GPR 1 "register_operand" "r")
++		 (match_operand:GPR 2 "high_bitmask_operand" "Yy")))
++  ""
++  "#"
++  ""
++  (set (match_dup 0) (match_dup 1))
++   (set (zero_extract:GPR (match_dup 0) (match_dup 2) (const_int 0))
++	(const_int 0))
++{
++  int len;
++
++  len = low_bitmask_len (<MODE>mode, ~INTVAL (operands2));
++  operands2 = GEN_INT (len);
++})
++
+ (define_insn_and_split "*bstrins_<mode>_for_mask"
+   (set (match_operand:GPR 0 "register_operand" "=r")
+ 	(and:GPR (match_operand:GPR 1 "register_operand" "r")
+diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
+index 1d9a30695..95be8a4fe 100644
+--- a/gcc/config/loongarch/predicates.md
++++ b/gcc/config/loongarch/predicates.md
+@@ -293,6 +293,10 @@
+   (and (match_code "const_int")
+        (match_test "low_bitmask_len (mode, INTVAL (op)) > 12")))
+ 
++(define_predicate "high_bitmask_operand"
++  (and (match_code "const_int")
++       (match_test "low_bitmask_len (mode, ~INTVAL (op)) > 0")))
++
+ (define_predicate "d_operand"
+   (and (match_code "reg")
+        (match_test "GP_REG_P (REGNO (op))")))
+diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-1.c b/gcc/testsuite/gcc.target/loongarch/bstrins-1.c
+new file mode 100644
+index 000000000..7cb3a9523
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-1.c
+@@ -0,0 +1,9 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */
++/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r4,\\\$r0,4,0" } } */
++
++long
++x (long a)
++{
++  return a & -32;
++}
+diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-2.c b/gcc/testsuite/gcc.target/loongarch/bstrins-2.c
+new file mode 100644
+index 000000000..9777f502e
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-2.c
+@@ -0,0 +1,14 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */
++/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r\0-9\+,\\\$r0,4,0" } } */
++
++struct aligned_buffer {
++  _Alignas(32) char x1024;
++};
++
++extern int f(char *);
++int g(void)
++{
++  struct aligned_buffer buf;
++  return f(buf.x);
++}
+-- 
+2.43.0
+

_service:tar_scm:0170-Backport-SME-aarch64-Add-tuple-forms-of-svreinterpre.patch Added

@@ -0,0 +1,1236 @@
+From 95234ef07c47dda7ac6a13f75619580a6683118c Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:22 +0000
+Subject: PATCH 071/157 BackportSME aarch64: Add tuple forms of
+ svreinterpret
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1ce9dc263c2f6d455b2013fc58932beda2a4ae92
+
+SME2 adds a number of intrinsics that operate on tuples of 2 and 4
+vectors.  The ACLE therefore extends the existing svreinterpret
+intrinsics to handle tuples as well.
+
+gcc/
+	* config/aarch64/aarch64-sve-builtins-base.cc
+	(svreinterpret_impl::fold): Punt on tuple forms.
+	(svreinterpret_impl::expand): Use tuple_mode instead of vector_mode.
+	* config/aarch64/aarch64-sve-builtins-base.def (svreinterpret):
+	Extend to x1234 groups.
+	* config/aarch64/aarch64-sve-builtins-functions.h
+	(multi_vector_function::vectors_per_tuple): If the function has
+	a group suffix, get the number of vectors from there.
+	* config/aarch64/aarch64-sve-builtins-shapes.h (reinterpret): Declare.
+	* config/aarch64/aarch64-sve-builtins-shapes.cc (reinterpret_def)
+	(reinterpret): New function shape.
+	* config/aarch64/aarch64-sve-builtins.cc (function_groups): Handle
+	DEF_SVE_FUNCTION_GS.
+	* config/aarch64/aarch64-sve-builtins.def (DEF_SVE_FUNCTION_GS): New
+	macro.
+	(DEF_SVE_FUNCTION): Forward to DEF_SVE_FUNCTION_GS by default.
+	* config/aarch64/aarch64-sve-builtins.h
+	(function_instance::tuple_mode): New member function.
+	(function_base::vectors_per_tuple): Take the function instance
+	as argument and get the number from the group suffix.
+	(function_instance::vectors_per_tuple): Update accordingly.
+	* config/aarch64/iterators.md (SVE_FULLx2, SVE_FULLx3, SVE_FULLx4)
+	(SVE_ALL_STRUCT): New mode iterators.
+	(SVE_STRUCT): Redefine in terms of SVE_FULL*.
+	* config/aarch64/aarch64-sve.md (@aarch64_sve_reinterpret<mode>)
+	(*aarch64_sve_reinterpret<mode>): Extend to SVE structure modes.
+
+gcc/testsuite/
+	* gcc.target/aarch64/sve/acle/asm/test_sve_acle.h (TEST_DUAL_XN):
+	New macro.
+	* gcc.target/aarch64/sve/acle/asm/reinterpret_bf16.c: Add tests for
+	tuple forms.
+	* gcc.target/aarch64/sve/acle/asm/reinterpret_f16.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/reinterpret_f32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/reinterpret_f64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/reinterpret_s16.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/reinterpret_s32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/reinterpret_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/reinterpret_s8.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/reinterpret_u16.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/reinterpret_u32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/reinterpret_u64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/reinterpret_u8.c: Likewise.
+---
+ .../aarch64/aarch64-sve-builtins-base.cc      |  5 +-
+ .../aarch64/aarch64-sve-builtins-base.def     |  2 +-
+ .../aarch64/aarch64-sve-builtins-functions.h  |  7 ++-
+ .../aarch64/aarch64-sve-builtins-shapes.cc    | 28 +++++++++
+ .../aarch64/aarch64-sve-builtins-shapes.h     |  1 +
+ gcc/config/aarch64/aarch64-sve-builtins.cc    |  8 ++-
+ gcc/config/aarch64/aarch64-sve-builtins.def   |  8 ++-
+ gcc/config/aarch64/aarch64-sve-builtins.h     | 20 +++++-
+ gcc/config/aarch64/aarch64-sve.md             |  8 +--
+ gcc/config/aarch64/iterators.md               | 26 +++++---
+ .../aarch64/sve/acle/asm/reinterpret_bf16.c   | 62 +++++++++++++++++++
+ .../aarch64/sve/acle/asm/reinterpret_f16.c    | 62 +++++++++++++++++++
+ .../aarch64/sve/acle/asm/reinterpret_f32.c    | 62 +++++++++++++++++++
+ .../aarch64/sve/acle/asm/reinterpret_f64.c    | 62 +++++++++++++++++++
+ .../aarch64/sve/acle/asm/reinterpret_s16.c    | 62 +++++++++++++++++++
+ .../aarch64/sve/acle/asm/reinterpret_s32.c    | 62 +++++++++++++++++++
+ .../aarch64/sve/acle/asm/reinterpret_s64.c    | 62 +++++++++++++++++++
+ .../aarch64/sve/acle/asm/reinterpret_s8.c     | 62 +++++++++++++++++++
+ .../aarch64/sve/acle/asm/reinterpret_u16.c    | 62 +++++++++++++++++++
+ .../aarch64/sve/acle/asm/reinterpret_u32.c    | 62 +++++++++++++++++++
+ .../aarch64/sve/acle/asm/reinterpret_u64.c    | 62 +++++++++++++++++++
+ .../aarch64/sve/acle/asm/reinterpret_u8.c     | 62 +++++++++++++++++++
+ .../aarch64/sve/acle/asm/test_sve_acle.h      | 14 +++++
+ 23 files changed, 851 insertions(+), 20 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+index c9bf13792..53f3f28f9 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+@@ -1928,6 +1928,9 @@ public:
+   gimple *
+   fold (gimple_folder &f) const OVERRIDE
+   {
++    if (f.vectors_per_tuple () > 1)
++      return NULL;
++
+     /* Punt to rtl if the effect of the reinterpret on registers does not
+        conform to GCC's endianness model.  */
+     if (!targetm.can_change_mode_class (f.vector_mode (0),
+@@ -1944,7 +1947,7 @@ public:
+   rtx
+   expand (function_expander &e) const OVERRIDE
+   {
+-    machine_mode mode = e.vector_mode (0);
++    machine_mode mode = e.tuple_mode (0);
+     return e.use_exact_insn (code_for_aarch64_sve_reinterpret (mode));
+   }
+ };
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def
+index 3a58f76c3..756469959 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins-base.def
++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def
+@@ -248,7 +248,7 @@ DEF_SVE_FUNCTION (svrdffr, rdffr, none, z_or_none)
+ DEF_SVE_FUNCTION (svrecpe, unary, all_float, none)
+ DEF_SVE_FUNCTION (svrecps, binary, all_float, none)
+ DEF_SVE_FUNCTION (svrecpx, unary, all_float, mxz)
+-DEF_SVE_FUNCTION (svreinterpret, unary_convert, reinterpret, none)
++DEF_SVE_FUNCTION_GS (svreinterpret, reinterpret, reinterpret, x1234, none)
+ DEF_SVE_FUNCTION (svrev, unary, all_data, none)
+ DEF_SVE_FUNCTION (svrev, unary_pred, all_pred, none)
+ DEF_SVE_FUNCTION (svrevb, unary, hsd_integer, mxz)
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
+index 9d346b6ff..94a6d1207 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h
++++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
+@@ -59,8 +59,13 @@ public:
+     : m_vectors_per_tuple (vectors_per_tuple) {}
+ 
+   unsigned int
+-  vectors_per_tuple () const OVERRIDE
++  vectors_per_tuple (const function_instance &fi) const override
+   {
++    if (fi.group_suffix_id != GROUP_none)
++      {
++	gcc_checking_assert (m_vectors_per_tuple == 1);
++	return fi.group_suffix ().vectors_per_tuple;
++      }
+     return m_vectors_per_tuple;
+   }
+ 
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+index f187b4cb2..95e40d8f3 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+@@ -2400,6 +2400,34 @@ struct reduction_wide_def : public overloaded_base<0>
+ };
+ SHAPE (reduction_wide)
+ 
++/* sv<t0>x<g>_t svfoo_t0_t1_g(sv<t1>x<g>_t)
++
++   where the target type <t0> must be specified explicitly but the source
++   type <t1> can be inferred.  */
++struct reinterpret_def : public overloaded_base<1>
++{
++  bool explicit_group_suffix_p () const override { return false; }
++
++  void
++  build (function_builder &b, const function_group_info &group) const override
++  {
++    b.add_overloaded_functions (group, MODE_none);
++    build_all (b, "t0,t1", group, MODE_none);
++  }
++
++  tree
++  resolve (function_resolver &r) const override
++  {
++    sve_type type;
++    if (!r.check_num_arguments (1)
++	|| !(type = r.infer_sve_type (0)))
++      return error_mark_node;
++
++    return r.resolve_to (r.mode_suffix_id, type);
++  }
++};
++SHAPE (reinterpret)
++
+ /* sv<t0>xN_t svfoo_t0(sv<t0>xN_t, uint64_t, sv<t0>_t)
+ 
+    where the second argument is an integer constant expression in the
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
+index 3b0025f85..2b06152d4 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
+@@ -133,6 +133,7 @@ namespace aarch64_sve
+     extern const function_shape *const rdffr;
+     extern const function_shape *const reduction;
+     extern const function_shape *const reduction_wide;
++    extern const function_shape *const reinterpret;
+     extern const function_shape *const set;
+     extern const function_shape *const setffr;
+     extern const function_shape *const shift_left_imm_long;
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
+index 9224916a7..c439f2e8a 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
+@@ -494,6 +494,10 @@ static const group_suffix_index groups_none = {
+   GROUP_none, NUM_GROUP_SUFFIXES
+ };
+ 
++static const group_suffix_index groups_x1234 = {
++  GROUP_none, GROUP_x2, GROUP_x3, GROUP_x4, NUM_GROUP_SUFFIXES
++};

_service:tar_scm:0170-LoongArch-Tweak-IOR-rtx_cost-for-bstrins.patch Added

@@ -0,0 +1,158 @@
+From bdc189d43ef38ea53823120de8008f39ead0618d Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sat, 15 Jun 2024 18:29:43 +0800
+Subject: PATCH 170/188 LoongArch: Tweak IOR rtx_cost for bstrins
+
+Consider
+
+    c &= 0xfff;
+    a &= ~0xfff;
+    b &= ~0xfff;
+    a |= c;
+    b |= c;
+
+This can be done with 2 bstrins instructions.  But we need to recognize
+it in loongarch_rtx_costs or the compiler will not propagate "c & 0xfff"
+forward.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc:
+	(loongarch_use_bstrins_for_ior_with_mask): Split the main logic
+	into ...
+	(loongarch_use_bstrins_for_ior_with_mask_1): ... here.
+	(loongarch_rtx_costs): Special case for IOR those can be
+	implemented with bstrins.
+
+gcc/testsuite/ChangeLog;
+
+	* gcc.target/loongarch/bstrins-3.c: New test.
+---
+ gcc/config/loongarch/loongarch.cc             | 73 ++++++++++++++-----
+ .../gcc.target/loongarch/bstrins-3.c          | 16 ++++
+ 2 files changed, 72 insertions(+), 17 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-3.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 77f83ab9e..cd9fa98dc 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -3678,6 +3678,27 @@ loongarch_set_reg_reg_piece_cost (machine_mode mode, unsigned int units)
+   return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
+ }
+ 
++static int
++loongarch_use_bstrins_for_ior_with_mask_1 (machine_mode mode,
++					   unsigned HOST_WIDE_INT mask1,
++					   unsigned HOST_WIDE_INT mask2)
++{
++  if (mask1 != ~mask2 || !mask1 || !mask2)
++    return 0;
++
++  /* Try to avoid a right-shift.  */
++  if (low_bitmask_len (mode, mask1) != -1)
++    return -1;
++
++  if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
++    return 1;
++
++  if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
++    return -1;
++
++  return 0;
++}
++
+ /* Return the cost of moving between two registers of mode MODE.  */
+ 
+ static int
+@@ -3809,6 +3830,38 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+       /* Fall through.  */
+ 
+     case IOR:
++      {
++	rtx op2 = {XEXP (x, 0), XEXP (x, 1)};
++	if (GET_CODE (op0) == AND && GET_CODE (op1) == AND
++	    && (mode == SImode || (TARGET_64BIT && mode == DImode)))
++	  {
++	    rtx rtx_mask0 = XEXP (op0, 1), rtx_mask1 = XEXP (op1, 1);
++	    if (CONST_INT_P (rtx_mask0) && CONST_INT_P (rtx_mask1))
++	      {
++		unsigned HOST_WIDE_INT mask0 = UINTVAL (rtx_mask0);
++		unsigned HOST_WIDE_INT mask1 = UINTVAL (rtx_mask1);
++		if (loongarch_use_bstrins_for_ior_with_mask_1 (mode,
++							       mask0,
++							       mask1))
++		  {
++		    /* A bstrins instruction */
++		    *total = COSTS_N_INSNS (1);
++
++		    /* A srai instruction */
++		    if (low_bitmask_len (mode, mask0) == -1
++			&& low_bitmask_len (mode, mask1) == -1)
++		      *total += COSTS_N_INSNS (1);
++
++		    for (int i = 0; i < 2; i++)
++		      *total += set_src_cost (XEXP (opi, 0), mode, speed);
++
++		    return true;
++		  }
++	      }
++	  }
++      }
++
++      /* Fall through.  */
+     case XOR:
+       /* Double-word operations use two single-word operations.  */
+       *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2),
+@@ -5793,23 +5846,9 @@ bool loongarch_pre_reload_split (void)
+ int
+ loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op)
+ {
+-  unsigned HOST_WIDE_INT mask1 = UINTVAL (op2);
+-  unsigned HOST_WIDE_INT mask2 = UINTVAL (op4);
+-
+-  if (mask1 != ~mask2 || !mask1 || !mask2)
+-    return 0;
+-
+-  /* Try to avoid a right-shift.  */
+-  if (low_bitmask_len (mode, mask1) != -1)
+-    return -1;
+-
+-  if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
+-    return 1;
+-
+-  if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
+-    return -1;
+-
+-  return 0;
++  return loongarch_use_bstrins_for_ior_with_mask_1 (mode,
++						    UINTVAL (op2),
++						    UINTVAL (op4));
+ }
+ 
+ /* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto
+diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-3.c b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
+new file mode 100644
+index 000000000..13762bdef
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
+@@ -0,0 +1,16 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-rtl-final" } */
++/* { dg-final { scan-rtl-dump-times "insv\sd\i" 2 "final" } } */
++
++struct X {
++  long a, b;
++};
++
++struct X
++test (long a, long b, long c)
++{
++  c &= 0xfff;
++  a &= ~0xfff;
++  b &= ~0xfff;
++  return (struct X){.a = a | c, .b = b | c}; 
++}
+-- 
+2.43.0
+

_service:tar_scm:0171-Backport-SME-attribs-Use-existing-traits-for-excl_ha.patch Added

@@ -0,0 +1,90 @@
+From 11f813112629dbad432134f7b4c7c9a93551eb3c Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Mon, 27 Nov 2023 13:38:16 +0000
+Subject: PATCH 072/157 BackportSME attribs: Use existing traits for
+ excl_hash_traits
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5b33cf3a3a2025a4856f90fea8bd04884c2f6b31
+
+excl_hash_traits can be defined more simply by reusing existing traits.
+
+gcc/
+	* attribs.cc (excl_hash_traits): Delete.
+	(test_attribute_exclusions): Use pair_hash and nofree_string_hash
+	instead.
+---
+ gcc/attribs.cc | 45 +++------------------------------------------
+ 1 file changed, 3 insertions(+), 42 deletions(-)
+
+diff --git a/gcc/attribs.cc b/gcc/attribs.cc
+index b219f8780..16d05b1da 100644
+--- a/gcc/attribs.cc
++++ b/gcc/attribs.cc
+@@ -2555,47 +2555,6 @@ namespace selftest
+ 
+ typedef std::pair<const char *, const char *> excl_pair;
+ 
+-struct excl_hash_traits: typed_noop_remove<excl_pair>
+-{
+-  typedef excl_pair  value_type;
+-  typedef value_type compare_type;
+-
+-  static hashval_t hash (const value_type &x)
+-  {
+-    hashval_t h1 = htab_hash_string (x.first);
+-    hashval_t h2 = htab_hash_string (x.second);
+-    return h1 ^ h2;
+-  }
+-
+-  static bool equal (const value_type &x, const value_type &y)
+-  {
+-    return !strcmp (x.first, y.first) && !strcmp (x.second, y.second);
+-  }
+-
+-  static void mark_deleted (value_type &x)
+-  {
+-    x = value_type (NULL, NULL);
+-  }
+-
+-  static const bool empty_zero_p = false;
+-
+-  static void mark_empty (value_type &x)
+-  {
+-    x = value_type ("", "");
+-  }
+-
+-  static bool is_deleted (const value_type &x)
+-  {
+-    return !x.first && !x.second;
+-  }
+-
+-  static bool is_empty (const value_type &x)
+-  {
+-    return !*x.first && !*x.second;
+-  }
+-};
+-
+-
+ /* Self-test to verify that each attribute exclusion is symmetric,
+    meaning that if attribute A is encoded as incompatible with
+    attribute B then the opposite relationship is also encoded.
+@@ -2605,13 +2564,15 @@ struct excl_hash_traits: typed_noop_remove<excl_pair>
+ static void
+ test_attribute_exclusions ()
+ {
++  using excl_hash_traits = pair_hash<nofree_string_hash, nofree_string_hash>;
++
+   /* Iterate over the array of attribute tables first (with TI0 as
+      the index) and over the array of attribute_spec in each table
+      (with SI0 as the index).  */
+   const size_t ntables = ARRAY_SIZE (attribute_tables);
+ 
+   /* Set of pairs of mutually exclusive attributes.  */
+-  typedef hash_set<excl_pair, false, excl_hash_traits> exclusion_set;
++  typedef hash_set<excl_hash_traits> exclusion_set;
+   exclusion_set excl_set;
+ 
+   for (size_t ti0 = 0; ti0 != ntables; ++ti0)
+-- 
+2.33.0
+

_service:tar_scm:0171-LoongArch-NFC-Dedup-and-sort-the-comment-in-loongarc.patch Added

@@ -0,0 +1,44 @@
+From 51c20768fde58093794ff0281c698b6738346313 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 16 Jun 2024 12:22:40 +0800
+Subject: PATCH 171/188 LoongArch: NFC: Dedup and sort the comment in
+ loongarch_print_operand_reloc
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_print_operand_reloc):
+	Dedup and sort the comment describing modifiers.
+---
+ gcc/config/loongarch/loongarch.cc | 10 +---------
+ 1 file changed, 1 insertion(+), 9 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index cd9fa98dc..35524b5da 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -6129,21 +6129,13 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
+    'T'	Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
+ 	      'z' for (eq:?I ...), 'n' for (ne:?I ...).
+    't'	Like 'T', but with the EQ/NE cases reversed
+-   'F'	Print the FPU branch condition for comparison OP.
+-   'W'	Print the inverse of the FPU branch condition for comparison OP.
+-   'w'	Print a LSX register.
+    'u'	Print a LASX register.
+-   'T'	Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
+-	      'z' for (eq:?I ...), 'n' for (ne:?I ...).
+-   't'	Like 'T', but with the EQ/NE cases reversed
+-   'Y'	Print loongarch_fp_conditionsINTVAL (OP)
+-   'Z'	Print OP and a comma for 8CC, otherwise print nothing.
+-   'z'	Print $0 if OP is zero, otherwise print OP normally.
+    'v'	Print the insn size suffix b, h, w or d for vector modes V16QI, V8HI,
+ 	  V4SI, V2SI, and w, d for vector modes V4SF, V2DF respectively.
+    'V'	Print exact log2 of CONST_INT OP element 0 of a replicated
+ 	  CONST_VECTOR in decimal.
+    'W'	Print the inverse of the FPU branch condition for comparison OP.
++   'w'	Print a LSX register.
+    'X'	Print CONST_INT OP in hexadecimal format.
+    'x'	Print the low 16 bits of CONST_INT OP in hexadecimal format.
+    'Y'	Print loongarch_fp_conditionsINTVAL (OP)
+-- 
+2.43.0
+

_service:tar_scm:0172-Backport-SME-Allow-target-attributes-in-non-gnu-name.patch Added

@@ -0,0 +1,2369 @@
+From 82d654912e3671055034e789a8f7110f6d87d447 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Sat, 2 Dec 2023 13:49:52 +0000
+Subject: PATCH 073/157 BackportSME Allow target attributes in non-gnu
+ namespaces
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7fa24687aa3a683fd105ce5ff6b176f48dca3b6c
+
+Currently there are four static sources of attributes:
+
+- LANG_HOOKS_ATTRIBUTE_TABLE
+- LANG_HOOKS_COMMON_ATTRIBUTE_TABLE
+- LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE
+- TARGET_ATTRIBUTE_TABLE
+
+All of the attributes in these tables go in the "gnu" namespace.
+This means that they can use the traditional GNU __attribute__((...))
+syntax and the standard gnu::... syntax.
+
+Standard attributes are registered dynamically with a null namespace.
+There are no supported attributes in other namespaces (clang, vendor
+namespaces, etc.).
+
+This patch tries to generalise things by making the namespace
+part of the attribute specification.
+
+It's usual for multiple attributes to be defined in the same namespace,
+so rather than adding the namespace to each individual definition,
+it seemed better to group attributes in the same namespace together.
+This would also allow us to reuse the same table for clang attributes
+that are written with the GNU syntax, or other similar situations
+where the attribute can be accessed via multiple "spellings".
+
+The patch therefore adds a scoped_attribute_specs that contains
+a namespace and a list of attributes in that namespace.
+
+It's still possible to have multiple scoped_attribute_specs
+for the same namespace.  E.g. it makes sense to keep the
+C++-specific, C/C++-common, and format-related attributes in
+separate tables, even though they're all GNU attributes.
+
+Current lists of attributes are terminated by a null name.
+Rather than keep that for the new structure, it seemed neater
+to use an array_slice.  This also makes the tables slighly more
+compact.
+
+In general, a target might want to support attributes in multiple
+namespaces.  Rather than have a separate hook for each possibility
+(like the three langhooks above), it seemed better to make
+TARGET_ATTRIBUTE_TABLE a table of tables.  Specifically, it's
+an array_slice of scoped_attribute_specs.
+
+We can do the same thing for langhooks, which allows the three hooks
+above to be merged into a single LANG_HOOKS_ATTRIBUTE_TABLE.
+It also allows the standard attributes to be registered statically
+and checked by the usual attribs.cc checks.
+
+The patch adds a TARGET_GNU_ATTRIBUTES helper for the common case
+in which a target wants a single table of gnu attributes.  It can
+only be used if the table is free of preprocessor directives.
+
+There are probably other things we need to do to make vendor namespaces
+work smoothly.  E.g. in principle it would be good to make exclusion
+sets namespace-aware.  But to some extent we have that with standard
+vs. gnu attributes too.  This patch is just supposed to be a first step.
+
+gcc/
+	* attribs.h (scoped_attribute_specs): New structure.
+	(register_scoped_attributes): Take a reference to a
+	scoped_attribute_specs instead of separate namespace and array
+	parameters.
+	* plugin.h (register_scoped_attributes): Likewise.
+	* attribs.cc (register_scoped_attributes): Likewise.
+	(attribute_tables): Change into an array of scoped_attribute_specs
+	pointers.  Reduce to 1 element for frontends and 1 element for targets.
+	(empty_attribute_table): Delete.
+	(check_attribute_tables): Update for changes to attribute_tables.
+	Use a hash_set to identify duplicates.
+	(handle_ignored_attributes_option): Update for above changes.
+	(init_attributes): Likewise.
+	(excl_pair): Delete.
+	(test_attribute_exclusions): Update for above changes.  Don't
+	enforce symmetry for standard attributes in the top-level namespace.
+	* langhooks-def.h (LANG_HOOKS_COMMON_ATTRIBUTE_TABLE): Delete.
+	(LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE): Likewise.
+	(LANG_HOOKS_INITIALIZER): Update accordingly.
+	(LANG_HOOKS_ATTRIBUTE_TABLE): Define to an empty constructor.
+	* langhooks.h (lang_hooks::common_attribute_table): Delete.
+	(lang_hooks::format_attribute_table): Likewise.
+	(lang_hooks::attribute_table): Redefine to an array of
+	scoped_attribute_specs pointers.
+	* target-def.h (TARGET_GNU_ATTRIBUTES): New macro.
+	* target.def (attribute_spec): Redefine to return an array of
+	scoped_attribute_specs pointers.
+	* tree-inline.cc (function_attribute_inlinable_p): Update accordingly.
+	* doc/tm.texi: Regenerate.
+	* config/aarch64/aarch64.cc (aarch64_attribute_table): Define using
+	TARGET_GNU_ATTRIBUTES.
+	* config/alpha/alpha.cc (vms_attribute_table): Likewise.
+	* config/avr/avr.cc (avr_attribute_table): Likewise.
+	* config/bfin/bfin.cc (bfin_attribute_table): Likewise.
+	* config/bpf/bpf.cc (bpf_attribute_table): Likewise.
+	* config/csky/csky.cc (csky_attribute_table): Likewise.
+	* config/epiphany/epiphany.cc (epiphany_attribute_table): Likewise.
+	* config/gcn/gcn.cc (gcn_attribute_table): Likewise.
+	* config/h8300/h8300.cc (h8300_attribute_table): Likewise.
+	* config/loongarch/loongarch.cc (loongarch_attribute_table): Likewise.
+	* config/m32c/m32c.cc (m32c_attribute_table): Likewise.
+	* config/m32r/m32r.cc (m32r_attribute_table): Likewise.
+	* config/m68k/m68k.cc (m68k_attribute_table): Likewise.
+	* config/mcore/mcore.cc (mcore_attribute_table): Likewise.
+	* config/microblaze/microblaze.cc (microblaze_attribute_table):
+	Likewise.
+	* config/mips/mips.cc (mips_attribute_table): Likewise.
+	* config/msp430/msp430.cc (msp430_attribute_table): Likewise.
+	* config/nds32/nds32.cc (nds32_attribute_table): Likewise.
+	* config/nvptx/nvptx.cc (nvptx_attribute_table): Likewise.
+	* config/riscv/riscv.cc (riscv_attribute_table): Likewise.
+	* config/rl78/rl78.cc (rl78_attribute_table): Likewise.
+	* config/rx/rx.cc (rx_attribute_table): Likewise.
+	* config/s390/s390.cc (s390_attribute_table): Likewise.
+	* config/sh/sh.cc (sh_attribute_table): Likewise.
+	* config/sparc/sparc.cc (sparc_attribute_table): Likewise.
+	* config/stormy16/stormy16.cc (xstormy16_attribute_table): Likewise.
+	* config/v850/v850.cc (v850_attribute_table): Likewise.
+	* config/visium/visium.cc (visium_attribute_table): Likewise.
+	* config/arc/arc.cc (arc_attribute_table): Likewise.  Move further
+	down file.
+	* config/arm/arm.cc (arm_attribute_table): Update for above changes,
+	using...
+	(arm_gnu_attributes, arm_gnu_attribute_table): ...these new globals.
+	* config/i386/i386-options.h (ix86_attribute_table): Delete.
+	(ix86_gnu_attribute_table): Declare.
+	* config/i386/i386-options.cc (ix86_attribute_table): Replace with...
+	(ix86_gnu_attributes, ix86_gnu_attribute_table): ...these two globals.
+	* config/i386/i386.cc (ix86_attribute_table): Define as an array of
+	scoped_attribute_specs pointers.
+	* config/ia64/ia64.cc (ia64_attribute_table): Update for above changes,
+	using...
+	(ia64_gnu_attributes, ia64_gnu_attribute_table): ...these new globals.
+	* config/rs6000/rs6000.cc (rs6000_attribute_table): Update for above
+	changes, using...
+	(rs6000_gnu_attributes, rs6000_gnu_attribute_table): ...these new
+	globals.
+
+gcc/ada/
+	* gcc-interface/gigi.h (gnat_internal_attribute_table): Change
+	type to scoped_attribute_specs.
+	* gcc-interface/utils.cc (gnat_internal_attribute_table): Likewise,
+	using...
+	(gnat_internal_attributes): ...this as the underlying array.
+	* gcc-interface/misc.cc (gnat_attribute_table): New global.
+	(LANG_HOOKS_ATTRIBUTE_TABLE): Use it.
+
+gcc/c-family/
+	* c-common.h (c_common_attribute_table): Replace with...
+	(c_common_gnu_attribute_table): ...this.
+	(c_common_format_attribute_table): Change type to
+	scoped_attribute_specs.
+	* c-attribs.cc (c_common_attribute_table): Replace with...
+	(c_common_gnu_attributes, c_common_gnu_attribute_table): ...these
+	new globals.
+	(c_common_format_attribute_table): Change type to
+	scoped_attribute_specs, using...
+	(c_common_format_attributes): ...this as the underlying array.
+
+gcc/c/
+	* c-tree.h (std_attribute_table): Declare.
+	* c-decl.cc (std_attribute_table): Change type to
+	scoped_attribute_specs, using...
+	(std_attributes): ...this as the underlying array.
+	(c_init_decl_processing): Remove call to register_scoped_attributes.
+	* c-objc-common.h (c_objc_attribute_table): New global.
+	(LANG_HOOKS_ATTRIBUTE_TABLE): Use it.
+	(LANG_HOOKS_COMMON_ATTRIBUTE_TABLE): Delete.
+	(LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE): Delete.
+
+gcc/cp/
+	* cp-tree.h (cxx_attribute_table): Delete.
+	(cxx_gnu_attribute_table, std_attribute_table): Declare.
+	* cp-objcp-common.h (LANG_HOOKS_COMMON_ATTRIBUTE_TABLE): Delete.
+	(LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE): Delete.
+	(cp_objcp_attribute_table): New table.
+	(LANG_HOOKS_ATTRIBUTE_TABLE): Redefine.
+	* tree.cc (cxx_attribute_table): Replace with...
+	(cxx_gnu_attributes, cxx_gnu_attribute_table): ...these globals.
+	(std_attribute_table): Change type to scoped_attribute_specs, using...
+	(std_attributes): ...this as the underlying array.
+	(init_tree): Remove call to register_scoped_attributes.
+
+gcc/d/
+	* d-tree.h (d_langhook_attribute_table): Replace with...
+	(d_langhook_gnu_attribute_table): ...this.
+	(d_langhook_common_attribute_table): Change type to
+	scoped_attribute_specs.
+	* d-attribs.cc (d_langhook_common_attribute_table): Change type to
+	scoped_attribute_specs, using...
+	(d_langhook_common_attributes): ...this as the underlying array.
+	(d_langhook_attribute_table): Replace with...

_service:tar_scm:0172-LoongArch-Fix-explicit-relocs-extreme-tls-desc.c-tes.patch Added

@@ -0,0 +1,45 @@
+From 9503e64bf304d44947791d9ff17d65a6905e59ce Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Fri, 28 Jun 2024 15:04:26 +0800
+Subject: PATCH 172/188 LoongArch: Fix explicit-relocs-{extreme-,}tls-desc.c
+ tests.
+
+After r15-1579, ADD and LD/ST pairs will be merged into LDX/STX.
+Cause these two tests to fail. To guarantee that these two tests pass,
+add the compilation option '-fno-late-combine-instructions'.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c:
+	Add compilation options '-fno-late-combine-instructions'.
+	* gcc.target/loongarch/explicit-relocs-tls-desc.c: Likewise.
+---
+ .../gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c     | 2 +-
+ gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c   | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c
+index 3797556e1..e9eb0d6f7 100644
+--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c
++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc -mcmodel=extreme" } */
++/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc -mcmodel=extreme -fno-late-combine-instructions" } */
+ 
+ __thread int a __attribute__((visibility("hidden")));
+ extern __thread int b __attribute__((visibility("default")));
+diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c
+index f66903091..fed478458 100644
+--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c
++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc" } */
++/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc -fno-late-combine-instructions" } */
+ 
+ __thread int a __attribute__((visibility("hidden")));
+ extern __thread int b __attribute__((visibility("default")));
+-- 
+2.43.0
+

_service:tar_scm:0173-Backport-SME-aarch64-Fix-plugin-header-install.patch Added

@@ -0,0 +1,64 @@
+From b1025ef48bff0622e54822dc0974f38748e9109f Mon Sep 17 00:00:00 2001
+From: Jakub Jelinek <jakub@redhat.com>
+Date: Thu, 22 Dec 2022 11:15:47 +0100
+Subject: PATCH 074/157 BackportSME aarch64: Fix plugin header install
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5b30e9bc211fede06cf85b54e466012540bef14d
+
+The r13-2943-g11a113d501ff64 made aarch64.h include
+aarch64-option-extensions.def, but that file isn't installed
+for building plugins.
+
+On Wed, Dec 21, 2022 at 09:56:33AM +0000, Richard Sandiford wrote:
+> Should this (and aarch64-fusion-pairs.def and aarch64-tuning-flags.def)
+> be in TM_H instead?  The first two OPTIONS_H_EXTRA entries seem to be
+> for aarch64-opt.h (included via aarch64.opt).
+>
+> I guess TM_H should also have aarch64-arches.def, since it's included
+> for aarch64_feature.
+
+gcc/Makefile.in has
+TM_H      = $(GTM_H) insn-flags.h $(OPTIONS_H)
+and
+OPTIONS_H = options.h flag-types.h $(OPTIONS_H_EXTRA)
+which means that adding something into TM_H when it is already in
+OPTIONS_H_EXTRA is a unnecessary.
+It is true that aarch64-fusion-pairs.def (included by aarch64-protos.h)
+and aarch64-tuning-flags.def (ditto) and aarch64-option-extensions.def
+(included by aarch64.h) aren't needed for options.h, so I think the
+right patch would be following.
+
+2022-12-22  Jakub Jelinek  <jakub@redhat.com>
+
+	* config/aarch64/t-aarch64 (TM_H): Don't add aarch64-cores.def,
+	add aarch64-fusion-pairs.def, aarch64-tuning-flags.def and
+	aarch64-option-extensions.def.
+	(OPTIONS_H_EXTRA): Don't add aarch64-fusion-pairs.def nor
+	aarch64-tuning-flags.def.
+---
+ gcc/config/aarch64/t-aarch64 | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64
+index ba74abc0a..6a21a248f 100644
+--- a/gcc/config/aarch64/t-aarch64
++++ b/gcc/config/aarch64/t-aarch64
+@@ -18,11 +18,11 @@
+ #  along with GCC; see the file COPYING3.  If not see
+ #  <http://www.gnu.org/licenses/>.
+ 
+-TM_H += $(srcdir)/config/aarch64/aarch64-cores.def
++TM_H += $(srcdir)/config/aarch64/aarch64-fusion-pairs.def \
++	$(srcdir)/config/aarch64/aarch64-tuning-flags.def \
++	$(srcdir)/config/aarch64/aarch64-option-extensions.def
+ OPTIONS_H_EXTRA += $(srcdir)/config/aarch64/aarch64-cores.def \
+-		   $(srcdir)/config/aarch64/aarch64-arches.def \
+-		   $(srcdir)/config/aarch64/aarch64-fusion-pairs.def \
+-		   $(srcdir)/config/aarch64/aarch64-tuning-flags.def
++		   $(srcdir)/config/aarch64/aarch64-arches.def
+ 
+ $(srcdir)/config/aarch64/aarch64-tune.md: s-aarch64-tune-md; @true
+ s-aarch64-tune-md: $(srcdir)/config/aarch64/gentune.sh \
+-- 
+2.33.0
+

_service:tar_scm:0173-LoongArch-Define-loongarch_insn_cost-and-set-the-cos.patch Added

@@ -0,0 +1,70 @@
+From 727b1a2cff9cecd904545895bbf39a89fbf1ea4f Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Fri, 28 Jun 2024 15:09:48 +0800
+Subject: PATCH 173/188 LoongArch: Define loongarch_insn_cost and set the
+ cost of movcf2gr and movgr2cf.
+
+The following two FAIL items have been fixed:
+
+FAIL: gcc.target/loongarch/movcf2gr-via-fr.c scan-assembler movcf2fr\\t\\\\\$f0-9+,\\\\\$fcc
+FAIL: gcc.target/loongarch/movcf2gr-via-fr.c scan-assembler movfr2gr\\\\.s\\t\\\\\$r4
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_insn_cost):
+	New function.
+	(TARGET_INSN_COST): New macro.
+---
+ gcc/config/loongarch/loongarch.cc | 29 +++++++++++++++++++++++++++++
+ 1 file changed, 29 insertions(+)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 35524b5da..958e82b86 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -4369,6 +4369,33 @@ loongarch_address_cost (rtx addr, machine_mode mode,
+   return loongarch_address_insns (addr, mode, false);
+ }
+ 
++/* Implement TARGET_INSN_COST.  */
++
++static int
++loongarch_insn_cost (rtx_insn *insn, bool speed)
++{
++  rtx x = PATTERN (insn);
++  int cost = pattern_cost (x, speed);
++
++  /* On LA464, prevent movcf2fr and movfr2gr from merging into movcf2gr.  */
++  if (GET_CODE (x) == SET
++      && GET_MODE (XEXP (x, 0)) == FCCmode)
++    {
++      rtx dest, src;
++      dest = XEXP (x, 0);
++      src = XEXP (x, 1);
++
++      if (REG_P (dest) && REG_P (src))
++	{
++	  if (GP_REG_P (REGNO (dest)) && FCC_REG_P (REGNO (src)))
++	    cost = loongarch_cost->movcf2gr;
++	  else if (FCC_REG_P (REGNO (dest)) && GP_REG_P (REGNO (src)))
++	    cost = loongarch_cost->movgr2cf;
++	}
++    }
++  return cost;
++}
++
+ /* Return one word of double-word value OP, taking into account the fixed
+    endianness of certain registers.  HIGH_P is true to select the high part,
+    false to select the low part.  */
+@@ -11089,6 +11116,8 @@ loongarch_asm_code_end (void)
+ #define TARGET_RTX_COSTS loongarch_rtx_costs
+ #undef TARGET_ADDRESS_COST
+ #define TARGET_ADDRESS_COST loongarch_address_cost
++#undef TARGET_INSN_COST
++#define TARGET_INSN_COST loongarch_insn_cost
+ #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+ #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+   loongarch_builtin_vectorization_cost
+-- 
+2.43.0
+

_service:tar_scm:0174-Backport-SME-aarch64-Add-arm_streaming-_compatible-a.patch Added

@@ -0,0 +1,1178 @@
+From 70b732b4518dd0e44b9e6bfaaad78492b8db8f29 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:23 +0000
+Subject: PATCH 075/157 BackportSME aarch64: Add
+ arm_streaming(_compatible) attributes
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2c9a54b4238308b127c3b60b01a591363131e7db
+
+This patch adds support for recognising the SME arm::streaming
+and arm::streaming_compatible attributes.  These attributes
+respectively describe whether the processor is definitely in
+"streaming mode" (PSTATE.SM==1), whether the processor is
+definitely not in streaming mode (PSTATE.SM==0), or whether
+we don't know at compile time either way.
+
+As far as the compiler is concerned, this effectively creates three
+ISA submodes: streaming mode enables things that are not available
+in non-streaming mode, non-streaming mode enables things that not
+available in streaming mode, and streaming-compatible mode has to stick
+to the common subset.  This means that some instructions are conditional
+on PSTATE.SM==1 and some are conditional on PSTATE.SM==0.
+
+I wondered about recording the streaming state in a new variable.
+However, the set of available instructions is also influenced by
+PSTATE.ZA (added later), so I think it makes sense to view this
+as an instance of a more general mechanism.  Also, keeping the
+PSTATE.SM state in the same flag variable as the other ISA
+features makes it possible to sum up the requirements of an
+ACLE function in a single value.
+
+The patch therefore adds a new set of feature flags called "ISA modes".
+Unlike the other two sets of flags (optional features and architecture-
+level features), these ISA modes are not controlled directly by
+command-line parameters or "target" attributes.
+
+arm::streaming and arm::streaming_compatible are function type attributes
+rather than function declaration attributes.  This means that we need
+to find somewhere to copy the type information across to a function's
+target options.  The patch does this in aarch64_set_current_function.
+
+We also need to record which ISA mode a callee expects/requires
+to be active on entry.  (The same mode is then active on return.)
+The patch extends the current UNSPEC_CALLEE_ABI cookie to include
+this information, as well as the PCS variant that it recorded
+previously.
+
+The attributes can also be written __arm_streaming and
+__arm_streaming_compatible.  This has two advantages: it triggers
+an error on compilers that don't understand the attributes, and it
+eases use on C, where ... attributes were only added in C23.
+
+gcc/
+	* config/aarch64/aarch64-isa-modes.def: New file.
+	* config/aarch64/aarch64.h: Include it in the feature enumerations.
+	(AARCH64_FL_SM_STATE, AARCH64_FL_ISA_MODES): New constants.
+	(AARCH64_FL_DEFAULT_ISA_MODE): Likewise.
+	(AARCH64_ISA_MODE): New macro.
+	(CUMULATIVE_ARGS): Add an isa_mode field.
+	* config/aarch64/aarch64-protos.h (aarch64_gen_callee_cookie): Declare.
+	(aarch64_tlsdesc_abi_id): Return an arm_pcs.
+	* config/aarch64/aarch64.cc (attr_streaming_exclusions)
+	(aarch64_gnu_attributes, aarch64_gnu_attribute_table)
+	(aarch64_arm_attributes, aarch64_arm_attribute_table): New tables.
+	(aarch64_attribute_table): Redefine to include the gnu and arm
+	attributes.
+	(aarch64_fntype_pstate_sm, aarch64_fntype_isa_mode): New functions.
+	(aarch64_fndecl_pstate_sm, aarch64_fndecl_isa_mode): Likewise.
+	(aarch64_gen_callee_cookie, aarch64_callee_abi): Likewise.
+	(aarch64_insn_callee_cookie, aarch64_insn_callee_abi): Use them.
+	(aarch64_function_arg, aarch64_output_mi_thunk): Likewise.
+	(aarch64_init_cumulative_args): Initialize the isa_mode field.
+	(aarch64_output_mi_thunk): Use aarch64_gen_callee_cookie to get
+	the ABI cookie.
+	(aarch64_override_options): Add the ISA mode to the feature set.
+	(aarch64_temporary_target::copy_from_fndecl): Likewise.
+	(aarch64_fndecl_options, aarch64_handle_attr_arch): Likewise.
+	(aarch64_set_current_function): Maintain the correct ISA mode.
+	(aarch64_tlsdesc_abi_id): Return an arm_pcs.
+	(aarch64_comp_type_attributes): Handle arm::streaming and
+	arm::streaming_compatible.
+	* config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros):
+	Define __arm_streaming and __arm_streaming_compatible.
+	* config/aarch64/aarch64.md (tlsdesc_small_<mode>): Use
+	aarch64_gen_callee_cookie to get the ABI cookie.
+	* config/aarch64/t-aarch64 (TM_H): Add all feature-related .def files.
+
+gcc/testsuite/
+	* gcc.target/aarch64/sme/aarch64-sme.exp: New harness.
+	* gcc.target/aarch64/sme/streaming_mode_1.c: New test.
+	* gcc.target/aarch64/sme/streaming_mode_2.c: Likewise.
+	* gcc.target/aarch64/sme/keyword_macros_1.c: Likewise.
+	* g++.target/aarch64/sme/aarch64-sme.exp: New harness.
+	* g++.target/aarch64/sme/streaming_mode_1.C: New test.
+	* g++.target/aarch64/sme/streaming_mode_2.C: Likewise.
+	* g++.target/aarch64/sme/keyword_macros_1.C: Likewise.
+	* gcc.target/aarch64/auto-init-1.c: Only expect the call insn
+	to contain 1 (const_int 0), not 2.
+---
+ gcc/config/aarch64/aarch64-c.cc               |  14 ++
+ gcc/config/aarch64/aarch64-isa-modes.def      |  35 +++
+ gcc/config/aarch64/aarch64-protos.h           |   3 +-
+ gcc/config/aarch64/aarch64.cc                 | 233 +++++++++++++++---
+ gcc/config/aarch64/aarch64.h                  |  24 +-
+ gcc/config/aarch64/aarch64.md                 |   3 +-
+ gcc/config/aarch64/t-aarch64                  |   5 +-
+ .../g++.target/aarch64/sme/aarch64-sme.exp    |  40 +++
+ .../g++.target/aarch64/sme/keyword_macros_1.C |   4 +
+ .../g++.target/aarch64/sme/streaming_mode_1.C | 142 +++++++++++
+ .../g++.target/aarch64/sme/streaming_mode_2.C |  25 ++
+ .../gcc.target/aarch64/auto-init-1.c          |   3 +-
+ .../gcc.target/aarch64/sme/aarch64-sme.exp    |  40 +++
+ .../gcc.target/aarch64/sme/keyword_macros_1.c |   4 +
+ .../gcc.target/aarch64/sme/streaming_mode_1.c | 130 ++++++++++
+ .../gcc.target/aarch64/sme/streaming_mode_2.c |  25 ++
+ 16 files changed, 685 insertions(+), 45 deletions(-)
+ create mode 100644 gcc/config/aarch64/aarch64-isa-modes.def
+ create mode 100644 gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp
+ create mode 100644 gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C
+ create mode 100644 gcc/testsuite/g++.target/aarch64/sme/streaming_mode_1.C
+ create mode 100644 gcc/testsuite/g++.target/aarch64/sme/streaming_mode_2.C
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c
+
+diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
+index 4085ad840..397745fbd 100644
+--- a/gcc/config/aarch64/aarch64-c.cc
++++ b/gcc/config/aarch64/aarch64-c.cc
+@@ -72,6 +72,20 @@ aarch64_define_unconditional_macros (cpp_reader *pfile)
+   builtin_define_with_int_value ("__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE / 8);
+ 
+   builtin_define ("__GCC_ASM_FLAG_OUTPUTS__");
++
++  /* Define keyword attributes like __arm_streaming as macros that expand
++     to the associated ... attribute.  Use __extension__ in the attribute
++     for C, since the ... syntax was only added in C23.  */
++#define DEFINE_ARM_KEYWORD_MACRO(NAME) \
++  builtin_define_with_value ("__arm_" NAME, \
++			     lang_GNU_CXX () \
++			     ? "arm::" NAME "" \
++			     : "__extension__ arm::" NAME "", 0);
++
++  DEFINE_ARM_KEYWORD_MACRO ("streaming");
++  DEFINE_ARM_KEYWORD_MACRO ("streaming_compatible");
++
++#undef DEFINE_ARM_KEYWORD_MACRO
+ }
+ 
+ /* Undefine/redefine macros that depend on the current backend state and may
+diff --git a/gcc/config/aarch64/aarch64-isa-modes.def b/gcc/config/aarch64/aarch64-isa-modes.def
+new file mode 100644
+index 000000000..5915c98a8
+--- /dev/null
++++ b/gcc/config/aarch64/aarch64-isa-modes.def
+@@ -0,0 +1,35 @@
++/* Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++/* This file defines a set of "ISA modes"; in other words, it defines
++   various bits of runtime state that control the set of available
++   instructions or that affect the semantics of instructions in some way.
++
++   Before using #include to read this file, define a macro:
++
++      DEF_AARCH64_ISA_MODE(NAME)
++
++   where NAME is the name of the mode.  */
++
++/* Indicates that PSTATE.SM is known to be 1 or 0 respectively.  These
++   modes are mutually exclusive.  If neither mode is active then the state
++   of PSTATE.SM is not known at compile time.  */
++DEF_AARCH64_ISA_MODE(SM_ON)
++DEF_AARCH64_ISA_MODE(SM_OFF)
++
++#undef DEF_AARCH64_ISA_MODE
+diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
+index 14a568140..9b03410dc 100644
+--- a/gcc/config/aarch64/aarch64-protos.h
++++ b/gcc/config/aarch64/aarch64-protos.h
+@@ -772,6 +772,7 @@ bool aarch64_constant_address_p (rtx);
+ bool aarch64_emit_approx_div (rtx, rtx, rtx);
+ bool aarch64_emit_approx_sqrt (rtx, rtx, bool);
+ tree aarch64_vector_load_decl (tree);

_service:tar_scm:0174-LoongArch-TFmode-is-not-allowed-to-be-stored-in-the-.patch Added

@@ -0,0 +1,73 @@
+From b6b917847a25afbaba9983e80b62b69ed3ce3983 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 4 Jul 2024 10:37:26 +0800
+Subject: PATCH 174/188 LoongArch: TFmode is not allowed to be stored in the
+ float register.
+
+	PR target/115752
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc
+	(loongarch_hard_regno_mode_ok_uncached): Replace
+	UNITS_PER_FPVALUE with UNITS_PER_HWFPVALUE.
+	* config/loongarch/loongarch.h (UNITS_PER_FPVALUE): Delete.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/pr115752.c: New test.
+---
+ gcc/config/loongarch/loongarch.cc             | 2 +-
+ gcc/config/loongarch/loongarch.h              | 7 -------
+ gcc/testsuite/gcc.target/loongarch/pr115752.c | 8 ++++++++
+ 3 files changed, 9 insertions(+), 8 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/pr115752.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 958e82b86..b78512e0e 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -6760,7 +6760,7 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode)
+       if (mclass == MODE_FLOAT
+ 	  || mclass == MODE_COMPLEX_FLOAT
+ 	  || mclass == MODE_VECTOR_FLOAT)
+-	return size <= UNITS_PER_FPVALUE;
++	return size <= UNITS_PER_HWFPVALUE;
+ 
+       /* Allow integer modes that fit into a single register.  We need
+ 	 to put integers into FPRs when using instructions like CVT
+diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
+index 6743d2684..a23dabde1 100644
+--- a/gcc/config/loongarch/loongarch.h
++++ b/gcc/config/loongarch/loongarch.h
+@@ -146,13 +146,6 @@ along with GCC; see the file COPYING3.  If not see
+ #define UNITS_PER_HWFPVALUE \
+   (TARGET_SOFT_FLOAT ? 0 : UNITS_PER_FP_REG)
+ 
+-/* The largest size of value that can be held in floating-point
+-   registers.  */
+-#define UNITS_PER_FPVALUE \
+-  (TARGET_SOFT_FLOAT ? 0 \
+-   : TARGET_SINGLE_FLOAT ? UNITS_PER_FP_REG \
+-			 : LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT)
+-
+ /* The number of bytes in a double.  */
+ #define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT)
+ 
+diff --git a/gcc/testsuite/gcc.target/loongarch/pr115752.c b/gcc/testsuite/gcc.target/loongarch/pr115752.c
+new file mode 100644
+index 000000000..df4bae524
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/pr115752.c
+@@ -0,0 +1,8 @@
++/* { dg-do compile } */
++
++long double
++test (long double xx)
++{
++   __asm ("" :: "f"(xx)); /* { dg-error "inconsistent operand constraints in an 'asm'" } */
++   return xx + 1;
++}
+-- 
+2.43.0
+

_service:tar_scm:0175-Backport-SME-aarch64-Add-sme.patch Added

@@ -0,0 +1,330 @@
+From c097d9ffc7dd8f90f78eb3b994f3691f4c8f812d Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:23 +0000
+Subject: PATCH 076/157 BackportSME aarch64: Add +sme
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7e04bd1fadf3410c3d24b56f650a52ff53d01a3c
+
+This patch adds the +sme ISA feature and requires it to be present
+when compiling arm_streaming code.  (arm_streaming_compatible code
+does not necessarily assume the presence of SME.  It just has to
+work when SME is present and streaming mode is enabled.)
+
+gcc/
+	* doc/invoke.texi: Document SME.
+	* doc/sourcebuild.texi: Document aarch64_sve.
+	* config/aarch64/aarch64-option-extensions.def (sme): Define.
+	* config/aarch64/aarch64.h (AARCH64_ISA_SME): New macro.
+	(TARGET_SME): Likewise.
+	* config/aarch64/aarch64.cc (aarch64_override_options_internal):
+	Ensure that SME is present when compiling streaming code.
+
+gcc/testsuite/
+	* lib/target-supports.exp (check_effective_target_aarch64_sme): New
+	target test.
+	* gcc.target/aarch64/sme/aarch64-sme.exp: Force SME to be enabled
+	if it isn't by default.
+	* g++.target/aarch64/sme/aarch64-sme.exp: Likewise.
+	* gcc.target/aarch64/sme/streaming_mode_3.c: New test.
+---
+ .../aarch64/aarch64-option-extensions.def     |  2 +
+ gcc/config/aarch64/aarch64.cc                 | 33 ++++++++++
+ gcc/config/aarch64/aarch64.h                  |  5 ++
+ gcc/doc/invoke.texi                           |  2 +
+ gcc/doc/sourcebuild.texi                      |  2 +
+ .../g++.target/aarch64/sme/aarch64-sme.exp    | 10 ++-
+ .../gcc.target/aarch64/sme/aarch64-sme.exp    | 10 ++-
+ .../gcc.target/aarch64/sme/streaming_mode_3.c | 63 +++++++++++++++++++
+ .../gcc.target/aarch64/sme/streaming_mode_4.c | 22 +++++++
+ gcc/testsuite/lib/target-supports.exp         | 12 ++++
+ 10 files changed, 157 insertions(+), 4 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_3.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_4.c
+
+diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
+index bdf4baf30..faee64a79 100644
+--- a/gcc/config/aarch64/aarch64-option-extensions.def
++++ b/gcc/config/aarch64/aarch64-option-extensions.def
+@@ -149,4 +149,6 @@ AARCH64_OPT_EXTENSION("ls64", LS64, (), (), (), "")
+ 
+ AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "")
+ 
++AARCH64_OPT_EXTENSION("sme", SME, (BF16, SVE2), (), (), "sme")
++
+ #undef AARCH64_OPT_EXTENSION
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 904166b21..8f8395201 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -11648,6 +11648,23 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+   return true;
+ }
+ 
++/* Implement TARGET_START_CALL_ARGS.  */
++
++static void
++aarch64_start_call_args (cumulative_args_t ca_v)
++{
++  CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
++
++  if (!TARGET_SME && (ca->isa_mode & AARCH64_FL_SM_ON))
++    {
++      error ("calling a streaming function requires the ISA extension %qs",
++	     "sme");
++      inform (input_location, "you can enable %qs using the command-line"
++	      " option %<-march%>, or by using the %<target%>"
++	      " attribute or pragma", "sme");
++    }
++}
++
+ /* This function is used by the call expanders of the machine description.
+    RESULT is the register in which the result is returned.  It's NULL for
+    "call" and "sibcall".
+@@ -18194,6 +18211,19 @@ aarch64_override_options_internal (struct gcc_options *opts)
+       && !fixed_regsR18_REGNUM)
+     error ("%<-fsanitize=shadow-call-stack%> requires %<-ffixed-x18%>");
+ 
++  if ((opts->x_aarch64_isa_flags & AARCH64_FL_SM_ON)
++      && !(opts->x_aarch64_isa_flags & AARCH64_FL_SME))
++    {
++      error ("streaming functions require the ISA extension %qs", "sme");
++      inform (input_location, "you can enable %qs using the command-line"
++	      " option %<-march%>, or by using the %<target%>"
++	      " attribute or pragma", "sme");
++      opts->x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
++      auto new_flags = (opts->x_aarch64_asm_isa_flags
++			| feature_deps::SME ().enable);
++      aarch64_set_asm_isa_flags (opts, new_flags);
++    }
++
+   initialize_aarch64_code_model (opts);
+   initialize_aarch64_tls_size (opts);
+ 
+@@ -28159,6 +28189,9 @@ aarch64_get_v16qi_mode ()
+ #undef TARGET_FUNCTION_VALUE_REGNO_P
+ #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
+ 
++#undef TARGET_START_CALL_ARGS
++#define TARGET_START_CALL_ARGS aarch64_start_call_args
++
+ #undef TARGET_GIMPLE_FOLD_BUILTIN
+ #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
+ 
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index 84215c8c3..dd2de4e88 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -214,6 +214,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
+ #define AARCH64_ISA_SVE2_BITPERM  (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM)
+ #define AARCH64_ISA_SVE2_SHA3	   (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3)
+ #define AARCH64_ISA_SVE2_SM4	   (aarch64_isa_flags & AARCH64_FL_SVE2_SM4)
++#define AARCH64_ISA_SME		   (aarch64_isa_flags & AARCH64_FL_SME)
+ #define AARCH64_ISA_V8_3A	   (aarch64_isa_flags & AARCH64_FL_V8_3A)
+ #define AARCH64_ISA_DOTPROD	   (aarch64_isa_flags & AARCH64_FL_DOTPROD)
+ #define AARCH64_ISA_AES	           (aarch64_isa_flags & AARCH64_FL_AES)
+@@ -292,6 +293,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
+ /* SVE2 SM4 instructions, enabled through +sve2-sm4.  */
+ #define TARGET_SVE2_SM4 (AARCH64_ISA_SVE2_SM4)
+ 
++/* SME instructions, enabled through +sme.  Note that this does not
++   imply anything about the state of PSTATE.SM.  */
++#define TARGET_SME (AARCH64_ISA_SME)
++
+ /* ARMv8.3-A features.  */
+ #define TARGET_ARMV8_3	(AARCH64_ISA_V8_3A)
+ 
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 53709b246..2420b05d9 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -19478,6 +19478,8 @@ Enable the instructions to accelerate memory operations like @code{memcpy},
+ Enable the Flag Manipulation instructions Extension.
+ @item pauth
+ Enable the Pointer Authentication Extension.
++@item sme
++Enable the Scalable Matrix Extension.
+ 
+ @end table
+ 
+diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
+index 454fae11a..80936a0eb 100644
+--- a/gcc/doc/sourcebuild.texi
++++ b/gcc/doc/sourcebuild.texi
+@@ -2277,6 +2277,8 @@ AArch64 target which generates instruction sequences for big endian.
+ @item aarch64_small_fpic
+ Binutils installed on test system supports relocation types required by -fpic
+ for AArch64 small memory model.
++@item aarch64_sme
++AArch64 target that generates instructions for SME.
+ @item aarch64_sve_hw
+ AArch64 target that is able to generate and execute SVE code (regardless of
+ whether it does so by default).
+diff --git a/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp
+index 72fcd0bd9..1c3e69cde 100644
+--- a/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp
++++ b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp
+@@ -30,10 +30,16 @@ load_lib g++-dg.exp
+ # Initialize `dg'.
+ dg-init
+ 
+-aarch64-with-arch-dg-options "" {
++if { check_effective_target_aarch64_sme } {
++    set sme_flags ""
++} else {
++    set sme_flags "-march=armv9-a+sme"
++}
++
++aarch64-with-arch-dg-options $sme_flags {
+     # Main loop.
+     dg-runtest lsort glob -nocomplain $srcdir/$subdir/*.\cCS\ \
+-	"" ""
++	"" $sme_flags
+ }
+ 
+ # All done.
+diff --git a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp
+index c990e5924..011310e80 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp
++++ b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp
+@@ -30,10 +30,16 @@ load_lib gcc-dg.exp
+ # Initialize `dg'.
+ dg-init
+ 
+-aarch64-with-arch-dg-options "" {
++if { check_effective_target_aarch64_sme } {
++    set sme_flags ""
++} else {
++    set sme_flags "-march=armv9-a+sme"
++}
++

_service:tar_scm:0175-LoongArch-Remove-unreachable-codes.patch Added

@@ -0,0 +1,249 @@
+From a75609d90f506709e4af26701aa4fb6adce00700 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 4 Jul 2024 15:00:40 +0800
+Subject: PATCH 175/188 LoongArch: Remove unreachable codes.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc
+	(loongarch_split_move): Delete.
+	(loongarch_hard_regno_mode_ok_uncached): Likewise.
+	* config/loongarch/loongarch.md
+	(move_doubleword_fpr<mode>): Likewise.
+	(load_low<mode>): Likewise.
+	(load_high<mode>): Likewise.
+	(store_word<mode>): Likewise.
+	(movgr2frh<mode>): Likewise.
+	(movfrh2gr<mode>): Likewise.
+---
+ gcc/config/loongarch/loongarch.cc |  47 +++----------
+ gcc/config/loongarch/loongarch.md | 109 ------------------------------
+ 2 files changed, 8 insertions(+), 148 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index b78512e0e..260dd7b5f 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -4459,42 +4459,13 @@ loongarch_split_move_p (rtx dest, rtx src)
+ void
+ loongarch_split_move (rtx dest, rtx src)
+ {
+-  rtx low_dest;
+-
+   gcc_checking_assert (loongarch_split_move_p (dest, src));
+   if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
+     loongarch_split_128bit_move (dest, src);
+   else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
+     loongarch_split_256bit_move (dest, src);
+-  else if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src))
+-    {
+-      if (!TARGET_64BIT && GET_MODE (dest) == DImode)
+-	emit_insn (gen_move_doubleword_fprdi (dest, src));
+-      else if (!TARGET_64BIT && GET_MODE (dest) == DFmode)
+-	emit_insn (gen_move_doubleword_fprdf (dest, src));
+-      else if (TARGET_64BIT && GET_MODE (dest) == TFmode)
+-	emit_insn (gen_move_doubleword_fprtf (dest, src));
+-      else
+-	gcc_unreachable ();
+-    }
+   else
+-    {
+-      /* The operation can be split into two normal moves.  Decide in
+-	 which order to do them.  */
+-      low_dest = loongarch_subword (dest, false);
+-      if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src))
+-	{
+-	  loongarch_emit_move (loongarch_subword (dest, true),
+-			       loongarch_subword (src, true));
+-	  loongarch_emit_move (low_dest, loongarch_subword (src, false));
+-	}
+-      else
+-	{
+-	  loongarch_emit_move (low_dest, loongarch_subword (src, false));
+-	  loongarch_emit_move (loongarch_subword (dest, true),
+-			       loongarch_subword (src, true));
+-	}
+-    }
++    gcc_unreachable ();
+ }
+ 
+ /* Check if adding an integer constant value for a specific mode can be
+@@ -6743,20 +6714,18 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode)
+   size = GET_MODE_SIZE (mode);
+   mclass = GET_MODE_CLASS (mode);
+ 
+-  if (GP_REG_P (regno) && !LSX_SUPPORTED_MODE_P (mode)
++  if (GP_REG_P (regno)
++      && !LSX_SUPPORTED_MODE_P (mode)
+       && !LASX_SUPPORTED_MODE_P (mode))
+     return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD;
+ 
+-  /* For LSX, allow TImode and 128-bit vector modes in all FPR.  */
+-  if (FP_REG_P (regno) && LSX_SUPPORTED_MODE_P (mode))
+-    return true;
+-
+-  /* FIXED ME: For LASX, allow TImode and 256-bit vector modes in all FPR.  */
+-  if (FP_REG_P (regno) && LASX_SUPPORTED_MODE_P (mode))
+-    return true;
+-
+   if (FP_REG_P (regno))
+     {
++      /* Allow 128-bit or 256-bit vector modes in all FPR.  */
++      if (LSX_SUPPORTED_MODE_P (mode)
++	  || LASX_SUPPORTED_MODE_P (mode))
++	return true;
++
+       if (mclass == MODE_FLOAT
+ 	  || mclass == MODE_COMPLEX_FLOAT
+ 	  || mclass == MODE_VECTOR_FLOAT)
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 55a759850..16f9f37c8 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -400,9 +400,6 @@
+ ;; 64-bit modes for which we provide move patterns.
+ (define_mode_iterator MOVE64 DI DF)
+ 
+-;; 128-bit modes for which we provide move patterns on 64-bit targets.
+-(define_mode_iterator MOVE128 TI TF)
+-
+ ;; Iterator for sub-32-bit integer modes.
+ (define_mode_iterator SHORT QI HI)
+ 
+@@ -421,12 +418,6 @@
+ (define_mode_iterator ANYFI (SI "TARGET_HARD_FLOAT")
+ 			     (DI "TARGET_DOUBLE_FLOAT"))
+ 
+-;; A mode for which moves involving FPRs may need to be split.
+-(define_mode_iterator SPLITF
+-  (DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
+-   (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
+-   (TF "TARGET_64BIT && TARGET_DOUBLE_FLOAT"))
+-
+ ;; A mode for anything with 32 bits or more, and able to be loaded with
+ ;; the same addressing mode as ld.w.
+ (define_mode_iterator LD_AT_LEAST_32_BIT GPR ANYF)
+@@ -2421,41 +2412,6 @@
+   (set_attr "move_type" "move,load,store")
+    (set_attr "mode" "DF"))
+ 
+-;; Emit a doubleword move in which exactly one of the operands is
+-;; a floating-point register.  We can't just emit two normal moves
+-;; because of the constraints imposed by the FPU register model;
+-;; see loongarch_can_change_mode_class for details.  Instead, we keep
+-;; the FPR whole and use special patterns to refer to each word of
+-;; the other operand.
+-
+-(define_expand "move_doubleword_fpr<mode>"
+-  (set (match_operand:SPLITF 0)
+-	(match_operand:SPLITF 1))
+-  ""
+-{
+-  if (FP_REG_RTX_P (operands0))
+-    {
+-      rtx low = loongarch_subword (operands1, 0);
+-      rtx high = loongarch_subword (operands1, 1);
+-      emit_insn (gen_load_low<mode> (operands0, low));
+-      if (!TARGET_64BIT)
+-       emit_insn (gen_movgr2frh<mode> (operands0, high, operands0));
+-      else
+-       emit_insn (gen_load_high<mode> (operands0, high, operands0));
+-    }
+-  else
+-    {
+-      rtx low = loongarch_subword (operands0, 0);
+-      rtx high = loongarch_subword (operands0, 1);
+-      emit_insn (gen_store_word<mode> (low, operands1, const0_rtx));
+-      if (!TARGET_64BIT)
+-       emit_insn (gen_movfrh2gr<mode> (high, operands1));
+-      else
+-       emit_insn (gen_store_word<mode> (high, operands1, const1_rtx));
+-    }
+-  DONE;
+-})
+-
+ ;; Clear one FCC register
+ 
+ (define_expand "movfcc"
+@@ -2742,49 +2698,6 @@
+   (set_attr "type" "fcvt")
+    (set_attr "mode" "<ANYF:MODE>"))
+ 
+-;; Load the low word of operand 0 with operand 1.
+-(define_insn "load_low<mode>"
+-  (set (match_operand:SPLITF 0 "register_operand" "=f,f")
+-	(unspec:SPLITF (match_operand:<HALFMODE> 1 "general_operand" "rJ,m")
+-		       UNSPEC_LOAD_LOW))
+-  "TARGET_HARD_FLOAT"
+-{
+-  operands0 = loongarch_subword (operands0, 0);
+-  return loongarch_output_move (operands0, operands1);
+-}
+-  (set_attr "move_type" "mgtf,fpload")
+-   (set_attr "mode" "<HALFMODE>"))
+-
+-;; Load the high word of operand 0 from operand 1, preserving the value
+-;; in the low word.
+-(define_insn "load_high<mode>"
+-  (set (match_operand:SPLITF 0 "register_operand" "=f,f")
+-	(unspec:SPLITF (match_operand:<HALFMODE> 1 "general_operand" "rJ,m")
+-			(match_operand:SPLITF 2 "register_operand" "0,0")
+-		       UNSPEC_LOAD_HIGH))
+-  "TARGET_HARD_FLOAT"
+-{
+-  operands0 = loongarch_subword (operands0, 1);
+-  return loongarch_output_move (operands0, operands1);
+-}
+-  (set_attr "move_type" "mgtf,fpload")
+-   (set_attr "mode" "<HALFMODE>"))
+-

_service:tar_scm:0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch Added

@@ -0,0 +1,168 @@
+From d8233e19aae2272c4863de5e8d61d49d3147e807 Mon Sep 17 00:00:00 2001
+From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+Date: Thu, 1 Jun 2023 09:37:06 +0100
+Subject: PATCH 077/157 BackportSME aarch64: Add =r,m and =m,r
+ alternatives to 64-bit vector move patterns
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=12e71b593ea0c64d919df525cd75ea10b7be8a4b
+
+We can use the X registers to load and store 64-bit vector modes, we just need to add the alternatives
+to the mov patterns. This straightforward patch does that and for the pair variants too.
+For the testcase in the code we now generate the optimal assembly without any superfluous
+GP<->SIMD moves.
+
+Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
+
+gcc/ChangeLog:
+
+	* config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>):
+	Add =r,m and =r,m alternatives.
+	(load_pair<DREG:mode><DREG2:mode>): Likewise.
+	(vec_store_pair<DREG:mode><DREG2:mode>): Likewise.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/aarch64/xreg-vec-modes_1.c: New test.
+---
+ gcc/config/aarch64/aarch64-simd.md            | 40 ++++++++++--------
+ .../gcc.target/aarch64/xreg-vec-modes_1.c     | 42 +++++++++++++++++++
+ 2 files changed, 65 insertions(+), 17 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
+
+diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
+index 2d688edf5..b5c52ba16 100644
+--- a/gcc/config/aarch64/aarch64-simd.md
++++ b/gcc/config/aarch64/aarch64-simd.md
+@@ -116,26 +116,28 @@
+ 
+ (define_insn "*aarch64_simd_mov<VDMOV:mode>"
+   (set (match_operand:VDMOV 0 "nonimmediate_operand"
+-		"=w, m,  m,  w, ?r, ?w, ?r,  w,  w")
++		"=w, r, m,  m, m,  w, ?r, ?w, ?r,  w,  w")
+ 	(match_operand:VDMOV 1 "general_operand"
+-		"m,  Dz, w,  w,  w,  r,  r, Dn, Dz"))
++		"m,  m, Dz, w, r,  w,  w,  r,  r, Dn, Dz"))
+   "TARGET_FLOAT
+    && (register_operand (operands0, <MODE>mode)
+        || aarch64_simd_reg_or_zero (operands1, <MODE>mode))"
+   "@
+    ldr\t%d0, %1
++   ldr\t%x0, %1
+    str\txzr, %0
+    str\t%d1, %0
++   str\t%x1, %0
+    * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
+    * return TARGET_SIMD ? \"umov\t%0, %1.d0\" : \"fmov\t%x0, %d1\";
+    fmov\t%d0, %1
+    mov\t%0, %1
+    * return aarch64_output_simd_mov_immediate (operands1, 64);
+    fmov\t%d0, xzr"
+-  (set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
+-		     neon_logic<q>, neon_to_gp<q>, f_mcr,\
++  (set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\
++		     store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\
+ 		     mov_reg, neon_move<q>, f_mcr")
+-   (set_attr "arch" "*,*,*,*,*,*,*,simd,*")
++   (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")
+ )
+ 
+ (define_insn "*aarch64_simd_mov<VQMOV:mode>"
+@@ -177,31 +179,35 @@
+ )
+ 
+ (define_insn "load_pair<DREG:mode><DREG2:mode>"
+-  (set (match_operand:DREG 0 "register_operand" "=w")
+-	(match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
+-   (set (match_operand:DREG2 2 "register_operand" "=w")
+-	(match_operand:DREG2 3 "memory_operand" "m"))
++  (set (match_operand:DREG 0 "register_operand" "=w,r")
++	(match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump,Ump"))
++   (set (match_operand:DREG2 2 "register_operand" "=w,r")
++	(match_operand:DREG2 3 "memory_operand" "m,m"))
+   "TARGET_FLOAT
+    && rtx_equal_p (XEXP (operands3, 0),
+ 		   plus_constant (Pmode,
+ 				  XEXP (operands1, 0),
+ 				  GET_MODE_SIZE (<DREG:MODE>mode)))"
+-  "ldp\\t%d0, %d2, %z1"
+-  (set_attr "type" "neon_ldp")
++  "@
++   ldp\t%d0, %d2, %z1
++   ldp\t%x0, %x2, %z1"
++  (set_attr "type" "neon_ldp,load_16")
+ )
+ 
+ (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
+-  (set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
+-	(match_operand:DREG 1 "register_operand" "w"))
+-   (set (match_operand:DREG2 2 "memory_operand" "=m")
+-	(match_operand:DREG2 3 "register_operand" "w"))
++  (set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump,Ump")
++	(match_operand:DREG 1 "register_operand" "w,r"))
++   (set (match_operand:DREG2 2 "memory_operand" "=m,m")
++	(match_operand:DREG2 3 "register_operand" "w,r"))
+   "TARGET_FLOAT
+    && rtx_equal_p (XEXP (operands2, 0),
+ 		   plus_constant (Pmode,
+ 				  XEXP (operands0, 0),
+ 				  GET_MODE_SIZE (<DREG:MODE>mode)))"
+-  "stp\\t%d1, %d3, %z0"
+-  (set_attr "type" "neon_stp")
++  "@
++   stp\t%d1, %d3, %z0
++   stp\t%x1, %x3, %z0"
++  (set_attr "type" "neon_stp,store_16")
+ )
+ 
+ (define_insn "load_pair<VQ:mode><VQ2:mode>"
+diff --git a/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
+new file mode 100644
+index 000000000..fc4dcb1ad
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
+@@ -0,0 +1,42 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++/* { dg-final { check-function-bodies "**" "" "" } } */
++
++typedef unsigned int v2si  __attribute__((vector_size (8)));
++
++#define force_gp(V1)   asm volatile (""				\
++           : "=r"(V1)                                           \
++           : "r"(V1)                                            \
++           : /* No clobbers */);
++
++/*
++** foo:
++**	ldr	(x0-9+), \x1\
++**	str	\1, \x0\
++**	ret
++*/
++
++void
++foo (v2si *a, v2si *b)
++{
++  v2si tmp = *b;
++  force_gp (tmp);
++  *a = tmp;
++}
++
++/*
++** foo2:
++**	ldp	(x0-9+), (x0-9+), \x0\
++**	stp	\1, \2, \x1\
++**	ret
++*/
++void
++foo2 (v2si *a, v2si *b)
++{
++  v2si t1 = *a;
++  v2si t2 = a1;
++  force_gp (t1);
++  force_gp (t2);
++  *b = t1;
++  b1 = t2;
++}
+-- 
+2.33.0
+

_service:tar_scm:0176-LoongArch-Organize-the-code-related-to-split-move-an.patch Added

@@ -0,0 +1,413 @@
+From 95089699271d235efc29ae48b78f8c7f1b6386c4 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Fri, 12 Jul 2024 09:57:40 +0800
+Subject: PATCH 176/188 LoongArch: Organize the code related to split move
+ and merge the same functions.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-protos.h
+	(loongarch_split_128bit_move): Delete.
+	(loongarch_split_128bit_move_p): Delete.
+	(loongarch_split_256bit_move): Delete.
+	(loongarch_split_256bit_move_p): Delete.
+	(loongarch_split_vector_move): Add a function declaration.
+	* config/loongarch/loongarch.cc
+	(loongarch_vector_costs::finish_cost): Adjust the code
+	formatting.
+	(loongarch_split_vector_move_p): Merge
+	loongarch_split_128bit_move_p and loongarch_split_256bit_move_p.
+	(loongarch_split_move_p): Merge code.
+	(loongarch_split_move): Likewise.
+	(loongarch_split_128bit_move_p): Delete.
+	(loongarch_split_256bit_move_p): Delete.
+	(loongarch_split_128bit_move): Delete.
+	(loongarch_split_vector_move): Merge loongarch_split_128bit_move
+	and loongarch_split_256bit_move.
+	(loongarch_split_256bit_move): Delete.
+	(loongarch_global_init): Remove the extra semicolon at the
+	end of the function.
+	* config/loongarch/loongarch.md (*movdf_softfloat):  Added a new
+	condition TARGET_64BIT.
+---
+ gcc/config/loongarch/loongarch-protos.h |   5 +-
+ gcc/config/loongarch/loongarch.cc       | 221 ++++++------------------
+ gcc/config/loongarch/loongarch.md       |   1 +
+ 3 files changed, 58 insertions(+), 169 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
+index 0c31a74b7..abf1a0893 100644
+--- a/gcc/config/loongarch/loongarch-protos.h
++++ b/gcc/config/loongarch/loongarch-protos.h
+@@ -85,10 +85,7 @@ extern bool loongarch_split_move_p (rtx, rtx);
+ extern void loongarch_split_move (rtx, rtx);
+ extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode);
+ extern void loongarch_split_plus_constant (rtx *, machine_mode);
+-extern void loongarch_split_128bit_move (rtx, rtx);
+-extern bool loongarch_split_128bit_move_p (rtx, rtx);
+-extern void loongarch_split_256bit_move (rtx, rtx);
+-extern bool loongarch_split_256bit_move_p (rtx, rtx);
++extern void loongarch_split_vector_move (rtx, rtx);
+ extern const char *loongarch_output_move (rtx, rtx);
+ #ifdef RTX_CODE
+ extern void loongarch_expand_scc (rtx *);
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 260dd7b5f..53bd8d7ec 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -4351,10 +4351,10 @@ void
+ loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs)
+ {
+   loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
++
+   if (loop_vinfo)
+-    {
+-      m_suggested_unroll_factor = determine_suggested_unroll_factor (loop_vinfo);
+-    }
++    m_suggested_unroll_factor
++      = determine_suggested_unroll_factor (loop_vinfo);
+ 
+   vector_costs::finish_cost (scalar_costs);
+ }
+@@ -4420,6 +4420,7 @@ loongarch_subword (rtx op, bool high_p)
+   return simplify_gen_subreg (word_mode, op, mode, byte);
+ }
+ 
++static bool loongarch_split_vector_move_p (rtx dest, rtx src);
+ /* Return true if a move from SRC to DEST should be split into two.
+    SPLIT_TYPE describes the split condition.  */
+ 
+@@ -4441,13 +4442,11 @@ loongarch_split_move_p (rtx dest, rtx src)
+ 	return false;
+     }
+ 
+-  /* Check if LSX moves need splitting.  */
+-  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
+-    return loongarch_split_128bit_move_p (dest, src);
+ 
+-  /* Check if LASX moves need splitting.  */
+-  if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
+-    return loongarch_split_256bit_move_p (dest, src);
++  /* Check if vector moves need splitting.  */
++  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
++      || LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
++    return loongarch_split_vector_move_p (dest, src);
+ 
+   /* Otherwise split all multiword moves.  */
+   return size > UNITS_PER_WORD;
+@@ -4460,10 +4459,9 @@ void
+ loongarch_split_move (rtx dest, rtx src)
+ {
+   gcc_checking_assert (loongarch_split_move_p (dest, src));
+-  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
+-    loongarch_split_128bit_move (dest, src);
+-  else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
+-    loongarch_split_256bit_move (dest, src);
++  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
++      || LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
++    loongarch_split_vector_move (dest, src);
+   else
+     gcc_unreachable ();
+ }
+@@ -4585,224 +4583,117 @@ loongarch_output_move_index_float (rtx x, machine_mode mode, bool ldr)
+ 
+   return insnldrindex-2;
+ }
+-/* Return true if a 128-bit move from SRC to DEST should be split.  */
+-
+-bool
+-loongarch_split_128bit_move_p (rtx dest, rtx src)
+-{
+-  /* LSX-to-LSX moves can be done in a single instruction.  */
+-  if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
+-    return false;
+-
+-  /* Check for LSX loads and stores.  */
+-  if (FP_REG_RTX_P (dest) && MEM_P (src))
+-    return false;
+-  if (FP_REG_RTX_P (src) && MEM_P (dest))
+-    return false;
+-
+-  /* Check for LSX set to an immediate const vector with valid replicated
+-     element.  */
+-  if (FP_REG_RTX_P (dest)
+-      && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511))
+-    return false;
+-
+-  /* Check for LSX load zero immediate.  */
+-  if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))
+-    return false;
+-
+-  return true;
+-}
+-
+-/* Return true if a 256-bit move from SRC to DEST should be split.  */
++/* Return true if a vector move from SRC to DEST should be split.  */
+ 
+-bool
+-loongarch_split_256bit_move_p (rtx dest, rtx src)
++static bool
++loongarch_split_vector_move_p (rtx dest, rtx src)
+ {
+-  /* LSX-to-LSX moves can be done in a single instruction.  */
++  /* Vector moves can be done in a single instruction.  */
+   if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
+     return false;
+ 
+-  /* Check for LSX loads and stores.  */
++  /* Check for vector loads and stores.  */
+   if (FP_REG_RTX_P (dest) && MEM_P (src))
+     return false;
+   if (FP_REG_RTX_P (src) && MEM_P (dest))
+     return false;
+ 
+-  /* Check for LSX set to an immediate const vector with valid replicated
++  /* Check for vector set to an immediate const vector with valid replicated
+      element.  */
+   if (FP_REG_RTX_P (dest)
+       && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511))
+     return false;
+ 
+-  /* Check for LSX load zero immediate.  */
++  /* Check for vector load zero immediate.  */
+   if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))
+     return false;
+ 
+   return true;
+ }
+ 
+-/* Split a 128-bit move from SRC to DEST.  */
++/* Split a vector move from SRC to DEST.  */
+ 
+ void
+-loongarch_split_128bit_move (rtx dest, rtx src)
++loongarch_split_vector_move (rtx dest, rtx src)
+ {
+   int byte, index;
+-  rtx low_dest, low_src, d, s;
++  rtx s, d;
++  machine_mode mode = GET_MODE (dest);
++  bool lsx_p = LSX_SUPPORTED_MODE_P (mode);
+ 
+   if (FP_REG_RTX_P (dest))
+     {
+       gcc_assert (!MEM_P (src));
+ 
+-      rtx new_dest = dest;
+-      if (!TARGET_64BIT)
+-	{
+-	  if (GET_MODE (dest) != V4SImode)

_service:tar_scm:0177-Backport-SME-AArch64-Rewrite-simd-move-immediate-pat.patch Added

@@ -0,0 +1,167 @@
+From 7d40978965ff893871a79f5f624f54ae02a34a8b Mon Sep 17 00:00:00 2001
+From: Tamar Christina <tamar.christina@arm.com>
+Date: Wed, 18 Oct 2023 09:34:01 +0100
+Subject: PATCH 078/157 BackportSME AArch64: Rewrite simd move immediate
+ patterns to new syntax
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=04227acbe9e6c60d1e314a6b4f2d949c07f30baa
+
+This rewrites the simd MOV patterns to use the new compact syntax.
+No change in semantics is expected.  This will be needed in follow on patches.
+
+This also merges the splits into the define_insn which will also be needed soon.
+
+gcc/ChangeLog:
+
+	PR tree-optimization/109154
+	* config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>):
+	Rewrite to new syntax.
+	(*aarch64_simd_mov<VQMOV:mode): Rewrite to new syntax and merge in
+	splits.
+---
+ gcc/config/aarch64/aarch64-simd.md | 116 ++++++++++++-----------------
+ 1 file changed, 47 insertions(+), 69 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
+index b5c52ba16..1f4b30642 100644
+--- a/gcc/config/aarch64/aarch64-simd.md
++++ b/gcc/config/aarch64/aarch64-simd.md
+@@ -115,54 +115,59 @@
+ )
+ 
+ (define_insn "*aarch64_simd_mov<VDMOV:mode>"
+-  (set (match_operand:VDMOV 0 "nonimmediate_operand"
+-		"=w, r, m,  m, m,  w, ?r, ?w, ?r,  w,  w")
+-	(match_operand:VDMOV 1 "general_operand"
+-		"m,  m, Dz, w, r,  w,  w,  r,  r, Dn, Dz"))
++  (set (match_operand:VDMOV 0 "nonimmediate_operand")
++	(match_operand:VDMOV 1 "general_operand"))
+   "TARGET_FLOAT
+    && (register_operand (operands0, <MODE>mode)
+        || aarch64_simd_reg_or_zero (operands1, <MODE>mode))"
+-  "@
+-   ldr\t%d0, %1
+-   ldr\t%x0, %1
+-   str\txzr, %0
+-   str\t%d1, %0
+-   str\t%x1, %0
+-   * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
+-   * return TARGET_SIMD ? \"umov\t%0, %1.d0\" : \"fmov\t%x0, %d1\";
+-   fmov\t%d0, %1
+-   mov\t%0, %1
+-   * return aarch64_output_simd_mov_immediate (operands1, 64);
+-   fmov\t%d0, xzr"
+-  (set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\
+-		     store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\
+-		     mov_reg, neon_move<q>, f_mcr")
+-   (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")
+-)
+-
+-(define_insn "*aarch64_simd_mov<VQMOV:mode>"
+-  (set (match_operand:VQMOV 0 "nonimmediate_operand"
+-		"=w, Umn,  m,  w, ?r, ?w, ?r, w,  w")
+-	(match_operand:VQMOV 1 "general_operand"
+-		"m,  Dz, w,  w,  w,  r,  r, Dn, Dz"))
++  {@ cons: =0, 1; attrs: type, arch
++     w , m ; neon_load1_1reg<q> , *    ldr\t%d0, %1
++     r , m ; load_8             , *    ldr\t%x0, %1
++     m , Dz; store_8            , *    str\txzr, %0
++     m , w ; neon_store1_1reg<q>, *    str\t%d1, %0
++     m , r ; store_8            , *    str\t%x1, %0
++     w , w ; neon_logic<q>      , simd mov\t%0.<Vbtype>, %1.<Vbtype>
++     w , w ; neon_logic<q>      , *    fmov\t%d0, %d1
++     ?r, w ; neon_to_gp<q>      , simd umov\t%0, %1.d0
++     ?r, w ; neon_to_gp<q>      , *    fmov\t%x0, %d1
++     ?w, r ; f_mcr              , *    fmov\t%d0, %1
++     ?r, r ; mov_reg            , *    mov\t%0, %1
++     w , Dn; neon_move<q>       , simd << aarch64_output_simd_mov_immediate (operands1, 64);
++     w , Dz; f_mcr              , *    fmov\t%d0, xzr
++  }
++)
++
++(define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
++  (set (match_operand:VQMOV 0 "nonimmediate_operand")
++	(match_operand:VQMOV 1 "general_operand"))
+   "TARGET_FLOAT
+    && (register_operand (operands0, <MODE>mode)
+        || aarch64_simd_reg_or_zero (operands1, <MODE>mode))"
+-  "@
+-   ldr\t%q0, %1
+-   stp\txzr, xzr, %0
+-   str\t%q1, %0
+-   mov\t%0.<Vbtype>, %1.<Vbtype>
+-   #
+-   #
+-   #
+-   * return aarch64_output_simd_mov_immediate (operands1, 128);
+-   fmov\t%d0, xzr"
+-  (set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
+-		     neon_logic<q>, multiple, multiple,\
+-		     multiple, neon_move<q>, fmov")
+-   (set_attr "length" "4,4,4,4,8,8,8,4,4")
+-   (set_attr "arch" "*,*,*,simd,*,*,*,simd,*")
++  {@ cons: =0, 1; attrs: type, arch, length
++     w  , m ; neon_load1_1reg<q> , *   , 4 ldr\t%q0, %1
++     Umn, Dz; store_16           , *   , 4 stp\txzr, xzr, %0
++     m  , w ; neon_store1_1reg<q>, *   , 4 str\t%q1, %0
++     w  , w ; neon_logic<q>      , simd, 4 mov\t%0.<Vbtype>, %1.<Vbtype>
++     ?r , w ; multiple           , *   , 8 #
++     ?w , r ; multiple           , *   , 8 #
++     ?r , r ; multiple           , *   , 8 #
++     w  , Dn; neon_move<q>       , simd, 4 << aarch64_output_simd_mov_immediate (operands1, 128);
++     w  , Dz; fmov               , *   , 4 fmov\t%d0, xzr
++  }
++  "&& reload_completed
++   && (REG_P (operands0)
++	&& REG_P (operands1)
++	&& !(FP_REGNUM_P (REGNO (operands0))
++	     && FP_REGNUM_P (REGNO (operands1))))"
++  (const_int 0)
++  {
++    if (GP_REGNUM_P (REGNO (operands0))
++	&& GP_REGNUM_P (REGNO (operands1)))
++      aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
++    else
++      aarch64_split_simd_move (operands0, operands1);
++    DONE;
++  }
+ )
+ 
+ ;; When storing lane zero we can use the normal STR and its more permissive
+@@ -238,33 +243,6 @@
+   (set_attr "type" "neon_stp_q")
+ )
+ 
+-
+-(define_split
+-  (set (match_operand:VQMOV 0 "register_operand" "")
+-	(match_operand:VQMOV 1 "register_operand" ""))
+-  "TARGET_FLOAT
+-   && reload_completed
+-   && GP_REGNUM_P (REGNO (operands0))
+-   && GP_REGNUM_P (REGNO (operands1))"
+-  (const_int 0)
+-{
+-  aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
+-  DONE;
+-})
+-
+-(define_split
+-  (set (match_operand:VQMOV 0 "register_operand" "")
+-        (match_operand:VQMOV 1 "register_operand" ""))
+-  "TARGET_FLOAT
+-   && reload_completed
+-   && ((FP_REGNUM_P (REGNO (operands0)) && GP_REGNUM_P (REGNO (operands1)))
+-       || (GP_REGNUM_P (REGNO (operands0)) && FP_REGNUM_P (REGNO (operands1))))"
+-  (const_int 0)
+-{
+-  aarch64_split_simd_move (operands0, operands1);
+-  DONE;
+-})
+-
+ (define_expand "@aarch64_split_simd_mov<mode>"
+   (set (match_operand:VQMOV 0)
+ 	(match_operand:VQMOV 1))
+-- 
+2.33.0
+

_service:tar_scm:0177-LoongArch-Expand-some-SImode-operations-through-si3_.patch Added

@@ -0,0 +1,364 @@
+From 34c8e935780d43a797e403ca6604115ec393f0e6 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sat, 20 Jul 2024 20:38:13 +0800
+Subject: PATCH 177/188 LoongArch: Expand some SImode operations through
+ "si3_extend" instructions if TARGET_64BIT
+
+We already had "si3_extend" insns and we hoped the fwprop or combine
+passes can use them to remove unnecessary sign extensions.  But this
+does not always work: for cases like x << 1 | y, the compiler
+tends to do
+
+    (sign_extend:DI
+      (ior:SI (ashift:SI (reg:SI $r4)
+                         (const_int 1))
+              (reg:SI $r5)))
+
+instead of
+
+    (ior:DI (sign_extend:DI (ashift:SI (reg:SI $r4) (const_int 1)))
+            (sign_extend:DI (reg:SI $r5)))
+
+So we cannot match the ashlsi3_extend instruction here and we get:
+
+    slli.w $r4,$r4,1
+    or     $r4,$r5,$r4
+    slli.w $r4,$r4,0    # <= redundant
+    jr	   $r1
+
+To eliminate this redundant extension we need to turn SImode shift etc.
+to DImode "si3_extend" operations earlier, when we expand the SImode
+operation.  We are already doing this for addition, now do it for
+shifts, rotates, substract, multiplication, division, and modulo as
+well.
+
+The bytepick.w definition for TARGET_64BIT needs to be adjusted so it
+won't be undone by the shift expanding.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (optab): Add (rotatert "rotr").
+	(<optab:any_shift><mode>3, <optab:any_div><mode>3,
+	sub<mode>3, rotr<mode>3, mul<mode>3): Add a "*" to the insn name
+	so we can redefine the names with define_expand.
+	(*<optab:any_shift>si3_extend): Remove "*" so we can use them
+	in expanders.
+	(*subsi3_extended, *mulsi3_extended): Likewise, also remove the
+	trailing "ed" for consistency.
+	(*<optab:any_div>si3_extended): Add mode for sign_extend to
+	prevent an ICE using it in expanders.
+	(shift_w, arith_w): New define_code_iterator.
+	(<optab:any_w><mode>3): New define_expand.  Expand with
+	<optab:any_w>si3_extend for SImode if TARGET_64BIT.
+	(<optab:arith_w><mode>3): Likewise.
+	(mul<mode>3): Expand to mulsi3_extended for SImode if
+	TARGET_64BIT and ISA_HAS_DIV32.
+	(<optab:any_div><mode>3): Expand to <optab:any_div>si3_extended
+	for SImode if TARGET_64BIT.
+	(rotl<mode>3): Expand to rotrsi3_extend for SImode if
+	TARGET_64BIT.
+	(bytepick_w_<bytepick_imm>): Add mode for lshiftrt and ashift.
+	(bitsize, bytepick_imm, bytepick_w_ashift_amount): New
+	define_mode_attr.
+	(bytepick_w_<bytepick_imm>_extend): Adjust for the RTL change
+	caused by 32-bit shift expanding.  Now bytepick_imm only covers
+	2 and 3, separate one remaining case to ...
+	(bytepick_w_1_extend): ... here, new define_insn.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/bitwise_extend.c: New test.
+---
+ gcc/config/loongarch/loongarch.md             | 131 +++++++++++++++---
+ .../gcc.target/loongarch/bitwise_extend.c     |  45 ++++++
+ 2 files changed, 154 insertions(+), 22 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/bitwise_extend.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 8bcb43042..6915dab0e 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -546,6 +546,7 @@
+ (define_code_attr optab (ashift "ashl")
+ 			 (ashiftrt "ashr")
+ 			 (lshiftrt "lshr")
++			 (rotatert "rotr")
+ 			 (ior "ior")
+ 			 (xor "xor")
+ 			 (and "and")
+@@ -624,6 +625,49 @@
+ 				 (48 "6")
+ 				 (56 "7"))
+ 
++;; Expand some 32-bit operations to si3_extend operations if TARGET_64BIT
++;; so the redundant sign extension can be removed if the output is used as
++;; an input of a bitwise operation.  Note plus, rotl, and div are handled
++;; separately.
++(define_code_iterator shift_w any_shift rotatert)
++(define_code_iterator arith_w minus mult)
++
++(define_expand "<optab><mode>3"
++  (set (match_operand:GPR 0 "register_operand" "=r")
++	(shift_w:GPR (match_operand:GPR 1 "register_operand" "r")
++		     (match_operand:SI 2 "arith_operand" "rI")))
++  ""
++{
++  if (TARGET_64BIT && <MODE>mode == SImode)
++    {
++      rtx t = gen_reg_rtx (DImode);
++      emit_insn (gen_<optab>si3_extend (t, operands1, operands2));
++      t = gen_lowpart (SImode, t);
++      SUBREG_PROMOTED_VAR_P (t) = 1;
++      SUBREG_PROMOTED_SET (t, SRP_SIGNED);
++      emit_move_insn (operands0, t);
++      DONE;
++    }
++})
++
++(define_expand "<optab><mode>3"
++  (set (match_operand:GPR 0 "register_operand" "=r")
++	(arith_w:GPR (match_operand:GPR 1 "register_operand" "r")
++		     (match_operand:GPR 2 "register_operand" "r")))
++  ""
++{
++  if (TARGET_64BIT && <MODE>mode == SImode)
++    {
++      rtx t = gen_reg_rtx (DImode);
++      emit_insn (gen_<optab>si3_extend (t, operands1, operands2));
++      t = gen_lowpart (SImode, t);
++      SUBREG_PROMOTED_VAR_P (t) = 1;
++      SUBREG_PROMOTED_SET (t, SRP_SIGNED);
++      emit_move_insn (operands0, t);
++      DONE;
++    }
++})
++
+ ;;
+ ;;  ....................
+ ;;
+@@ -781,7 +825,7 @@
+   (set_attr "type" "fadd")
+    (set_attr "mode" "<UNITMODE>"))
+ 
+-(define_insn "sub<mode>3"
++(define_insn "*sub<mode>3"
+   (set (match_operand:GPR 0 "register_operand" "=r")
+ 	(minus:GPR (match_operand:GPR 1 "register_operand" "r")
+ 		   (match_operand:GPR 2 "register_operand" "r")))
+@@ -791,7 +835,7 @@
+    (set_attr "mode" "<MODE>"))
+ 
+ 
+-(define_insn "*subsi3_extended"
++(define_insn "subsi3_extend"
+   (set (match_operand:DI 0 "register_operand" "=r")
+ 	(sign_extend:DI
+ 	    (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
+@@ -818,7 +862,7 @@
+   (set_attr "type" "fmul")
+    (set_attr "mode" "<MODE>"))
+ 
+-(define_insn "mul<mode>3"
++(define_insn "*mul<mode>3"
+   (set (match_operand:GPR 0 "register_operand" "=r")
+ 	(mult:GPR (match_operand:GPR 1 "register_operand" "r")
+ 		  (match_operand:GPR 2 "register_operand" "r")))
+@@ -827,7 +871,7 @@
+   (set_attr "type" "imul")
+    (set_attr "mode" "<MODE>"))
+ 
+-(define_insn "*mulsi3_extended"
++(define_insn "mulsi3_extend"
+   (set (match_operand:DI 0 "register_operand" "=r")
+ 	(sign_extend:DI
+ 	    (mult:SI (match_operand:SI 1 "register_operand" "r")
+@@ -1001,8 +1045,19 @@
+ 		     (match_operand:GPR 2 "register_operand")))
+   ""
+ {
+- if (GET_MODE (operands0) == SImode && TARGET_64BIT && !ISA_HAS_DIV32)
++ if (GET_MODE (operands0) == SImode && TARGET_64BIT)
+   {
++    if (ISA_HAS_DIV32)
++      {
++        rtx t = gen_reg_rtx (DImode);
++        emit_insn (gen_<optab>si3_extended (t, operands1, operands2));
++        t = gen_lowpart (SImode, t);
++        SUBREG_PROMOTED_VAR_P (t) = 1;
++        SUBREG_PROMOTED_SET (t, SRP_SIGNED);
++        emit_move_insn (operands0, t);
++        DONE;
++      }
++
+     rtx reg1 = gen_reg_rtx (DImode);
+     rtx reg2 = gen_reg_rtx (DImode);
+     rtx rd = gen_reg_rtx (DImode);
+@@ -1038,7 +1093,7 @@
+ 
+ (define_insn "<optab>si3_extended"
+   (set (match_operand:DI 0 "register_operand" "=r,&r,&r")

_service:tar_scm:0178-Backport-SME-AArch64-remove-test-comment-from-mov-mo.patch Added

@@ -0,0 +1,34 @@
+From 883af5a13e648e74cb8d8722be6d4980e8bc8f48 Mon Sep 17 00:00:00 2001
+From: Tamar Christina <tamar.christina@arm.com>
+Date: Tue, 20 Jun 2023 08:54:42 +0100
+Subject: PATCH 079/157 BackportSME AArch64: remove test comment from
+ *mov<mode>_aarch64
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=36de416df8b3f109353e309011061fa66e872e3a
+
+I accidentally left a test comment in the final version of the patch.
+This removes the comment.
+
+gcc/ChangeLog:
+
+	* config/aarch64/aarch64.md (*mov<mode>_aarch64): Drop test comment.
+---
+ gcc/config/aarch64/aarch64.md | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+index 29a665e45..1ec23fae8 100644
+--- a/gcc/config/aarch64/aarch64.md
++++ b/gcc/config/aarch64/aarch64.md
+@@ -1213,7 +1213,7 @@
+      m, r Z  ; store_4        , *      str<size>\\t%w1, %0
+      m, w    ; store_4        , *      str\t%<size>1, %0
+      r, w    ; neon_to_gp<q>  , simd   umov\t%w0, %1.<v>0
+-     r, w    ; neon_to_gp<q>  , nosimd fmov\t%w0, %s1 /*foo */
++     r, w    ; neon_to_gp<q>  , nosimd fmov\t%w0, %s1
+      w, r Z  ; neon_from_gp<q>, simd   dup\t%0.<Vallxd>, %w1
+      w, r Z  ; neon_from_gp<q>, nosimd fmov\t%s0, %w1
+      w, w    ; neon_dup       , simd   dup\t%<Vetype>0, %1.<v>0
+-- 
+2.33.0
+

_service:tar_scm:0178-LoongArch-Relax-ins_zero_bitmask_operand-and-remove-.patch Added

@@ -0,0 +1,123 @@
+From 8311e0053c8a9646b8798c53ae4a8f45d12c42c1 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 28 Jul 2024 17:02:49 +0800
+Subject: PATCH 178/188 LoongArch: Relax ins_zero_bitmask_operand and remove
+ and<mode>3_align
+
+In r15-1207 I was too stupid to realize we just need to relax
+ins_zero_bitmask_operand to allow using bstrins for aligning, instead of
+adding a new split.  And, "> 12" in ins_zero_bitmask_operand also makes
+no sense: it rejects bstrins for things like "x & ~4l" with no good
+reason.
+
+So fix my errors now.
+
+gcc/ChangeLog:
+
+	* config/loongarch/predicates.md (ins_zero_bitmask_operand):
+	Cover more cases that bstrins can benefit.
+	(high_bitmask_operand): Remove.
+	* config/loongarch/constraints.md (Yy): Remove.
+	* config/loongarch/loongarch.md (and<mode>3_align): Remove.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/bstrins-4.c: New test.
+---
+ gcc/config/loongarch/constraints.md            |  4 ----
+ gcc/config/loongarch/loongarch.md              | 17 -----------------
+ gcc/config/loongarch/predicates.md             |  9 ++-------
+ gcc/testsuite/gcc.target/loongarch/bstrins-4.c |  9 +++++++++
+ 4 files changed, 11 insertions(+), 28 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-4.c
+
+diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
+index f3f5776da..d653ea82a 100644
+--- a/gcc/config/loongarch/constraints.md
++++ b/gcc/config/loongarch/constraints.md
+@@ -292,10 +292,6 @@
+    "@internal"
+    (match_operand 0 "low_bitmask_operand"))
+ 
+-(define_constraint "Yy"
+-   "@internal"
+-   (match_operand 0 "high_bitmask_operand"))
+-
+ (define_constraint "YI"
+   "@internal
+    A replicated vector const in which the replicated value is in the range
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 6915dab0e..1ebcfa0c7 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -1588,23 +1588,6 @@
+   (set_attr "move_type" "pick_ins")
+    (set_attr "mode" "<MODE>"))
+ 
+-(define_insn_and_split "and<mode>3_align"
+-  (set (match_operand:GPR 0 "register_operand" "=r")
+-	(and:GPR (match_operand:GPR 1 "register_operand" "r")
+-		 (match_operand:GPR 2 "high_bitmask_operand" "Yy")))
+-  ""
+-  "#"
+-  ""
+-  (set (match_dup 0) (match_dup 1))
+-   (set (zero_extract:GPR (match_dup 0) (match_dup 2) (const_int 0))
+-	(const_int 0))
+-{
+-  int len;
+-
+-  len = low_bitmask_len (<MODE>mode, ~INTVAL (operands2));
+-  operands2 = GEN_INT (len);
+-})
+-
+ (define_insn_and_split "*bstrins_<mode>_for_mask"
+   (set (match_operand:GPR 0 "register_operand" "=r")
+ 	(and:GPR (match_operand:GPR 1 "register_operand" "r")
+diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
+index 95be8a4fe..2b7f7ed47 100644
+--- a/gcc/config/loongarch/predicates.md
++++ b/gcc/config/loongarch/predicates.md
+@@ -293,10 +293,6 @@
+   (and (match_code "const_int")
+        (match_test "low_bitmask_len (mode, INTVAL (op)) > 12")))
+ 
+-(define_predicate "high_bitmask_operand"
+-  (and (match_code "const_int")
+-       (match_test "low_bitmask_len (mode, ~INTVAL (op)) > 0")))
+-
+ (define_predicate "d_operand"
+   (and (match_code "reg")
+        (match_test "GP_REG_P (REGNO (op))")))
+@@ -406,11 +402,10 @@
+ 
+ (define_predicate "ins_zero_bitmask_operand"
+   (and (match_code "const_int")
+-       (match_test "INTVAL (op) != -1")
+-       (match_test "INTVAL (op) & 1")
+        (match_test "low_bitmask_len (mode, \
+ 				     ~UINTVAL (op) | (~UINTVAL(op) - 1)) \
+-		    > 12")))
++		    > 0")
++       (not (match_operand 0 "const_uns_arith_operand"))))
+ 
+ (define_predicate "const_call_insn_operand"
+   (match_code "const,symbol_ref,label_ref")
+diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-4.c b/gcc/testsuite/gcc.target/loongarch/bstrins-4.c
+new file mode 100644
+index 000000000..0823cfc38
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-4.c
+@@ -0,0 +1,9 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */
++/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r4,\\\$r0,2,2" } } */
++
++long
++x (long a)
++{
++  return a & ~4;
++}
+-- 
+2.43.0
+

_service:tar_scm:0179-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch Added

@@ -0,0 +1,1552 @@
+From 4a0e91dc27b30ae673ba132bf2be17a74bc89f31 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:24 +0000
+Subject: PATCH 080/157 BackportSME aarch64: Distinguish
+ streaming-compatible AdvSIMD insns
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c86ee4f683e05e5809597d96b5eeb261c9c92cac
+
+The vast majority of Advanced SIMD instructions are not
+available in streaming mode, but some of the load/store/move
+instructions are.  This patch adds a new target feature macro
+called TARGET_BASE_SIMD for this streaming-compatible subset.
+
+The vector-to-vector move instructions are not streaming-compatible,
+so we need to use the SVE move instructions where enabled, or fall
+back to the nofp16 handling otherwise.
+
+I haven't found a good way of testing the SVE EXT alternative
+in aarch64_simd_mov_from_<mode>high, but I'd rather provide it
+than not.
+
+gcc/
+	* config/aarch64/aarch64.h (TARGET_BASE_SIMD): New macro.
+	(TARGET_SIMD): Require PSTATE.SM to be 0.
+	(AARCH64_ISA_SM_OFF): New macro.
+	* config/aarch64/aarch64.cc (aarch64_array_mode_supported_p):
+	Allow Advanced SIMD structure modes for TARGET_BASE_SIMD.
+	(aarch64_print_operand): Support '%Z'.
+	(aarch64_secondary_reload): Expect SVE moves to be used for
+	Advanced SIMD modes if SVE is enabled and non-streaming
+	Advanced SIMD isn't.
+	(aarch64_register_move_cost): Likewise.
+	(aarch64_simd_container_mode): Extend Advanced SIMD mode
+	handling to TARGET_BASE_SIMD.
+	(aarch64_expand_cpymem): Expand commentary.
+	* config/aarch64/aarch64.md (arches): Add base_simd and nobase_simd.
+	(arch_enabled): Handle it.
+	(*mov<mode>_aarch64): Extend UMOV alternative to TARGET_BASE_SIMD.
+	(*movti_aarch64): Use an SVE move instruction if non-streaming
+	SIMD isn't available.
+	(*mov<TFD:mode>_aarch64): Likewise.
+	(load_pair_dw_tftf): Extend to TARGET_BASE_SIMD.
+	(store_pair_dw_tftf): Likewise.
+	(loadwb_pair<TX:mode>_<P:mode>): Likewise.
+	(storewb_pair<TX:mode>_<P:mode>): Likewise.
+	* config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>):
+	Allow UMOV in streaming mode.
+	(*aarch64_simd_mov<VQMOV:mode>): Use an SVE move instruction
+	if non-streaming SIMD isn't available.
+	(aarch64_store_lane0<mode>): Depend on TARGET_FLOAT rather than
+	TARGET_SIMD.
+	(aarch64_simd_mov_from_<mode>low): Likewise.  Use fmov if
+	Advanced SIMD is completely disabled.
+	(aarch64_simd_mov_from_<mode>high): Use SVE EXT instructions if
+	non-streaming SIMD isn't available.
+
+gcc/testsuite/
+	* gcc.target/aarch64/movdf_2.c: New test.
+	* gcc.target/aarch64/movdi_3.c: Likewise.
+	* gcc.target/aarch64/movhf_2.c: Likewise.
+	* gcc.target/aarch64/movhi_2.c: Likewise.
+	* gcc.target/aarch64/movqi_2.c: Likewise.
+	* gcc.target/aarch64/movsf_2.c: Likewise.
+	* gcc.target/aarch64/movsi_2.c: Likewise.
+	* gcc.target/aarch64/movtf_3.c: Likewise.
+	* gcc.target/aarch64/movtf_4.c: Likewise.
+	* gcc.target/aarch64/movti_3.c: Likewise.
+	* gcc.target/aarch64/movti_4.c: Likewise.
+	* gcc.target/aarch64/movv16qi_4.c: Likewise.
+	* gcc.target/aarch64/movv16qi_5.c: Likewise.
+	* gcc.target/aarch64/movv8qi_4.c: Likewise.
+	* gcc.target/aarch64/sme/arm_neon_1.c: Likewise.
+	* gcc.target/aarch64/sme/arm_neon_2.c: Likewise.
+	* gcc.target/aarch64/sme/arm_neon_3.c: Likewise.
+---
+ gcc/config/aarch64/aarch64-simd.md            | 50 ++++++-----
+ gcc/config/aarch64/aarch64.cc                 | 16 ++--
+ gcc/config/aarch64/aarch64.h                  | 12 ++-
+ gcc/config/aarch64/aarch64.md                 | 77 +++++++++--------
+ gcc/testsuite/gcc.target/aarch64/movdf_2.c    | 51 +++++++++++
+ gcc/testsuite/gcc.target/aarch64/movdi_3.c    | 59 +++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movhf_2.c    | 53 ++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movhi_2.c    | 61 +++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movqi_2.c    | 59 +++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movsf_2.c    | 51 +++++++++++
+ gcc/testsuite/gcc.target/aarch64/movsi_2.c    | 59 +++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movtf_3.c    | 81 +++++++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movtf_4.c    | 78 +++++++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movti_3.c    | 86 +++++++++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movti_4.c    | 83 ++++++++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movv16qi_4.c | 82 ++++++++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movv16qi_5.c | 79 +++++++++++++++++
+ gcc/testsuite/gcc.target/aarch64/movv8qi_4.c  | 55 ++++++++++++
+ .../gcc.target/aarch64/sme/arm_neon_1.c       | 13 +++
+ .../gcc.target/aarch64/sme/arm_neon_2.c       | 11 +++
+ .../gcc.target/aarch64/sme/arm_neon_3.c       | 11 +++
+ 21 files changed, 1062 insertions(+), 65 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movdf_2.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movdi_3.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movhf_2.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movhi_2.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movqi_2.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movsf_2.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movsi_2.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_3.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_4.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_3.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_4.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_4.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_5.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/movv8qi_4.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/arm_neon_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/arm_neon_2.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/arm_neon_3.c
+
+diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
+index 1f4b30642..62493cdfa 100644
+--- a/gcc/config/aarch64/aarch64-simd.md
++++ b/gcc/config/aarch64/aarch64-simd.md
+@@ -121,19 +121,19 @@
+    && (register_operand (operands0, <MODE>mode)
+        || aarch64_simd_reg_or_zero (operands1, <MODE>mode))"
+   {@ cons: =0, 1; attrs: type, arch
+-     w , m ; neon_load1_1reg<q> , *    ldr\t%d0, %1
+-     r , m ; load_8             , *    ldr\t%x0, %1
+-     m , Dz; store_8            , *    str\txzr, %0
+-     m , w ; neon_store1_1reg<q>, *    str\t%d1, %0
+-     m , r ; store_8            , *    str\t%x1, %0
+-     w , w ; neon_logic<q>      , simd mov\t%0.<Vbtype>, %1.<Vbtype>
+-     w , w ; neon_logic<q>      , *    fmov\t%d0, %d1
+-     ?r, w ; neon_to_gp<q>      , simd umov\t%0, %1.d0
+-     ?r, w ; neon_to_gp<q>      , *    fmov\t%x0, %d1
+-     ?w, r ; f_mcr              , *    fmov\t%d0, %1
+-     ?r, r ; mov_reg            , *    mov\t%0, %1
+-     w , Dn; neon_move<q>       , simd << aarch64_output_simd_mov_immediate (operands1, 64);
+-     w , Dz; f_mcr              , *    fmov\t%d0, xzr
++     w , m ; neon_load1_1reg<q> , *         ldr\t%d0, %1
++     r , m ; load_8             , *         ldr\t%x0, %1
++     m , Dz; store_8            , *         str\txzr, %0
++     m , w ; neon_store1_1reg<q>, *         str\t%d1, %0
++     m , r ; store_8            , *         str\t%x1, %0
++     w , w ; neon_logic<q>      , simd      mov\t%0.<Vbtype>, %1.<Vbtype>
++     w , w ; neon_logic<q>      , *         fmov\t%d0, %d1
++     ?r, w ; neon_to_gp<q>      , base_simd umov\t%0, %1.d0
++     ?r, w ; neon_to_gp<q>      , *         fmov\t%x0, %d1
++     ?w, r ; f_mcr              , *         fmov\t%d0, %1
++     ?r, r ; mov_reg            , *         mov\t%0, %1
++     w , Dn; neon_move<q>       , simd      << aarch64_output_simd_mov_immediate (operands1, 64);
++     w , Dz; f_mcr              , *         fmov\t%d0, xzr
+   }
+ )
+ 
+@@ -148,6 +148,7 @@
+      Umn, Dz; store_16           , *   , 4 stp\txzr, xzr, %0
+      m  , w ; neon_store1_1reg<q>, *   , 4 str\t%q1, %0
+      w  , w ; neon_logic<q>      , simd, 4 mov\t%0.<Vbtype>, %1.<Vbtype>
++     w  , w ; *                  , sve , 4 mov\t%Z0.d, %Z1.d
+      ?r , w ; multiple           , *   , 8 #
+      ?w , r ; multiple           , *   , 8 #
+      ?r , r ; multiple           , *   , 8 #
+@@ -177,7 +178,7 @@
+   (set (match_operand:<VEL> 0 "memory_operand" "=m")
+ 	(vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
+ 			(parallel (match_operand 2 "const_int_operand" "n"))))
+-  "TARGET_SIMD
++  "TARGET_FLOAT
+    && ENDIAN_LANE_N (<nunits>, INTVAL (operands2)) == 0"
+   "str\\t%<Vetype>1, %0"
+   (set_attr "type" "neon_store1_1reg<q>")
+@@ -312,35 +313,38 @@
+ )
+ 
+ (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
+-  (set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
++  (set (match_operand:<VHALF> 0 "register_operand" "=w,?r,?r")
+         (vec_select:<VHALF>
+-          (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
++          (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w")
+           (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))
+-  "TARGET_SIMD"
++  "TARGET_FLOAT"
+   "@
+    #
+-   umov\t%0, %1.d0"
++   umov\t%0, %1.d0
++   fmov\t%0, %d1"
+   "&& reload_completed && aarch64_simd_register (operands0, <VHALF>mode)"
+   (set (match_dup 0) (match_dup 1))
+   {
+     operands1 = aarch64_replace_reg_mode (operands1, <VHALF>mode);
+   }
+-  (set_attr "type" "mov_reg,neon_to_gp<q>")
++  (set_attr "type" "mov_reg,neon_to_gp<q>,f_mrc")
++   (set_attr "arch" "simd,base_simd,*")
+    (set_attr "length" "4")
+ )
+ 
+ (define_insn "aarch64_simd_mov_from_<mode>high"
+-  (set (match_operand:<VHALF> 0 "register_operand" "=w,?r,?r")

_service:tar_scm:0179-LoongArch-Rework-bswap-hi-si-di-2-definition.patch Added

@@ -0,0 +1,224 @@
+From 54bf8fc616af5cdb9e4c787a2dfb2c516c8e425a Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 28 Jul 2024 19:57:02 +0800
+Subject: PATCH 179/188 LoongArch: Rework bswap{hi,si,di}2 definition
+
+Per a gcc-help thread we are generating sub-optimal code for
+__builtin_bswap{32,64}.  To fix it:
+
+- Use a single revb.d instruction for bswapdi2.
+- Use a single revb.2w instruction for bswapsi2 for TARGET_64BIT,
+  revb.2h + rotri.w for !TARGET_64BIT.
+- Use a single revb.2h instruction for bswapsi2 (x) r>> 16, and a single
+  revb.2w instruction for bswapdi2 (x) r>> 32.
+
+Unfortunately I cannot figure out a way to make the compiler generate
+revb.4h or revh.{2w,d} instructions.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (UNSPEC_REVB_2H, UNSPEC_REVB_4H,
+	UNSPEC_REVH_D): Remove UNSPECs.
+	(revb_4h, revh_d): Remove define_insn.
+	(revb_2h): Define as (rotatert:SI (bswap:SI x) 16) instead of
+	an UNSPEC.
+	(revb_2h_extend, revb_2w, *bswapsi2, bswapdi2): New define_insn.
+	(bswapsi2): Change to define_expand.  Only expand to revb.2h +
+	rotri.w if !TARGET_64BIT.
+	(bswapdi2): Change to define_insn of which the output is just a
+	revb.d instruction.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/revb.c: New test.
+---
+ gcc/config/loongarch/loongarch.md         | 79 ++++++++++++-----------
+ gcc/testsuite/gcc.target/loongarch/revb.c | 61 +++++++++++++++++
+ 2 files changed, 104 insertions(+), 36 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/revb.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 1ebcfa0c7..b1c828dba 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -20,11 +20,6 @@
+ ;; <http://www.gnu.org/licenses/>.
+ 
+ (define_c_enum "unspec" 
+-  ;; Integer operations that are too cumbersome to describe directly.
+-  UNSPEC_REVB_2H
+-  UNSPEC_REVB_4H
+-  UNSPEC_REVH_D
+-
+   ;; Floating-point moves.
+   UNSPEC_LOAD_LOW
+   UNSPEC_LOAD_HIGH
+@@ -3151,55 +3146,67 @@
+ 
+ ;; Reverse the order of bytes of operand 1 and store the result in operand 0.
+ 
+-(define_insn "bswaphi2"
+-  (set (match_operand:HI 0 "register_operand" "=r")
+-	(bswap:HI (match_operand:HI 1 "register_operand" "r")))
++(define_insn "revb_2h"
++  (set (match_operand:SI 0 "register_operand" "=r")
++	(rotatert:SI (bswap:SI (match_operand:SI 1 "register_operand" "r"))
++		     (const_int 16)))
+   ""
+   "revb.2h\t%0,%1"
+   (set_attr "type" "shift"))
+ 
+-(define_insn_and_split "bswapsi2"
+-  (set (match_operand:SI 0 "register_operand" "=r")
+-	(bswap:SI (match_operand:SI 1 "register_operand" "r")))
+-  ""
+-  "#"
+-  ""
+-  (set (match_dup 0) (unspec:SI (match_dup 1) UNSPEC_REVB_2H))
+-   (set (match_dup 0) (rotatert:SI (match_dup 0) (const_int 16)))
+-  ""
+-  (set_attr "insn_count" "2"))
+-
+-(define_insn_and_split "bswapdi2"
++(define_insn "revb_2h_extend"
+   (set (match_operand:DI 0 "register_operand" "=r")
+-	(bswap:DI (match_operand:DI 1 "register_operand" "r")))
++	(sign_extend:DI
++	  (rotatert:SI
++	    (bswap:SI (match_operand:SI 1 "register_operand" "r"))
++	    (const_int 16))))
+   "TARGET_64BIT"
+-  "#"
+-  ""
+-  (set (match_dup 0) (unspec:DI (match_dup 1) UNSPEC_REVB_4H))
+-   (set (match_dup 0) (unspec:DI (match_dup 0) UNSPEC_REVH_D))
+-  ""
+-  (set_attr "insn_count" "2"))
++  "revb.2h\t%0,%1"
++  (set_attr "type" "shift"))
+ 
+-(define_insn "revb_2h"
+-  (set (match_operand:SI 0 "register_operand" "=r")
+-	(unspec:SI (match_operand:SI 1 "register_operand" "r") UNSPEC_REVB_2H))
++(define_insn "bswaphi2"
++  (set (match_operand:HI 0 "register_operand" "=r")
++	(bswap:HI (match_operand:HI 1 "register_operand" "r")))
+   ""
+   "revb.2h\t%0,%1"
+   (set_attr "type" "shift"))
+ 
+-(define_insn "revb_4h"
++(define_insn "revb_2w"
+   (set (match_operand:DI 0 "register_operand" "=r")
+-	(unspec:DI (match_operand:DI 1 "register_operand" "r") UNSPEC_REVB_4H))
++	(rotatert:DI (bswap:DI (match_operand:DI 1 "register_operand" "r"))
++		     (const_int 32)))
+   "TARGET_64BIT"
+-  "revb.4h\t%0,%1"
++  "revb.2w\t%0,%1"
+   (set_attr "type" "shift"))
+ 
+-(define_insn "revh_d"
++(define_insn "*bswapsi2"
++  (set (match_operand:SI 0 "register_operand" "=r")
++	(bswap:SI (match_operand:SI 1 "register_operand" "r")))
++  "TARGET_64BIT"
++  "revb.2w\t%0,%1"
++  (set_attr "type" "shift"))
++
++(define_expand "bswapsi2"
++  (set (match_operand:SI 0 "register_operand" "=r")
++	(bswap:SI (match_operand:SI 1 "register_operand" "r")))
++  ""
++{
++  if (!TARGET_64BIT)
++    {
++      rtx t = gen_reg_rtx (SImode);
++      emit_insn (gen_revb_2h (t, operands1));
++      emit_insn (gen_rotrsi3 (operands0, t, GEN_INT (16)));
++      DONE;
++    }
++})
++
++(define_insn "bswapdi2"
+   (set (match_operand:DI 0 "register_operand" "=r")
+-	(unspec:DI (match_operand:DI 1 "register_operand" "r") UNSPEC_REVH_D))
++	(bswap:DI (match_operand:DI 1 "register_operand" "r")))
+   "TARGET_64BIT"
+-  "revh.d\t%0,%1"
++  "revb.d\t%0,%1"
+   (set_attr "type" "shift"))
++
+ &#xc;
+ ;;
+ ;;  ....................
+diff --git a/gcc/testsuite/gcc.target/loongarch/revb.c b/gcc/testsuite/gcc.target/loongarch/revb.c
+new file mode 100644
+index 000000000..27a5d0fc7
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/revb.c
+@@ -0,0 +1,61 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */
++/* { dg-final { check-function-bodies "**" "" } } */
++
++/*
++**t1:
++**	revb.2w	\$r4,\$r4
++**	slli.w	\$r4,\$r4,0
++**	jr	\$r1
++*/
++unsigned int
++t1 (unsigned int x)
++{
++  return __builtin_bswap32 (x);
++}
++
++/*
++**t2:
++**	revb.d	\$r4,\$r4
++**	jr	\$r1
++*/
++unsigned long
++t2 (unsigned long x)
++{
++  return __builtin_bswap64 (x);
++}
++
++/*
++**t3:
++**	revb.2h	\$r4,\$r4
++**	jr	\$r1
++*/
++unsigned int
++t3 (unsigned int x)
++{
++  return (x >> 8) & 0xff00ff | (x << 8) & 0xff00ff00;
++}
++
++/*

_service:tar_scm:0180-Backport-SME-aarch64-Mark-relevant-SVE-instructions-.patch Added

@@ -0,0 +1,4506 @@
+From 0404dfa43633a35460aba1b96d04f62cc7d6103b Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:24 +0000
+Subject: PATCH 081/157 BackportSME aarch64: Mark relevant SVE
+ instructions as non-streaming
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=983b4365028e9a059b5fb1eef85a297bea19fc8e
+
+Following on from the previous Advanced SIMD patch, this one
+divides SVE instructions into non-streaming and streaming-
+compatible groups.
+
+gcc/
+	* config/aarch64/aarch64.h (TARGET_NON_STREAMING): New macro.
+	(TARGET_SVE2_AES, TARGET_SVE2_BITPERM): Use it.
+	(TARGET_SVE2_SHA3, TARGET_SVE2_SM4): Likewise.
+	* config/aarch64/aarch64-sve-builtins-base.def: Separate out
+	the functions that require PSTATE.SM to be 0 and guard them
+	with AARCH64_FL_SM_OFF.
+	* config/aarch64/aarch64-sve-builtins-sve2.def: Likewise.
+	* config/aarch64/aarch64-sve-builtins.cc (check_required_extensions):
+	Enforce AARCH64_FL_SM_OFF requirements.
+	* config/aarch64/aarch64-sve.md (aarch64_wrffr): Require
+	TARGET_NON_STREAMING
+	(aarch64_rdffr, aarch64_rdffr_z, *aarch64_rdffr_z_ptest): Likewise.
+	(*aarch64_rdffr_ptest, *aarch64_rdffr_z_cc, *aarch64_rdffr_cc)
+	(@aarch64_ld<fn>f1<mode>): Likewise.
+	(@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>)
+	(gather_load<mode><v_int_container>): Likewise
+	(mask_gather_load<mode><v_int_container>): Likewise.
+	(mask_gather_load<mode><v_int_container>): Likewise.
+	(*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked): Likewise.
+	(*mask_gather_load<mode><v_int_container>_sxtw): Likewise.
+	(*mask_gather_load<mode><v_int_container>_uxtw): Likewise.
+	(@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>)
+	(@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode>
+	<SVE_2BHSI:mode>): Likewise.
+	(*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode>
+	<SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked)
+	(*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode>
+	<SVE_2BHSI:mode>_sxtw): Likewise.
+	(*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode>
+	<SVE_2BHSI:mode>_uxtw): Likewise.
+	(@aarch64_ldff1_gather<mode>, @aarch64_ldff1_gather<mode>): Likewise.
+	(*aarch64_ldff1_gather<mode>_sxtw): Likewise.
+	(*aarch64_ldff1_gather<mode>_uxtw): Likewise.
+	(@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode>
+	<VNx4_NARROW:mode>): Likewise.
+	(@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode>
+	<VNx2_NARROW:mode>): Likewise.
+	(*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode>
+	<VNx2_NARROW:mode>_sxtw): Likewise.
+	(*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode>
+	<VNx2_NARROW:mode>_uxtw): Likewise.
+	(@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>)
+	(@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>)
+	(*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw)
+	(*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw)
+	(scatter_store<mode><v_int_container>): Likewise.
+	(mask_scatter_store<mode><v_int_container>): Likewise.
+	(*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked)
+	(*mask_scatter_store<mode><v_int_container>_sxtw): Likewise.
+	(*mask_scatter_store<mode><v_int_container>_uxtw): Likewise.
+	(@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>)
+	(@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>)
+	(*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw)
+	(*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw)
+	(@aarch64_sve_ld1ro<mode>, @aarch64_adr<mode>): Likewise.
+	(*aarch64_adr_sxtw, *aarch64_adr_uxtw_unspec): Likewise.
+	(*aarch64_adr_uxtw_and, @aarch64_adr<mode>_shift): Likewise.
+	(*aarch64_adr<mode>_shift, *aarch64_adr_shift_sxtw): Likewise.
+	(*aarch64_adr_shift_uxtw, @aarch64_sve_add_<optab><vsi2qi>): Likewise.
+	(@aarch64_sve_<sve_fp_op><mode>, fold_left_plus_<mode>): Likewise.
+	(mask_fold_left_plus_<mode>, @aarch64_sve_compact<mode>): Likewise.
+	* config/aarch64/aarch64-sve2.md (@aarch64_gather_ldnt<mode>)
+	(@aarch64_gather_ldnt_<ANY_EXTEND:optab><SVE_FULL_SDI:mode>
+	<SVE_PARTIAL_I:mode>): Likewise.
+	(@aarch64_sve2_histcnt<mode>, @aarch64_sve2_histseg<mode>): Likewise.
+	(@aarch64_pred_<SVE2_MATCH:sve_int_op><mode>): Likewise.
+	(*aarch64_pred_<SVE2_MATCH:sve_int_op><mode>_cc): Likewise.
+	(*aarch64_pred_<SVE2_MATCH:sve_int_op><mode>_ptest): Likewise.
+	* config/aarch64/iterators.md (SVE_FP_UNARY_INT): Make FEXPA
+	depend on TARGET_NON_STREAMING.
+	(SVE_BFLOAT_TERNARY_LONG): Likewise BFMMLA.
+
+gcc/testsuite/
+	* g++.target/aarch64/sve/aarch64-ssve.exp: New harness.
+	* g++.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp: Add
+	-DSTREAMING_COMPATIBLE to the list of options.
+	* g++.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp: Likewise.
+	* gcc.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp: Likewise.
+	* gcc.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp: Likewise.
+	Fix pasto in variable name.
+	* gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: Mark functions
+	as streaming-compatible if STREAMING_COMPATIBLE is defined.
+	* gcc.target/aarch64/sve/acle/asm/adda_f16.c: Disable for
+	streaming-compatible code.
+	* gcc.target/aarch64/sve/acle/asm/adda_f32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/adda_f64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/adrb.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/adrd.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/adrh.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/adrw.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/compact_f32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/compact_f64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/compact_s32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/compact_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/compact_u32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/compact_u64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/expa_f16.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/expa_f32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/expa_f64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1_gather_f32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1_gather_f64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1_gather_s32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1_gather_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1_gather_u32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1_gather_u64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1ro_bf16.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1sw_gather_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1sw_gather_u64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1uw_gather_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ld1uw_gather_u64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1_bf16.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1_f16.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1_f32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1_f64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1_gather_f32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1_gather_f64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1_gather_s32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1_gather_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1_gather_u32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1_gather_u64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1_s16.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1_s32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1_s8.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1_u16.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1_u32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1_u64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1_u8.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sb_s16.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sb_s32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sb_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sb_u16.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sb_u32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sb_u64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sh_s32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sh_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sh_u32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sh_u64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_u64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sw_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1sw_u64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1ub_s16.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1ub_s32.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1ub_s64.c: Likewise.
+	* gcc.target/aarch64/sve/acle/asm/ldff1ub_u16.c: Likewise.

_service:tar_scm:0180-testsuite-fix-dg-do-preprocess-typo.patch Added

_service:tar_scm:0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch Added

@@ -0,0 +1,250 @@
+From da06b276b6ae281efad2ec3b982e09b1f4015917 Mon Sep 17 00:00:00 2001
+From: Tamar Christina <tamar.christina@arm.com>
+Date: Mon, 12 Dec 2022 15:18:56 +0000
+Subject: PATCH 082/157 BackportSME AArch64: Support new tbranch optab.
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=17ae956c0fa6baac3d22764019d5dd5ebf5c2b11
+
+This implements the new tbranch optab for AArch64.
+
+we cannot emit one big RTL for the final instruction immediately.
+The reason that all comparisons in the AArch64 backend expand to separate CC
+compares, and separate testing of the operands is for ifcvt.
+
+The separate CC compare is needed so ifcvt can produce csel, cset etc from the
+compares.  Unlike say combine, ifcvt can not do recog on a parallel with a
+clobber.  Should we emit the instruction directly then ifcvt will not be able
+to say, make a csel, because we have no patterns which handle zero_extract and
+compare. (unlike combine ifcvt cannot transform the extract into an AND).
+
+While you could provide various patterns for this (and I did try) you end up
+with broken patterns because you can't add the clobber to the CC register.  If
+you do, ifcvt recog fails.
+
+i.e.
+
+int
+f1 (int x)
+{
+  if (x & 1)
+    return 1;
+  return x;
+}
+
+We lose csel here.
+
+Secondly the reason the compare with an explicit CC mode is needed is so that
+ifcvt can transform the operation into a version that doesn't require the flags
+to be set.  But it only does so if it know the explicit usage of the CC reg.
+
+For instance
+
+int
+foo (int a, int b)
+{
+  return ((a & (1 << 25)) ? 5 : 4);
+}
+
+Doesn't require a comparison, the optimal form is:
+
+foo(int, int):
+        ubfx    x0, x0, 25, 1
+        add     w0, w0, 4
+        ret
+
+and no compare is actually needed.  If you represent the instruction using an
+ANDS instead of a zero_extract then you get close, but you end up with an ands
+followed by an add, which is a slower operation.
+
+gcc/ChangeLog:
+
+	* config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
+	(*tb<optab><ALLI:mode><GPI:mode>1): ... this.
+	(tbranch_<code><mode>4): New.
+	* config/aarch64/iterators.md(ZEROM, zerom): New.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/aarch64/tbz_1.c: New test.
+---
+ gcc/config/aarch64/aarch64.md            | 33 ++++++--
+ gcc/config/aarch64/iterators.md          |  2 +
+ gcc/testsuite/gcc.target/aarch64/tbz_1.c | 95 ++++++++++++++++++++++++
+ 3 files changed, 122 insertions(+), 8 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/tbz_1.c
+
+diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+index 079c8a3f9..2becc888e 100644
+--- a/gcc/config/aarch64/aarch64.md
++++ b/gcc/config/aarch64/aarch64.md
+@@ -953,12 +953,29 @@
+ 		      (const_int 1)))
+ )
+ 
+-(define_insn "*tb<optab><mode>1"
++(define_expand "tbranch_<code><mode>3"
+   (set (pc) (if_then_else
+-	      (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
+-				    (const_int 1)
+-				    (match_operand 1
+-				      "aarch64_simd_shift_imm_<mode>" "n"))
++              (EQL (match_operand:ALLI 0 "register_operand")
++                   (match_operand 1 "aarch64_simd_shift_imm_<mode>"))
++              (label_ref (match_operand 2 ""))
++              (pc)))
++  ""
++{
++  rtx bitvalue = gen_reg_rtx (<ZEROM>mode);
++  rtx reg = gen_lowpart (<ZEROM>mode, operands0);
++  rtx val = GEN_INT (1UL << UINTVAL (operands1));
++  emit_insn (gen_and<zerom>3 (bitvalue, reg, val));
++  operands1 = const0_rtx;
++  operands0 = aarch64_gen_compare_reg (<CODE>, bitvalue,
++					 operands1);
++})
++
++(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
++  (set (pc) (if_then_else
++	      (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
++				     (const_int 1)
++				     (match_operand 1
++				       "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
+ 		   (const_int 0))
+ 	     (label_ref (match_operand 2 "" ""))
+ 	     (pc)))
+@@ -969,15 +986,15 @@
+       {
+ 	if (get_attr_far_branch (insn) == 1)
+ 	  return aarch64_gen_far_branch (operands, 2, "Ltb",
+-					 "<inv_tb>\\t%<w>0, %1, ");
++					 "<inv_tb>\\t%<ALLI:w>0, %1, ");
+ 	else
+ 	  {
+ 	    operands1 = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands1));
+-	    return "tst\t%<w>0, %1\;<bcond>\t%l2";
++	    return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
+ 	  }
+       }
+     else
+-      return "<tbz>\t%<w>0, %1, %l2";
++      return "<tbz>\t%<ALLI:w>0, %1, %l2";
+   }
+   (set_attr "type" "branch")
+    (set (attr "length")
+diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
+index 226dea48a..b616f5c9a 100644
+--- a/gcc/config/aarch64/iterators.md
++++ b/gcc/config/aarch64/iterators.md
+@@ -1104,6 +1104,8 @@
+ 
+ ;; Give the number of bits in the mode
+ (define_mode_attr sizen (QI "8") (HI "16") (SI "32") (DI "64"))
++(define_mode_attr ZEROM (QI "SI") (HI "SI") (SI "SI") (DI "DI"))
++(define_mode_attr zerom (QI "si") (HI "si") (SI "si") (DI "di"))
+ 
+ ;; Give the ordinal of the MSB in the mode
+ (define_mode_attr sizem1 (QI "#7") (HI "#15") (SI "#31") (DI "#63")
+diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
+new file mode 100644
+index 000000000..39deb58e2
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
+@@ -0,0 +1,95 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
++/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
++
++#include <stdbool.h>
++
++void h(void);
++
++/*
++** g1:
++** 	tbnz	w0-9+, #?0, .L(0-9+)
++** 	ret
++**	...
++*/
++void g1(bool x)
++{
++  if (__builtin_expect (x, 0))
++    h ();
++}
++
++/*
++** g2:
++** 	tbz	w0-9+, #?0, .L(0-9+)
++** 	b	h
++**	...
++*/
++void g2(bool x)
++{
++  if (__builtin_expect (x, 1))
++    h ();
++}
++
++/*
++** g3_ge:
++** 	tbnz	w0-9+, #?31, .L0-9+
++** 	b	h
++**	...
++*/
++void g3_ge(int x)
++{
++  if (__builtin_expect (x >= 0, 1))
++    h ();
++}
++
++/*
++** g3_gt:
++** 	cmp	w0-9+, 0

_service:tar_scm:0181-LoongArch-Remove-gawk-extension-from-a-generator-scr.patch Added

@@ -0,0 +1,47 @@
+From 643248a4c60c016af44bc740b35c7ac174849029 Mon Sep 17 00:00:00 2001
+From: Yang Yujie <yangyujie@loongson.cn>
+Date: Tue, 23 Jul 2024 10:04:26 +0800
+Subject: PATCH 181/188 LoongArch: Remove gawk extension from a generator
+ script.
+
+gcc/ChangeLog:
+
+	* config/loongarch/genopts/gen-evolution.awk: Do not use
+	"length()" to compute the size of an array.
+---
+ gcc/config/loongarch/genopts/gen-evolution.awk | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/gcc/config/loongarch/genopts/gen-evolution.awk b/gcc/config/loongarch/genopts/gen-evolution.awk
+index 4d105afa9..1c8004e41 100644
+--- a/gcc/config/loongarch/genopts/gen-evolution.awk
++++ b/gcc/config/loongarch/genopts/gen-evolution.awk
+@@ -1,4 +1,4 @@
+-#!/usr/bin/gawk
++#!/usr/bin/awk -f
+ #
+ # A simple script that generates loongarch-evolution.h
+ # from genopts/isa-evolution.in
+@@ -94,8 +94,9 @@ function gen_cpucfg_useful_idx()
+         idx_bucketcpucfg_wordi = 1
+ 
+     delete idx_list
++    j = 1
+     for (i in idx_bucket)
+-        idx_listlength(idx_list)-1 = i+0
++        idx_listj++ = i+0
+     delete idx_bucket
+ 
+     asort (idx_list)
+@@ -108,7 +109,7 @@ function gen_cpucfg_useful_idx()
+     print ""
+ 
+     printf ("static constexpr int N_CPUCFG_WORDS = %d;\n",
+-            idx_listlength(idx_list) + 1)
++            idx_listj - 1 + 1)
+ 
+     delete idx_list
+ }
+-- 
+2.43.0
+

_service:tar_scm:0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch Added

@@ -0,0 +1,381 @@
+From 755f67b1abd70b3c3ea20076fe60c1d303bf1e0c Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:04 +0100
+Subject: PATCH 083/157 BackportSME aarch64: Use local frame vars in
+ shrink-wrapping code
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=76d89da25af3064e80c9b7b584c678ff72b1f0bd
+
+aarch64_layout_frame uses a shorthand for referring to
+cfun->machine->frame:
+
+  aarch64_frame &frame = cfun->machine->frame;
+
+This patch does the same for some other heavy users of the structure.
+No functional change intended.
+
+gcc/
+	* config/aarch64/aarch64.cc (aarch64_save_callee_saves): Use
+	a local shorthand for cfun->machine->frame.
+	(aarch64_restore_callee_saves, aarch64_get_separate_components):
+	(aarch64_process_components): Likewise.
+	(aarch64_allocate_and_probe_stack_space): Likewise.
+	(aarch64_expand_prologue, aarch64_expand_epilogue): Likewise.
+	(aarch64_layout_frame): Use existing shorthand for one more case.
+---
+ gcc/config/aarch64/aarch64.cc | 123 ++++++++++++++++++----------------
+ 1 file changed, 64 insertions(+), 59 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 08a98f8ba..b7da1d0be 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8951,7 +8951,7 @@ aarch64_layout_frame (void)
+   frame.is_scs_enabled
+     = (!crtl->calls_eh_return
+        && sanitize_flags_p (SANITIZE_SHADOW_CALL_STACK)
+-       && known_ge (cfun->machine->frame.reg_offsetLR_REGNUM, 0));
++       && known_ge (frame.reg_offsetLR_REGNUM, 0));
+ 
+   /* When shadow call stack is enabled, the scs_pop in the epilogue will
+      restore x30, and we don't need to pop x30 again in the traditional
+@@ -9363,6 +9363,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
+ 			   unsigned start, unsigned limit, bool skip_wb,
+ 			   bool hard_fp_valid_p)
+ {
++  aarch64_frame &frame = cfun->machine->frame;
+   rtx_insn *insn;
+   unsigned regno;
+   unsigned regno2;
+@@ -9377,8 +9378,8 @@ aarch64_save_callee_saves (poly_int64 start_offset,
+       bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
+ 
+       if (skip_wb
+-	  && (regno == cfun->machine->frame.wb_push_candidate1
+-	      || regno == cfun->machine->frame.wb_push_candidate2))
++	  && (regno == frame.wb_push_candidate1
++	      || regno == frame.wb_push_candidate2))
+ 	continue;
+ 
+       if (cfun->machine->reg_is_wrapped_separatelyregno)
+@@ -9386,7 +9387,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
+ 
+       machine_mode mode = aarch64_reg_save_mode (regno);
+       reg = gen_rtx_REG (mode, regno);
+-      offset = start_offset + cfun->machine->frame.reg_offsetregno;
++      offset = start_offset + frame.reg_offsetregno;
+       rtx base_rtx = stack_pointer_rtx;
+       poly_int64 sp_offset = offset;
+ 
+@@ -9399,7 +9400,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
+ 	{
+ 	  gcc_assert (known_eq (start_offset, 0));
+ 	  poly_int64 fp_offset
+-	    = cfun->machine->frame.below_hard_fp_saved_regs_size;
++	    = frame.below_hard_fp_saved_regs_size;
+ 	  if (hard_fp_valid_p)
+ 	    base_rtx = hard_frame_pointer_rtx;
+ 	  else
+@@ -9421,8 +9422,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
+ 	  && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
+ 	  && !cfun->machine->reg_is_wrapped_separatelyregno2
+ 	  && known_eq (GET_MODE_SIZE (mode),
+-		       cfun->machine->frame.reg_offsetregno2
+-		       - cfun->machine->frame.reg_offsetregno))
++		       frame.reg_offsetregno2 - frame.reg_offsetregno))
+ 	{
+ 	  rtx reg2 = gen_rtx_REG (mode, regno2);
+ 	  rtx mem2;
+@@ -9472,6 +9472,7 @@ static void
+ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
+ 			      unsigned limit, bool skip_wb, rtx *cfi_ops)
+ {
++  aarch64_frame &frame = cfun->machine->frame;
+   unsigned regno;
+   unsigned regno2;
+   poly_int64 offset;
+@@ -9488,13 +9489,13 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
+       rtx reg, mem;
+ 
+       if (skip_wb
+-	  && (regno == cfun->machine->frame.wb_pop_candidate1
+-	      || regno == cfun->machine->frame.wb_pop_candidate2))
++	  && (regno == frame.wb_pop_candidate1
++	      || regno == frame.wb_pop_candidate2))
+ 	continue;
+ 
+       machine_mode mode = aarch64_reg_save_mode (regno);
+       reg = gen_rtx_REG (mode, regno);
+-      offset = start_offset + cfun->machine->frame.reg_offsetregno;
++      offset = start_offset + frame.reg_offsetregno;
+       rtx base_rtx = stack_pointer_rtx;
+       if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
+ 	aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
+@@ -9505,8 +9506,7 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
+ 	  && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
+ 	  && !cfun->machine->reg_is_wrapped_separatelyregno2
+ 	  && known_eq (GET_MODE_SIZE (mode),
+-		       cfun->machine->frame.reg_offsetregno2
+-		       - cfun->machine->frame.reg_offsetregno))
++		       frame.reg_offsetregno2 - frame.reg_offsetregno))
+ 	{
+ 	  rtx reg2 = gen_rtx_REG (mode, regno2);
+ 	  rtx mem2;
+@@ -9611,6 +9611,7 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
+ static sbitmap
+ aarch64_get_separate_components (void)
+ {
++  aarch64_frame &frame = cfun->machine->frame;
+   sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
+   bitmap_clear (components);
+ 
+@@ -9627,18 +9628,18 @@ aarch64_get_separate_components (void)
+ 	if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
+ 	  continue;
+ 
+-	poly_int64 offset = cfun->machine->frame.reg_offsetregno;
++	poly_int64 offset = frame.reg_offsetregno;
+ 
+ 	/* If the register is saved in the first SVE save slot, we use
+ 	   it as a stack probe for -fstack-clash-protection.  */
+ 	if (flag_stack_clash_protection
+-	    && maybe_ne (cfun->machine->frame.below_hard_fp_saved_regs_size, 0)
++	    && maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
+ 	    && known_eq (offset, 0))
+ 	  continue;
+ 
+ 	/* Get the offset relative to the register we'll use.  */
+ 	if (frame_pointer_needed)
+-	  offset -= cfun->machine->frame.below_hard_fp_saved_regs_size;
++	  offset -= frame.below_hard_fp_saved_regs_size;
+ 	else
+ 	  offset += crtl->outgoing_args_size;
+ 
+@@ -9657,11 +9658,11 @@ aarch64_get_separate_components (void)
+   /* If the spare predicate register used by big-endian SVE code
+      is call-preserved, it must be saved in the main prologue
+      before any saves that use it.  */
+-  if (cfun->machine->frame.spare_pred_reg != INVALID_REGNUM)
+-    bitmap_clear_bit (components, cfun->machine->frame.spare_pred_reg);
++  if (frame.spare_pred_reg != INVALID_REGNUM)
++    bitmap_clear_bit (components, frame.spare_pred_reg);
+ 
+-  unsigned reg1 = cfun->machine->frame.wb_push_candidate1;
+-  unsigned reg2 = cfun->machine->frame.wb_push_candidate2;
++  unsigned reg1 = frame.wb_push_candidate1;
++  unsigned reg2 = frame.wb_push_candidate2;
+   /* If registers have been chosen to be stored/restored with
+      writeback don't interfere with them to avoid having to output explicit
+      stack adjustment instructions.  */
+@@ -9770,6 +9771,7 @@ aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
+ static void
+ aarch64_process_components (sbitmap components, bool prologue_p)
+ {
++  aarch64_frame &frame = cfun->machine->frame;
+   rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
+ 			     ? HARD_FRAME_POINTER_REGNUM
+ 			     : STACK_POINTER_REGNUM);
+@@ -9784,9 +9786,9 @@ aarch64_process_components (sbitmap components, bool prologue_p)
+       machine_mode mode = aarch64_reg_save_mode (regno);
+       
+       rtx reg = gen_rtx_REG (mode, regno);
+-      poly_int64 offset = cfun->machine->frame.reg_offsetregno;
++      poly_int64 offset = frame.reg_offsetregno;
+       if (frame_pointer_needed)
+-	offset -= cfun->machine->frame.below_hard_fp_saved_regs_size;
++	offset -= frame.below_hard_fp_saved_regs_size;
+       else
+ 	offset += crtl->outgoing_args_size;
+ 
+@@ -9811,14 +9813,14 @@ aarch64_process_components (sbitmap components, bool prologue_p)
+ 	  break;
+ 	}
+ 
+-      poly_int64 offset2 = cfun->machine->frame.reg_offsetregno2;
++      poly_int64 offset2 = frame.reg_offsetregno2;
+       /* The next register is not of the same class or its offset is not
+ 	 mergeable with the current one into a pair.  */
+       if (aarch64_sve_mode_p (mode)
+ 	  || !satisfies_constraint_Ump (mem)

_service:tar_scm:0182-LoongArch-Use-iorn-and-andn-standard-pattern-names.patch Added

@@ -0,0 +1,226 @@
+From 64560e75b4d020b6c47e07592595ceed663541af Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 1 Aug 2024 16:07:25 +0800
+Subject: PATCH 182/188 LoongArch: Use iorn and andn standard pattern names.
+
+R15-1890 introduced new optabs iorc and andc, and its corresponding
+internal functions BIT_{ANDC,IORC}, and if targets defines such optabs
+for vector modes.  And in r15-2258 the iorc and andc were renamed to
+iorn and andn.
+So we changed the andn and iorn implementation templates to the standard
+template names.
+
+gcc/ChangeLog:
+
+	* config/loongarch/lasx.md (xvandn<mode>3): Rename to ...
+	(andn<mode>3): This.
+	(xvorn<mode>3): Rename to ...
+	(iorn<mode>3): This.
+	* config/loongarch/loongarch-builtins.cc
+	(CODE_FOR_lsx_vandn_v): Defined as the modified name.
+	(CODE_FOR_lsx_vorn_v): Likewise.
+	(CODE_FOR_lasx_xvandn_v): Likewise.
+	(CODE_FOR_lasx_xvorn_v): Likewise.
+	(loongarch_expand_builtin_insn): When the builtin function to be
+	called is __builtin_lasx_xvandn or __builtin_lsx_vandn, swap the
+	two operands.
+	* config/loongarch/loongarch.md (<optab>n<mode>): Rename to ...
+	(<optab>n<mode>3): This.
+	* config/loongarch/lsx.md (vandn<mode>3): Rename to ...
+	(andn<mode>3): This.
+	(vorn<mode>3): Rename to ...
+	(iorn<mode>3): This.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/lasx-andn-iorn.c: New test.
+	* gcc.target/loongarch/lsx-andn-iorn.c: New test.
+---
+ gcc/config/loongarch/lasx.md                  | 10 +++----
+ gcc/config/loongarch/loongarch-builtins.cc    | 10 ++++---
+ gcc/config/loongarch/loongarch.md             |  8 +++---
+ gcc/config/loongarch/lsx.md                   | 10 +++----
+ .../gcc.target/loongarch/lasx-andn-iorn.c     | 11 ++++++++
+ .../gcc.target/loongarch/lsx-andn-iorn.c      | 28 +++++++++++++++++++
+ 6 files changed, 59 insertions(+), 18 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index 44a7d58ff..3775155ca 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -2716,12 +2716,12 @@
+    (set_attr "mode" "V4DI"))
+ 
+ ;; Extend loongson-sx to loongson-asx.
+-(define_insn "xvandn<mode>3"
++(define_insn "andn<mode>3"
+   (set (match_operand:LASX 0 "register_operand" "=f")
+-	(and:LASX (not:LASX (match_operand:LASX 1 "register_operand" "f"))
+-			    (match_operand:LASX 2 "register_operand" "f")))
++	(and:LASX (not:LASX (match_operand:LASX 2 "register_operand" "f"))
++			    (match_operand:LASX 1 "register_operand" "f")))
+   "ISA_HAS_LASX"
+-  "xvandn.v\t%u0,%u1,%u2"
++  "xvandn.v\t%u0,%u2,%u1"
+   (set_attr "type" "simd_logic")
+    (set_attr "mode" "<MODE>"))
+ 
+@@ -4637,7 +4637,7 @@
+   (set_attr "type" "simd_int_arith")
+    (set_attr "mode" "<MODE>"))
+ 
+-(define_insn "xvorn<mode>3"
++(define_insn "iorn<mode>3"
+   (set (match_operand:ILASX 0 "register_operand" "=f")
+ 	(ior:ILASX (not:ILASX (match_operand:ILASX 2 "register_operand" "f"))
+ 		   (match_operand:ILASX 1 "register_operand" "f")))
+diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
+index 51abba007..f9ff85d2e 100644
+--- a/gcc/config/loongarch/loongarch-builtins.cc
++++ b/gcc/config/loongarch/loongarch-builtins.cc
+@@ -458,8 +458,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
+ #define CODE_FOR_lsx_vabsd_du CODE_FOR_lsx_vabsd_u_du
+ #define CODE_FOR_lsx_vftint_wu_s CODE_FOR_lsx_vftint_u_wu_s
+ #define CODE_FOR_lsx_vftint_lu_d CODE_FOR_lsx_vftint_u_lu_d
+-#define CODE_FOR_lsx_vandn_v CODE_FOR_vandnv16qi3
+-#define CODE_FOR_lsx_vorn_v CODE_FOR_vornv16qi3
++#define CODE_FOR_lsx_vandn_v CODE_FOR_andnv16qi3
++#define CODE_FOR_lsx_vorn_v CODE_FOR_iornv16qi3
+ #define CODE_FOR_lsx_vneg_b CODE_FOR_vnegv16qi2
+ #define CODE_FOR_lsx_vneg_h CODE_FOR_vnegv8hi2
+ #define CODE_FOR_lsx_vneg_w CODE_FOR_vnegv4si2
+@@ -692,8 +692,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
+ #define CODE_FOR_lasx_xvrepli_w CODE_FOR_lasx_xvrepliv8si
+ #define CODE_FOR_lasx_xvrepli_d CODE_FOR_lasx_xvrepliv4di
+ 
+-#define CODE_FOR_lasx_xvandn_v CODE_FOR_xvandnv32qi3
+-#define CODE_FOR_lasx_xvorn_v CODE_FOR_xvornv32qi3
++#define CODE_FOR_lasx_xvandn_v CODE_FOR_andnv32qi3
++#define CODE_FOR_lasx_xvorn_v CODE_FOR_iornv32qi3
+ #define CODE_FOR_lasx_xvneg_b CODE_FOR_negv32qi2
+ #define CODE_FOR_lasx_xvneg_h CODE_FOR_negv16hi2
+ #define CODE_FOR_lasx_xvneg_w CODE_FOR_negv8si2
+@@ -2853,6 +2853,7 @@ loongarch_expand_builtin_insn (enum insn_code icode, unsigned int nops,
+     case CODE_FOR_lsx_vpickod_b:
+     case CODE_FOR_lsx_vpickod_h:
+     case CODE_FOR_lsx_vpickod_w:
++    case CODE_FOR_lsx_vandn_v:
+     case CODE_FOR_lasx_xvilvh_b:
+     case CODE_FOR_lasx_xvilvh_h:
+     case CODE_FOR_lasx_xvilvh_w:
+@@ -2873,6 +2874,7 @@ loongarch_expand_builtin_insn (enum insn_code icode, unsigned int nops,
+     case CODE_FOR_lasx_xvpickod_b:
+     case CODE_FOR_lasx_xvpickod_h:
+     case CODE_FOR_lasx_xvpickod_w:
++    case CODE_FOR_lasx_xvandn_v:
+       /* Swap the operands 1 and 2 for interleave operations.  Built-ins follow
+ 	 convention of ISA, which have op1 as higher component and op2 as lower
+ 	 component.  However, the VEC_PERM op in tree and vec_concat in RTL
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index b1c828dba..58c8f28ed 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -1701,13 +1701,13 @@
+   (set_attr "type" "logical")
+    (set_attr "mode" "SI"))
+ 
+-(define_insn "<optab>n<mode>"
++(define_insn "<optab>n<mode>3"
+   (set (match_operand:X 0 "register_operand" "=r")
+ 	(neg_bitwise:X
+-	    (not:X (match_operand:X 1 "register_operand" "r"))
+-	    (match_operand:X 2 "register_operand" "r")))
++	    (not:X (match_operand:X 2 "register_operand" "r"))
++	    (match_operand:X 1 "register_operand" "r")))
+   ""
+-  "<insn>n\t%0,%2,%1"
++  "<insn>n\t%0,%1,%2"
+   (set_attr "type" "logical")
+    (set_attr "mode" "<MODE>"))
+ 
+diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
+index 2eac11473..c7480aafd 100644
+--- a/gcc/config/loongarch/lsx.md
++++ b/gcc/config/loongarch/lsx.md
+@@ -2344,12 +2344,12 @@
+ }
+   (set_attr "mode" "V4SF"))
+ 
+-(define_insn "vandn<mode>3"
++(define_insn "andn<mode>3"
+   (set (match_operand:LSX 0 "register_operand" "=f")
+-	(and:LSX (not:LSX (match_operand:LSX 1 "register_operand" "f"))
+-		 (match_operand:LSX 2 "register_operand" "f")))
++	(and:LSX (not:LSX (match_operand:LSX 2 "register_operand" "f"))
++		 (match_operand:LSX 1 "register_operand" "f")))
+   "ISA_HAS_LSX"
+-  "vandn.v\t%w0,%w1,%w2"
++  "vandn.v\t%w0,%w2,%w1"
+   (set_attr "type" "simd_logic")
+    (set_attr "mode" "<MODE>"))
+ 
+@@ -3028,7 +3028,7 @@
+   (set_attr "type" "simd_int_arith")
+    (set_attr "mode" "<MODE>"))
+ 
+-(define_insn "vorn<mode>3"
++(define_insn "iorn<mode>3"
+   (set (match_operand:ILSX 0 "register_operand" "=f")
+ 	(ior:ILSX (not:ILSX (match_operand:ILSX 2 "register_operand" "f"))
+ 		  (match_operand:ILSX 1 "register_operand" "f")))
+diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c b/gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c
+new file mode 100644
+index 000000000..4aa5f19a6
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c
+@@ -0,0 +1,11 @@
++#define N 8
++
++#include "./lsx-andn-iorn.c"
++
++/* { dg-do compile } */
++/* { dg-options "-O2 -mlasx -ftree-vectorize" } */
++
++/* We should produce a BIT_ANDC and BIT_IORC here.  */
++
++/* { dg-final { scan-tree-dump ".BIT_ANDN " "optimized" } } */
++/* { dg-final { scan-tree-dump ".BIT_IORN " "optimized" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c b/gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c
+new file mode 100644
+index 000000000..7bceccd37
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c
+@@ -0,0 +1,28 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mlsx -ftree-vectorize" } */
++
++#ifndef N

_service:tar_scm:0183-Backport-SME-aarch64-Avoid-a-use-of-callee_offset.patch Added

@@ -0,0 +1,103 @@
+From 54a6e52207703a8643fc406175377105f887ebef Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:04 +0100
+Subject: PATCH BackportSME aarch64: Avoid a use of callee_offset
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f9ab771fa8cd747f34786c6f33deea32c2eb828b
+
+When we emit the frame chain, i.e. when we reach Here in this statement
+of aarch64_expand_prologue:
+
+  if (emit_frame_chain)
+    {
+      // Here
+      ...
+    }
+
+the stack is in one of two states:
+
+- We've allocated up to the frame chain, but no more.
+
+- We've allocated the whole frame, and the frame chain is within easy
+  reach of the new SP.
+
+The offset of the frame chain from the current SP is available
+in aarch64_frame as callee_offset.  It is also available as the
+chain_offset local variable, where the latter is calculated from other
+data.  (However, chain_offset is not always equal to callee_offset when
+!emit_frame_chain, so chain_offset isn't redundant.)
+
+In c600df9a4060da3c6121ff4d0b93f179eafd69d1 I switched to using
+chain_offset for the initialisation of the hard frame pointer:
+
+       aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
+-                         stack_pointer_rtx, callee_offset,
++                         stack_pointer_rtx, chain_offset,
+                          tmp1_rtx, tmp0_rtx, frame_pointer_needed);
+
+But the later REG_CFA_ADJUST_CFA handling still used callee_offset.
+
+I think the difference is harmless, but it's more logical for the
+CFA note to be in sync, and it's more convenient for later patches
+if it uses chain_offset.
+
+gcc/
+	* config/aarch64/aarch64.cc (aarch64_expand_prologue): Use
+	chain_offset rather than callee_offset.
+---
+ gcc/config/aarch64/aarch64.cc | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index b7da1d0be..fbd7a079a 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -10263,21 +10263,20 @@ aarch64_epilogue_uses (int regno)
+    current FP is also set up if it is in use.  */
+ 
+ void
+ aarch64_expand_prologue (void)
+ {
+   aarch64_frame &frame = cfun->machine->frame;
+   poly_int64 frame_size = frame.frame_size;
+   poly_int64 initial_adjust = frame.initial_adjust;
+   HOST_WIDE_INT callee_adjust = frame.callee_adjust;
+   poly_int64 final_adjust = frame.final_adjust;
+-  poly_int64 callee_offset = frame.callee_offset;
+   poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
+   poly_int64 below_hard_fp_saved_regs_size
+     = frame.below_hard_fp_saved_regs_size;
+   unsigned reg1 = frame.wb_push_candidate1;
+   unsigned reg2 = frame.wb_push_candidate2;
+   bool emit_frame_chain = frame.emit_frame_chain;
+   rtx_insn *insn;
+ 
+   if (flag_stack_clash_protection && known_eq (callee_adjust, 0))
+     {
+@@ -10376,22 +10375,21 @@ aarch64_expand_prologue (void)
+ 	     the CFA based on the frame pointer.  We therefore need new
+ 	     DW_CFA_expressions to re-express the save slots with addresses
+ 	     based on the frame pointer.  */
+ 	  rtx_insn *insn = get_last_insn ();
+ 	  gcc_assert (RTX_FRAME_RELATED_P (insn));
+ 
+ 	  /* Add an explicit CFA definition if this was previously
+ 	     implicit.  */
+ 	  if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX))
+ 	    {
+-	      rtx src = plus_constant (Pmode, stack_pointer_rtx,
+-				       callee_offset);
++	      rtx src = plus_constant (Pmode, stack_pointer_rtx, chain_offset);
+ 	      add_reg_note (insn, REG_CFA_ADJUST_CFA,
+ 			    gen_rtx_SET (hard_frame_pointer_rtx, src));
+ 	    }
+ 
+ 	  /* Change the save slot expressions for the registers that
+ 	     we've already saved.  */
+ 	  aarch64_add_cfa_expression (insn, regno_reg_rtxreg2,
+ 				      hard_frame_pointer_rtx, UNITS_PER_WORD);
+ 	  aarch64_add_cfa_expression (insn, regno_reg_rtxreg1,
+ 				      hard_frame_pointer_rtx, 0);
+-- 
+2.38.1.windows.1
+

_service:tar_scm:0183-LoongArch-Drop-vcond-u-expanders.patch Added

@@ -0,0 +1,127 @@
+From 8394519779553a2c59214d76054dd1ba87a380b3 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 8 Aug 2024 10:39:54 +0800
+Subject: PATCH 183/188 LoongArch: Drop vcond{,u} expanders.
+
+Optabs vcond{,u} will be removed for GCC 15.  Since regtest shows no
+fallout, dropping the expanders, now.
+
+gcc/ChangeLog:
+
+	PR target/114189
+	* config/loongarch/lasx.md (vcondu<LASX:mode><ILASX:mode>): Delete.
+	(vcond<LASX:mode><LASX_2:mode>): Likewise.
+	* config/loongarch/lsx.md (vcondu<LSX:mode><ILSX:mode>): Likewise.
+	(vcond<LSX:mode><LSX_2:mode>): Likewise.
+---
+ gcc/config/loongarch/lasx.md | 37 ------------------------------------
+ gcc/config/loongarch/lsx.md  | 31 ------------------------------
+ 2 files changed, 68 deletions(-)
+
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index 3775155ca..be2f6ca8e 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -165,9 +165,6 @@
+ ;; All vector modes with 256 bits.
+ (define_mode_iterator LASX V4DF V8SF V4DI V8SI V16HI V32QI)
+ 
+-;; Same as LASX.  Used by vcond to iterate two modes.
+-(define_mode_iterator LASX_2 V4DF V8SF V4DI V8SI V16HI V32QI)
+-
+ ;; Only used for splitting insert_d and copy_{u,s}.d.
+ (define_mode_iterator LASX_D V4DI V4DF)
+ 
+@@ -762,40 +759,6 @@
+    DONE;
+ })
+ 
+-;; FIXME: 256??
+-(define_expand "vcondu<LASX:mode><ILASX:mode>"
+-  (match_operand:LASX 0 "register_operand")
+-   (match_operand:LASX 1 "reg_or_m1_operand")
+-   (match_operand:LASX 2 "reg_or_0_operand")
+-   (match_operator 3 ""
+-    (match_operand:ILASX 4 "register_operand")
+-     (match_operand:ILASX 5 "register_operand"))
+-  "ISA_HAS_LASX
+-   && (GET_MODE_NUNITS (<LASX:MODE>mode)
+-       == GET_MODE_NUNITS (<ILASX:MODE>mode))"
+-{
+-  loongarch_expand_vec_cond_expr (<LASX:MODE>mode, <LASX:VIMODE256>mode,
+-				  operands);
+-  DONE;
+-})
+-
+-;; FIXME: 256??
+-(define_expand "vcond<LASX:mode><LASX_2:mode>"
+-  (match_operand:LASX 0 "register_operand")
+-   (match_operand:LASX 1 "reg_or_m1_operand")
+-   (match_operand:LASX 2 "reg_or_0_operand")
+-   (match_operator 3 ""
+-     (match_operand:LASX_2 4 "register_operand")
+-      (match_operand:LASX_2 5 "register_operand"))
+-  "ISA_HAS_LASX
+-   && (GET_MODE_NUNITS (<LASX:MODE>mode)
+-       == GET_MODE_NUNITS (<LASX_2:MODE>mode))"
+-{
+-  loongarch_expand_vec_cond_expr (<LASX:MODE>mode, <LASX:VIMODE256>mode,
+-				  operands);
+-  DONE;
+-})
+-
+ ;; Same as vcond_
+ (define_expand "vcond_mask_<mode><mode256_i>"
+   (match_operand:LASX 0 "register_operand")
+diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
+index c7480aafd..5cb5bc61f 100644
+--- a/gcc/config/loongarch/lsx.md
++++ b/gcc/config/loongarch/lsx.md
+@@ -186,9 +186,6 @@
+ ;; All vector modes with 128 bits.
+ (define_mode_iterator LSX      V2DF V4SF V2DI V4SI V8HI V16QI)
+ 
+-;; Same as LSX.  Used by vcond to iterate two modes.
+-(define_mode_iterator LSX_2    V2DF V4SF V2DI V4SI V8HI V16QI)
+-
+ ;; Only used for vilvh and splitting insert_d and copy_{u,s}.d.
+ (define_mode_iterator LSX_D    V2DI V2DF)
+ 
+@@ -533,34 +530,6 @@
+   DONE;
+ })
+ 
+-(define_expand "vcondu<LSX:mode><ILSX:mode>"
+-  (match_operand:LSX 0 "register_operand")
+-   (match_operand:LSX 1 "reg_or_m1_operand")
+-   (match_operand:LSX 2 "reg_or_0_operand")
+-   (match_operator 3 ""
+-     (match_operand:ILSX 4 "register_operand")
+-      (match_operand:ILSX 5 "register_operand"))
+-  "ISA_HAS_LSX
+-   && (GET_MODE_NUNITS (<LSX:MODE>mode) == GET_MODE_NUNITS (<ILSX:MODE>mode))"
+-{
+-  loongarch_expand_vec_cond_expr (<LSX:MODE>mode, <LSX:VIMODE>mode, operands);
+-  DONE;
+-})
+-
+-(define_expand "vcond<LSX:mode><LSX_2:mode>"
+-  (match_operand:LSX 0 "register_operand")
+-   (match_operand:LSX 1 "reg_or_m1_operand")
+-   (match_operand:LSX 2 "reg_or_0_operand")
+-   (match_operator 3 ""
+-     (match_operand:LSX_2 4 "register_operand")
+-      (match_operand:LSX_2 5 "register_operand"))
+-  "ISA_HAS_LSX
+-   && (GET_MODE_NUNITS (<LSX:MODE>mode) == GET_MODE_NUNITS (<LSX_2:MODE>mode))"
+-{
+-  loongarch_expand_vec_cond_expr (<LSX:MODE>mode, <LSX:VIMODE>mode, operands);
+-  DONE;
+-})
+-
+ (define_expand "vcond_mask_<mode><mode_i>"
+   (match_operand:LSX 0 "register_operand")
+    (match_operand:LSX 1 "reg_or_m1_operand")
+-- 
+2.43.0
+

_service:tar_scm:0184-Backport-SME-aarch64-Explicitly-handle-frames-with-n.patch Added

@@ -0,0 +1,51 @@
+From 82bbe6513987a7656150110164e25f44fe410796 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:05 +0100
+Subject: PATCH 085/157 BackportSME aarch64: Explicitly handle frames
+ with no saved registers
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c601c918c9ac01ef8315774a642ff924f77c85e5
+
+If a frame has no saved registers, it can be allocated in one go.
+There is no need to treat the areas below and above the saved
+registers as separate.
+
+And if we allocate the frame in one go, it should be allocated
+as the initial_adjust rather than the final_adjust.  This allows the
+frame size to grow to guard_size - guard_used_by_caller before a stack
+probe is needed.  (A frame with no register saves is necessarily a
+leaf frame.)
+
+This is a no-op as thing stand, since a leaf function will have
+no outgoing arguments, and so all the frame will be above where
+the saved registers normally go.
+
+gcc/
+	* config/aarch64/aarch64.cc (aarch64_layout_frame): Explicitly
+	allocate the frame in one go if there are no saved registers.
+---
+ gcc/config/aarch64/aarch64.cc | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index fbd7a079a..c59af6b1c 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8978,9 +8978,11 @@ aarch64_layout_frame (void)
+ 
+   HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset;
+   HOST_WIDE_INT const_saved_regs_size;
+-  if (frame.frame_size.is_constant (&const_size)
+-      && const_size < max_push_offset
+-      && known_eq (frame.hard_fp_offset, const_size))
++  if (known_eq (frame.saved_regs_size, 0))
++    frame.initial_adjust = frame.frame_size;
++  else if (frame.frame_size.is_constant (&const_size)
++	   && const_size < max_push_offset
++	   && known_eq (frame.hard_fp_offset, const_size))
+     {
+       /* Simple, small frame with no outgoing arguments:
+ 
+-- 
+2.33.0
+

_service:tar_scm:0184-LoongArch-Provide-ashr-lshr-and-ashl-RTL-pattern-for.patch Added

@@ -0,0 +1,220 @@
+From d9ce0e85c8cba331413c6a521987a1ecbd94df1c Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 8 Aug 2024 09:59:28 +0800
+Subject: PATCH 184/188 LoongArch: Provide ashr lshr and ashl RTL pattern for
+ vectors.
+
+We support vashr vlshr and vashl. However, in r15-1638 support optimize
+x < 0 ? -1 : 0 into (signed) x >> 31 and x < 0 ? 1 : 0 into (unsigned) x >> 31.
+To support this optimization, vector ashr lshr and ashl need to be implemented.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (insn): Added rotatert rotr pairs.
+	* config/loongarch/simd.md (rotr<mode>3): Remove to ...
+	(<optab><mode>3): This.
+
+gcc/testsuite/ChangeLog:
+
+	* g++.target/loongarch/vect-ashr-lshr.C: New test.
+---
+ gcc/config/loongarch/loongarch.md             |   1 +
+ gcc/config/loongarch/simd.md                  |  13 +-
+ .../g++.target/loongarch/vect-ashr-lshr.C     | 147 ++++++++++++++++++
+ 3 files changed, 155 insertions(+), 6 deletions(-)
+ create mode 100644 gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 58c8f28ed..867977b36 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -559,6 +559,7 @@
+ (define_code_attr insn (ashift "sll")
+ 			(ashiftrt "sra")
+ 			(lshiftrt "srl")
++			(rotatert "rotr")
+ 			(ior "or")
+ 			(xor "xor")
+ 			(and "and")
+diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
+index 00d4c7831..c28b95282 100644
+--- a/gcc/config/loongarch/simd.md
++++ b/gcc/config/loongarch/simd.md
+@@ -306,14 +306,15 @@
+     operands4 = gen_reg_rtx (<MODE>mode);
+   });
+ 
+-;; <x>vrotri.{b/h/w/d}
++;; <x>v{rotr/sll/sra/srl}i.{b/h/w/d}
+ 
+-(define_insn "rotr<mode>3"
++(define_insn "<optab><mode>3"
+   (set (match_operand:IVEC 0 "register_operand" "=f")
+-	(rotatert:IVEC (match_operand:IVEC 1 "register_operand" "f")
+-		       (match_operand:SI 2 "const_<bitimm>_operand")))
+-  ""
+-  "<x>vrotri.<simdfmt>\t%<wu>0,%<wu>1,%2";
++	(shift_w:IVEC
++	  (match_operand:IVEC 1 "register_operand" "f")
++	  (match_operand:SI 2 "const_<bitimm>_operand")))
++  "ISA_HAS_LSX"
++  "<x>v<insn>i.<simdfmt>\t%<wu>0,%<wu>1,%2"
+   (set_attr "type" "simd_int_arith")
+    (set_attr "mode" "<MODE>"))
+ 
+diff --git a/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C b/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C
+new file mode 100644
+index 000000000..bcef985fa
+--- /dev/null
++++ b/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C
+@@ -0,0 +1,147 @@
++/* { dg-do compile } */
++/* { dg-options "-mlasx -O2" } */
++/* { dg-final { scan-assembler-times "vsrli.b" 2 } } */
++/* { dg-final { scan-assembler-times "vsrli.h" 2 } } */
++/* { dg-final { scan-assembler-times "vsrli.w" 2 } } */
++/* { dg-final { scan-assembler-times "vsrli.d" 2 } } */
++/* { dg-final { scan-assembler-times "vsrai.b" 2 } } */
++/* { dg-final { scan-assembler-times "vsrai.h" 2 } } */
++/* { dg-final { scan-assembler-times "vsrai.w" 2 } } */
++/* { dg-final { scan-assembler-times "vsrai.d" 2 } } */
++
++typedef signed char v16qi __attribute__((vector_size(16)));
++typedef signed char v32qi __attribute__((vector_size(32)));
++typedef short v8hi __attribute__((vector_size(16)));
++typedef short v16hi __attribute__((vector_size(32)));
++typedef int v4si __attribute__((vector_size(16)));
++typedef int v8si __attribute__((vector_size(32)));
++typedef long long v2di __attribute__((vector_size(16)));
++typedef long long v4di __attribute__((vector_size(32)));
++
++v16qi
++foo (v16qi a)
++{
++  v16qi const1_op = __extension__(v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
++  v16qi const0_op = __extension__(v16qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
++  return a < const0_op ? const1_op : const0_op;
++}
++
++v32qi
++foo2 (v32qi a)
++{
++  v32qi const1_op = __extension__(v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
++  v32qi const0_op = __extension__(v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
++  return a < const0_op ? const1_op : const0_op;
++}
++
++v8hi
++foo3 (v8hi a)
++{
++  v8hi const1_op = __extension__(v8hi){1,1,1,1,1,1,1,1};
++  v8hi const0_op = __extension__(v8hi){0,0,0,0,0,0,0,0};
++  return a < const0_op ? const1_op : const0_op;
++}
++
++v16hi
++foo4 (v16hi a)
++{
++  v16hi const1_op = __extension__(v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
++  v16hi const0_op = __extension__(v16hi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
++  return a < const0_op ? const1_op : const0_op;
++}
++
++v4si
++foo5 (v4si a)
++{
++  v4si const1_op = __extension__(v4si){1,1,1,1};
++  v4si const0_op = __extension__(v4si){0,0,0,0};
++  return a < const0_op ? const1_op : const0_op;
++}
++
++v8si
++foo6 (v8si a)
++{
++  v8si const1_op = __extension__(v8si){1,1,1,1,1,1,1,1};
++  v8si const0_op = __extension__(v8si){0,0,0,0,0,0,0,0};
++  return a < const0_op ? const1_op : const0_op;
++}
++
++v2di
++foo7 (v2di a)
++{
++  v2di const1_op = __extension__(v2di){1,1};
++  v2di const0_op = __extension__(v2di){0,0};
++  return a < const0_op ? const1_op : const0_op;
++}
++
++v4di
++foo8 (v4di a)
++{
++  v4di const1_op = __extension__(v4di){1,1,1,1};
++  v4di const0_op = __extension__(v4di){0,0,0,0};
++  return a < const0_op ? const1_op : const0_op;
++}
++
++v16qi
++foo9 (v16qi a)
++{
++  v16qi const1_op = __extension__(v16qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
++  v16qi const0_op = __extension__(v16qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
++  return a < const0_op ? const1_op : const0_op;
++}
++
++v32qi
++foo10 (v32qi a)
++{
++  v32qi const1_op = __extension__(v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
++  v32qi const0_op = __extension__(v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
++  return a < const0_op ? const1_op : const0_op;
++}
++
++v8hi
++foo11 (v8hi a)
++{
++  v8hi const1_op = __extension__(v8hi){-1,-1,-1,-1,-1,-1,-1,-1};
++  v8hi const0_op = __extension__(v8hi){0,0,0,0,0,0,0,0};
++  return a < const0_op ? const1_op : const0_op;
++}
++
++v16hi
++foo12 (v16hi a)
++{
++  v16hi const1_op = __extension__(v16hi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
++  v16hi const0_op = __extension__(v16hi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
++  return a < const0_op ? const1_op : const0_op;
++}
++
++v4si
++foo13 (v4si a)
++{
++  v4si const1_op = __extension__(v4si){-1,-1,-1,-1};
++  v4si const0_op = __extension__(v4si){0,0,0,0};
++  return a < const0_op ? const1_op : const0_op;
++}
++
++v8si
++foo14 (v8si a)
++{
++  v8si const1_op = __extension__(v8si){-1,-1,-1,-1,-1,-1,-1,-1};
++  v8si const0_op = __extension__(v8si){0,0,0,0,0,0,0,0};

_service:tar_scm:0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch Added

@@ -0,0 +1,236 @@
+From bf985fe08b6298218180666a7d20f4aa0b41326f Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:05 +0100
+Subject: PATCH 086/157 BackportSME aarch64: Add bytes_below_saved_regs
+ to frame info
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7b792ecaa9414bc81520b3da552d40ad854be976
+
+The frame layout code currently hard-codes the assumption that
+the number of bytes below the saved registers is equal to the
+size of the outgoing arguments.  This patch abstracts that
+value into a new field of aarch64_frame.
+
+gcc/
+	* config/aarch64/aarch64.h (aarch64_frame::bytes_below_saved_regs): New
+	field.
+	* config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it,
+	and use it instead of crtl->outgoing_args_size.
+	(aarch64_get_separate_components): Use bytes_below_saved_regs instead
+	of outgoing_args_size.
+	(aarch64_process_components): Likewise.
+---
+ gcc/config/aarch64/aarch64.cc | 71 ++++++++++++++++++-----------------
+ gcc/config/aarch64/aarch64.h  |  5 +++
+ 2 files changed, 41 insertions(+), 35 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index c59af6b1c..5533dd85b 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8817,6 +8817,8 @@ aarch64_layout_frame (void)
+   gcc_assert (crtl->is_leaf
+ 	      || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED));
+ 
++  frame.bytes_below_saved_regs = crtl->outgoing_args_size;
++
+   /* Now assign stack slots for the registers.  Start with the predicate
+      registers, since predicate LDR and STR have a relatively small
+      offset range.  These saves happen below the hard frame pointer.  */
+@@ -8921,18 +8923,18 @@ aarch64_layout_frame (void)
+ 
+   poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
+ 
+-  poly_int64 above_outgoing_args
++  poly_int64 saved_regs_and_above
+     = aligned_upper_bound (varargs_and_saved_regs_size
+ 			   + get_frame_size (),
+ 			   STACK_BOUNDARY / BITS_PER_UNIT);
+ 
+   frame.hard_fp_offset
+-    = above_outgoing_args - frame.below_hard_fp_saved_regs_size;
++    = saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
+ 
+   /* Both these values are already aligned.  */
+-  gcc_assert (multiple_p (crtl->outgoing_args_size,
++  gcc_assert (multiple_p (frame.bytes_below_saved_regs,
+ 			  STACK_BOUNDARY / BITS_PER_UNIT));
+-  frame.frame_size = above_outgoing_args + crtl->outgoing_args_size;
++  frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
+ 
+   frame.locals_offset = frame.saved_varargs_size;
+ 
+@@ -8976,7 +8978,7 @@ aarch64_layout_frame (void)
+   else if (frame.wb_pop_candidate1 != INVALID_REGNUM)
+     max_push_offset = 256;
+ 
+-  HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset;
++  HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset;
+   HOST_WIDE_INT const_saved_regs_size;
+   if (known_eq (frame.saved_regs_size, 0))
+     frame.initial_adjust = frame.frame_size;
+@@ -8984,31 +8986,31 @@ aarch64_layout_frame (void)
+ 	   && const_size < max_push_offset
+ 	   && known_eq (frame.hard_fp_offset, const_size))
+     {
+-      /* Simple, small frame with no outgoing arguments:
++      /* Simple, small frame with no data below the saved registers.
+ 
+ 	 stp reg1, reg2, sp, -frame_size!
+ 	 stp reg3, reg4, sp, 16  */
+       frame.callee_adjust = const_size;
+     }
+-  else if (crtl->outgoing_args_size.is_constant (&const_outgoing_args_size)
++  else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs)
+ 	   && frame.saved_regs_size.is_constant (&const_saved_regs_size)
+-	   && const_outgoing_args_size + const_saved_regs_size < 512
+-	   /* We could handle this case even with outgoing args, provided
+-	      that the number of args left us with valid offsets for all
+-	      predicate and vector save slots.  It's such a rare case that
+-	      it hardly seems worth the effort though.  */
+-	   && (!saves_below_hard_fp_p || const_outgoing_args_size == 0)
++	   && const_below_saved_regs + const_saved_regs_size < 512
++	   /* We could handle this case even with data below the saved
++	      registers, provided that that data left us with valid offsets
++	      for all predicate and vector save slots.  It's such a rare
++	      case that it hardly seems worth the effort though.  */
++	   && (!saves_below_hard_fp_p || const_below_saved_regs == 0)
+ 	   && !(cfun->calls_alloca
+ 		&& frame.hard_fp_offset.is_constant (&const_fp_offset)
+ 		&& const_fp_offset < max_push_offset))
+     {
+-      /* Frame with small outgoing arguments:
++      /* Frame with small area below the saved registers:
+ 
+ 	 sub sp, sp, frame_size
+-	 stp reg1, reg2, sp, outgoing_args_size
+-	 stp reg3, reg4, sp, outgoing_args_size + 16  */
++	 stp reg1, reg2, sp, bytes_below_saved_regs
++	 stp reg3, reg4, sp, bytes_below_saved_regs + 16  */
+       frame.initial_adjust = frame.frame_size;
+-      frame.callee_offset = const_outgoing_args_size;
++      frame.callee_offset = const_below_saved_regs;
+     }
+   else if (saves_below_hard_fp_p
+ 	   && known_eq (frame.saved_regs_size,
+@@ -9018,30 +9020,29 @@ aarch64_layout_frame (void)
+ 
+ 	 sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
+ 	 save SVE registers relative to SP
+-	 sub sp, sp, outgoing_args_size  */
++	 sub sp, sp, bytes_below_saved_regs  */
+       frame.initial_adjust = (frame.hard_fp_offset
+ 			      + frame.below_hard_fp_saved_regs_size);
+-      frame.final_adjust = crtl->outgoing_args_size;
++      frame.final_adjust = frame.bytes_below_saved_regs;
+     }
+   else if (frame.hard_fp_offset.is_constant (&const_fp_offset)
+ 	   && const_fp_offset < max_push_offset)
+     {
+-      /* Frame with large outgoing arguments or SVE saves, but with
+-	 a small local area:
++      /* Frame with large area below the saved registers, or with SVE saves,
++	 but with a small area above:
+ 
+ 	 stp reg1, reg2, sp, -hard_fp_offset!
+ 	 stp reg3, reg4, sp, 16
+ 	 sub sp, sp, below_hard_fp_saved_regs_size
+ 	 save SVE registers relative to SP
+-	 sub sp, sp, outgoing_args_size  */
++	 sub sp, sp, bytes_below_saved_regs  */
+       frame.callee_adjust = const_fp_offset;
+       frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
+-      frame.final_adjust = crtl->outgoing_args_size;
++      frame.final_adjust = frame.bytes_below_saved_regs;
+     }
+   else
+     {
+-      /* Frame with large local area and outgoing arguments or SVE saves,
+-	 using frame pointer:
++      /* General case:
+ 
+ 	 sub sp, sp, hard_fp_offset
+ 	 stp x29, x30, sp, 0
+@@ -9049,10 +9050,10 @@ aarch64_layout_frame (void)
+ 	 stp reg3, reg4, sp, 16
+ 	 sub sp, sp, below_hard_fp_saved_regs_size
+ 	 save SVE registers relative to SP
+-	 sub sp, sp, outgoing_args_size  */
++	 sub sp, sp, bytes_below_saved_regs  */
+       frame.initial_adjust = frame.hard_fp_offset;
+       frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
+-      frame.final_adjust = crtl->outgoing_args_size;
++      frame.final_adjust = frame.bytes_below_saved_regs;
+     }
+ 
+   /* Make sure the individual adjustments add up to the full frame size.  */
+@@ -9643,7 +9644,7 @@ aarch64_get_separate_components (void)
+ 	if (frame_pointer_needed)
+ 	  offset -= frame.below_hard_fp_saved_regs_size;
+ 	else
+-	  offset += crtl->outgoing_args_size;
++	  offset += frame.bytes_below_saved_regs;
+ 
+ 	/* Check that we can access the stack slot of the register with one
+ 	   direct load with no adjustments needed.  */
+@@ -9792,7 +9793,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
+       if (frame_pointer_needed)
+ 	offset -= frame.below_hard_fp_saved_regs_size;
+       else
+-	offset += crtl->outgoing_args_size;
++	offset += frame.bytes_below_saved_regs;
+ 
+       rtx addr = plus_constant (Pmode, ptr_reg, offset);
+       rtx mem = gen_frame_mem (mode, addr);
+@@ -9846,7 +9847,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
+       if (frame_pointer_needed)
+ 	offset2 -= frame.below_hard_fp_saved_regs_size;
+       else
+-	offset2 += crtl->outgoing_args_size;
++	offset2 += frame.bytes_below_saved_regs;
+       rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
+       rtx mem2 = gen_frame_mem (mode, addr2);
+       rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
+@@ -9920,10 +9921,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void)
+    registers.  If POLY_SIZE is not large enough to require a probe this function
+    will only adjust the stack.  When allocating the stack space
+    FRAME_RELATED_P is then used to indicate if the allocation is frame related.
+-   FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing
+-   arguments.  If we are then we ensure that any allocation larger than the ABI

_service:tar_scm:0185-LoongArch-Implement-scalar-isinf-isnormal-and-isfini.patch Added

@@ -0,0 +1,203 @@
+From 7e8e122306feaecf8d7b520b4e7c0b9908ca6fd2 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Thu, 4 Jul 2024 02:49:28 +0800
+Subject: PATCH 185/188 LoongArch: Implement scalar isinf, isnormal, and
+ isfinite via fclass
+
+Doing so can avoid loading FP constants from the memory.  It also
+partially fixes PR 66262 as fclass does not signal on sNaN.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (extendsidi2): Add ("=r", "f")
+	alternative and use movfr2gr.s for it.  The spec clearly states
+	movfr2gr.s sign extends the value to GRLEN.
+	(fclass_<fmt>): Make the result SImode instead of a floating
+	mode.  The fclass results are really not FP values.
+	(FCLASS_MASK): New define_int_iterator.
+	(fclass_optab): New define_int_attr.
+	(<FCLASS_MASK:fclass_optab><ANYF:mode>): New define_expand
+	template.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/fclass-compile.c: New test.
+	* gcc.target/loongarch/fclass-run.c: New test.
+---
+ gcc/config/loongarch/loongarch.md             | 53 ++++++++++++++++---
+ .../gcc.target/loongarch/fclass-compile.c     | 20 +++++++
+ .../gcc.target/loongarch/fclass-run.c         | 53 +++++++++++++++++++
+ 3 files changed, 119 insertions(+), 7 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/fclass-compile.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/fclass-run.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 867977b36..15960a79f 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -1851,16 +1851,17 @@
+ ;;  ....................
+ 
+ (define_insn "extendsidi2"
+-  (set (match_operand:DI 0 "register_operand" "=r,r,r,r")
++  (set (match_operand:DI 0 "register_operand" "=r,r,r,r,r")
+ 	(sign_extend:DI
+-	    (match_operand:SI 1 "nonimmediate_operand" "r,ZC,m,k")))
++	    (match_operand:SI 1 "nonimmediate_operand" "r,ZC,m,k,f")))
+   "TARGET_64BIT"
+   "@
+    slli.w\t%0,%1,0
+    ldptr.w\t%0,%1
+    ld.w\t%0,%1
+-   ldx.w\t%0,%1"
+-  (set_attr "move_type" "sll0,load,load,load")
++   ldx.w\t%0,%1
++   movfr2gr.s\t%0,%1"
++  (set_attr "move_type" "sll0,load,load,load,mftg")
+    (set_attr "mode" "DI"))
+ 
+ (define_insn "extend<SHORT:mode><GPR:mode>2"
+@@ -4110,14 +4111,52 @@
+   "movgr2fcsr\t$r%0,%1")
+ 
+ (define_insn "fclass_<fmt>"
+-  (set (match_operand:ANYF 0 "register_operand" "=f")
+-	(unspec:ANYF (match_operand:ANYF 1 "register_operand" "f")
+-		      UNSPEC_FCLASS))
++  (set (match_operand:SI 0 "register_operand" "=f")
++	(unspec:SI (match_operand:ANYF 1 "register_operand" "f")
++		   UNSPEC_FCLASS))
+   "TARGET_HARD_FLOAT"
+   "fclass.<fmt>\t%0,%1"
+   (set_attr "type" "unknown")
+    (set_attr "mode" "<MODE>"))
+ 
++(define_int_iterator FCLASS_MASK 68 136 952)
++(define_int_attr fclass_optab
++  (68	"isinf")
++   (136	"isnormal")
++   (952	"isfinite"))
++
++(define_expand "<FCLASS_MASK:fclass_optab><ANYF:mode>2"
++  (match_operand:SI   0 "register_operand" "=r")
++   (match_operand:ANYF 1 "register_operand" " f")
++   (const_int FCLASS_MASK)
++  "TARGET_HARD_FLOAT"
++  {
++    rtx ft0 = gen_reg_rtx (SImode);
++    rtx t0 = gen_reg_rtx (word_mode);
++    rtx mask = GEN_INT (<FCLASS_MASK>);
++
++    emit_insn (gen_fclass_<ANYF:fmt> (ft0, operands1));
++
++    if (TARGET_64BIT)
++      emit_insn (gen_extend_insn (t0, ft0, DImode, SImode, 0));
++    else
++      emit_move_insn (t0, ft0);
++
++    emit_move_insn (t0, gen_rtx_AND (word_mode, t0, mask));
++    emit_move_insn (t0, gen_rtx_NE (word_mode, t0, const0_rtx));
++
++    if (TARGET_64BIT)
++      {
++	t0 = lowpart_subreg (SImode, t0, DImode);
++	SUBREG_PROMOTED_VAR_P (t0) = 1;
++	SUBREG_PROMOTED_SET (t0, SRP_SIGNED);
++      }
++
++    emit_move_insn (operands0, t0);
++
++    DONE;
++  })
++
+ (define_insn "bytepick_w_<bytepick_imm>"
+   (set (match_operand:SI 0 "register_operand" "=r")
+ 	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+diff --git a/gcc/testsuite/gcc.target/loongarch/fclass-compile.c b/gcc/testsuite/gcc.target/loongarch/fclass-compile.c
+new file mode 100644
+index 000000000..9c24d6e26
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/fclass-compile.c
+@@ -0,0 +1,20 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=loongarch64 -mfpu=64 -mabi=lp64d" } */
++/* { dg-final { scan-assembler-times "fclass\\.s" 1 } } */
++/* { dg-final { scan-assembler-times "fclass\\.d" 1 } } */
++
++__attribute__ ((noipa)) int
++test_fclass_f (float f)
++{
++  return __builtin_isinf (f)
++	 | __builtin_isnormal (f) << 1
++	 | __builtin_isfinite (f) << 2;
++}
++
++__attribute__ ((noipa)) int
++test_fclass_d (double d)
++{
++  return __builtin_isinf (d)
++	 | __builtin_isnormal (d) << 1
++	 | __builtin_isfinite (d) << 2;
++}
+diff --git a/gcc/testsuite/gcc.target/loongarch/fclass-run.c b/gcc/testsuite/gcc.target/loongarch/fclass-run.c
+new file mode 100644
+index 000000000..e5585f9d5
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/fclass-run.c
+@@ -0,0 +1,53 @@
++/* { dg-do run } */
++/* { dg-options "-O2 -fsignaling-nans -D_GNU_SOURCE -std=c23" } */
++/* { dg-require-effective-target fenv_exceptions } */
++
++#include <fenv.h>
++#include "fclass-compile.c"
++
++#define ASSERT_EQ(x, y) (void)(x == y || (__builtin_abort (), 1))
++
++int
++main (void)
++{
++  volatile float f_inf = __builtin_inff ();
++  volatile float f_zero = 0;
++  volatile float f_normal = 114.514;
++  volatile float f_subnormal = 1e-40;
++  volatile float f_qnan = __builtin_nanf ("");
++  volatile float f_snan = __builtin_nansf ("");
++  volatile double d_inf = __builtin_inf ();
++  volatile double d_zero = 0;
++  volatile double d_normal = 1919.810;
++  volatile double d_subnormal = 1e-320;
++  volatile double d_qnan = __builtin_nan ("");
++  volatile double d_snan = __builtin_nans ("");
++
++#if __loongarch_frlen >= 64
++  /* With fclass.{s/d} we shouldn't signal, even if the input is sNaN.
++     PR 66462.  */
++  feenableexcept (FE_INVALID);
++#endif
++
++  ASSERT_EQ (test_fclass_f (f_inf), 0b001);
++  ASSERT_EQ (test_fclass_f (-f_inf), 0b001);
++  ASSERT_EQ (test_fclass_f (f_zero), 0b100);
++  ASSERT_EQ (test_fclass_f (-f_zero), 0b100);
++  ASSERT_EQ (test_fclass_f (f_normal), 0b110);
++  ASSERT_EQ (test_fclass_f (-f_normal), 0b110);
++  ASSERT_EQ (test_fclass_f (f_subnormal), 0b100);
++  ASSERT_EQ (test_fclass_f (-f_subnormal), 0b100);
++  ASSERT_EQ (test_fclass_f (f_qnan), 0);
++  ASSERT_EQ (test_fclass_f (f_snan), 0);
++
++  ASSERT_EQ (test_fclass_d (d_inf), 0b001);
++  ASSERT_EQ (test_fclass_d (-d_inf), 0b001);
++  ASSERT_EQ (test_fclass_d (d_zero), 0b100);
++  ASSERT_EQ (test_fclass_d (-d_zero), 0b100);
++  ASSERT_EQ (test_fclass_d (d_normal), 0b110);
++  ASSERT_EQ (test_fclass_d (-d_normal), 0b110);
++  ASSERT_EQ (test_fclass_d (d_subnormal), 0b100);
++  ASSERT_EQ (test_fclass_d (-d_subnormal), 0b100);
++  ASSERT_EQ (test_fclass_d (d_qnan), 0);
++  ASSERT_EQ (test_fclass_d (d_snan), 0);

_service:tar_scm:0186-Backport-SME-aarch64-Add-bytes_below_hard_fp-to-fram.patch Added

@@ -0,0 +1,87 @@
+From bd5299017c233bcdf0fcc3dd7217eec1641411fe Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:06 +0100
+Subject: PATCH 087/157 BackportSME aarch64: Add bytes_below_hard_fp to
+ frame info
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=99305f306246079cc57d30dae7c32107f02ff3e8
+
+Following on from the previous bytes_below_saved_regs patch, this one
+records the number of bytes that are below the hard frame pointer.
+This eventually replaces below_hard_fp_saved_regs_size.
+
+If a frame pointer is not needed, the epilogue adds final_adjust
+to the stack pointer before restoring registers:
+
+     aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true);
+
+Therefore, if the epilogue needs to restore the stack pointer from
+the hard frame pointer, the directly corresponding offset is:
+
+     -bytes_below_hard_fp + final_adjust
+
+i.e. go from the hard frame pointer to the bottom of the frame,
+then add the same amount as if we were using the stack pointer
+from the outset.
+
+gcc/
+	* config/aarch64/aarch64.h (aarch64_frame::bytes_below_hard_fp): New
+	field.
+	* config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it.
+	(aarch64_expand_epilogue): Use it instead of
+	below_hard_fp_saved_regs_size.
+---
+ gcc/config/aarch64/aarch64.cc | 6 +++---
+ gcc/config/aarch64/aarch64.h  | 5 +++++
+ 2 files changed, 8 insertions(+), 3 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 5533dd85b..2bb49b9b0 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8869,6 +8869,7 @@ aarch64_layout_frame (void)
+      of the callee save area.  */
+   bool saves_below_hard_fp_p = maybe_ne (offset, 0);
+   frame.below_hard_fp_saved_regs_size = offset;
++  frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs;
+   if (frame.emit_frame_chain)
+     {
+       /* FP and LR are placed in the linkage record.  */
+@@ -10456,8 +10457,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
+   poly_int64 final_adjust = frame.final_adjust;
+   poly_int64 callee_offset = frame.callee_offset;
+   poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
+-  poly_int64 below_hard_fp_saved_regs_size
+-    = frame.below_hard_fp_saved_regs_size;
++  poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp;
+   unsigned reg1 = frame.wb_pop_candidate1;
+   unsigned reg2 = frame.wb_pop_candidate2;
+   unsigned int last_gpr = (frame.is_scs_enabled
+@@ -10515,7 +10515,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
+        is restored on the instruction doing the writeback.  */
+     aarch64_add_offset (Pmode, stack_pointer_rtx,
+ 			hard_frame_pointer_rtx,
+-			-callee_offset - below_hard_fp_saved_regs_size,
++			-bytes_below_hard_fp + final_adjust,
+ 			tmp1_rtx, tmp0_rtx, callee_adjust == 0);
+   else
+      /* The case where we need to re-use the register here is very rare, so
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index 9e0ca380e..dedc5b32f 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -810,6 +810,11 @@ struct GTY (()) aarch64_frame
+      are saved below the hard frame pointer.  */
+   poly_int64 below_hard_fp_saved_regs_size;
+ 
++  /* The number of bytes between the bottom of the static frame (the bottom
++     of the outgoing arguments) and the hard frame pointer.  This value is
++     always a multiple of STACK_BOUNDARY.  */
++  poly_int64 bytes_below_hard_fp;
++
+   /* Offset from the base of the frame (incomming SP) to the
+      top of the locals area.  This value is always a multiple of
+      STACK_BOUNDARY.  */
+-- 
+2.33.0
+

_service:tar_scm:0186-LoongArch-Add-support-to-annotate-tablejump.patch Added

@@ -0,0 +1,155 @@
+From 5079c41ada379bd8d1bdb92dd2b91e72e9496ea6 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Thu, 11 Jul 2024 19:43:48 +0800
+Subject: PATCH 186/188 LoongArch: Add support to annotate tablejump
+
+This is per the request from the kernel developers.  For generating the
+ORC unwind info, the objtool program needs to analysis the control flow
+of a .o file.  If a jump table is used, objtool has to correlate the
+jump instruction with the table.
+
+On x86 (where objtool was initially developed) it's simple: a relocation
+entry natrually correlates them because one single instruction is used
+for table-based jump.  But on an RISC machine objtool would have to
+reconstruct the data flow if it must find out the correlation on its
+own.
+
+So, emit an additional section to store the correlation info as pairs of
+addresses, each pair contains the address of a jump instruction (jr) and
+the address of the jump table.  This is very trivial to implement in
+GCC.
+
+gcc/ChangeLog:
+
+	* config/loongarch/genopts/loongarch.opt.in
+	(mannotate-tablejump): New option.
+	* config/loongarch/loongarch.opt: Regenerate.
+	* config/loongarch/loongarch.md (tablejump<mode>): Emit
+	additional correlation info between the jump instruction and the
+	jump table, if -mannotate-tablejump.
+	* doc/invoke.texi: Document -mannotate-tablejump.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/jump-table-annotate.c: New test.
+
+Suggested-by: Tiezhu Yang <yangtiezhu@loongson.cn>
+---
+ gcc/config/loongarch/genopts/loongarch.opt.in     |  4 ++++
+ gcc/config/loongarch/loongarch.md                 | 12 +++++++++++-
+ gcc/config/loongarch/loongarch.opt                |  4 ++++
+ gcc/doc/invoke.texi                               | 13 ++++++++++++-
+ .../gcc.target/loongarch/jump-table-annotate.c    | 15 +++++++++++++++
+ 5 files changed, 46 insertions(+), 2 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c
+
+diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
+index 0ecd10922..20795f6bd 100644
+--- a/gcc/config/loongarch/genopts/loongarch.opt.in
++++ b/gcc/config/loongarch/genopts/loongarch.opt.in
+@@ -301,3 +301,7 @@ default value is 4.
+ ; CPUCFG independently, so we use bit flags to specify them.
+ TargetVariable
+ HOST_WIDE_INT la_isa_evolution = 0
++
++mannotate-tablejump
++Target Mask(ANNOTATE_TABLEJUMP) Save
++Annotate table jump instruction (jr {reg}) to correlate it with the jump table.
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 15960a79f..66236a7c7 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -3496,12 +3496,22 @@
+   DONE;
+ })
+ 
++(define_mode_attr mode_size (DI "8") (SI "4"))
++
+ (define_insn "@tablejump<mode>"
+   (set (pc)
+ 	(match_operand:P 0 "register_operand" "e"))
+    (use (label_ref (match_operand 1 "" "")))
+   ""
+-  "jr\t%0"
++  {
++    return TARGET_ANNOTATE_TABLEJUMP
++      ? "1:jr\t%0\n\t"
++	".pushsection\t.discard.tablejump_annotate\n\t"
++	"\t.<mode_size>byte\t1b\n\t"
++	"\t.<mode_size>byte\t%1\n\t"
++	".popsection"
++      : "jr\t%0";
++  }
+   (set_attr "type" "jump")
+    (set_attr "mode" "none"))
+ 
+diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
+index 69b3b965c..16fed6ec3 100644
+--- a/gcc/config/loongarch/loongarch.opt
++++ b/gcc/config/loongarch/loongarch.opt
+@@ -310,6 +310,10 @@ default value is 4.
+ TargetVariable
+ HOST_WIDE_INT la_isa_evolution = 0
+ 
++mannotate-tablejump
++Target Mask(ANNOTATE_TABLEJUMP) Save
++Annotate table jump instruction (jr {reg}) to correlate it with the jump table
++
+ mfrecipe
+ Target Mask(ISA_FRECIPE) Var(la_isa_evolution)
+ Support frecipe.{s/d} and frsqrte.{s/d} instructions.
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index f6d59317b..d2c52cdf4 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -1011,7 +1011,7 @@ Objective-C and Objective-C++ Dialects}.
+ -mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as @gol
+ -mrecip  -mrecip=@var{opt} -mfrecipe -mno-frecipe -mdiv32 -mno-div32 @gol
+ -mlam-bh -mno-lam-bh -mlamcas -mno-lamcas -mld-seq-sa -mno-ld-seq-sa @gol
+--mtls-dialect=@var{opt}}
++-mtls-dialect=@var{opt} -mannotate-tablejump -mno-annotate-tablejump}
+ 
+ @emph{M32R/D Options}
+ @gccoptlist{-m32r2  -m32rx  -m32r @gol
+@@ -24750,6 +24750,17 @@ Whether a load-load barrier (@code{dbar 0x700}) is needed.  When build with
+ This option controls which tls dialect may be used for general dynamic and
+ local dynamic TLS models.
+ 
++@opindex mannotate-tablejump
++@opindex mno-annotate-tablejump
++@item -mannotate-tablejump
++@itemx -mno-annotate-tablejump
++Create an annotation section @code{.discard.tablejump_annotate} to
++correlate the @code{jirl} instruction and the jump table when a jump
++table is used to optimize the @code{switch} statement.  Some external
++tools, for example @file{objtool} of the Linux kernel building system,
++need the annotation to analysis the control flow.  The default is
++@option{-mno-annotate-tablejump}.
++
+ @table @samp
+ @item trad
+ Use traditional TLS. This is the default.
+diff --git a/gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c b/gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c
+new file mode 100644
+index 000000000..9d58e60e3
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c
+@@ -0,0 +1,15 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-mannotate-tablejump" } */
++
++extern void asdf(int);
++void foo(int x) {
++  switch (x) {
++  case 0: asdf(10); break;
++  case 1: asdf(11); break;
++  case 2: asdf(12); break;
++  case 3: asdf(13); break;
++  case 4: asdf(14); break;
++  }
++}
++
++/* { dg-final { scan-assembler "\\.discard\\.tablejump_annotate" } } */
+-- 
+2.43.0
+

_service:tar_scm:0187-Backport-SME-aarch64-Robustify-stack-tie-handling.patch Added

@@ -0,0 +1,126 @@
+From 4dc3e578d958ceb73f973483f42247c3d33210dc Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 20 Jun 2023 21:48:38 +0100
+Subject: PATCH 088/157 BackportSME aarch64: Robustify stack tie handling
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=580b74a79146e51268dd11192d3870645adb0bbb
+
+The SVE handling of stack clash protection copied the stack
+pointer to X11 before the probe and set up X11 as the CFA
+for unwind purposes:
+
+    /* This is done to provide unwinding information for the stack
+       adjustments we're about to do, however to prevent the optimizers
+       from removing the R11 move and leaving the CFA note (which would be
+       very wrong) we tie the old and new stack pointer together.
+       The tie will expand to nothing but the optimizers will not touch
+       the instruction.  */
+    rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
+    emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
+    emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx));
+
+    /* We want the CFA independent of the stack pointer for the
+       duration of the loop.  */
+    add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
+    RTX_FRAME_RELATED_P (insn) = 1;
+
+-fcprop-registers is now smart enough to realise that X11 = SP,
+replace X11 with SP in the stack tie, and delete the instruction
+created above.
+
+This patch tries to prevent that by making stack_tie fussy about
+the register numbers.  It fixes failures in
+gcc.target/aarch64/sve/pcs/stack_clash*.c.
+
+gcc/
+	* config/aarch64/aarch64.md (stack_tie): Hard-code the first
+	register operand to the stack pointer.  Require the second register
+	operand to have the number specified in a separate const_int operand.
+	* config/aarch64/aarch64.cc (aarch64_emit_stack_tie): New function.
+	(aarch64_allocate_and_probe_stack_space): Use it.
+	(aarch64_expand_prologue, aarch64_expand_epilogue): Likewise.
+	(aarch64_expand_epilogue): Likewise.
+---
+ gcc/config/aarch64/aarch64.cc | 18 ++++++++++++++----
+ gcc/config/aarch64/aarch64.md |  7 ++++---
+ 2 files changed, 18 insertions(+), 7 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 2bb49b9b0..4d505c6fc 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -9917,6 +9917,16 @@ aarch64_stack_clash_protection_alloca_probe_range (void)
+   return STACK_CLASH_CALLER_GUARD;
+ }
+ 
++/* Emit a stack tie that acts as a scheduling barrier for all previous and
++   subsequent memory accesses and that requires the stack pointer and REG
++   to have their current values.  REG can be stack_pointer_rtx if no
++   other register's value needs to be fixed.  */
++
++static void
++aarch64_emit_stack_tie (rtx reg)
++{
++  emit_insn (gen_stack_tie (reg, gen_int_mode (REGNO (reg), DImode)));
++}
+ 
+ /* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
+    registers.  If POLY_SIZE is not large enough to require a probe this function
+@@ -10030,7 +10040,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+ 	     the instruction.  */
+ 	  rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
+ 	  emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
+-	  emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx));
++	  aarch64_emit_stack_tie (stack_ptr_copy);
+ 
+ 	  /* We want the CFA independent of the stack pointer for the
+ 	     duration of the loop.  */
+@@ -10398,7 +10408,7 @@ aarch64_expand_prologue (void)
+ 	  aarch64_add_cfa_expression (insn, regno_reg_rtxreg1,
+ 				      hard_frame_pointer_rtx, 0);
+ 	}
+-      emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
++      aarch64_emit_stack_tie (hard_frame_pointer_rtx);
+     }
+ 
+   aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM,
+@@ -10501,7 +10511,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
+       || cfun->calls_alloca
+       || crtl->calls_eh_return)
+     {
+-      emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
++      aarch64_emit_stack_tie (stack_pointer_rtx);
+       need_barrier_p = false;
+     }
+ 
+@@ -10540,7 +10550,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
+ 				callee_adjust != 0, &cfi_ops);
+ 
+   if (need_barrier_p)
+-    emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
++    aarch64_emit_stack_tie (stack_pointer_rtx);
+ 
+   if (callee_adjust != 0)
+     aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
+diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+index 2becc888e..2ce123255 100644
+--- a/gcc/config/aarch64/aarch64.md
++++ b/gcc/config/aarch64/aarch64.md
+@@ -7088,10 +7088,11 @@
+ 
+ (define_insn "stack_tie"
+   (set (mem:BLK (scratch))
+-	(unspec:BLK (match_operand:DI 0 "register_operand" "rk")
+-		     (match_operand:DI 1 "register_operand" "rk")
++	(unspec:BLK (reg:DI SP_REGNUM)
++		     (match_operand:DI 0 "register_operand" "rk")
++		     (match_operand:DI 1 "const_int_operand")
+ 		    UNSPEC_PRLG_STK))
+-  ""
++  "REGNO (operands0) == INTVAL (operands1)"
+   ""
+   (set_attr "length" "0")
+ )
+-- 
+2.33.0
+

_service:tar_scm:0187-LoongArch-Fix-up-r15-4130.patch Added

@@ -0,0 +1,32 @@
+From 8cdf96cd61612746262a811b8a091ecab27bd3a1 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Wed, 10 Jul 2024 12:15:23 +0800
+Subject: PATCH 187/188 LoongArch: Fix up r15-4130
+
+An earlier version of the patch (lacking the regeneration of some files)
+was pushed.  Fix it up now.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.opt: Regenerate.
+	* config/loongarch/loongarch.opt.urls: Regenerate.
+---
+ gcc/config/loongarch/loongarch.opt | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
+index 16fed6ec3..f9c7bd446 100644
+--- a/gcc/config/loongarch/loongarch.opt
++++ b/gcc/config/loongarch/loongarch.opt
+@@ -312,7 +312,7 @@ HOST_WIDE_INT la_isa_evolution = 0
+ 
+ mannotate-tablejump
+ Target Mask(ANNOTATE_TABLEJUMP) Save
+-Annotate table jump instruction (jr {reg}) to correlate it with the jump table
++Annotate table jump instruction (jr {reg}) to correlate it with the jump table.
+ 
+ mfrecipe
+ Target Mask(ISA_FRECIPE) Var(la_isa_evolution)
+-- 
+2.43.0
+

_service:tar_scm:0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch Added

@@ -0,0 +1,228 @@
+From 8e010ea1a3e122a74696250d7c6ce5660a88b8f5 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:06 +0100
+Subject: PATCH 089/157 BackportSME aarch64: Tweak
+ aarch64_save/restore_callee_saves
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=38698967268c44991e02aa1e5a2ce9382d6de9db
+
+aarch64_save_callee_saves and aarch64_restore_callee_saves took
+a parameter called start_offset that gives the offset of the
+bottom of the saved register area from the current stack pointer.
+However, it's more convenient for later patches if we use the
+bottom of the entire frame as the reference point, rather than
+the bottom of the saved registers.
+
+Doing that removes the need for the callee_offset field.
+Other than that, this is not a win on its own.  It only really
+makes sense in combination with the follow-on patches.
+
+gcc/
+	* config/aarch64/aarch64.h (aarch64_frame::callee_offset): Delete.
+	* config/aarch64/aarch64.cc (aarch64_layout_frame): Remove
+	callee_offset handling.
+	(aarch64_save_callee_saves): Replace the start_offset parameter
+	with a bytes_below_sp parameter.
+	(aarch64_restore_callee_saves): Likewise.
+	(aarch64_expand_prologue): Update accordingly.
+	(aarch64_expand_epilogue): Likewise.
+---
+ gcc/config/aarch64/aarch64.cc | 56 +++++++++++++++++------------------
+ gcc/config/aarch64/aarch64.h  |  4 ---
+ 2 files changed, 28 insertions(+), 32 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 4d505c6fc..a0a4c7ac3 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8943,7 +8943,6 @@ aarch64_layout_frame (void)
+   frame.final_adjust = 0;
+   frame.callee_adjust = 0;
+   frame.sve_callee_adjust = 0;
+-  frame.callee_offset = 0;
+ 
+   frame.wb_pop_candidate1 = frame.wb_push_candidate1;
+   frame.wb_pop_candidate2 = frame.wb_push_candidate2;
+@@ -9011,7 +9010,6 @@ aarch64_layout_frame (void)
+ 	 stp reg1, reg2, sp, bytes_below_saved_regs
+ 	 stp reg3, reg4, sp, bytes_below_saved_regs + 16  */
+       frame.initial_adjust = frame.frame_size;
+-      frame.callee_offset = const_below_saved_regs;
+     }
+   else if (saves_below_hard_fp_p
+ 	   && known_eq (frame.saved_regs_size,
+@@ -9358,12 +9356,13 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg,
+ }
+ 
+ /* Emit code to save the callee-saved registers from register number START
+-   to LIMIT to the stack at the location starting at offset START_OFFSET,
+-   skipping any write-back candidates if SKIP_WB is true.  HARD_FP_VALID_P
+-   is true if the hard frame pointer has been set up.  */
++   to LIMIT to the stack.  The stack pointer is currently BYTES_BELOW_SP
++   bytes above the bottom of the static frame.  Skip any write-back
++   candidates if SKIP_WB is true.  HARD_FP_VALID_P is true if the hard
++   frame pointer has been set up.  */
+ 
+ static void
+-aarch64_save_callee_saves (poly_int64 start_offset,
++aarch64_save_callee_saves (poly_int64 bytes_below_sp,
+ 			   unsigned start, unsigned limit, bool skip_wb,
+ 			   bool hard_fp_valid_p)
+ {
+@@ -9391,7 +9390,9 @@ aarch64_save_callee_saves (poly_int64 start_offset,
+ 
+       machine_mode mode = aarch64_reg_save_mode (regno);
+       reg = gen_rtx_REG (mode, regno);
+-      offset = start_offset + frame.reg_offsetregno;
++      offset = (frame.reg_offsetregno
++		+ frame.bytes_below_saved_regs
++		- bytes_below_sp);
+       rtx base_rtx = stack_pointer_rtx;
+       poly_int64 sp_offset = offset;
+ 
+@@ -9402,9 +9403,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
+       else if (GP_REGNUM_P (regno)
+ 	       && (!offset.is_constant (&const_offset) || const_offset >= 512))
+ 	{
+-	  gcc_assert (known_eq (start_offset, 0));
+-	  poly_int64 fp_offset
+-	    = frame.below_hard_fp_saved_regs_size;
++	  poly_int64 fp_offset = frame.bytes_below_hard_fp - bytes_below_sp;
+ 	  if (hard_fp_valid_p)
+ 	    base_rtx = hard_frame_pointer_rtx;
+ 	  else
+@@ -9468,12 +9467,13 @@ aarch64_save_callee_saves (poly_int64 start_offset,
+ }
+ 
+ /* Emit code to restore the callee registers from register number START
+-   up to and including LIMIT.  Restore from the stack offset START_OFFSET,
+-   skipping any write-back candidates if SKIP_WB is true.  Write the
+-   appropriate REG_CFA_RESTORE notes into CFI_OPS.  */
++   up to and including LIMIT.  The stack pointer is currently BYTES_BELOW_SP
++   bytes above the bottom of the static frame.  Skip any write-back
++   candidates if SKIP_WB is true.  Write the appropriate REG_CFA_RESTORE
++   notes into CFI_OPS.  */
+ 
+ static void
+-aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
++aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
+ 			      unsigned limit, bool skip_wb, rtx *cfi_ops)
+ {
+   aarch64_frame &frame = cfun->machine->frame;
+@@ -9499,7 +9499,9 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
+ 
+       machine_mode mode = aarch64_reg_save_mode (regno);
+       reg = gen_rtx_REG (mode, regno);
+-      offset = start_offset + frame.reg_offsetregno;
++      offset = (frame.reg_offsetregno
++		+ frame.bytes_below_saved_regs
++		- bytes_below_sp);
+       rtx base_rtx = stack_pointer_rtx;
+       if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
+ 	aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
+@@ -10285,8 +10287,6 @@ aarch64_expand_prologue (void)
+   HOST_WIDE_INT callee_adjust = frame.callee_adjust;
+   poly_int64 final_adjust = frame.final_adjust;
+   poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
+-  poly_int64 below_hard_fp_saved_regs_size
+-    = frame.below_hard_fp_saved_regs_size;
+   unsigned reg1 = frame.wb_push_candidate1;
+   unsigned reg2 = frame.wb_push_candidate2;
+   bool emit_frame_chain = frame.emit_frame_chain;
+@@ -10362,8 +10362,8 @@ aarch64_expand_prologue (void)
+ 			     - frame.hard_fp_offset);
+   gcc_assert (known_ge (chain_offset, 0));
+ 
+-  /* The offset of the bottom of the save area from the current SP.  */
+-  poly_int64 saved_regs_offset = chain_offset - below_hard_fp_saved_regs_size;
++  /* The offset of the current SP from the bottom of the static frame.  */
++  poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust;
+ 
+   if (emit_frame_chain)
+     {
+@@ -10371,7 +10371,7 @@ aarch64_expand_prologue (void)
+ 	{
+ 	  reg1 = R29_REGNUM;
+ 	  reg2 = R30_REGNUM;
+-	  aarch64_save_callee_saves (saved_regs_offset, reg1, reg2,
++	  aarch64_save_callee_saves (bytes_below_sp, reg1, reg2,
+ 				     false, false);
+ 	}
+       else
+@@ -10411,7 +10411,7 @@ aarch64_expand_prologue (void)
+       aarch64_emit_stack_tie (hard_frame_pointer_rtx);
+     }
+ 
+-  aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM,
++  aarch64_save_callee_saves (bytes_below_sp, R0_REGNUM, R30_REGNUM,
+ 			     callee_adjust != 0 || emit_frame_chain,
+ 			     emit_frame_chain);
+   if (maybe_ne (sve_callee_adjust, 0))
+@@ -10421,16 +10421,17 @@ aarch64_expand_prologue (void)
+       aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx,
+ 					      sve_callee_adjust,
+ 					      !frame_pointer_needed, false);
+-      saved_regs_offset += sve_callee_adjust;
++      bytes_below_sp -= sve_callee_adjust;
+     }
+-  aarch64_save_callee_saves (saved_regs_offset, P0_REGNUM, P15_REGNUM,
++  aarch64_save_callee_saves (bytes_below_sp, P0_REGNUM, P15_REGNUM,
+ 			     false, emit_frame_chain);
+-  aarch64_save_callee_saves (saved_regs_offset, V0_REGNUM, V31_REGNUM,
++  aarch64_save_callee_saves (bytes_below_sp, V0_REGNUM, V31_REGNUM,
+ 			     callee_adjust != 0 || emit_frame_chain,
+ 			     emit_frame_chain);
+ 
+   /* We may need to probe the final adjustment if it is larger than the guard
+      that is assumed by the called.  */
++  gcc_assert (known_eq (bytes_below_sp, final_adjust));
+   aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
+ 					  !frame_pointer_needed, true);
+ }
+@@ -10465,7 +10466,6 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
+   poly_int64 initial_adjust = frame.initial_adjust;
+   HOST_WIDE_INT callee_adjust = frame.callee_adjust;
+   poly_int64 final_adjust = frame.final_adjust;
+-  poly_int64 callee_offset = frame.callee_offset;
+   poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
+   poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp;
+   unsigned reg1 = frame.wb_pop_candidate1;
+@@ -10535,9 +10535,9 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
+ 
+   /* Restore the vector registers before the predicate registers,
+      so that we can use P4 as a temporary for big-endian SVE frames.  */
+-  aarch64_restore_callee_saves (callee_offset, V0_REGNUM, V31_REGNUM,
++  aarch64_restore_callee_saves (final_adjust, V0_REGNUM, V31_REGNUM,
+ 				callee_adjust != 0, &cfi_ops);
+-  aarch64_restore_callee_saves (callee_offset, P0_REGNUM, P15_REGNUM,
++  aarch64_restore_callee_saves (final_adjust, P0_REGNUM, P15_REGNUM,
+ 				false, &cfi_ops);

_service:tar_scm:0188-libphobos-Update-build-scripts-for-LoongArch64.patch Added

@@ -0,0 +1,304 @@
+From 46e279e1c79086e930965c9a15d08b70a2c06a80 Mon Sep 17 00:00:00 2001
+From: Yang Yujie <yangyujie@loongson.cn>
+Date: Mon, 28 Oct 2024 01:53:57 +0000
+Subject: PATCH 188/188 libphobos: Update build scripts for LoongArch64.
+
+libphobos/ChangeLog:
+
+        * m4/druntime/cpu.m4: Support loongarch* targets.
+        * libdruntime/Makefile.am: Same.
+        * libdruntime/Makefile.in: Regenerate.
+        * configure: Regenerate.
+---
+ libphobos/configure               | 21 ++++++-
+ libphobos/libdruntime/Makefile.am |  3 +
+ libphobos/libdruntime/Makefile.in | 94 +++++++++++++++++++------------
+ libphobos/m4/druntime/cpu.m4      |  5 ++
+ 4 files changed, 85 insertions(+), 38 deletions(-)
+
+diff --git a/libphobos/configure b/libphobos/configure
+index 9da06f087..6acb2dd89 100755
+--- a/libphobos/configure
++++ b/libphobos/configure
+@@ -696,6 +696,8 @@ DRUNTIME_CPU_POWERPC_FALSE
+ DRUNTIME_CPU_POWERPC_TRUE
+ DRUNTIME_CPU_MIPS_FALSE
+ DRUNTIME_CPU_MIPS_TRUE
++DRUNTIME_CPU_LOONGARCH_FALSE
++DRUNTIME_CPU_LOONGARCH_TRUE
+ DRUNTIME_CPU_ARM_FALSE
+ DRUNTIME_CPU_ARM_TRUE
+ DRUNTIME_CPU_AARCH64_FALSE
+@@ -11750,7 +11752,7 @@ else
+   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+   lt_status=$lt_dlunknown
+   cat > conftest.$ac_ext <<_LT_EOF
+-#line 11753 "configure"
++#line 11755 "configure"
+ #include "confdefs.h"
+ 
+ #if HAVE_DLFCN_H
+@@ -11856,7 +11858,7 @@ else
+   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+   lt_status=$lt_dlunknown
+   cat > conftest.$ac_ext <<_LT_EOF
+-#line 11859 "configure"
++#line 11861 "configure"
+ #include "confdefs.h"
+ 
+ #if HAVE_DLFCN_H
+@@ -14137,6 +14139,9 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
+                ;;
+       mips*)   druntime_target_cpu_parsed="mips"
+                ;;
++      loongarch*)
++               druntime_target_cpu_parsed="loongarch"
++               ;;
+       powerpc*)
+                druntime_target_cpu_parsed="powerpc"
+                ;;
+@@ -14174,6 +14179,14 @@ else
+   DRUNTIME_CPU_MIPS_FALSE=
+ fi
+ 
++   if test "$druntime_target_cpu_parsed" = "loongarch"; then
++  DRUNTIME_CPU_LOONGARCH_TRUE=
++  DRUNTIME_CPU_LOONGARCH_FALSE='#'
++else
++  DRUNTIME_CPU_LOONGARCH_TRUE='#'
++  DRUNTIME_CPU_LOONGARCH_FALSE=
++fi
++
+    if test "$druntime_target_cpu_parsed" = "powerpc"; then
+   DRUNTIME_CPU_POWERPC_TRUE=
+   DRUNTIME_CPU_POWERPC_FALSE='#'
+@@ -15738,6 +15751,10 @@ if test -z "${DRUNTIME_CPU_MIPS_TRUE}" && test -z "${DRUNTIME_CPU_MIPS_FALSE}";
+   as_fn_error $? "conditional \"DRUNTIME_CPU_MIPS\" was never defined.
+ Usually this means the macro was only invoked conditionally." "$LINENO" 5
+ fi
++if test -z "${DRUNTIME_CPU_LOONGARCH_TRUE}" && test -z "${DRUNTIME_CPU_LOONGARCH_FALSE}"; then
++  as_fn_error $? "conditional \"DRUNTIME_CPU_LOONGARCH\" was never defined.
++Usually this means the macro was only invoked conditionally." "$LINENO" 5
++fi
+ if test -z "${DRUNTIME_CPU_POWERPC_TRUE}" && test -z "${DRUNTIME_CPU_POWERPC_FALSE}"; then
+   as_fn_error $? "conditional \"DRUNTIME_CPU_POWERPC\" was never defined.
+ Usually this means the macro was only invoked conditionally." "$LINENO" 5
+diff --git a/libphobos/libdruntime/Makefile.am b/libphobos/libdruntime/Makefile.am
+index 6ca4012b7..65e3f1b44 100644
+--- a/libphobos/libdruntime/Makefile.am
++++ b/libphobos/libdruntime/Makefile.am
+@@ -86,6 +86,9 @@ endif
+ if DRUNTIME_CPU_MIPS
+     DRUNTIME_SOURCES_CONFIGURED += config/mips/switchcontext.S
+ endif
++if DRUNTIME_CPU_LOONGARCH
++    DRUNTIME_SOURCES_CONFIGURED += config/loongarch/switchcontext.S
++endif
+ if DRUNTIME_CPU_POWERPC
+     DRUNTIME_SOURCES_CONFIGURED += config/powerpc/switchcontext.S
+ endif
+diff --git a/libphobos/libdruntime/Makefile.in b/libphobos/libdruntime/Makefile.in
+index f7f78d71f..91cd65362 100644
+--- a/libphobos/libdruntime/Makefile.in
++++ b/libphobos/libdruntime/Makefile.in
+@@ -124,12 +124,13 @@ target_triplet = @target@
+ # CPU specific sources
+ @DRUNTIME_CPU_AARCH64_TRUE@am__append_11 = config/aarch64/switchcontext.S
+ @DRUNTIME_CPU_ARM_TRUE@am__append_12 = config/arm/switchcontext.S
+-@DRUNTIME_CPU_MIPS_TRUE@am__append_13 = config/mips/switchcontext.S
+-@DRUNTIME_CPU_POWERPC_TRUE@am__append_14 = config/powerpc/switchcontext.S
+-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__append_15 = config/mingw/switchcontext.S
+-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__append_16 = config/x86/switchcontext.S
+-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__append_17 = config/systemz/get_tls_offset.S
+-@DRUNTIME_CPU_S390_TRUE@am__append_18 = config/s390/get_tls_offset.S
++@DRUNTIME_CPU_LOONGARCH_TRUE@am__append_13 = config/loongarch/switchcontext.S
++@DRUNTIME_CPU_MIPS_TRUE@am__append_14 = config/mips/switchcontext.S
++@DRUNTIME_CPU_POWERPC_TRUE@am__append_15 = config/powerpc/switchcontext.S
++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__append_16 = config/mingw/switchcontext.S
++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__append_17 = config/x86/switchcontext.S
++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__append_18 = config/systemz/get_tls_offset.S
++@DRUNTIME_CPU_S390_TRUE@am__append_19 = config/s390/get_tls_offset.S
+ subdir = libdruntime
+ ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+ am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
+@@ -474,45 +475,49 @@ am__objects_22 = core/sys/solaris/dlfcn.lo core/sys/solaris/elf.lo \
+ @DRUNTIME_OS_SOLARIS_TRUE@am__objects_23 = $(am__objects_22)
+ @DRUNTIME_CPU_AARCH64_TRUE@am__objects_24 = config/aarch64/libgdruntime_la-switchcontext.lo
+ @DRUNTIME_CPU_ARM_TRUE@am__objects_25 = config/arm/libgdruntime_la-switchcontext.lo
+-@DRUNTIME_CPU_MIPS_TRUE@am__objects_26 = config/mips/libgdruntime_la-switchcontext.lo
+-@DRUNTIME_CPU_POWERPC_TRUE@am__objects_27 = config/powerpc/libgdruntime_la-switchcontext.lo
+-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_28 = config/mingw/libgdruntime_la-switchcontext.lo
+-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_29 = config/x86/libgdruntime_la-switchcontext.lo
+-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_30 = config/systemz/libgdruntime_la-get_tls_offset.lo
+-@DRUNTIME_CPU_S390_TRUE@am__objects_31 = config/s390/libgdruntime_la-get_tls_offset.lo
+-am__objects_32 = $(am__objects_5) $(am__objects_7) $(am__objects_9) \
++@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_26 = config/loongarch/libgdruntime_la-switchcontext.lo
++@DRUNTIME_CPU_MIPS_TRUE@am__objects_27 = config/mips/libgdruntime_la-switchcontext.lo
++@DRUNTIME_CPU_POWERPC_TRUE@am__objects_28 = config/powerpc/libgdruntime_la-switchcontext.lo
++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_29 = config/mingw/libgdruntime_la-switchcontext.lo
++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_30 = config/x86/libgdruntime_la-switchcontext.lo
++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_31 = config/systemz/libgdruntime_la-get_tls_offset.lo
++@DRUNTIME_CPU_S390_TRUE@am__objects_32 = config/s390/libgdruntime_la-get_tls_offset.lo
++am__objects_33 = $(am__objects_6) $(am__objects_8) $(am__objects_10) \
+ 	$(am__objects_11) $(am__objects_13) $(am__objects_15) \
+ 	$(am__objects_17) $(am__objects_19) $(am__objects_21) \
+ 	$(am__objects_23) $(am__objects_24) $(am__objects_25) \
+ 	$(am__objects_26) $(am__objects_27) $(am__objects_28) \
+-	$(am__objects_29) $(am__objects_30) $(am__objects_31)
+-am__objects_33 = gcc/config.lo gcc/libbacktrace.lo
+-am__objects_34 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \
+-	$(am__objects_32) $(am__objects_33)
+-am_libgdruntime_la_OBJECTS = $(am__objects_34)
++	$(am__objects_29) $(am__objects_30) $(am__objects_31) \
++	$(am__objects_32)
++am__objects_34 = gcc/config.lo gcc/libbacktrace.lo
++am__objects_35 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \
++	$(am__objects_33) $(am__objects_34)
++am_libgdruntime_la_OBJECTS = $(am__objects_35)
+ libgdruntime_la_OBJECTS = $(am_libgdruntime_la_OBJECTS)
+ am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
+-am__objects_35 = core/stdc/libgdruntime_convenience_la-errno_.lo
+-@DRUNTIME_OS_MINGW_TRUE@am__objects_36 = $(am__objects_20) \
++am__objects_36 = core/stdc/libgdruntime_convenience_la-errno_.lo
++@DRUNTIME_OS_MINGW_TRUE@am__objects_37 = $(am__objects_20) \
+ @DRUNTIME_OS_MINGW_TRUE@	config/mingw/libgdruntime_convenience_la-msvc.lo
+-@DRUNTIME_CPU_AARCH64_TRUE@am__objects_37 = config/aarch64/libgdruntime_convenience_la-switchcontext.lo
+-@DRUNTIME_CPU_ARM_TRUE@am__objects_38 = config/arm/libgdruntime_convenience_la-switchcontext.lo
+-@DRUNTIME_CPU_MIPS_TRUE@am__objects_39 = config/mips/libgdruntime_convenience_la-switchcontext.lo
+-@DRUNTIME_CPU_POWERPC_TRUE@am__objects_40 = config/powerpc/libgdruntime_convenience_la-switchcontext.lo
+-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_41 = config/mingw/libgdruntime_convenience_la-switchcontext.lo
+-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_42 = config/x86/libgdruntime_convenience_la-switchcontext.lo
+-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_43 = config/systemz/libgdruntime_convenience_la-get_tls_offset.lo
+-@DRUNTIME_CPU_S390_TRUE@am__objects_44 = config/s390/libgdruntime_convenience_la-get_tls_offset.lo
+-am__objects_45 = $(am__objects_5) $(am__objects_7) $(am__objects_9) \
++@DRUNTIME_CPU_AARCH64_TRUE@am__objects_38 = config/aarch64/libgdruntime_convenience_la-switchcontext.lo
++@DRUNTIME_CPU_ARM_TRUE@am__objects_39 = config/arm/libgdruntime_convenience_la-switchcontext.lo
++@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_40 = config/loongarch/libgdruntime_convenience_la-switchcontext.lo
++@DRUNTIME_CPU_MIPS_TRUE@am__objects_41 = config/mips/libgdruntime_convenience_la-switchcontext.lo
++@DRUNTIME_CPU_POWERPC_TRUE@am__objects_42 = config/powerpc/libgdruntime_convenience_la-switchcontext.lo
++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_43 = config/mingw/libgdruntime_convenience_la-switchcontext.lo
++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_44 = config/x86/libgdruntime_convenience_la-switchcontext.lo
++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_45 = config/systemz/libgdruntime_convenience_la-get_tls_offset.lo
++@DRUNTIME_CPU_S390_TRUE@am__objects_46 = config/s390/libgdruntime_convenience_la-get_tls_offset.lo
++am__objects_47 = $(am__objects_5) $(am__objects_7) $(am__objects_9) \
+ 	$(am__objects_11) $(am__objects_13) $(am__objects_15) \
+ 	$(am__objects_17) $(am__objects_19) $(am__objects_36) \
+ 	$(am__objects_23) $(am__objects_37) $(am__objects_38) \
+ 	$(am__objects_39) $(am__objects_40) $(am__objects_41) \
+-	$(am__objects_42) $(am__objects_43) $(am__objects_44)
+-am__objects_46 = $(am__objects_1) $(am__objects_35) $(am__objects_3) \
+-	$(am__objects_45) $(am__objects_33)
+-am__objects_47 = $(am__objects_46)
+-am_libgdruntime_convenience_la_OBJECTS = $(am__objects_47)
++	$(am__objects_42) $(am__objects_43) $(am__objects_44) \
++	$(am__objects_45) $(am__objects_46)
++am__objects_48 = $(am__objects_1) $(am__objects_35) $(am__objects_3) \
++	$(am__objects_47) $(am__objects_33)
++am__objects_49 = $(am__objects_48)
++am_libgdruntime_convenience_la_OBJECTS = $(am__objects_49)
+ libgdruntime_convenience_la_OBJECTS =  \

_service:tar_scm:0189-Backport-SME-aarch64-Only-calculate-chain_offset-if-.patch Added

@@ -0,0 +1,47 @@
+From c8768dd861538817db8c1955dcce4b6d8ce17c48 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:07 +0100
+Subject: PATCH 090/157 BackportSME aarch64: Only calculate chain_offset
+ if there is a chain
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=aa8b57ee0206e8e5ac7078692ee67fb6ead05645
+
+After previous patches, it is no longer necessary to calculate
+a chain_offset in cases where there is no chain record.
+
+gcc/
+	* config/aarch64/aarch64.cc (aarch64_expand_prologue): Move the
+	calculation of chain_offset into the emit_frame_chain block.
+---
+ gcc/config/aarch64/aarch64.cc | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index a0a4c7ac3..bef6a658b 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -10357,16 +10357,16 @@ aarch64_expand_prologue (void)
+   if (callee_adjust != 0)
+     aarch64_push_regs (reg1, reg2, callee_adjust);
+ 
+-  /* The offset of the frame chain record (if any) from the current SP.  */
+-  poly_int64 chain_offset = (initial_adjust + callee_adjust
+-			     - frame.hard_fp_offset);
+-  gcc_assert (known_ge (chain_offset, 0));
+-
+   /* The offset of the current SP from the bottom of the static frame.  */
+   poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust;
+ 
+   if (emit_frame_chain)
+     {
++      /* The offset of the frame chain record (if any) from the current SP.  */
++      poly_int64 chain_offset = (initial_adjust + callee_adjust
++				 - frame.hard_fp_offset);
++      gcc_assert (known_ge (chain_offset, 0));
++
+       if (callee_adjust == 0)
+ 	{
+ 	  reg1 = R29_REGNUM;
+-- 
+2.33.0
+

_service:tar_scm:0189-LoongArch-fix-building-errors.patch Added

@@ -0,0 +1,273 @@
+From 142ae446cab26f1beb81a53a7da3c477ce42df40 Mon Sep 17 00:00:00 2001
+From: Peng Fan <fanpeng@loongson.cn>
+Date: Mon, 28 Oct 2024 09:02:51 +0000
+Subject: PATCH LoongArch: fix building errors.
+
+---
+ config/mt-loongarch-mlib                   |  2 +-
+ gcc/config/loongarch/loongarch-evolution.h |  2 +-
+ gcc/config/loongarch/loongarch-opts.cc     |  1 +
+ gcc/config/loongarch/loongarch-str.h       | 11 +++---
+ gcc/config/loongarch/loongarch.cc          |  9 +----
+ gcc/config/loongarch/loongarch.md          | 44 ++++++++++++++++------
+ gcc/config/loongarch/simd.md               | 15 +++++---
+ gcc/doc/invoke.texi                        |  3 +-
+ 8 files changed, 53 insertions(+), 34 deletions(-)
+
+diff --git a/config/mt-loongarch-mlib b/config/mt-loongarch-mlib
+index 4cfe568f1..bbbba277f 100644
+--- a/config/mt-loongarch-mlib
++++ b/config/mt-loongarch-mlib
+@@ -1 +1 @@
+-FLAGS_FOR_TARGET += -fmultiflags
++FLAGS_FOR_TARGET += 
+diff --git a/gcc/config/loongarch/loongarch-evolution.h b/gcc/config/loongarch/loongarch-evolution.h
+index d64996481..7e8e602c7 100644
+--- a/gcc/config/loongarch/loongarch-evolution.h
++++ b/gcc/config/loongarch/loongarch-evolution.h
+@@ -1,7 +1,7 @@
+ /* Generated automatically by "genstr" from "isa-evolution.in".
+    Please do not edit this file directly.
+ 
+-   Copyright (C) 2023 Free Software Foundation, Inc.
++   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+ 
+ This file is part of GCC.
+ 
+diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
+index 735daeb7c..1d08bb6a1 100644
+--- a/gcc/config/loongarch/loongarch-opts.cc
++++ b/gcc/config/loongarch/loongarch-opts.cc
+@@ -1071,6 +1071,7 @@ loongarch_init_misc_options (struct gcc_options *opts,
+ 
+ #undef INIT_TARGET_FLAG
+ 
++#define TARGET_DIRECT_EXTERN_ACCESS_OPTS_P(opts) (((opts->x_target_flags) & MASK_DIRECT_EXTERN_ACCESS) != 0)
+   /* Set mexplicit-relocs default.  */
+   if (opts->x_la_opt_explicit_relocs == M_OPT_UNSET)
+     opts->x_la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS
+diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
+index 3cbe12f7b..13d161a8c 100644
+--- a/gcc/config/loongarch/loongarch-str.h
++++ b/gcc/config/loongarch/loongarch-str.h
+@@ -66,9 +66,10 @@ along with GCC; see the file COPYING3.  If not see
+ #define STR_CMODEL_LARGE "large"
+ #define STR_CMODEL_EXTREME "extreme"
+ 
+-#define OPTSTR_FRECIPE "frecipe"
+-#define OPTSTR_DIV32   "div32"
+-#define OPTSTR_LAM_BH  "lam-bh"
+-#define OPTSTR_LAMCAS  "lamcas"
+-#define OPTSTR_LD_SEQ_SA   "ld-seq-sa"
++#define OPTSTR_FRECIPE	"frecipe"
++#define OPTSTR_DIV32	"div32"
++#define OPTSTR_LAM_BH	"lam-bh"
++#define OPTSTR_LAMCAS	"lamcas"
++#define OPTSTR_LD_SEQ_SA	"ld-seq-sa"
++
+ #endif /* LOONGARCH_STR_H */
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 53bd8d7ec..6be0d80b3 100644
+--- a/gcc/config/loongarch/loongarch.cc
++++ b/gcc/config/loongarch/loongarch.cc
+@@ -764,14 +764,7 @@ loongarch_setup_incoming_varargs (cumulative_args_t cum,
+      argument.  Advance a local copy of CUM past the last "real" named
+      argument, to find out how many registers are left over.  */
+   local_cum = *get_cumulative_args (cum);
+-
+-  /* For a C23 variadic function w/o any named argument, and w/o an
+-     artifical argument for large return value, skip advancing args.
+-     There is such an artifical argument iff. arg.type is non-NULL
+-     (PR 114175).  */
+-  if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
+-      || arg.type != NULL_TREE)
+-    loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg);
++  loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg);
+ 
+   /* Found out how many registers we need to save.  */
+   gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs;
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 66236a7c7..d8d444c7a 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -32,6 +32,7 @@
+   UNSPEC_FCLASS
+   UNSPEC_FMAX
+   UNSPEC_FMIN
++  UNSPEC_COPYSIGN
+   UNSPEC_FTINT
+   UNSPEC_FTINTRM
+   UNSPEC_FTINTRP
+@@ -415,11 +416,13 @@
+ 
+ ;; A mode for anything with 32 bits or more, and able to be loaded with
+ ;; the same addressing mode as ld.w.
+-(define_mode_iterator LD_AT_LEAST_32_BIT GPR ANYF)
++;; (define_mode_iterator LD_AT_LEAST_32_BIT GPR ANYF)
++(define_mode_iterator LD_AT_LEAST_32_BIT (SI "") (DI "TARGET_64BIT") (SF "TARGET_HARD_FLOAT") (DF "TARGET_DOUBLE_FLOAT"))
+ 
+ ;; A mode for anything able to be stored with the same addressing mode as
+ ;; st.w.
+-(define_mode_iterator ST_ANY QHWD ANYF)
++;; (define_mode_iterator ST_ANY QHWD ANYF)
++(define_mode_iterator ST_ANY (QI "") (HI "") (SI "") (DI "TARGET_64BIT") (SF "TARGET_HARD_FLOAT") (DF "TARGET_DOUBLE_FLOAT"))
+ 
+ ;; A mode for anything legal as a input of a div or mod instruction.
+ (define_mode_iterator DIV (DI "TARGET_64BIT")
+@@ -590,6 +593,10 @@
+ (define_code_attr sel (eq "masknez") (ne "maskeqz"))
+ (define_code_attr selinv (eq "maskeqz") (ne "masknez"))
+ 
++(define_int_attr lrint_allow_inexact (UNSPEC_FTINT "1")
++                     (UNSPEC_FTINTRM "0")
++                     (UNSPEC_FTINTRP "0"))
++
+ ;; Iterator and attributes for floating-point to fixed-point conversion
+ ;; instructions.
+ (define_int_iterator LRINT UNSPEC_FTINT UNSPEC_FTINTRM UNSPEC_FTINTRP)
+@@ -625,7 +632,8 @@
+ ;; so the redundant sign extension can be removed if the output is used as
+ ;; an input of a bitwise operation.  Note plus, rotl, and div are handled
+ ;; separately.
+-(define_code_iterator shift_w any_shift rotatert)
++;; (define_code_iterator shift_w any_shift rotatert)
++(define_code_iterator shift_w ashift ashiftrt lshiftrt rotatert)
+ (define_code_iterator arith_w minus mult)
+ 
+ (define_expand "<optab><mode>3"
+@@ -1324,8 +1332,9 @@
+ 
+ (define_insn "copysign<mode>3"
+   (set (match_operand:ANYF 0 "register_operand" "=f")
+-	(copysign:ANYF (match_operand:ANYF 1 "register_operand" "f")
+-		       (match_operand:ANYF 2 "register_operand" "f")))
++	(unspec:ANYF (match_operand:ANYF 1 "register_operand" "f")
++		       (match_operand:ANYF 2 "register_operand" "f")
++               UNSPEC_COPYSIGN))
+   "TARGET_HARD_FLOAT"
+   "fcopysign.<fmt>\t%0,%1,%2"
+   (set_attr "type" "fcopysign")
+@@ -2722,12 +2731,13 @@
+    (set_attr "mode" "<MODE>"))
+ 
+ ;; Convert floating-point numbers to integers
++;;   (<LRINT> == UNSPEC_FTINT
+ (define_insn "<lrint_pattern><ANYF:mode><ANYFI:mode>2"
+   (set (match_operand:ANYFI 0 "register_operand" "=f")
+ 	(unspec:ANYFI (match_operand:ANYF 1 "register_operand" "f")
+ 		      LRINT))
+   "TARGET_HARD_FLOAT &&
+-   (<LRINT> == UNSPEC_FTINT
++   (<lrint_allow_inexact>
+     || flag_fp_int_builtin_inexact
+     || !flag_trapping_math)"
+   "ftint<lrint_submenmonic>.<ANYFI:ifmt>.<ANYF:fmt> %0,%1"
+@@ -4135,15 +4145,26 @@
+    (136	"isnormal")
+    (952	"isfinite"))
+ 
+-(define_expand "<FCLASS_MASK:fclass_optab><ANYF:mode>2"
++;;(define_expand "<FCLASS_MASK:fclass_optab><ANYF:mode>2"
++;;  (match_operand:SI   0 "register_operand" "=r")
++;;   (match_operand:ANYF 1 "register_operand" " f")
++;;   (const_int FCLASS_MASK)
++;;  "TARGET_HARD_FLOAT"
++;;  {
++;;    rtx ft0 = gen_reg_rtx (SImode);
++;;    rtx t0 = gen_reg_rtx (word_mode);
++;;    rtx mask = GEN_INT (<FCLASS_MASK>);
++
++(define_expand "fclass_optab<ANYF:mode>2"
++ (unspec:ANYF
+   (match_operand:SI   0 "register_operand" "=r")
+-   (match_operand:ANYF 1 "register_operand" " f")
+-   (const_int FCLASS_MASK)
++   (match_operand:ANYF 1 "register_operand" " f")
++   UNSPEC_FCLASS)
+   "TARGET_HARD_FLOAT"
+   {
+     rtx ft0 = gen_reg_rtx (SImode);
+     rtx t0 = gen_reg_rtx (word_mode);
+-    rtx mask = GEN_INT (<FCLASS_MASK>);
++    rtx mask = GEN_INT (GET_MODE_MASK (<MODE>mode));
+ 
+     emit_insn (gen_fclass_<ANYF:fmt> (ft0, operands1));
+ 
+@@ -4165,7 +4186,8 @@
+     emit_move_insn (operands0, t0);
+ 
+     DONE;

_service:tar_scm:0190-Backport-SME-aarch64-Rename-locals_offset-to-bytes_a.patch Added

@@ -0,0 +1,94 @@
+From 43dc03de6d608e10d83cc7994d127e3764bfbcf7 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:07 +0100
+Subject: PATCH 091/157 BackportSME aarch64: Rename locals_offset to
+ bytes_above_locals
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=28034dbb5e32711d5f4d655576f2499e6f57f854
+
+locals_offset was described as:
+
+  /* Offset from the base of the frame (incomming SP) to the
+     top of the locals area.  This value is always a multiple of
+     STACK_BOUNDARY.  */
+
+This is implicitly an “upside down” view of the frame: the incoming
+SP is at offset 0, and anything N bytes below the incoming SP is at
+offset N (rather than -N).
+
+However, reg_offset instead uses a “right way up” view; that is,
+it views offsets in address terms.  Something above X is at a
+positive offset from X and something below X is at a negative
+offset from X.
+
+Also, even on FRAME_GROWS_DOWNWARD targets like AArch64,
+target-independent code views offsets in address terms too:
+locals are allocated at negative offsets to virtual_stack_vars.
+
+It seems confusing to have *_offset fields of the same structure
+using different polarities like this.  This patch tries to avoid
+that by renaming locals_offset to bytes_above_locals.
+
+gcc/
+	* config/aarch64/aarch64.h (aarch64_frame::locals_offset): Rename to...
+	(aarch64_frame::bytes_above_locals): ...this.
+	* config/aarch64/aarch64.cc (aarch64_layout_frame)
+	(aarch64_initial_elimination_offset): Update accordingly.
+---
+ gcc/config/aarch64/aarch64.cc | 6 +++---
+ gcc/config/aarch64/aarch64.h  | 6 +++---
+ 2 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index bef6a658b..992f71bbd 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8937,7 +8937,7 @@ aarch64_layout_frame (void)
+ 			  STACK_BOUNDARY / BITS_PER_UNIT));
+   frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
+ 
+-  frame.locals_offset = frame.saved_varargs_size;
++  frame.bytes_above_locals = frame.saved_varargs_size;
+ 
+   frame.initial_adjust = 0;
+   frame.final_adjust = 0;
+@@ -13047,13 +13047,13 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
+ 	return frame.hard_fp_offset;
+ 
+       if (from == FRAME_POINTER_REGNUM)
+-	return frame.hard_fp_offset - frame.locals_offset;
++	return frame.hard_fp_offset - frame.bytes_above_locals;
+     }
+ 
+   if (to == STACK_POINTER_REGNUM)
+     {
+       if (from == FRAME_POINTER_REGNUM)
+-	return frame.frame_size - frame.locals_offset;
++	return frame.frame_size - frame.bytes_above_locals;
+     }
+ 
+   return frame.frame_size;
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index a1db4f689..2acff9a96 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -815,10 +815,10 @@ struct GTY (()) aarch64_frame
+      always a multiple of STACK_BOUNDARY.  */
+   poly_int64 bytes_below_hard_fp;
+ 
+-  /* Offset from the base of the frame (incomming SP) to the
+-     top of the locals area.  This value is always a multiple of
++  /* The number of bytes between the top of the locals area and the top
++     of the frame (the incomming SP).  This value is always a multiple of
+      STACK_BOUNDARY.  */
+-  poly_int64 locals_offset;
++  poly_int64 bytes_above_locals;
+ 
+   /* Offset from the base of the frame (incomming SP) to the
+      hard_frame_pointer.  This value is always a multiple of
+-- 
+2.33.0
+

_service:tar_scm:0190-tree-optimization-110702-avoid-zero-based-memory-ref.patch Added

@@ -0,0 +1,119 @@
+From 13dfb01e5c30c3bd09333ac79d6ff96a617fea67 Mon Sep 17 00:00:00 2001
+From: Richard Biener <rguenther@suse.de>
+Date: Thu, 3 Aug 2023 13:11:12 +0200
+Subject: PATCH tree-optimization/110702 - avoid zero-based memory references
+ in IVOPTs
+
+Sometimes IVOPTs chooses a weird induction variable which downstream
+leads to issues.  Most of the times we can fend those off during costing
+by rejecting the candidate but it looks like the address description
+costing synthesizes is different from what we end up generating so
+the following fixes things up at code generation time.  Specifically
+we avoid the create_mem_ref_raw fallback which uses a literal zero
+address base with the actual base in index2.  For the case in question
+we have the address
+
+  type = unsigned long
+  offset = 0
+  elements = {
+    0 = &e * -3,
+    1 = (sizetype) a.9_30 * 232,
+    2 = ivtmp.28_44 * 4
+  }
+
+from which we code generate the problematical
+
+  _3 = MEM(long int *)0B + ivtmp.36_9 + ivtmp.28_44 * 4;
+
+which references the object at address zero.  The patch below
+recognizes the fallback after the fact and transforms the
+TARGET_MEM_REF memory reference into a LEA for which this form
+isn't problematic:
+
+  _24 = &MEM(long int *)0B + ivtmp.36_34 + ivtmp.28_44 * 4;
+  _3 = *_24;
+
+hereby avoiding the correctness issue.  We'd later conclude the
+program terminates at the null pointer dereference and make the
+function pure, miscompling the main function of the testcase.
+
+	PR tree-optimization/110702
+	* tree-ssa-loop-ivopts.cc (rewrite_use_address): When
+	we created a NULL pointer based access rewrite that to
+	a LEA.
+
+	* gcc.dg/torture/pr110702.c: New testcase.
+---
+ gcc/testsuite/gcc.dg/torture/pr110702.c | 31 +++++++++++++++++++++++++
+ gcc/tree-ssa-loop-ivopts.cc             | 17 +++++++++++++-
+ 2 files changed, 47 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.dg/torture/pr110702.c
+
+diff --git a/gcc/testsuite/gcc.dg/torture/pr110702.c b/gcc/testsuite/gcc.dg/torture/pr110702.c
+new file mode 100644
+index 00000000000..aab9c7d923e
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/torture/pr110702.c
+@@ -0,0 +1,31 @@
++/* { dg-do run } */
++
++void abort (void);
++
++int a, b, c, d;
++long e974;
++
++void f()
++{
++  for (; a >= 0; a--)
++    {
++      b = 0;
++      for (; b <= 3; b++)
++	{
++	  c = 0;
++	  for (; c <= 3; c++)
++	    {
++	      int *g = &d;
++	      *g = e00b | eaba;
++	    }
++	}
++    }
++}
++
++int main()
++{
++  f();
++  if (a != -1)
++    abort ();
++  return 0;
++}
+diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc
+index 92fc1c7d734..934897af691 100644
+--- a/gcc/tree-ssa-loop-ivopts.cc
++++ b/gcc/tree-ssa-loop-ivopts.cc
+@@ -7630,7 +7630,22 @@ rewrite_use_address (struct ivopts_data *data,
+ 				      true, GSI_SAME_STMT);
+     }
+   else
+-    copy_ref_info (ref, *use->op_p);
++    {
++      /* When we end up confused enough and have no suitable base but
++	 stuffed everything to index2 use a LEA for the address and
++	 create a plain MEM_REF to avoid basing a memory reference
++	 on address zero which create_mem_ref_raw does as fallback.  */
++      if (TREE_CODE (ref) == TARGET_MEM_REF
++	  && TMR_INDEX2 (ref) != NULL_TREE
++	  && integer_zerop (TREE_OPERAND (ref, 0)))
++	{
++	  ref = fold_build1 (ADDR_EXPR, TREE_TYPE (TREE_OPERAND (ref, 0)), ref);
++	  ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
++					  true, GSI_SAME_STMT);
++	  ref = build2 (MEM_REF, type, ref, build_zero_cst (alias_ptr_type));
++	}
++      copy_ref_info (ref, *use->op_p);
++    }
+ 
+   *use->op_p = ref;
+ }
+-- 
+2.45.2
+

_service:tar_scm:0191-Backport-SME-aarch64-Rename-hard_fp_offset-to-bytes_.patch Added

@@ -0,0 +1,151 @@
+From e33aa6e25334fd94e1e4f2d8b6c8247029657a54 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:08 +0100
+Subject: PATCH 092/157 BackportSME aarch64: Rename hard_fp_offset to
+ bytes_above_hard_fp
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ed61c87f044f5460109c197855b316641db3c6c6
+
+Similarly to the previous locals_offset patch, hard_fp_offset
+was described as:
+
+  /* Offset from the base of the frame (incomming SP) to the
+     hard_frame_pointer.  This value is always a multiple of
+     STACK_BOUNDARY.  */
+  poly_int64 hard_fp_offset;
+
+which again took an “upside-down” view: higher offsets meant lower
+addresses.  This patch renames the field to bytes_above_hard_fp instead.
+
+gcc/
+	* config/aarch64/aarch64.h (aarch64_frame::hard_fp_offset): Rename
+	to...
+	(aarch64_frame::bytes_above_hard_fp): ...this.
+	* config/aarch64/aarch64.cc (aarch64_layout_frame)
+	(aarch64_expand_prologue): Update accordingly.
+	(aarch64_initial_elimination_offset): Likewise.
+---
+ gcc/config/aarch64/aarch64.cc | 26 +++++++++++++-------------
+ gcc/config/aarch64/aarch64.h  |  6 +++---
+ 2 files changed, 16 insertions(+), 16 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 992f71bbd..67199a026 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8929,7 +8929,7 @@ aarch64_layout_frame (void)
+ 			   + get_frame_size (),
+ 			   STACK_BOUNDARY / BITS_PER_UNIT);
+ 
+-  frame.hard_fp_offset
++  frame.bytes_above_hard_fp
+     = saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
+ 
+   /* Both these values are already aligned.  */
+@@ -8978,13 +8978,13 @@ aarch64_layout_frame (void)
+   else if (frame.wb_pop_candidate1 != INVALID_REGNUM)
+     max_push_offset = 256;
+ 
+-  HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset;
++  HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp;
+   HOST_WIDE_INT const_saved_regs_size;
+   if (known_eq (frame.saved_regs_size, 0))
+     frame.initial_adjust = frame.frame_size;
+   else if (frame.frame_size.is_constant (&const_size)
+ 	   && const_size < max_push_offset
+-	   && known_eq (frame.hard_fp_offset, const_size))
++	   && known_eq (frame.bytes_above_hard_fp, const_size))
+     {
+       /* Simple, small frame with no data below the saved registers.
+ 
+@@ -9001,8 +9001,8 @@ aarch64_layout_frame (void)
+ 	      case that it hardly seems worth the effort though.  */
+ 	   && (!saves_below_hard_fp_p || const_below_saved_regs == 0)
+ 	   && !(cfun->calls_alloca
+-		&& frame.hard_fp_offset.is_constant (&const_fp_offset)
+-		&& const_fp_offset < max_push_offset))
++		&& frame.bytes_above_hard_fp.is_constant (&const_above_fp)
++		&& const_above_fp < max_push_offset))
+     {
+       /* Frame with small area below the saved registers:
+ 
+@@ -9020,12 +9020,12 @@ aarch64_layout_frame (void)
+ 	 sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
+ 	 save SVE registers relative to SP
+ 	 sub sp, sp, bytes_below_saved_regs  */
+-      frame.initial_adjust = (frame.hard_fp_offset
++      frame.initial_adjust = (frame.bytes_above_hard_fp
+ 			      + frame.below_hard_fp_saved_regs_size);
+       frame.final_adjust = frame.bytes_below_saved_regs;
+     }
+-  else if (frame.hard_fp_offset.is_constant (&const_fp_offset)
+-	   && const_fp_offset < max_push_offset)
++  else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp)
++	   && const_above_fp < max_push_offset)
+     {
+       /* Frame with large area below the saved registers, or with SVE saves,
+ 	 but with a small area above:
+@@ -9035,7 +9035,7 @@ aarch64_layout_frame (void)
+ 	 sub sp, sp, below_hard_fp_saved_regs_size
+ 	 save SVE registers relative to SP
+ 	 sub sp, sp, bytes_below_saved_regs  */
+-      frame.callee_adjust = const_fp_offset;
++      frame.callee_adjust = const_above_fp;
+       frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
+       frame.final_adjust = frame.bytes_below_saved_regs;
+     }
+@@ -9050,7 +9050,7 @@ aarch64_layout_frame (void)
+ 	 sub sp, sp, below_hard_fp_saved_regs_size
+ 	 save SVE registers relative to SP
+ 	 sub sp, sp, bytes_below_saved_regs  */
+-      frame.initial_adjust = frame.hard_fp_offset;
++      frame.initial_adjust = frame.bytes_above_hard_fp;
+       frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
+       frame.final_adjust = frame.bytes_below_saved_regs;
+     }
+@@ -10364,7 +10364,7 @@ aarch64_expand_prologue (void)
+     {
+       /* The offset of the frame chain record (if any) from the current SP.  */
+       poly_int64 chain_offset = (initial_adjust + callee_adjust
+-				 - frame.hard_fp_offset);
++				 - frame.bytes_above_hard_fp);
+       gcc_assert (known_ge (chain_offset, 0));
+ 
+       if (callee_adjust == 0)
+@@ -13044,10 +13044,10 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
+   if (to == HARD_FRAME_POINTER_REGNUM)
+     {
+       if (from == ARG_POINTER_REGNUM)
+-	return frame.hard_fp_offset;
++	return frame.bytes_above_hard_fp;
+ 
+       if (from == FRAME_POINTER_REGNUM)
+-	return frame.hard_fp_offset - frame.bytes_above_locals;
++	return frame.bytes_above_hard_fp - frame.bytes_above_locals;
+     }
+ 
+   if (to == STACK_POINTER_REGNUM)
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index 2acff9a96..0f7822c3d 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -820,10 +820,10 @@ struct GTY (()) aarch64_frame
+      STACK_BOUNDARY.  */
+   poly_int64 bytes_above_locals;
+ 
+-  /* Offset from the base of the frame (incomming SP) to the
+-     hard_frame_pointer.  This value is always a multiple of
++  /* The number of bytes between the hard_frame_pointer and the top of
++     the frame (the incomming SP).  This value is always a multiple of
+      STACK_BOUNDARY.  */
+-  poly_int64 hard_fp_offset;
++  poly_int64 bytes_above_hard_fp;
+ 
+   /* The size of the frame.  This value is the offset from base of the
+      frame (incomming SP) to the stack_pointer.  This value is always
+-- 
+2.33.0
+

_service:tar_scm:0191-LoongArch-Change-OSDIR-for-distribution.patch Added

_service:tar_scm:0192-Backport-SME-aarch64-Tweak-frame_size-comment.patch Added

@@ -0,0 +1,37 @@
+From 6aa0db727b6e3a7fed95b014f25f3f022d1f46e2 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:08 +0100
+Subject: PATCH 093/157 BackportSME aarch64: Tweak frame_size comment
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=492b60670e69b0a7f11345b69a3c922c20d5d8c3
+
+This patch fixes another case in which a value was described with
+an “upside-down” view.
+
+gcc/
+	* config/aarch64/aarch64.h (aarch64_frame::frame_size): Tweak comment.
+---
+ gcc/config/aarch64/aarch64.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index 0f7822c3d..39abca051 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -825,8 +825,8 @@ struct GTY (()) aarch64_frame
+      STACK_BOUNDARY.  */
+   poly_int64 bytes_above_hard_fp;
+ 
+-  /* The size of the frame.  This value is the offset from base of the
+-     frame (incomming SP) to the stack_pointer.  This value is always
++  /* The size of the frame, i.e. the number of bytes between the bottom
++     of the outgoing arguments and the incoming SP.  This value is always
+      a multiple of STACK_BOUNDARY.  */
+   poly_int64 frame_size;
+ 
+-- 
+2.33.0
+

_service:tar_scm:0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch Added

@@ -0,0 +1,198 @@
+From 3b10711c6a5610bf8e2287b9491557268ee148da Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:09 +0100
+Subject: PATCH 094/157 BackportSME aarch64: Measure reg_offset from the
+ bottom of the frame
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=67a36b6f5d6be11d280081b461e72910aca2fc54
+
+reg_offset was measured from the bottom of the saved register area.
+This made perfect sense with the original layout, since the bottom
+of the saved register area was also the hard frame pointer address.
+It became slightly less obvious with SVE, since we save SVE
+registers below the hard frame pointer, but it still made sense.
+
+However, if we want to allow different frame layouts, it's more
+convenient and obvious to measure reg_offset from the bottom of
+the frame.  After previous patches, it's also a slight simplification
+in its own right.
+
+gcc/
+	* config/aarch64/aarch64.h (aarch64_frame): Add comment above
+	reg_offset.
+	* config/aarch64/aarch64.cc (aarch64_layout_frame): Walk offsets
+	from the bottom of the frame, rather than the bottom of the saved
+	register area.  Measure reg_offset from the bottom of the frame
+	rather than the bottom of the saved register area.
+	(aarch64_save_callee_saves): Update accordingly.
+	(aarch64_restore_callee_saves): Likewise.
+	(aarch64_get_separate_components): Likewise.
+	(aarch64_process_components): Likewise.
+---
+ gcc/config/aarch64/aarch64.cc | 53 ++++++++++++++++-------------------
+ gcc/config/aarch64/aarch64.h  |  3 ++
+ 2 files changed, 27 insertions(+), 29 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 67199a026..df8a83b04 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8739,7 +8739,6 @@ aarch64_needs_frame_chain (void)
+ static void
+ aarch64_layout_frame (void)
+ {
+-  poly_int64 offset = 0;
+   int regno, last_fp_reg = INVALID_REGNUM;
+   machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM);
+   poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
+@@ -8817,7 +8816,9 @@ aarch64_layout_frame (void)
+   gcc_assert (crtl->is_leaf
+ 	      || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED));
+ 
+-  frame.bytes_below_saved_regs = crtl->outgoing_args_size;
++  poly_int64 offset = crtl->outgoing_args_size;
++  gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
++  frame.bytes_below_saved_regs = offset;
+ 
+   /* Now assign stack slots for the registers.  Start with the predicate
+      registers, since predicate LDR and STR have a relatively small
+@@ -8829,7 +8830,8 @@ aarch64_layout_frame (void)
+ 	offset += BYTES_PER_SVE_PRED;
+       }
+ 
+-  if (maybe_ne (offset, 0))
++  poly_int64 saved_prs_size = offset - frame.bytes_below_saved_regs;
++  if (maybe_ne (saved_prs_size, 0))
+     {
+       /* If we have any vector registers to save above the predicate registers,
+ 	 the offset of the vector register save slots need to be a multiple
+@@ -8847,10 +8849,10 @@ aarch64_layout_frame (void)
+ 	offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+       else
+ 	{
+-	  if (known_le (offset, vector_save_size))
+-	    offset = vector_save_size;
+-	  else if (known_le (offset, vector_save_size * 2))
+-	    offset = vector_save_size * 2;
++	  if (known_le (saved_prs_size, vector_save_size))
++	    offset = frame.bytes_below_saved_regs + vector_save_size;
++	  else if (known_le (saved_prs_size, vector_save_size * 2))
++	    offset = frame.bytes_below_saved_regs + vector_save_size * 2;
+ 	  else
+ 	    gcc_unreachable ();
+ 	}
+@@ -8867,9 +8869,10 @@ aarch64_layout_frame (void)
+ 
+   /* OFFSET is now the offset of the hard frame pointer from the bottom
+      of the callee save area.  */
+-  bool saves_below_hard_fp_p = maybe_ne (offset, 0);
+-  frame.below_hard_fp_saved_regs_size = offset;
+-  frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs;
++  frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
++  bool saves_below_hard_fp_p
++    = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
++  frame.bytes_below_hard_fp = offset;
+   if (frame.emit_frame_chain)
+     {
+       /* FP and LR are placed in the linkage record.  */
+@@ -8920,9 +8923,10 @@ aarch64_layout_frame (void)
+ 
+   offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+ 
+-  frame.saved_regs_size = offset;
++  frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
+ 
+-  poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
++  poly_int64 varargs_and_saved_regs_size
++    = frame.saved_regs_size + frame.saved_varargs_size;
+ 
+   poly_int64 saved_regs_and_above
+     = aligned_upper_bound (varargs_and_saved_regs_size
+@@ -9390,9 +9394,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
+ 
+       machine_mode mode = aarch64_reg_save_mode (regno);
+       reg = gen_rtx_REG (mode, regno);
+-      offset = (frame.reg_offsetregno
+-		+ frame.bytes_below_saved_regs
+-		- bytes_below_sp);
++      offset = frame.reg_offsetregno - bytes_below_sp;
+       rtx base_rtx = stack_pointer_rtx;
+       poly_int64 sp_offset = offset;
+ 
+@@ -9499,9 +9501,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
+ 
+       machine_mode mode = aarch64_reg_save_mode (regno);
+       reg = gen_rtx_REG (mode, regno);
+-      offset = (frame.reg_offsetregno
+-		+ frame.bytes_below_saved_regs
+-		- bytes_below_sp);
++      offset = frame.reg_offsetregno - bytes_below_sp;
+       rtx base_rtx = stack_pointer_rtx;
+       if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
+ 	aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
+@@ -9640,14 +9640,12 @@ aarch64_get_separate_components (void)
+ 	   it as a stack probe for -fstack-clash-protection.  */
+ 	if (flag_stack_clash_protection
+ 	    && maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
+-	    && known_eq (offset, 0))
++	    && known_eq (offset, frame.bytes_below_saved_regs))
+ 	  continue;
+ 
+ 	/* Get the offset relative to the register we'll use.  */
+ 	if (frame_pointer_needed)
+-	  offset -= frame.below_hard_fp_saved_regs_size;
+-	else
+-	  offset += frame.bytes_below_saved_regs;
++	  offset -= frame.bytes_below_hard_fp;
+ 
+ 	/* Check that we can access the stack slot of the register with one
+ 	   direct load with no adjustments needed.  */
+@@ -9794,9 +9792,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
+       rtx reg = gen_rtx_REG (mode, regno);
+       poly_int64 offset = frame.reg_offsetregno;
+       if (frame_pointer_needed)
+-	offset -= frame.below_hard_fp_saved_regs_size;
+-      else
+-	offset += frame.bytes_below_saved_regs;
++	offset -= frame.bytes_below_hard_fp;
+ 
+       rtx addr = plus_constant (Pmode, ptr_reg, offset);
+       rtx mem = gen_frame_mem (mode, addr);
+@@ -9848,9 +9844,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
+       /* REGNO2 can be saved/restored in a pair with REGNO.  */
+       rtx reg2 = gen_rtx_REG (mode, regno2);
+       if (frame_pointer_needed)
+-	offset2 -= frame.below_hard_fp_saved_regs_size;
+-      else
+-	offset2 += frame.bytes_below_saved_regs;
++	offset2 -= frame.bytes_below_hard_fp;
+       rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
+       rtx mem2 = gen_frame_mem (mode, addr2);
+       rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
+@@ -9976,7 +9970,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+   if (final_adjustment_p
+       && known_eq (frame.below_hard_fp_saved_regs_size, 0))
+     {
+-      poly_int64 lr_offset = frame.reg_offsetLR_REGNUM;
++      poly_int64 lr_offset = (frame.reg_offsetLR_REGNUM
++			      - frame.bytes_below_saved_regs);
+       if (known_ge (lr_offset, 0))
+ 	min_probe_threshold -= lr_offset.to_constant ();
+       else
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index 39abca051..f340237d0 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -790,6 +790,9 @@ extern enum aarch64_processor aarch64_tune;
+ #ifdef HAVE_POLY_INT_H
+ struct GTY (()) aarch64_frame
+ {
++  /* The offset from the bottom of the static frame (the bottom of the
++     outgoing arguments) of each register save slot, or -2 if no save is
++     needed.  */
+   poly_int64 reg_offsetLAST_SAVED_REGNUM + 1;
+ 
+   /* The number of extra stack bytes taken up by register varargs.
+-- 
+2.33.0
+

_service:tar_scm:0194-Backport-SME-aarch64-Simplify-top-of-frame-allocatio.patch Added

@@ -0,0 +1,58 @@
+From 4b8f3f194e68d0d411eaa6692699d8e5e2b4217d Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:09 +0100
+Subject: PATCH 095/157 BackportSME aarch64: Simplify top of frame
+ allocation
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bc9dcdde80915d7585a21daa2b69f4adf4a1e3c1
+
+After previous patches, it no longer really makes sense to allocate
+the top of the frame in terms of varargs_and_saved_regs_size and
+saved_regs_and_above.
+
+gcc/
+	* config/aarch64/aarch64.cc (aarch64_layout_frame): Simplify
+	the allocation of the top of the frame.
+---
+ gcc/config/aarch64/aarch64.cc | 23 ++++++++---------------
+ 1 file changed, 8 insertions(+), 15 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index df8a83b04..3329aa364 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8925,23 +8925,16 @@ aarch64_layout_frame (void)
+ 
+   frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
+ 
+-  poly_int64 varargs_and_saved_regs_size
+-    = frame.saved_regs_size + frame.saved_varargs_size;
+-
+-  poly_int64 saved_regs_and_above
+-    = aligned_upper_bound (varargs_and_saved_regs_size
+-			   + get_frame_size (),
+-			   STACK_BOUNDARY / BITS_PER_UNIT);
+-
+-  frame.bytes_above_hard_fp
+-    = saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
++  offset += get_frame_size ();
++  offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
++  auto top_of_locals = offset;
+ 
+-  /* Both these values are already aligned.  */
+-  gcc_assert (multiple_p (frame.bytes_below_saved_regs,
+-			  STACK_BOUNDARY / BITS_PER_UNIT));
+-  frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
++  offset += frame.saved_varargs_size;
++  gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
++  frame.frame_size = offset;
+ 
+-  frame.bytes_above_locals = frame.saved_varargs_size;
++  frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp;
++  frame.bytes_above_locals = frame.frame_size - top_of_locals;
+ 
+   frame.initial_adjust = 0;
+   frame.final_adjust = 0;
+-- 
+2.33.0
+

_service:tar_scm:0195-Backport-SME-aarch64-Minor-initial-adjustment-tweak.patch Added

@@ -0,0 +1,41 @@
+From 0ab484f5de7d28c0a7166439d403e0983834b120 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:10 +0100
+Subject: PATCH 096/157 BackportSME aarch64: Minor initial adjustment
+ tweak
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ee5466ff4faca2076cc61f1f120d0b5062c8111c
+
+This patch just changes a calculation of initial_adjust
+to one that makes it slightly more obvious that the total
+adjustment is frame.frame_size.
+
+gcc/
+	* config/aarch64/aarch64.cc (aarch64_layout_frame): Tweak
+	calculation of initial_adjust for frames in which all saves
+	are SVE saves.
+---
+ gcc/config/aarch64/aarch64.cc | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 3329aa364..72604dd9d 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -9014,11 +9014,10 @@ aarch64_layout_frame (void)
+     {
+       /* Frame in which all saves are SVE saves:
+ 
+-	 sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
++	 sub sp, sp, frame_size - bytes_below_saved_regs
+ 	 save SVE registers relative to SP
+ 	 sub sp, sp, bytes_below_saved_regs  */
+-      frame.initial_adjust = (frame.bytes_above_hard_fp
+-			      + frame.below_hard_fp_saved_regs_size);
++      frame.initial_adjust = frame.frame_size - frame.bytes_below_saved_regs;
+       frame.final_adjust = frame.bytes_below_saved_regs;
+     }
+   else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp)
+-- 
+2.33.0
+

_service:tar_scm:0196-Backport-SME-aarch64-Tweak-stack-clash-boundary-cond.patch Added

@@ -0,0 +1,128 @@
+From b4581d1e6a7b94dfbd58871dad51d3f12889081f Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:10 +0100
+Subject: PATCH 097/157 BackportSME aarch64: Tweak stack clash boundary
+ condition
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1785b8077cc03214ebd1db953c870172fcf15966
+
+The AArch64 ABI says that, when stack clash protection is used,
+there can be a maximum of 1KiB of unprobed space at sp on entry
+to a function.  Therefore, we need to probe when allocating
+>= guard_size - 1KiB of data (>= rather than >).  This is what
+GCC does.
+
+If an allocation is exactly guard_size bytes, it is enough to allocate
+those bytes and probe once at offset 1024.  It isn't possible to use a
+single probe at any other offset: higher would conmplicate later code,
+by leaving more unprobed space than usual, while lower would risk
+leaving an entire page unprobed.  For simplicity, the code probes all
+allocations at offset 1024.
+
+Some register saves also act as probes.  If we need to allocate
+more space below the last such register save probe, we need to
+probe the allocation if it is > 1KiB.  Again, this allocation is
+then sometimes (but not always) probed at offset 1024.  This sort of
+allocation is currently only used for outgoing arguments, which are
+rarely this big.
+
+However, the code also probed if this final outgoing-arguments
+allocation was == 1KiB, rather than just > 1KiB.  This isn't
+necessary, since the register save then probes at offset 1024
+as required.  Continuing to probe allocations of exactly 1KiB
+would complicate later patches.
+
+gcc/
+	* config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space):
+	Don't probe final allocations that are exactly 1KiB in size (after
+	unprobed space above the final allocation has been deducted).
+
+gcc/testsuite/
+	* gcc.target/aarch64/stack-check-prologue-17.c: New test.
+---
+ gcc/config/aarch64/aarch64.cc                 |  4 +-
+ .../aarch64/stack-check-prologue-17.c         | 55 +++++++++++++++++++
+ 2 files changed, 58 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 72604dd9d..ba92a23a7 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -9943,9 +9943,11 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+   HOST_WIDE_INT guard_size
+     = 1 << param_stack_clash_protection_guard_size;
+   HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
++  HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
++  gcc_assert (multiple_p (poly_size, byte_sp_alignment));
+   HOST_WIDE_INT min_probe_threshold
+     = (final_adjustment_p
+-       ? guard_used_by_caller
++       ? guard_used_by_caller + byte_sp_alignment
+        : guard_size - guard_used_by_caller);
+   /* When doing the final adjustment for the outgoing arguments, take into
+      account any unprobed space there is above the current SP.  There are
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
+new file mode 100644
+index 000000000..0d8a25d73
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
+@@ -0,0 +1,55 @@
++/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */
++/* { dg-final { check-function-bodies "**" "" } } */
++
++void f(int, ...);
++void g();
++
++/*
++** test1:
++**	...
++**	str	x30, \sp\
++**	sub	sp, sp, #1024
++**	cbnz	w0, .*
++**	bl	g
++**	...
++*/
++int test1(int z) {
++  __uint128_t x = 0;
++  int y0x400;
++  if (z)
++    {
++      f(0, 0, 0, 0, 0, 0, 0, &y,
++	x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++	x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++	x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++	x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
++    }
++  g();
++  return 1;
++}
++
++/*
++** test2:
++**	...
++**	str	x30, \sp\
++**	sub	sp, sp, #1040
++**	str	xzr, \sp\
++**	cbnz	w0, .*
++**	bl	g
++**	...
++*/
++int test2(int z) {
++  __uint128_t x = 0;
++  int y0x400;
++  if (z)
++    {
++      f(0, 0, 0, 0, 0, 0, 0, &y,
++	x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++	x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++	x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++	x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++	x);
++    }
++  g();
++  return 1;
++}
+-- 
+2.33.0
+

_service:tar_scm:0197-Backport-SME-aarch64-Put-LR-save-probe-in-first-16-b.patch Added

@@ -0,0 +1,409 @@
+From ffd483dc6a2a4af495d56cf5ebdbbb3b9ca58820 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:11 +0100
+Subject: PATCH 098/157 BackportSME aarch64: Put LR save probe in first
+ 16 bytes
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fee0a18abfdd4874194abd149943fa7c77a29b7c
+
+-fstack-clash-protection uses the save of LR as a probe for the next
+allocation.  The next allocation could be:
+
+* another part of the static frame, e.g. when allocating SVE save slots
+  or outgoing arguments
+
+* an alloca in the same function
+
+* an allocation made by a callee function
+
+However, when -fomit-frame-pointer is used, the LR save slot is placed
+above the other GPR save slots.  It could therefore be up to 80 bytes
+above the base of the GPR save area (which is also the hard fp address).
+
+aarch64_allocate_and_probe_stack_space took this into account when
+deciding how much subsequent space could be allocated without needing
+a probe.  However, it interacted badly with:
+
+      /* If doing a small final adjustment, we always probe at offset 0.
+	 This is done to avoid issues when LR is not at position 0 or when
+	 the final adjustment is smaller than the probing offset.  */
+      else if (final_adjustment_p && rounded_size == 0)
+	residual_probe_offset = 0;
+
+which forces any allocation that is smaller than the guard page size
+to be probed at offset 0 rather than the usual offset 1024.  It was
+therefore possible to construct cases in which we had:
+
+* a probe using LR at SP + 80 bytes (or some other value >= 16)
+* an allocation of the guard page size - 16 bytes
+* a probe at SP + 0
+
+which allocates guard page size + 64 consecutive unprobed bytes.
+
+This patch requires the LR probe to be in the first 16 bytes of the
+save area when stack clash protection is active.  Doing it
+unconditionally would cause code-quality regressions.
+
+Putting LR before other registers prevents push/pop allocation
+when shadow call stacks are enabled, since LR is restored
+separately from the other callee-saved registers.
+
+The new comment doesn't say that the probe register is required
+to be LR, since a later patch removes that restriction.
+
+gcc/
+	* config/aarch64/aarch64.cc (aarch64_layout_frame): Ensure that
+	the LR save slot is in the first 16 bytes of the register save area.
+	Only form STP/LDP push/pop candidates if both registers are valid.
+	(aarch64_allocate_and_probe_stack_space): Remove workaround for
+	when LR was not in the first 16 bytes.
+
+gcc/testsuite/
+	* gcc.target/aarch64/stack-check-prologue-18.c: New test.
+	* gcc.target/aarch64/stack-check-prologue-19.c: Likewise.
+	* gcc.target/aarch64/stack-check-prologue-20.c: Likewise.
+---
+ gcc/config/aarch64/aarch64.cc                 |  72 ++++++-------
+ .../aarch64/stack-check-prologue-18.c         | 100 ++++++++++++++++++
+ .../aarch64/stack-check-prologue-19.c         | 100 ++++++++++++++++++
+ .../aarch64/stack-check-prologue-20.c         |   3 +
+ 4 files changed, 233 insertions(+), 42 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index ba92a23a7..1ba4c2f89 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8873,26 +8873,34 @@ aarch64_layout_frame (void)
+   bool saves_below_hard_fp_p
+     = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
+   frame.bytes_below_hard_fp = offset;
++
++  auto allocate_gpr_slot = &(unsigned int regno)
++    {
++      frame.reg_offsetregno = offset;
++      if (frame.wb_push_candidate1 == INVALID_REGNUM)
++	frame.wb_push_candidate1 = regno;
++      else if (frame.wb_push_candidate2 == INVALID_REGNUM)
++	frame.wb_push_candidate2 = regno;
++      offset += UNITS_PER_WORD;
++    };
++
+   if (frame.emit_frame_chain)
+     {
+       /* FP and LR are placed in the linkage record.  */
+-      frame.reg_offsetR29_REGNUM = offset;
+-      frame.wb_push_candidate1 = R29_REGNUM;
+-      frame.reg_offsetR30_REGNUM = offset + UNITS_PER_WORD;
+-      frame.wb_push_candidate2 = R30_REGNUM;
+-      offset += 2 * UNITS_PER_WORD;
++      allocate_gpr_slot (R29_REGNUM);
++      allocate_gpr_slot (R30_REGNUM);
+     }
++  else if (flag_stack_clash_protection
++	   && known_eq (frame.reg_offsetR30_REGNUM, SLOT_REQUIRED))
++    /* Put the LR save slot first, since it makes a good choice of probe
++       for stack clash purposes.  The idea is that the link register usually
++       has to be saved before a call anyway, and so we lose little by
++       stopping it from being individually shrink-wrapped.  */
++    allocate_gpr_slot (R30_REGNUM);
+ 
+   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
+     if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED))
+-      {
+-	frame.reg_offsetregno = offset;
+-	if (frame.wb_push_candidate1 == INVALID_REGNUM)
+-	  frame.wb_push_candidate1 = regno;
+-	else if (frame.wb_push_candidate2 == INVALID_REGNUM)
+-	  frame.wb_push_candidate2 = regno;
+-	offset += UNITS_PER_WORD;
+-      }
++      allocate_gpr_slot (regno);
+ 
+   poly_int64 max_int_offset = offset;
+   offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+@@ -8970,10 +8978,13 @@ aarch64_layout_frame (void)
+      max_push_offset to 0, because no registers are popped at this time,
+      so callee_adjust cannot be adjusted.  */
+   HOST_WIDE_INT max_push_offset = 0;
+-  if (frame.wb_pop_candidate2 != INVALID_REGNUM)
+-    max_push_offset = 512;
+-  else if (frame.wb_pop_candidate1 != INVALID_REGNUM)
+-    max_push_offset = 256;
++  if (frame.wb_pop_candidate1 != INVALID_REGNUM)
++    {
++      if (frame.wb_pop_candidate2 != INVALID_REGNUM)
++	max_push_offset = 512;
++      else
++	max_push_offset = 256;
++    }
+ 
+   HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp;
+   HOST_WIDE_INT const_saved_regs_size;
+@@ -9949,29 +9960,6 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+     = (final_adjustment_p
+        ? guard_used_by_caller + byte_sp_alignment
+        : guard_size - guard_used_by_caller);
+-  /* When doing the final adjustment for the outgoing arguments, take into
+-     account any unprobed space there is above the current SP.  There are
+-     two cases:
+-
+-     - When saving SVE registers below the hard frame pointer, we force
+-       the lowest save to take place in the prologue before doing the final
+-       adjustment (i.e. we don't allow the save to be shrink-wrapped).
+-       This acts as a probe at SP, so there is no unprobed space.
+-
+-     - When there are no SVE register saves, we use the store of the link
+-       register as a probe.  We can't assume that LR was saved at position 0
+-       though, so treat any space below it as unprobed.  */
+-  if (final_adjustment_p
+-      && known_eq (frame.below_hard_fp_saved_regs_size, 0))
+-    {
+-      poly_int64 lr_offset = (frame.reg_offsetLR_REGNUM
+-			      - frame.bytes_below_saved_regs);
+-      if (known_ge (lr_offset, 0))
+-	min_probe_threshold -= lr_offset.to_constant ();
+-      else
+-	gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0));
+-    }
+-
+   poly_int64 frame_size = frame.frame_size;
+ 
+   /* We should always have a positive probe threshold.  */
+@@ -10151,8 +10139,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+       if (final_adjustment_p && rounded_size != 0)
+ 	min_probe_threshold = 0;
+       /* If doing a small final adjustment, we always probe at offset 0.
+-	 This is done to avoid issues when LR is not at position 0 or when
+-	 the final adjustment is smaller than the probing offset.  */
++	 This is done to avoid issues when the final adjustment is smaller
++	 than the probing offset.  */
+       else if (final_adjustment_p && rounded_size == 0)
+ 	residual_probe_offset = 0;
+ 
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
+new file mode 100644
+index 000000000..82447d20f
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
+@@ -0,0 +1,100 @@
++/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */
++/* { dg-final { check-function-bodies "**" "" } } */
++
++void f(int, ...);
++void g();
++
++/*
++** test1:

_service:tar_scm:0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch Added

@@ -0,0 +1,126 @@
+From c12de24e57cbe26c224bab39698736fa4004f8ff Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:11 +0100
+Subject: PATCH 099/157 BackportSME aarch64: Simplify probe of final
+ frame allocation
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f87028a905059573ae7fdfe526d034fd70b3bcae
+
+Previous patches ensured that the final frame allocation only needs
+a probe when the size is strictly greater than 1KiB.  It's therefore
+safe to use the normal 1024 probe offset in all cases.
+
+The main motivation for doing this is to simplify the code and
+remove the number of special cases.
+
+gcc/
+	* config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space):
+	Always probe the residual allocation at offset 1024, asserting
+	that that is in range.
+
+gcc/testsuite/
+	* gcc.target/aarch64/stack-check-prologue-17.c: Expect the probe
+	to be at offset 1024 rather than offset 0.
+	* gcc.target/aarch64/stack-check-prologue-18.c: Likewise.
+	* gcc.target/aarch64/stack-check-prologue-19.c: Likewise.
+---
+ gcc/config/aarch64/aarch64.cc                        | 12 ++++--------
+ .../gcc.target/aarch64/stack-check-prologue-17.c     |  2 +-
+ .../gcc.target/aarch64/stack-check-prologue-18.c     |  4 ++--
+ .../gcc.target/aarch64/stack-check-prologue-19.c     |  4 ++--
+ 4 files changed, 9 insertions(+), 13 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 1ba4c2f89..6d835dc8f 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -10133,16 +10133,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+      are still safe.  */
+   if (residual)
+     {
+-      HOST_WIDE_INT residual_probe_offset = guard_used_by_caller;
++      gcc_assert (guard_used_by_caller + byte_sp_alignment <= size);
++
+       /* If we're doing final adjustments, and we've done any full page
+ 	 allocations then any residual needs to be probed.  */
+       if (final_adjustment_p && rounded_size != 0)
+ 	min_probe_threshold = 0;
+-      /* If doing a small final adjustment, we always probe at offset 0.
+-	 This is done to avoid issues when the final adjustment is smaller
+-	 than the probing offset.  */
+-      else if (final_adjustment_p && rounded_size == 0)
+-	residual_probe_offset = 0;
+ 
+       aarch64_sub_sp (temp1, temp2, residual, frame_related_p);
+       if (residual >= min_probe_threshold)
+@@ -10153,8 +10149,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+ 		     HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
+ 		     "\n", residual);
+ 
+-	    emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+-					     residual_probe_offset));
++	  emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
++					   guard_used_by_caller));
+ 	  emit_insn (gen_blockage ());
+ 	}
+     }
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
+index 0d8a25d73..f0ec13897 100644
+--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
+@@ -33,7 +33,7 @@ int test1(int z) {
+ **	...
+ **	str	x30, \sp\
+ **	sub	sp, sp, #1040
+-**	str	xzr, \sp\
++**	str	xzr, \sp, #?1024\
+ **	cbnz	w0, .*
+ **	bl	g
+ **	...
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
+index 82447d20f..6383bec5e 100644
+--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
+@@ -9,7 +9,7 @@ void g();
+ **	...
+ **	str	x30, \sp\
+ **	sub	sp, sp, #4064
+-**	str	xzr, \sp\
++**	str	xzr, \sp, #?1024\
+ **	cbnz	w0, .*
+ **	bl	g
+ **	...
+@@ -50,7 +50,7 @@ int test1(int z) {
+ **	...
+ **	str	x30, \sp\
+ **	sub	sp, sp, #1040
+-**	str	xzr, \sp\
++**	str	xzr, \sp, #?1024\
+ **	cbnz	w0, .*
+ **	bl	g
+ **	...
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
+index 73ac3e4e4..562039b5e 100644
+--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
+@@ -9,7 +9,7 @@ void g();
+ **	...
+ **	str	x30, \sp\
+ **	sub	sp, sp, #4064
+-**	str	xzr, \sp\
++**	str	xzr, \sp, #?1024\
+ **	cbnz	w0, .*
+ **	bl	g
+ **	...
+@@ -50,7 +50,7 @@ int test1(int z) {
+ **	...
+ **	str	x30, \sp\
+ **	sub	sp, sp, #1040
+-**	str	xzr, \sp\
++**	str	xzr, \sp, #?1024\
+ **	cbnz	w0, .*
+ **	bl	g
+ **	...
+-- 
+2.33.0
+

_service:tar_scm:0199-Backport-SME-aarch64-Explicitly-record-probe-registe.patch Added

@@ -0,0 +1,280 @@
+From 1bf3e9a04411b483c89d2e2f9096ab66800c3b3f Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:12 +0100
+Subject: PATCH 100/157 BackportSME aarch64: Explicitly record probe
+ registers in frame info
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5ce957484eea15f09503fcffa4dfdfb70ad82f8f
+
+The stack frame is currently divided into three areas:
+
+A: the area above the hard frame pointer
+B: the SVE saves below the hard frame pointer
+C: the outgoing arguments
+
+If the stack frame is allocated in one chunk, the allocation needs a
+probe if the frame size is >= guard_size - 1KiB.  In addition, if the
+function is not a leaf function, it must probe an address no more than
+1KiB above the outgoing SP.  We ensured the second condition by
+
+(1) using single-chunk allocations for non-leaf functions only if
+    the link register save slot is within 512 bytes of the bottom
+    of the frame; and
+
+(2) using the link register save as a probe (meaning, for instance,
+    that it can't be individually shrink wrapped)
+
+If instead the stack is allocated in multiple chunks, then:
+
+* an allocation involving only the outgoing arguments (C above) requires
+  a probe if the allocation size is > 1KiB
+
+* any other allocation requires a probe if the allocation size
+  is >= guard_size - 1KiB
+
+* second and subsequent allocations require the previous allocation
+  to probe at the bottom of the allocated area, regardless of the size
+  of that previous allocation
+
+The final point means that, unlike for single allocations,
+it can be necessary to have both a non-SVE register probe and
+an SVE register probe.  For example:
+
+* allocate A, probe using a non-SVE register save
+* allocate B, probe using an SVE register save
+* allocate C
+
+The non-SVE register used in this case was again the link register.
+It was previously used even if the link register save slot was some
+bytes above the bottom of the non-SVE register saves, but an earlier
+patch avoided that by putting the link register save slot first.
+
+As a belt-and-braces fix, this patch explicitly records which
+probe registers we're using and allows the non-SVE probe to be
+whichever register comes first (as for SVE).
+
+The patch also avoids unnecessary probes in sve/pcs/stack_clash_3.c.
+
+gcc/
+	* config/aarch64/aarch64.h (aarch64_frame::sve_save_and_probe)
+	(aarch64_frame::hard_fp_save_and_probe): New fields.
+	* config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize them.
+	Rather than asserting that a leaf function saves LR, instead assert
+	that a leaf function saves something.
+	(aarch64_get_separate_components): Prevent the chosen probe
+	registers from being individually shrink-wrapped.
+	(aarch64_allocate_and_probe_stack_space): Remove workaround for
+	probe registers that aren't at the bottom of the previous allocation.
+
+gcc/testsuite/
+	* gcc.target/aarch64/sve/pcs/stack_clash_3.c: Avoid redundant probes.
+---
+ gcc/config/aarch64/aarch64.cc                 | 68 +++++++++++++++----
+ gcc/config/aarch64/aarch64.h                  |  8 +++
+ .../aarch64/sve/pcs/stack_clash_3.c           |  6 +-
+ 3 files changed, 64 insertions(+), 18 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 6d835dc8f..dd80ceba8 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8810,15 +8810,11 @@ aarch64_layout_frame (void)
+ 	&& !crtl->abi->clobbers_full_reg_p (regno))
+       frame.reg_offsetregno = SLOT_REQUIRED;
+ 
+-  /* With stack-clash, LR must be saved in non-leaf functions.  The saving of
+-     LR counts as an implicit probe which allows us to maintain the invariant
+-     described in the comment at expand_prologue.  */
+-  gcc_assert (crtl->is_leaf
+-	      || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED));
+ 
+   poly_int64 offset = crtl->outgoing_args_size;
+   gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
+   frame.bytes_below_saved_regs = offset;
++  frame.sve_save_and_probe = INVALID_REGNUM;
+ 
+   /* Now assign stack slots for the registers.  Start with the predicate
+      registers, since predicate LDR and STR have a relatively small
+@@ -8826,6 +8822,8 @@ aarch64_layout_frame (void)
+   for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++)
+     if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED))
+       {
++	if (frame.sve_save_and_probe == INVALID_REGNUM)
++	  frame.sve_save_and_probe = regno;
+ 	frame.reg_offsetregno = offset;
+ 	offset += BYTES_PER_SVE_PRED;
+       }
+@@ -8863,6 +8861,8 @@ aarch64_layout_frame (void)
+     for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
+       if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED))
+ 	{
++	  if (frame.sve_save_and_probe == INVALID_REGNUM)
++	    frame.sve_save_and_probe = regno;
+ 	  frame.reg_offsetregno = offset;
+ 	  offset += vector_save_size;
+ 	}
+@@ -8872,10 +8872,18 @@ aarch64_layout_frame (void)
+   frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
+   bool saves_below_hard_fp_p
+     = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
++  gcc_assert (!saves_below_hard_fp_p
++	      || (frame.sve_save_and_probe != INVALID_REGNUM
++		  && known_eq (frame.reg_offsetframe.sve_save_and_probe,
++			       frame.bytes_below_saved_regs)));
++
+   frame.bytes_below_hard_fp = offset;
++  frame.hard_fp_save_and_probe = INVALID_REGNUM;
+ 
+   auto allocate_gpr_slot = &(unsigned int regno)
+     {
++      if (frame.hard_fp_save_and_probe == INVALID_REGNUM)
++	frame.hard_fp_save_and_probe = regno;
+       frame.reg_offsetregno = offset;
+       if (frame.wb_push_candidate1 == INVALID_REGNUM)
+ 	frame.wb_push_candidate1 = regno;
+@@ -8909,6 +8917,8 @@ aarch64_layout_frame (void)
+   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
+     if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED))
+       {
++	if (frame.hard_fp_save_and_probe == INVALID_REGNUM)
++	  frame.hard_fp_save_and_probe = regno;
+ 	/* If there is an alignment gap between integer and fp callee-saves,
+ 	   allocate the last fp register to it if possible.  */
+ 	if (regno == last_fp_reg
+@@ -8932,6 +8942,17 @@ aarch64_layout_frame (void)
+   offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+ 
+   frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
++  gcc_assert (known_eq (frame.saved_regs_size,
++			frame.below_hard_fp_saved_regs_size)
++	      || (frame.hard_fp_save_and_probe != INVALID_REGNUM
++		  && known_eq (frame.reg_offsetframe.hard_fp_save_and_probe,
++			       frame.bytes_below_hard_fp)));
++
++  /* With stack-clash, a register must be saved in non-leaf functions.
++     The saving of the bottommost register counts as an implicit probe,
++     which allows us to maintain the invariant described in the comment
++     at expand_prologue.  */
++  gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0));
+ 
+   offset += get_frame_size ();
+   offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+@@ -9062,6 +9083,25 @@ aarch64_layout_frame (void)
+       frame.final_adjust = frame.bytes_below_saved_regs;
+     }
+ 
++  /* The frame is allocated in pieces, with each non-final piece
++     including a register save at offset 0 that acts as a probe for
++     the following piece.  In addition, the save of the bottommost register
++     acts as a probe for callees and allocas.  Roll back any probes that
++     aren't needed.
++
++     A probe isn't needed if it is associated with the final allocation
++     (including callees and allocas) that happens before the epilogue is
++     executed.  */
++  if (crtl->is_leaf
++      && !cfun->calls_alloca
++      && known_eq (frame.final_adjust, 0))
++    {
++      if (maybe_ne (frame.sve_callee_adjust, 0))
++	frame.sve_save_and_probe = INVALID_REGNUM;
++      else
++	frame.hard_fp_save_and_probe = INVALID_REGNUM;
++    }
++
+   /* Make sure the individual adjustments add up to the full frame size.  */
+   gcc_assert (known_eq (frame.initial_adjust
+ 			+ frame.callee_adjust
+@@ -9639,13 +9679,6 @@ aarch64_get_separate_components (void)
+ 
+ 	poly_int64 offset = frame.reg_offsetregno;
+ 
+-	/* If the register is saved in the first SVE save slot, we use
+-	   it as a stack probe for -fstack-clash-protection.  */
+-	if (flag_stack_clash_protection
+-	    && maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
+-	    && known_eq (offset, frame.bytes_below_saved_regs))
+-	  continue;
+-
+ 	/* Get the offset relative to the register we'll use.  */

_service:tar_scm:0200-Backport-SME-aarch64-Remove-below_hard_fp_saved_regs.patch Added

@@ -0,0 +1,160 @@
+From 5c33afb2173f68a0166bd180977cd1e547df22dc Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:12 +0100
+Subject: PATCH 101/157 BackportSME aarch64: Remove
+ below_hard_fp_saved_regs_size
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2abfc867d3ba025ac2146bb21b92a93e6325dec1
+
+After previous patches, it's no longer necessary to store
+saved_regs_size and below_hard_fp_saved_regs_size in the frame info.
+All measurements instead use the top or bottom of the frame as
+reference points.
+
+gcc/
+	* config/aarch64/aarch64.h (aarch64_frame::saved_regs_size)
+	(aarch64_frame::below_hard_fp_saved_regs_size): Delete.
+	* config/aarch64/aarch64.cc (aarch64_layout_frame): Update accordingly.
+---
+ gcc/config/aarch64/aarch64.cc | 45 ++++++++++++++++-------------------
+ gcc/config/aarch64/aarch64.h  |  7 ------
+ 2 files changed, 21 insertions(+), 31 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index dd80ceba8..0894ed325 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8869,9 +8869,8 @@ aarch64_layout_frame (void)
+ 
+   /* OFFSET is now the offset of the hard frame pointer from the bottom
+      of the callee save area.  */
+-  frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
+-  bool saves_below_hard_fp_p
+-    = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
++  auto below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
++  bool saves_below_hard_fp_p = maybe_ne (below_hard_fp_saved_regs_size, 0);
+   gcc_assert (!saves_below_hard_fp_p
+ 	      || (frame.sve_save_and_probe != INVALID_REGNUM
+ 		  && known_eq (frame.reg_offsetframe.sve_save_and_probe,
+@@ -8941,9 +8940,8 @@ aarch64_layout_frame (void)
+ 
+   offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+ 
+-  frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
+-  gcc_assert (known_eq (frame.saved_regs_size,
+-			frame.below_hard_fp_saved_regs_size)
++  auto saved_regs_size = offset - frame.bytes_below_saved_regs;
++  gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size)
+ 	      || (frame.hard_fp_save_and_probe != INVALID_REGNUM
+ 		  && known_eq (frame.reg_offsetframe.hard_fp_save_and_probe,
+ 			       frame.bytes_below_hard_fp)));
+@@ -8952,7 +8950,7 @@ aarch64_layout_frame (void)
+      The saving of the bottommost register counts as an implicit probe,
+      which allows us to maintain the invariant described in the comment
+      at expand_prologue.  */
+-  gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0));
++  gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0));
+ 
+   offset += get_frame_size ();
+   offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+@@ -9009,7 +9007,7 @@ aarch64_layout_frame (void)
+ 
+   HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp;
+   HOST_WIDE_INT const_saved_regs_size;
+-  if (known_eq (frame.saved_regs_size, 0))
++  if (known_eq (saved_regs_size, 0))
+     frame.initial_adjust = frame.frame_size;
+   else if (frame.frame_size.is_constant (&const_size)
+ 	   && const_size < max_push_offset
+@@ -9022,7 +9020,7 @@ aarch64_layout_frame (void)
+       frame.callee_adjust = const_size;
+     }
+   else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs)
+-	   && frame.saved_regs_size.is_constant (&const_saved_regs_size)
++	   && saved_regs_size.is_constant (&const_saved_regs_size)
+ 	   && const_below_saved_regs + const_saved_regs_size < 512
+ 	   /* We could handle this case even with data below the saved
+ 	      registers, provided that that data left us with valid offsets
+@@ -9041,8 +9039,7 @@ aarch64_layout_frame (void)
+       frame.initial_adjust = frame.frame_size;
+     }
+   else if (saves_below_hard_fp_p
+-	   && known_eq (frame.saved_regs_size,
+-			frame.below_hard_fp_saved_regs_size))
++	   && known_eq (saved_regs_size, below_hard_fp_saved_regs_size))
+     {
+       /* Frame in which all saves are SVE saves:
+ 
+@@ -9064,7 +9061,7 @@ aarch64_layout_frame (void)
+ 	 save SVE registers relative to SP
+ 	 sub sp, sp, bytes_below_saved_regs  */
+       frame.callee_adjust = const_above_fp;
+-      frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
++      frame.sve_callee_adjust = below_hard_fp_saved_regs_size;
+       frame.final_adjust = frame.bytes_below_saved_regs;
+     }
+   else
+@@ -9079,7 +9076,7 @@ aarch64_layout_frame (void)
+ 	 save SVE registers relative to SP
+ 	 sub sp, sp, bytes_below_saved_regs  */
+       frame.initial_adjust = frame.bytes_above_hard_fp;
+-      frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
++      frame.sve_callee_adjust = below_hard_fp_saved_regs_size;
+       frame.final_adjust = frame.bytes_below_saved_regs;
+     }
+ 
+@@ -10231,17 +10228,17 @@ aarch64_epilogue_uses (int regno)
+ 	|  local variables              | <-- frame_pointer_rtx
+ 	|                               |
+ 	+-------------------------------+
+-	|  padding                      | \
+-	+-------------------------------+  |
+-	|  callee-saved registers       |  | frame.saved_regs_size
+-	+-------------------------------+  |
+-	|  LR'                          |  |
+-	+-------------------------------+  |
+-	|  FP'                          |  |
+-	+-------------------------------+  |<- hard_frame_pointer_rtx (aligned)
+-	|  SVE vector registers         |  | \
+-	+-------------------------------+  |  | below_hard_fp_saved_regs_size
+-	|  SVE predicate registers      | /  /
++	|  padding                      |
++	+-------------------------------+
++	|  callee-saved registers       |
++	+-------------------------------+
++	|  LR'                          |
++	+-------------------------------+
++	|  FP'                          |
++	+-------------------------------+ <-- hard_frame_pointer_rtx (aligned)
++	|  SVE vector registers         |
++	+-------------------------------+
++	|  SVE predicate registers      |
+ 	+-------------------------------+
+ 	|  dynamic allocation           |
+ 	+-------------------------------+
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index af480d9e8..292ef2eec 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -801,18 +801,11 @@ struct GTY (()) aarch64_frame
+      STACK_BOUNDARY.  */
+   HOST_WIDE_INT saved_varargs_size;
+ 
+-  /* The size of the callee-save registers with a slot in REG_OFFSET.  */
+-  poly_int64 saved_regs_size;
+-
+   /* The number of bytes between the bottom of the static frame (the bottom
+      of the outgoing arguments) and the bottom of the register save area.
+      This value is always a multiple of STACK_BOUNDARY.  */
+   poly_int64 bytes_below_saved_regs;
+ 
+-  /* The size of the callee-save registers with a slot in REG_OFFSET that
+-     are saved below the hard frame pointer.  */
+-  poly_int64 below_hard_fp_saved_regs_size;
+-
+   /* The number of bytes between the bottom of the static frame (the bottom
+      of the outgoing arguments) and the hard frame pointer.  This value is
+      always a multiple of STACK_BOUNDARY.  */
+-- 
+2.33.0
+

_service:tar_scm:0201-Backport-SME-aarch64-Make-stack-smash-canary-protect.patch Added

@@ -0,0 +1,301 @@
+From b225443d64481bc225e29bf119d99b719c69cd3c Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Sep 2023 16:05:13 +0100
+Subject: PATCH 102/157 BackportSME aarch64: Make stack smash canary
+ protect saved registers
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3e4afea3b192c205c9a9da99f4cac65c68087eaf
+
+AArch64 normally puts the saved registers near the bottom of the frame,
+immediately above any dynamic allocations.  But this means that a
+stack-smash attack on those dynamic allocations could overwrite the
+saved registers without needing to reach as far as the stack smash
+canary.
+
+The same thing could also happen for variable-sized arguments that are
+passed by value, since those are allocated before a call and popped on
+return.
+
+This patch avoids that by putting the locals (and thus the canary) below
+the saved registers when stack smash protection is active.
+
+The patch fixes CVE-2023-4039.
+
+gcc/
+	* config/aarch64/aarch64.cc (aarch64_save_regs_above_locals_p):
+	New function.
+	(aarch64_layout_frame): Use it to decide whether locals should
+	go above or below the saved registers.
+	(aarch64_expand_prologue): Update stack layout comment.
+	Emit a stack tie after the final adjustment.
+
+gcc/testsuite/
+	* gcc.target/aarch64/stack-protector-8.c: New test.
+	* gcc.target/aarch64/stack-protector-9.c: Likewise.
+---
+ gcc/config/aarch64/aarch64.cc                 | 46 +++++++--
+ .../gcc.target/aarch64/stack-protector-8.c    | 95 +++++++++++++++++++
+ .../gcc.target/aarch64/stack-protector-9.c    | 33 +++++++
+ 3 files changed, 168 insertions(+), 6 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 0894ed325..8d4dd2891 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8733,6 +8733,20 @@ aarch64_needs_frame_chain (void)
+   return aarch64_use_frame_pointer;
+ }
+ 
++/* Return true if the current function should save registers above
++   the locals area, rather than below it.  */
++
++static bool
++aarch64_save_regs_above_locals_p ()
++{
++  /* When using stack smash protection, make sure that the canary slot
++     comes between the locals and the saved registers.  Otherwise,
++     it would be possible for a carefully sized smash attack to change
++     the saved registers (particularly LR and FP) without reaching the
++     canary.  */
++  return crtl->stack_protect_guard;
++}
++
+ /* Mark the registers that need to be saved by the callee and calculate
+    the size of the callee-saved registers area and frame record (both FP
+    and LR may be omitted).  */
+@@ -8744,6 +8758,7 @@ aarch64_layout_frame (void)
+   poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
+   bool frame_related_fp_reg_p = false;
+   aarch64_frame &frame = cfun->machine->frame;
++  poly_int64 top_of_locals = -1;
+ 
+   frame.emit_frame_chain = aarch64_needs_frame_chain ();
+ 
+@@ -8810,9 +8825,16 @@ aarch64_layout_frame (void)
+ 	&& !crtl->abi->clobbers_full_reg_p (regno))
+       frame.reg_offsetregno = SLOT_REQUIRED;
+ 
++  bool regs_at_top_p = aarch64_save_regs_above_locals_p ();
+ 
+   poly_int64 offset = crtl->outgoing_args_size;
+   gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
++  if (regs_at_top_p)
++    {
++      offset += get_frame_size ();
++      offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
++      top_of_locals = offset;
++    }
+   frame.bytes_below_saved_regs = offset;
+   frame.sve_save_and_probe = INVALID_REGNUM;
+ 
+@@ -8952,15 +8974,18 @@ aarch64_layout_frame (void)
+      at expand_prologue.  */
+   gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0));
+ 
+-  offset += get_frame_size ();
+-  offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+-  auto top_of_locals = offset;
+-
++  if (!regs_at_top_p)
++    {
++      offset += get_frame_size ();
++      offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
++      top_of_locals = offset;
++    }
+   offset += frame.saved_varargs_size;
+   gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
+   frame.frame_size = offset;
+ 
+   frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp;
++  gcc_assert (known_ge (top_of_locals, 0));
+   frame.bytes_above_locals = frame.frame_size - top_of_locals;
+ 
+   frame.initial_adjust = 0;
+@@ -10225,10 +10250,10 @@ aarch64_epilogue_uses (int regno)
+ 	|  for register varargs         |
+ 	|                               |
+ 	+-------------------------------+
+-	|  local variables              | <-- frame_pointer_rtx
++	|  local variables (1)          | <-- frame_pointer_rtx
+ 	|                               |
+ 	+-------------------------------+
+-	|  padding                      |
++	|  padding (1)                  |
+ 	+-------------------------------+
+ 	|  callee-saved registers       |
+ 	+-------------------------------+
+@@ -10240,6 +10265,10 @@ aarch64_epilogue_uses (int regno)
+ 	+-------------------------------+
+ 	|  SVE predicate registers      |
+ 	+-------------------------------+
++	|  local variables (2)          |
++	+-------------------------------+
++	|  padding (2)                  |
++	+-------------------------------+
+ 	|  dynamic allocation           |
+ 	+-------------------------------+
+ 	|  padding                      |
+@@ -10249,6 +10278,9 @@ aarch64_epilogue_uses (int regno)
+ 	+-------------------------------+
+ 	|                               | <-- stack_pointer_rtx (aligned)
+ 
++   The regions marked (1) and (2) are mutually exclusive.  (2) is used
++   when aarch64_save_regs_above_locals_p is true.
++
+    Dynamic stack allocations via alloca() decrease stack_pointer_rtx
+    but leave frame_pointer_rtx and hard_frame_pointer_rtx
+    unchanged.
+@@ -10444,6 +10476,8 @@ aarch64_expand_prologue (void)
+   gcc_assert (known_eq (bytes_below_sp, final_adjust));
+   aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
+ 					  !frame_pointer_needed, true);
++  if (emit_frame_chain && maybe_ne (final_adjust, 0))
++    aarch64_emit_stack_tie (hard_frame_pointer_rtx);
+ }
+ 
+ /* Return TRUE if we can use a simple_return insn.
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
+new file mode 100644
+index 000000000..e71d820e3
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
+@@ -0,0 +1,95 @@
++/* { dg-options " -O -fstack-protector-strong -mstack-protector-guard=sysreg -mstack-protector-guard-reg=tpidr2_el0 -mstack-protector-guard-offset=16" } */
++/* { dg-final { check-function-bodies "**" "" } } */
++
++void g(void *);
++__SVBool_t *h(void *);
++
++/*
++** test1:
++**	sub	sp, sp, #288
++**	stp	x29, x30, \sp, #?272\
++**	add	x29, sp, #?272
++**	mrs	(x0-9+), tpidr2_el0
++**	ldr	(x0-9+), \\1, #?16\
++**	str	\2, \sp, #?264\
++**	mov	\2, #?0
++**	add	x0, sp, #?8
++**	bl	g
++**	...
++**	mrs	.*
++**	...
++**	bne	.*
++**	...
++**	ldp	x29, x30, \sp, #?272\
++**	add	sp, sp, #?288
++**	ret
++**	bl	__stack_chk_fail
++*/
++int test1() {
++  int y0x40;
++  g(y);
++  return 1;
++}
++
++/*
++** test2:

_service:tar_scm:0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch Added

@@ -0,0 +1,201 @@
+From 31433584b018cb2dc81e2366351a57bf5e1c4e44 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 17 Oct 2023 23:45:43 +0100
+Subject: PATCH 103/157 BackportSME Handle epilogues that contain jumps
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=aeb3f0436f8ae84e593eda9641fe4e6fdf0afb3e
+
+The prologue/epilogue pass allows the prologue sequence to contain
+jumps.  The sequence is then partitioned into basic blocks using
+find_many_sub_basic_blocks.
+
+This patch treats epilogues in a similar way.  Since only one block
+might need to be split, the patch (re)introduces a find_sub_basic_blocks
+routine to handle a single block.
+
+The new routine hard-codes the assumption that split_block will chain
+the new block immediately after the original block.  The routine doesn't
+try to replicate the fix for PR81030, since that was specific to
+gimple->rtl expansion.
+
+The patch is needed for follow-on aarch64 patches that add conditional
+code to the epilogue.  The tests are part of those patches.
+
+gcc/
+	* cfgbuild.h (find_sub_basic_blocks): Declare.
+	* cfgbuild.cc (update_profile_for_new_sub_basic_block): New function,
+	split out from...
+	(find_many_sub_basic_blocks): ...here.
+	(find_sub_basic_blocks): New function.
+	* function.cc (thread_prologue_and_epilogue_insns): Handle
+	epilogues that contain jumps.
+---
+ gcc/cfgbuild.cc | 95 +++++++++++++++++++++++++++++++++----------------
+ gcc/cfgbuild.h  |  1 +
+ gcc/function.cc |  4 +++
+ 3 files changed, 70 insertions(+), 30 deletions(-)
+
+diff --git a/gcc/cfgbuild.cc b/gcc/cfgbuild.cc
+index 646a06614..58b865f29 100644
+--- a/gcc/cfgbuild.cc
++++ b/gcc/cfgbuild.cc
+@@ -693,6 +693,43 @@ compute_outgoing_frequencies (basic_block b)
+     }
+ }
+ 
++/* Update the profile information for BB, which was created by splitting
++   an RTL block that had a non-final jump.  */
++
++static void
++update_profile_for_new_sub_basic_block (basic_block bb)
++{
++  edge e;
++  edge_iterator ei;
++
++  bool initialized_src = false, uninitialized_src = false;
++  bb->count = profile_count::zero ();
++  FOR_EACH_EDGE (e, ei, bb->preds)
++    {
++      if (e->count ().initialized_p ())
++	{
++	  bb->count += e->count ();
++	  initialized_src = true;
++	}
++      else
++	uninitialized_src = true;
++    }
++  /* When some edges are missing with read profile, this is
++     most likely because RTL expansion introduced loop.
++     When profile is guessed we may have BB that is reachable
++     from unlikely path as well as from normal path.
++
++     TODO: We should handle loops created during BB expansion
++     correctly here.  For now we assume all those loop to cycle
++     precisely once.  */
++  if (!initialized_src
++      || (uninitialized_src
++	   && profile_status_for_fn (cfun) < PROFILE_GUESSED))
++    bb->count = profile_count::uninitialized ();
++
++  compute_outgoing_frequencies (bb);
++}
++
+ /* Assume that some pass has inserted labels or control flow
+    instructions within a basic block.  Split basic blocks as needed
+    and create edges.  */
+@@ -744,40 +781,15 @@ find_many_sub_basic_blocks (sbitmap blocks)
+   if (profile_status_for_fn (cfun) != PROFILE_ABSENT)
+     FOR_BB_BETWEEN (bb, min, max->next_bb, next_bb)
+       {
+-	edge e;
+-	edge_iterator ei;
+-
+ 	if (STATE (bb) == BLOCK_ORIGINAL)
+ 	  continue;
+ 	if (STATE (bb) == BLOCK_NEW)
+ 	  {
+-	    bool initialized_src = false, uninitialized_src = false;
+-	    bb->count = profile_count::zero ();
+-	    FOR_EACH_EDGE (e, ei, bb->preds)
+-	      {
+-		if (e->count ().initialized_p ())
+-		  {
+-		    bb->count += e->count ();
+-		    initialized_src = true;
+-		  }
+-		else
+-		  uninitialized_src = true;
+-	      }
+-	    /* When some edges are missing with read profile, this is
+-	       most likely because RTL expansion introduced loop.
+-	       When profile is guessed we may have BB that is reachable
+-	       from unlikely path as well as from normal path.
+-
+-	       TODO: We should handle loops created during BB expansion
+-	       correctly here.  For now we assume all those loop to cycle
+-	       precisely once.  */
+-	    if (!initialized_src
+-		|| (uninitialized_src
+-		     && profile_status_for_fn (cfun) < PROFILE_GUESSED))
+-	      bb->count = profile_count::uninitialized ();
++	    update_profile_for_new_sub_basic_block (bb);
++	    continue;
+ 	  }
+- 	/* If nothing changed, there is no need to create new BBs.  */
+-	else if (EDGE_COUNT (bb->succs) == n_succsbb->index)
++	/* If nothing changed, there is no need to create new BBs.  */
++	if (EDGE_COUNT (bb->succs) == n_succsbb->index)
+ 	  {
+ 	    /* In rare occassions RTL expansion might have mistakely assigned
+ 	       a probabilities different from what is in CFG.  This happens
+@@ -788,10 +800,33 @@ find_many_sub_basic_blocks (sbitmap blocks)
+ 	      update_br_prob_note (bb);
+ 	    continue;
+ 	  }
+-
+ 	compute_outgoing_frequencies (bb);
+       }
+ 
+   FOR_EACH_BB_FN (bb, cfun)
+     SET_STATE (bb, 0);
+ }
++
++/* Like find_many_sub_basic_blocks, but look only within BB.  */
++
++void
++find_sub_basic_blocks (basic_block bb)
++{
++  basic_block end_bb = bb->next_bb;
++  find_bb_boundaries (bb);
++  if (bb->next_bb == end_bb)
++    return;
++
++  /* Re-scan and wire in all edges.  This expects simple (conditional)
++     jumps at the end of each new basic blocks.  */
++  make_edges (bb, end_bb->prev_bb, 1);
++
++  /* Update branch probabilities.  Expect only (un)conditional jumps
++     to be created with only the forward edges.  */
++  if (profile_status_for_fn (cfun) != PROFILE_ABSENT)
++    {
++      compute_outgoing_frequencies (bb);
++      for (bb = bb->next_bb; bb != end_bb; bb = bb->next_bb)
++	update_profile_for_new_sub_basic_block (bb);
++    }
++}
+diff --git a/gcc/cfgbuild.h b/gcc/cfgbuild.h
+index 85145da7f..53543bb75 100644
+--- a/gcc/cfgbuild.h
++++ b/gcc/cfgbuild.h
+@@ -24,5 +24,6 @@ extern bool inside_basic_block_p (const rtx_insn *);
+ extern bool control_flow_insn_p (const rtx_insn *);
+ extern void rtl_make_eh_edge (sbitmap, basic_block, rtx);
+ extern void find_many_sub_basic_blocks (sbitmap);
++extern void find_sub_basic_blocks (basic_block);
+ 
+ #endif /* GCC_CFGBUILD_H */
+diff --git a/gcc/function.cc b/gcc/function.cc
+index ddab43ca4..f4fc211a0 100644
+--- a/gcc/function.cc
++++ b/gcc/function.cc
+@@ -6126,6 +6126,8 @@ thread_prologue_and_epilogue_insns (void)
+ 		  && returnjump_p (BB_END (e->src)))
+ 		e->flags &= ~EDGE_FALLTHRU;
+ 	    }
++
++	  find_sub_basic_blocks (BLOCK_FOR_INSN (epilogue_seq));
+ 	}
+       else if (next_active_insn (BB_END (exit_fallthru_edge->src)))
+ 	{
+@@ -6234,6 +6236,8 @@ thread_prologue_and_epilogue_insns (void)
+ 	  set_insn_locations (seq, epilogue_location);
+ 
+ 	  emit_insn_before (seq, insn);
++
++	  find_sub_basic_blocks (BLOCK_FOR_INSN (insn));
+ 	}
+     }
+ 
+--

_service:tar_scm:0203-Backport-SME-aarch64-Use-vecs-to-store-register-save.patch Added

@@ -0,0 +1,709 @@
+From 554c83414c10909c39e0ad30026ffa4821dd9698 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 17 Oct 2023 23:46:33 +0100
+Subject: PATCH 104/157 BackportSME aarch64: Use vecs to store register
+ save order
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=575858508090b18dcbc176db285c9f55227ca4c0
+
+aarch64_save/restore_callee_saves looped over registers in register
+number order.  This in turn meant that we could only use LDP and STP
+for registers that were consecutive both number-wise and
+offset-wise (after unsaved registers are excluded).
+
+This patch instead builds lists of the registers that we've decided to
+save, in offset order.  We can then form LDP/STP pairs regardless of
+register number order, which in turn means that we can put the LR save
+slot first without losing LDP/STP opportunities.
+
+gcc/
+	* config/aarch64/aarch64.h (aarch64_frame): Add vectors that
+	store the list saved GPRs, FPRs and predicate registers.
+	* config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize
+	the lists of saved registers.  Use them to choose push candidates.
+	Invalidate pop candidates if we're not going to do a pop.
+	(aarch64_next_callee_save): Delete.
+	(aarch64_save_callee_saves): Take a list of registers,
+	rather than a range.  Make !skip_wb select only write-back
+	candidates.
+	(aarch64_expand_prologue): Update calls accordingly.
+	(aarch64_restore_callee_saves): Take a list of registers,
+	rather than a range.  Always skip pop candidates.  Also skip
+	LR if shadow call stacks are enabled.
+	(aarch64_expand_epilogue): Update calls accordingly.
+
+gcc/testsuite/
+	* gcc.target/aarch64/sve/pcs/stack_clash_2.c: Expect restores
+	to happen in offset order.
+	* gcc.target/aarch64/sve/pcs/stack_clash_2_128.c: Likewise.
+	* gcc.target/aarch64/sve/pcs/stack_clash_2_256.c: Likewise.
+	* gcc.target/aarch64/sve/pcs/stack_clash_2_512.c: Likewise.
+	* gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c: Likewise.
+	* gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c: Likewise.
+---
+ gcc/config/aarch64/aarch64.cc                 | 203 +++++++++---------
+ gcc/config/aarch64/aarch64.h                  |   9 +-
+ .../aarch64/sve/pcs/stack_clash_2.c           |   6 +-
+ .../aarch64/sve/pcs/stack_clash_2_1024.c      |   6 +-
+ .../aarch64/sve/pcs/stack_clash_2_128.c       |   6 +-
+ .../aarch64/sve/pcs/stack_clash_2_2048.c      |   6 +-
+ .../aarch64/sve/pcs/stack_clash_2_256.c       |   6 +-
+ .../aarch64/sve/pcs/stack_clash_2_512.c       |   6 +-
+ 8 files changed, 128 insertions(+), 120 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 8d4dd2891..e10c9d763 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8753,13 +8753,17 @@ aarch64_save_regs_above_locals_p ()
+ static void
+ aarch64_layout_frame (void)
+ {
+-  int regno, last_fp_reg = INVALID_REGNUM;
++  unsigned regno, last_fp_reg = INVALID_REGNUM;
+   machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM);
+   poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
+   bool frame_related_fp_reg_p = false;
+   aarch64_frame &frame = cfun->machine->frame;
+   poly_int64 top_of_locals = -1;
+ 
++  vec_safe_truncate (frame.saved_gprs, 0);
++  vec_safe_truncate (frame.saved_fprs, 0);
++  vec_safe_truncate (frame.saved_prs, 0);
++
+   frame.emit_frame_chain = aarch64_needs_frame_chain ();
+ 
+   /* Adjust the outgoing arguments size if required.  Keep it in sync with what
+@@ -8844,6 +8848,7 @@ aarch64_layout_frame (void)
+   for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++)
+     if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED))
+       {
++	vec_safe_push (frame.saved_prs, regno);
+ 	if (frame.sve_save_and_probe == INVALID_REGNUM)
+ 	  frame.sve_save_and_probe = regno;
+ 	frame.reg_offsetregno = offset;
+@@ -8865,7 +8870,7 @@ aarch64_layout_frame (void)
+ 	 If we don't have any vector registers to save, and we know how
+ 	 big the predicate save area is, we can just round it up to the
+ 	 next 16-byte boundary.  */
+-      if (last_fp_reg == (int) INVALID_REGNUM && offset.is_constant ())
++      if (last_fp_reg == INVALID_REGNUM && offset.is_constant ())
+ 	offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+       else
+ 	{
+@@ -8879,10 +8884,11 @@ aarch64_layout_frame (void)
+     }
+ 
+   /* If we need to save any SVE vector registers, add them next.  */
+-  if (last_fp_reg != (int) INVALID_REGNUM && crtl->abi->id () == ARM_PCS_SVE)
++  if (last_fp_reg != INVALID_REGNUM && crtl->abi->id () == ARM_PCS_SVE)
+     for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
+       if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED))
+ 	{
++	  vec_safe_push (frame.saved_fprs, regno);
+ 	  if (frame.sve_save_and_probe == INVALID_REGNUM)
+ 	    frame.sve_save_and_probe = regno;
+ 	  frame.reg_offsetregno = offset;
+@@ -8903,13 +8909,8 @@ aarch64_layout_frame (void)
+ 
+   auto allocate_gpr_slot = &(unsigned int regno)
+     {
+-      if (frame.hard_fp_save_and_probe == INVALID_REGNUM)
+-	frame.hard_fp_save_and_probe = regno;
++      vec_safe_push (frame.saved_gprs, regno);
+       frame.reg_offsetregno = offset;
+-      if (frame.wb_push_candidate1 == INVALID_REGNUM)
+-	frame.wb_push_candidate1 = regno;
+-      else if (frame.wb_push_candidate2 == INVALID_REGNUM)
+-	frame.wb_push_candidate2 = regno;
+       offset += UNITS_PER_WORD;
+     };
+ 
+@@ -8938,8 +8939,7 @@ aarch64_layout_frame (void)
+   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
+     if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED))
+       {
+-	if (frame.hard_fp_save_and_probe == INVALID_REGNUM)
+-	  frame.hard_fp_save_and_probe = regno;
++	vec_safe_push (frame.saved_fprs, regno);
+ 	/* If there is an alignment gap between integer and fp callee-saves,
+ 	   allocate the last fp register to it if possible.  */
+ 	if (regno == last_fp_reg
+@@ -8952,21 +8952,25 @@ aarch64_layout_frame (void)
+ 	  }
+ 
+ 	frame.reg_offsetregno = offset;
+-	if (frame.wb_push_candidate1 == INVALID_REGNUM)
+-	  frame.wb_push_candidate1 = regno;
+-	else if (frame.wb_push_candidate2 == INVALID_REGNUM
+-		 && frame.wb_push_candidate1 >= V0_REGNUM)
+-	  frame.wb_push_candidate2 = regno;
+ 	offset += vector_save_size;
+       }
+ 
+   offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+-
+   auto saved_regs_size = offset - frame.bytes_below_saved_regs;
+-  gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size)
+-	      || (frame.hard_fp_save_and_probe != INVALID_REGNUM
+-		  && known_eq (frame.reg_offsetframe.hard_fp_save_and_probe,
+-			       frame.bytes_below_hard_fp)));
++
++  array_slice<unsigned int> push_regs = (!vec_safe_is_empty (frame.saved_gprs)
++					 ? frame.saved_gprs
++					 : frame.saved_fprs);
++  if (!push_regs.empty ()
++      && known_eq (frame.reg_offsetpush_regs0, frame.bytes_below_hard_fp))
++    {
++      frame.hard_fp_save_and_probe = push_regs0;
++      frame.wb_push_candidate1 = push_regs0;
++      if (push_regs.size () > 1)
++	frame.wb_push_candidate2 = push_regs1;
++    }
++  else
++    gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size));
+ 
+   /* With stack-clash, a register must be saved in non-leaf functions.
+      The saving of the bottommost register counts as an implicit probe,
+@@ -9130,12 +9134,14 @@ aarch64_layout_frame (void)
+ 			+ frame.sve_callee_adjust
+ 			+ frame.final_adjust, frame.frame_size));
+ 
+-  if (!frame.emit_frame_chain && frame.callee_adjust == 0)
++  if (frame.callee_adjust == 0)
+     {
+-      /* We've decided not to associate any register saves with the initial
+-	 stack allocation.  */
+-      frame.wb_pop_candidate1 = frame.wb_push_candidate1 = INVALID_REGNUM;
+-      frame.wb_pop_candidate2 = frame.wb_push_candidate2 = INVALID_REGNUM;
++      /* We've decided not to do a "real" push and pop.  However,
++	 setting up the frame chain is treated as being essentially
++	 a multi-instruction push.  */
++      frame.wb_pop_candidate1 = frame.wb_pop_candidate2 = INVALID_REGNUM;
++      if (!frame.emit_frame_chain)
++	frame.wb_push_candidate1 = frame.wb_push_candidate2 = INVALID_REGNUM;
+     }
+ 
+   frame.laid_out = true;
+@@ -9150,17 +9156,6 @@ aarch64_register_saved_on_entry (int regno)
+   return known_ge (cfun->machine->frame.reg_offsetregno, 0);
+ }
+ 
+-/* Return the next register up from REGNO up to LIMIT for the callee
+-   to save.  */
+-
+-static unsigned
+-aarch64_next_callee_save (unsigned regno, unsigned limit)
+-{
+-  while (regno <= limit && !aarch64_register_saved_on_entry (regno))
+-    regno ++;

_service:tar_scm:0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch Added

@@ -0,0 +1,107 @@
+From ccc3ca614bbaa242fe25ec82b903dfcac03fe2de Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 17 Oct 2023 23:46:33 +0100
+Subject: PATCH 105/157 BackportSME aarch64: Put LR save slot first in
+ more cases
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=773306e9ef4ea1407f89686eb513a50602493666
+
+Now that the prologue and epilogue code iterates over saved
+registers in offset order, we can put the LR save slot first
+without compromising LDP/STP formation.
+
+This isn't worthwhile when shadow call stacks are enabled, since the
+first two registers are also push/pop candidates, and LR cannot be
+popped when shadow call stacks are enabled.  (LR is instead loaded
+first and compared against the shadow stack's value.)
+
+But otherwise, it seems better to put the LR save slot first,
+to reduce unnecessary variation with the layout for stack clash
+protection.
+
+gcc/
+	* config/aarch64/aarch64.cc (aarch64_layout_frame): Don't make
+	the position of the LR save slot dependent on stack clash
+	protection unless shadow call stacks are enabled.
+
+gcc/testsuite/
+	* gcc.target/aarch64/test_frame_2.c: Expect x30 to come before x19.
+	* gcc.target/aarch64/test_frame_4.c: Likewise.
+	* gcc.target/aarch64/test_frame_7.c: Likewise.
+	* gcc.target/aarch64/test_frame_10.c: Likewise.
+---
+ gcc/config/aarch64/aarch64.cc                    | 2 +-
+ gcc/testsuite/gcc.target/aarch64/test_frame_10.c | 4 ++--
+ gcc/testsuite/gcc.target/aarch64/test_frame_2.c  | 4 ++--
+ gcc/testsuite/gcc.target/aarch64/test_frame_4.c  | 4 ++--
+ gcc/testsuite/gcc.target/aarch64/test_frame_7.c  | 4 ++--
+ 5 files changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index e10c9d763..1c127192d 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8920,7 +8920,7 @@ aarch64_layout_frame (void)
+       allocate_gpr_slot (R29_REGNUM);
+       allocate_gpr_slot (R30_REGNUM);
+     }
+-  else if (flag_stack_clash_protection
++  else if ((flag_stack_clash_protection || !frame.is_scs_enabled)
+ 	   && known_eq (frame.reg_offsetR30_REGNUM, SLOT_REQUIRED))
+     /* Put the LR save slot first, since it makes a good choice of probe
+        for stack clash purposes.  The idea is that the link register usually
+diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
+index c19505082..c54ab2d0c 100644
+--- a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
++++ b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
+@@ -14,6 +14,6 @@
+ t_frame_pattern_outgoing (test10, 480, "x19", 24, a8, a9, a10)
+ t_frame_run (test10)
+ 
+-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\sp, \0-9\+\\\" 1 } } */
+-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\sp, \0-9\+\\\" } } */
++/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\sp, \0-9\+\\\" 1 } } */
++/* { dg-final { scan-assembler "ldp\tx30, x19, \\\sp, \0-9\+\\\" } } */
+ 
+diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c
+index 7e5df84cf..0d715314c 100644
+--- a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c
++++ b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c
+@@ -14,6 +14,6 @@ t_frame_pattern (test2, 200, "x19")
+ t_frame_run (test2)
+ 
+ 
+-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\sp, -\0-9\+\\\!" 1 } } */
+-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\sp\\\, \0-9\+" } } */
++/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\sp, -\0-9\+\\\!" 1 } } */
++/* { dg-final { scan-assembler "ldp\tx30, x19, \\\sp\\\, \0-9\+" } } */
+ 
+diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c
+index ed13487a0..b41229c42 100644
+--- a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c
++++ b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c
+@@ -13,6 +13,6 @@
+ t_frame_pattern (test4, 400, "x19")
+ t_frame_run (test4)
+ 
+-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\sp, -\0-9\+\\\!" 1 } } */
+-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\sp\\\, \0-9\+" } } */
++/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\sp, -\0-9\+\\\!" 1 } } */
++/* { dg-final { scan-assembler "ldp\tx30, x19, \\\sp\\\, \0-9\+" } } */
+ 
+diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
+index 964527949..5702656a5 100644
+--- a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
++++ b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
+@@ -13,6 +13,6 @@
+ t_frame_pattern (test7, 700, "x19")
+ t_frame_run (test7)
+ 
+-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\sp" 1 } } */
+-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\sp\\\" } } */
++/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\sp" 1 } } */
++/* { dg-final { scan-assembler "ldp\tx30, x19, \\\sp\\\" } } */
+ 
+-- 
+2.33.0
+

_service:tar_scm:0205-Backport-SME-aarch64-Switch-PSTATE.SM-around-calls.patch Added

@@ -0,0 +1,3270 @@
+From 88a41bc24eb793eee27aa9f4ef6b763b3c3e76e6 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:25 +0000
+Subject: PATCH 106/157 BackportSME aarch64: Switch PSTATE.SM around
+ calls
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dd8090f40079fa41ee58d9f76b2e50ed4f95c6bf
+
+This patch adds support for switching to the appropriate SME mode
+for each call.  Switching to streaming mode requires an SMSTART SM
+instruction and switching to non-streaming mode requires an SMSTOP SM
+instruction.  If the call is being made from streaming-compatible code,
+these switches are conditional on the current mode being the opposite
+of the one that the call needs.
+
+Since changing PSTATE.SM changes the vector length and effectively
+changes the ISA, the code to do the switching has to be emitted late.
+The patch does this using a new pass that runs next to late prologue/
+epilogue insertion.  (It doesn't use md_reorg because later additions
+need the CFG.)
+
+If a streaming-compatible function needs to switch mode for a call,
+it must restore the original mode afterwards.  The old mode must
+therefore be available immediately after the call.  The easiest
+way of ensuring this is to force the use of a hard frame pointer
+and ensure that the old state is saved at an in-range offset
+from there.
+
+Changing modes clobbers the Z and P registers, so we need to
+save and restore live Z and P state around each mode switch.
+However, mode switches are not expected to be performance
+critical, so it seemed better to err on the side of being
+correct rather than trying to optimise the save and restore
+with surrounding code.
+
+gcc/
+	* config/aarch64/aarch64-passes.def
+	(pass_late_thread_prologue_and_epilogue): New pass.
+	* config/aarch64/aarch64-sme.md: New file.
+	* config/aarch64/aarch64.md: Include it.
+	(*tb<optab><mode>1): Rename to...
+	(@aarch64_tb<optab><mode>): ...this.
+	(call, call_value, sibcall, sibcall_value): Don't require operand 2
+	to be a CONST_INT.
+	* config/aarch64/aarch64-protos.h (aarch64_emit_call_insn): Return
+	the insn.
+	(make_pass_switch_sm_state): Declare.
+	* config/aarch64/aarch64.h (TARGET_STREAMING_COMPATIBLE): New macro.
+	(CALL_USED_REGISTER): Mark VG as call-preserved.
+	(aarch64_frame::old_svcr_offset): New member variable.
+	(machine_function::call_switches_sm_state): Likewise.
+	(CUMULATIVE_ARGS::num_sme_mode_switch_args): Likewise.
+	(CUMULATIVE_ARGS::sme_mode_switch_args): Likewise.
+	* config/aarch64/aarch64.cc: Include tree-pass.h and cfgbuild.h.
+	(aarch64_cfun_incoming_pstate_sm): New function.
+	(aarch64_call_switches_pstate_sm): Likewise.
+	(aarch64_reg_save_mode): Return DImode for VG_REGNUM.
+	(aarch64_callee_isa_mode): New function.
+	(aarch64_insn_callee_isa_mode): Likewise.
+	(aarch64_guard_switch_pstate_sm): Likewise.
+	(aarch64_switch_pstate_sm): Likewise.
+	(aarch64_sme_mode_switch_regs): New class.
+	(aarch64_record_sme_mode_switch_args): New function.
+	(aarch64_finish_sme_mode_switch_args): Likewise.
+	(aarch64_function_arg): Handle the end marker by returning a
+	PARALLEL that contains the ABI cookie that we used previously
+	alongside the result of aarch64_finish_sme_mode_switch_args.
+	(aarch64_init_cumulative_args): Initialize num_sme_mode_switch_args.
+	(aarch64_function_arg_advance): If a call would switch SM state,
+	record all argument registers that would need to be saved around
+	the mode switch.
+	(aarch64_need_old_pstate_sm): New function.
+	(aarch64_layout_frame): Decide whether the frame needs to store the
+	incoming value of PSTATE.SM and allocate a save slot for it if so.
+	If a function switches SME state, arrange to save the old value
+	of the DWARF VG register.  Handle the case where this is the only
+	register save slot above the FP.
+	(aarch64_save_callee_saves): Handles saves of the DWARF VG register.
+	(aarch64_get_separate_components): Prevent such saves from being
+	shrink-wrapped.
+	(aarch64_old_svcr_mem): New function.
+	(aarch64_read_old_svcr): Likewise.
+	(aarch64_guard_switch_pstate_sm): Likewise.
+	(aarch64_expand_prologue): Handle saves of the DWARF VG register.
+	Initialize any SVCR save slot.
+	(aarch64_expand_call): Allow the cookie to be PARALLEL that contains
+	both the UNSPEC_CALLEE_ABI value and a list of registers that need
+	to be preserved across a change to PSTATE.SM.  If the call does
+	involve such a change to PSTATE.SM, record the registers that
+	would be clobbered by this process.  Also emit an instruction
+	to mark the temporary change in VG.  Update call_switches_pstate_sm.
+	(aarch64_emit_call_insn): Return the emitted instruction.
+	(aarch64_frame_pointer_required): New function.
+	(aarch64_conditional_register_usage): Prevent VG_REGNUM from being
+	treated as a register operand.
+	(aarch64_switch_pstate_sm_for_call): New function.
+	(pass_data_switch_pstate_sm): New pass variable.
+	(pass_switch_pstate_sm): New pass class.
+	(make_pass_switch_pstate_sm): New function.
+	(TARGET_FRAME_POINTER_REQUIRED): Define.
+	* config/aarch64/t-aarch64 (s-check-sve-md): Add aarch64-sme.md.
+
+gcc/testsuite/
+	* gcc.target/aarch64/sme/call_sm_switch_1.c: New test.
+	* gcc.target/aarch64/sme/call_sm_switch_2.c: Likewise.
+	* gcc.target/aarch64/sme/call_sm_switch_3.c: Likewise.
+	* gcc.target/aarch64/sme/call_sm_switch_4.c: Likewise.
+	* gcc.target/aarch64/sme/call_sm_switch_5.c: Likewise.
+	* gcc.target/aarch64/sme/call_sm_switch_6.c: Likewise.
+	* gcc.target/aarch64/sme/call_sm_switch_7.c: Likewise.
+	* gcc.target/aarch64/sme/call_sm_switch_8.c: Likewise.
+	* gcc.target/aarch64/sme/call_sm_switch_9.c: Likewise.
+	* gcc.target/aarch64/sme/call_sm_switch_10.c: Likewise.
+---
+ gcc/config/aarch64/aarch64-passes.def         |   1 +
+ gcc/config/aarch64/aarch64-protos.h           |   3 +-
+ gcc/config/aarch64/aarch64-sme.md             | 171 ++++
+ gcc/config/aarch64/aarch64.cc                 | 883 +++++++++++++++++-
+ gcc/config/aarch64/aarch64.h                  |  25 +-
+ gcc/config/aarch64/aarch64.md                 |  13 +-
+ gcc/config/aarch64/t-aarch64                  |   5 +-
+ .../gcc.target/aarch64/sme/call_sm_switch_1.c | 233 +++++
+ .../aarch64/sme/call_sm_switch_10.c           |  37 +
+ .../gcc.target/aarch64/sme/call_sm_switch_2.c |  43 +
+ .../gcc.target/aarch64/sme/call_sm_switch_3.c | 166 ++++
+ .../gcc.target/aarch64/sme/call_sm_switch_4.c |  43 +
+ .../gcc.target/aarch64/sme/call_sm_switch_5.c | 318 +++++++
+ .../gcc.target/aarch64/sme/call_sm_switch_6.c |  45 +
+ .../gcc.target/aarch64/sme/call_sm_switch_7.c | 516 ++++++++++
+ .../gcc.target/aarch64/sme/call_sm_switch_8.c |  87 ++
+ .../gcc.target/aarch64/sme/call_sm_switch_9.c | 103 ++
+ 17 files changed, 2668 insertions(+), 24 deletions(-)
+ create mode 100644 gcc/config/aarch64/aarch64-sme.md
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_10.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_2.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_4.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_6.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_7.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_9.c
+
+diff --git a/gcc/config/aarch64/aarch64-passes.def b/gcc/config/aarch64/aarch64-passes.def
+index a2babc112..c6cbbf2ef 100644
+--- a/gcc/config/aarch64/aarch64-passes.def
++++ b/gcc/config/aarch64/aarch64-passes.def
+@@ -20,6 +20,7 @@
+ 
+ INSERT_PASS_AFTER (pass_regrename, 1, pass_fma_steering);
+ INSERT_PASS_BEFORE (pass_reorder_blocks, 1, pass_track_speculation);
++INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue, 1, pass_switch_pstate_sm);
+ INSERT_PASS_AFTER (pass_machine_reorg, 1, pass_tag_collision_avoidance);
+ INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_bti);
+ INSERT_PASS_AFTER (pass_if_after_combine, 1, pass_cc_fusion);
+diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
+index 9b03410dc..737f47026 100644
+--- a/gcc/config/aarch64/aarch64-protos.h
++++ b/gcc/config/aarch64/aarch64-protos.h
+@@ -913,7 +913,7 @@ void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx,
+ 				   const_tree, unsigned, bool = false);
+ void aarch64_init_expanders (void);
+ void aarch64_init_simd_builtins (void);
+-void aarch64_emit_call_insn (rtx);
++rtx_call_insn *aarch64_emit_call_insn (rtx);
+ void aarch64_register_pragmas (void);
+ void aarch64_relayout_simd_types (void);
+ void aarch64_reset_previous_fndecl (void);
+@@ -1055,6 +1055,7 @@ rtl_opt_pass *make_pass_track_speculation (gcc::context *);
+ rtl_opt_pass *make_pass_tag_collision_avoidance (gcc::context *);
+ rtl_opt_pass *make_pass_insert_bti (gcc::context *ctxt);
+ rtl_opt_pass *make_pass_cc_fusion (gcc::context *ctxt);
++rtl_opt_pass *make_pass_switch_pstate_sm (gcc::context *ctxt);
+ 
+ poly_uint64 aarch64_regmode_natural_size (machine_mode);
+ 
+diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md
+new file mode 100644
+index 000000000..52427b4f1
+--- /dev/null
++++ b/gcc/config/aarch64/aarch64-sme.md
+@@ -0,0 +1,171 @@
++;; Machine description for AArch64 SME.
++;; Copyright (C) 2023 Free Software Foundation, Inc.
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify it
++;; under the terms of the GNU General Public License as published by
++;; the Free Software Foundation; either version 3, or (at your option)
++;; any later version.
++;;
++;; GCC is distributed in the hope that it will be useful, but
++;; WITHOUT ANY WARRANTY; without even the implied warranty of
++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++;; General Public License for more details.
++;;
++;; You should have received a copy of the GNU General Public License

_service:tar_scm:0206-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch Added

@@ -0,0 +1,4324 @@
+From 1efd433c779f66440facc8ba5cd23bdbdd6672ba Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:26 +0000
+Subject: PATCH 107/157 BackportSME aarch64: Add support for SME ZA
+ attributes
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3af9ceb631b741095d8eabd055ff7c23d4a69e6f
+
+SME has an array called ZA that can be enabled and disabled separately
+from streaming mode.  A status bit called PSTATE.ZA indicates whether
+ZA is currently enabled or not.
+
+In C and C++, the state of PSTATE.ZA is controlled using function
+attributes.  There are four attributes that can be attached to
+function types to indicate that the function shares ZA with its
+caller.  These are:
+
+- arm::in("za")
+- arm::out("za")
+- arm::inout("za")
+- arm::preserves("za")
+
+If a function's type has one of these shared-ZA attributes,
+PSTATE.ZA is specified to be 1 on entry to the function and on return
+from the function.  Otherwise, the caller and callee have separate
+ZA contexts; they do not use ZA to share data.
+
+Although normal non-shared-ZA functions have a separate ZA context
+from their callers, nested uses of ZA are expected to be rare.
+The ABI therefore defines a cooperative lazy saving scheme that
+allows saves and restore of ZA to be kept to a minimum.
+(Callers still have the option of doing a full save and restore
+if they prefer.)
+
+Functions that want to use ZA internally have an arm::new("za")
+attribute, which tells the compiler to enable PSTATE.ZA for
+the duration of the function body.  It also tells the compiler
+to commit any lazy save initiated by a caller.
+
+The patch uses various abstract hard registers to track dataflow
+relating to ZA.  See the comments in the patch for details.
+
+The lazy save scheme is intended to be transparent to most normal
+functions, so that they don't need to be recompiled for SME.
+This is reflected in the way that most normal functions ignore
+the new hard registers added in the patch.
+
+As with arm::streaming and arm::streaming_compatible, the attributes are
+also available as __arm_<attr>.  This has two advantages: it triggers an
+error on compilers that don't understand the attributes, and it eases
+use on C, where ... attributes were only added in C23.
+
+gcc/
+	* config/aarch64/aarch64-isa-modes.def (ZA_ON): New ISA mode.
+	* config/aarch64/aarch64-protos.h (aarch64_rdsvl_immediate_p)
+	(aarch64_output_rdsvl, aarch64_optimize_mode_switching)
+	(aarch64_restore_za): Declare.
+	* config/aarch64/constraints.md (UsR): New constraint.
+	* config/aarch64/aarch64.md (LOWERING_REGNUM, TPIDR_BLOCK_REGNUM)
+	(SME_STATE_REGNUM, TPIDR2_SETUP_REGNUM, ZA_FREE_REGNUM)
+	(ZA_SAVED_REGNUM, ZA_REGNUM, FIRST_FAKE_REGNUM): New constants.
+	(LAST_FAKE_REGNUM): Likewise.
+	(UNSPEC_SAVE_NZCV, UNSPEC_RESTORE_NZCV, UNSPEC_SME_VQ): New unspecs.
+	(arches): Add sme.
+	(arch_enabled): Handle it.
+	(*cb<optab><mode>1): Rename to...
+	(aarch64_cb<optab><mode>1): ...this.
+	(*movsi_aarch64): Add an alternative for RDSVL.
+	(*movdi_aarch64): Likewise.
+	(aarch64_save_nzcv, aarch64_restore_nzcv): New insns.
+	* config/aarch64/aarch64-sme.md (UNSPEC_SMSTOP_ZA)
+	(UNSPEC_INITIAL_ZERO_ZA, UNSPEC_TPIDR2_SAVE, UNSPEC_TPIDR2_RESTORE)
+	(UNSPEC_READ_TPIDR2, UNSPEC_WRITE_TPIDR2, UNSPEC_SETUP_LOCAL_TPIDR2)
+	(UNSPEC_RESTORE_ZA, UNSPEC_START_PRIVATE_ZA_CALL): New unspecs.
+	(UNSPEC_END_PRIVATE_ZA_CALL, UNSPEC_COMMIT_LAZY_SAVE): Likewise.
+	(UNSPECV_ASM_UPDATE_ZA): New unspecv.
+	(aarch64_tpidr2_save, aarch64_smstart_za, aarch64_smstop_za)
+	(aarch64_initial_zero_za, aarch64_setup_local_tpidr2)
+	(aarch64_clear_tpidr2, aarch64_write_tpidr2, aarch64_read_tpidr2)
+	(aarch64_tpidr2_restore, aarch64_restore_za, aarch64_asm_update_za)
+	(aarch64_start_private_za_call, aarch64_end_private_za_call)
+	(aarch64_commit_lazy_save): New patterns.
+	* config/aarch64/aarch64.h (AARCH64_ISA_ZA_ON, TARGET_ZA): New macros.
+	(FIXED_REGISTERS, REGISTER_NAMES): Add the new fake ZA registers.
+	(CALL_USED_REGISTERS): Replace with...
+	(CALL_REALLY_USED_REGISTERS): ...this and add the fake ZA registers.
+	(FIRST_PSEUDO_REGISTER): Bump to include the fake ZA registers.
+	(FAKE_REGS): New register class.
+	(REG_CLASS_NAMES): Update accordingly.
+	(REG_CLASS_CONTENTS): Likewise.
+	(machine_function::tpidr2_block): New member variable.
+	(machine_function::tpidr2_block_ptr): Likewise.
+	(machine_function::za_save_buffer): Likewise.
+	(machine_function::next_asm_update_za_id): Likewise.
+	(CUMULATIVE_ARGS::shared_za_flags): Likewise.
+	(aarch64_mode_entity, aarch64_local_sme_state): New enums.
+	(aarch64_tristate_mode): Likewise.
+	(OPTIMIZE_MODE_SWITCHING, NUM_MODES_FOR_MODE_SWITCHING): Define.
+	* config/aarch64/aarch64.cc (AARCH64_STATE_SHARED, AARCH64_STATE_IN)
+	(AARCH64_STATE_OUT): New constants.
+	(aarch64_attribute_shared_state_flags): New function.
+	(aarch64_lookup_shared_state_flags, aarch64_fndecl_has_new_state)
+	(aarch64_check_state_string, cmp_string_csts): Likewise.
+	(aarch64_merge_string_arguments, aarch64_check_arm_new_against_type)
+	(handle_arm_new, handle_arm_shared): Likewise.
+	(handle_arm_new_za_attribute): New
+	(aarch64_arm_attribute_table): Add new, preserves, in, out, and inout.
+	(aarch64_hard_regno_nregs): Handle FAKE_REGS.
+	(aarch64_hard_regno_mode_ok): Likewise.
+	(aarch64_fntype_shared_flags, aarch64_fntype_pstate_za): New functions.
+	(aarch64_fntype_isa_mode): Include aarch64_fntype_pstate_za.
+	(aarch64_fndecl_has_state, aarch64_fndecl_pstate_za): New functions.
+	(aarch64_fndecl_isa_mode): Include aarch64_fndecl_pstate_za.
+	(aarch64_cfun_incoming_pstate_za, aarch64_cfun_shared_flags)
+	(aarch64_cfun_has_new_state, aarch64_cfun_has_state): New functions.
+	(aarch64_sme_vq_immediate, aarch64_sme_vq_unspec_p): Likewise.
+	(aarch64_rdsvl_immediate_p, aarch64_output_rdsvl): Likewise.
+	(aarch64_expand_mov_immediate): Handle RDSVL immediates.
+	(aarch64_function_arg): Add the ZA sharing flags as a third limb
+	of the PARALLEL.
+	(aarch64_init_cumulative_args): Record the ZA sharing flags.
+	(aarch64_extra_live_on_entry): New function.  Handle the new
+	ZA-related fake registers.
+	(aarch64_epilogue_uses): Handle the new ZA-related fake registers.
+	(aarch64_cannot_force_const_mem): Handle UNSPEC_SME_VQ constants.
+	(aarch64_get_tpidr2_block, aarch64_get_tpidr2_ptr): New functions.
+	(aarch64_init_tpidr2_block, aarch64_restore_za): Likewise.
+	(aarch64_layout_frame): Check whether the current function creates
+	new ZA state.  Record that it clobbers LR if so.
+	(aarch64_expand_prologue): Handle functions that create new ZA state.
+	(aarch64_expand_epilogue): Likewise.
+	(aarch64_create_tpidr2_block): New function.
+	(aarch64_restore_za): Likewise.
+	(aarch64_start_call_args): Disallow calls to shared-ZA functions
+	from functions that have no ZA state.  Emit a marker instruction
+	before calls to private-ZA functions from functions that have
+	SME state.
+	(aarch64_expand_call): Add return registers for state that is
+	managed via attributes.  Record the use and clobber information
+	for the ZA registers.
+	(aarch64_end_call_args): New function.
+	(aarch64_regno_regclass): Handle FAKE_REGS.
+	(aarch64_class_max_nregs): Likewise.
+	(aarch64_override_options_internal): Require TARGET_SME for
+	functions that have ZA state.
+	(aarch64_conditional_register_usage): Handle FAKE_REGS.
+	(aarch64_mov_operand_p): Handle RDSVL immediates.
+	(aarch64_comp_type_attributes): Check that the ZA sharing flags
+	are equal.
+	(aarch64_merge_decl_attributes): New function.
+	(aarch64_optimize_mode_switching, aarch64_mode_emit_za_save_buffer)
+	(aarch64_mode_emit_local_sme_state, aarch64_mode_emit):  Likewise.
+	(aarch64_insn_references_sme_state_p): Likewise.
+	(aarch64_mode_needed_local_sme_state): Likewise.
+	(aarch64_mode_needed_za_save_buffer, aarch64_mode_needed): Likewise.
+	(aarch64_mode_after_local_sme_state, aarch64_mode_after): Likewise.
+	(aarch64_local_sme_confluence, aarch64_mode_confluence): Likewise.
+	(aarch64_one_shot_backprop, aarch64_local_sme_backprop): Likewise.
+	(aarch64_mode_backprop, aarch64_mode_entry): Likewise.
+	(aarch64_mode_exit, aarch64_mode_eh_handler): Likewise.
+	(aarch64_mode_priority, aarch64_md_asm_adjust): Likewise.
+	(TARGET_END_CALL_ARGS, TARGET_MERGE_DECL_ATTRIBUTES): Define.
+	(TARGET_MODE_EMIT, TARGET_MODE_NEEDED, TARGET_MODE_AFTER): Likewise.
+	(TARGET_MODE_CONFLUENCE, TARGET_MODE_BACKPROP): Likewise.
+	(TARGET_MODE_ENTRY, TARGET_MODE_EXIT): Likewise.
+	(TARGET_MODE_EH_HANDLER, TARGET_MODE_PRIORITY): Likewise.
+	(TARGET_EXTRA_LIVE_ON_ENTRY): Likewise.
+	(TARGET_MD_ASM_ADJUST): Use aarch64_md_asm_adjust.
+	* config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros):
+	Define __arm_new, __arm_preserves,__arm_in, __arm_out, and __arm_inout.
+
+gcc/testsuite/
+	* gcc.target/aarch64/sme/za_state_1.c: New test.
+	* gcc.target/aarch64/sme/za_state_2.c: Likewise.
+	* gcc.target/aarch64/sme/za_state_3.c: Likewise.
+	* gcc.target/aarch64/sme/za_state_4.c: Likewise.
+	* gcc.target/aarch64/sme/za_state_5.c: Likewise.
+	* gcc.target/aarch64/sme/za_state_6.c: Likewise.
+	* g++.target/aarch64/sme/exceptions_1.C: Likewise.
+	* gcc.target/aarch64/sme/keyword_macros_1.c: Add ZA macros.
+	* g++.target/aarch64/sme/keyword_macros_1.C: Likewise.
+---
+ gcc/config/aarch64/aarch64-c.cc               |   32 +
+ gcc/config/aarch64/aarch64-isa-modes.def      |    5 +
+ gcc/config/aarch64/aarch64-protos.h           |    5 +
+ gcc/config/aarch64/aarch64-sme.md             |  287 ++++
+ gcc/config/aarch64/aarch64.cc                 | 1371 ++++++++++++++++-
+ gcc/config/aarch64/aarch64.h                  |   98 +-
+ gcc/config/aarch64/aarch64.md                 |   81 +-
+ gcc/config/aarch64/constraints.md             |    6 +
+ .../g++.target/aarch64/sme/exceptions_1.C     |  189 +++
+ .../g++.target/aarch64/sme/keyword_macros_1.C |    5 +
+ .../gcc.target/aarch64/sme/keyword_macros_1.c |    5 +
+ .../gcc.target/aarch64/sme/za_state_1.c       |  154 ++
+ .../gcc.target/aarch64/sme/za_state_2.c       |   73 +
+ .../gcc.target/aarch64/sme/za_state_3.c       |   31 +
+ .../gcc.target/aarch64/sme/za_state_4.c       |  585 +++++++
+ .../gcc.target/aarch64/sme/za_state_5.c       |  595 +++++++
+ .../gcc.target/aarch64/sme/za_state_6.c       |   23 +

_service:tar_scm:0207-Backport-SME-aarch64-Add-a-register-class-for-w12-w1.patch Added

@@ -0,0 +1,103 @@
+From 9866b4c1d85d88fd9e25ff3ac5224b69d4e0f0b2 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:26 +0000
+Subject: PATCH 108/157 BackportSME aarch64: Add a register class for
+ w12-w15
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=084122adb5792a9c8e7f7876e2c1d59ba80c228b
+
+Some SME instructions use w12-w15 to index ZA.  This patch
+adds a register class for that range.
+
+gcc/
+	* config/aarch64/aarch64.h (W12_W15_REGNUM_P): New macro.
+	(W12_W15_REGS): New register class.
+	(REG_CLASS_NAMES, REG_CLASS_CONTENTS): Add entries for it.
+	* config/aarch64/aarch64.cc (aarch64_regno_regclass)
+	(aarch64_class_max_nregs, aarch64_register_move_cost): Handle
+	W12_W15_REGS.
+---
+ gcc/config/aarch64/aarch64.cc | 12 +++++++-----
+ gcc/config/aarch64/aarch64.h  |  6 ++++++
+ 2 files changed, 13 insertions(+), 5 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index a6e996c5b..112dfeabb 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -14213,6 +14213,9 @@ aarch64_label_mentioned_p (rtx x)
+ enum reg_class
+ aarch64_regno_regclass (unsigned regno)
+ {
++  if (W12_W15_REGNUM_P (regno))
++    return W12_W15_REGS;
++
+   if (STUB_REGNUM_P (regno))
+     return STUB_REGS;
+ 
+@@ -14577,6 +14580,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
+   unsigned int nregs, vec_flags;
+   switch (regclass)
+     {
++    case W12_W15_REGS:
+     case STUB_REGS:
+     case TAILCALL_ADDR_REGS:
+     case POINTER_REGS:
+@@ -16926,13 +16930,11 @@ aarch64_register_move_cost (machine_mode mode,
+   const struct cpu_regmove_cost *regmove_cost
+     = aarch64_tune_params.regmove_cost;
+ 
+-  /* Caller save and pointer regs are equivalent to GENERAL_REGS.  */
+-  if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS
+-      || to == STUB_REGS)
++  /* Trest any subset of POINTER_REGS as though it were GENERAL_REGS.  */
++  if (reg_class_subset_p (to, POINTER_REGS))
+     to = GENERAL_REGS;
+ 
+-  if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS
+-      || from == STUB_REGS)
++  if (reg_class_subset_p (from, POINTER_REGS))
+     from = GENERAL_REGS;
+ 
+   /* Make RDFFR very expensive.  In particular, if we know that the FFR
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index 89d30b9bf..8b21faf34 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -648,6 +648,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
+    && (REGNO) != R17_REGNUM \
+    && (REGNO) != R30_REGNUM) \
+ 
++#define W12_W15_REGNUM_P(REGNO) \
++  IN_RANGE (REGNO, R12_REGNUM, R15_REGNUM)
++
+ #define FP_REGNUM_P(REGNO)			\
+   (((unsigned) (REGNO - V0_REGNUM)) <= (V31_REGNUM - V0_REGNUM))
+ 
+@@ -674,6 +677,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
+ enum reg_class
+ {
+   NO_REGS,
++  W12_W15_REGS,
+   TAILCALL_ADDR_REGS,
+   STUB_REGS,
+   GENERAL_REGS,
+@@ -698,6 +702,7 @@ enum reg_class
+ #define REG_CLASS_NAMES				\
+ {						\
+   "NO_REGS",					\
++  "W12_W15_REGS",				\
+   "TAILCALL_ADDR_REGS",				\
+   "STUB_REGS",					\
+   "GENERAL_REGS",				\
+@@ -719,6 +724,7 @@ enum reg_class
+ #define REG_CLASS_CONTENTS						\
+ {									\
+   { 0x00000000, 0x00000000, 0x00000000 },	/* NO_REGS */		\
++  { 0x0000f000, 0x00000000, 0x00000000 },	/* W12_W15_REGS */	\
+   { 0x00030000, 0x00000000, 0x00000000 },	/* TAILCALL_ADDR_REGS */\
+   { 0x3ffcffff, 0x00000000, 0x00000000 },	/* STUB_REGS */		\
+   { 0x7fffffff, 0x00000000, 0x00000003 },	/* GENERAL_REGS */	\
+-- 
+2.33.0
+

_service:tar_scm:0208-Backport-SME-aarch64-Add-a-VNx1TI-mode.patch Added

@@ -0,0 +1,72 @@
+From 8310c0df319a86bc2f63b8d3198dd1c394827bac Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:27 +0000
+Subject: PATCH 109/157 BackportSME aarch64: Add a VNx1TI mode
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=80fc055cf00fee4b1f9f19f77c8880b12226e086
+
+Although TI isn't really a native SVE element mode, it's convenient
+for SME if we define VNx1TI anyway, so that it can be used to
+distinguish .Q ZA operations from others.  It's purely an RTL
+convenience and isn't (yet) a valid storage mode.
+
+gcc/
+	* config/aarch64/aarch64-modes.def: Add VNx1TI.
+---
+ gcc/config/aarch64/aarch64-modes.def | 21 ++++++++++++++-------
+ 1 file changed, 14 insertions(+), 7 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
+index 8f399225a..8fa66fdb3 100644
+--- a/gcc/config/aarch64/aarch64-modes.def
++++ b/gcc/config/aarch64/aarch64-modes.def
+@@ -146,7 +146,7 @@ ADV_SIMD_Q_REG_STRUCT_MODES (4, V4x16, V4x8, V4x4, V4x2)
+    for 8-bit, 16-bit, 32-bit and 64-bit elements respectively.  It isn't
+    strictly necessary to set the alignment here, since the default would
+    be clamped to BIGGEST_ALIGNMENT anyhow, but it seems clearer.  */
+-#define SVE_MODES(NVECS, VB, VH, VS, VD) \
++#define SVE_MODES(NVECS, VB, VH, VS, VD, VT) \
+   VECTOR_MODES_WITH_PREFIX (VNx, INT, 16 * NVECS, NVECS == 1 ? 1 : 4); \
+   VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 16 * NVECS, NVECS == 1 ? 1 : 4); \
+   \
+@@ -154,6 +154,7 @@ ADV_SIMD_Q_REG_STRUCT_MODES (4, V4x16, V4x8, V4x4, V4x2)
+   ADJUST_NUNITS (VH##HI, aarch64_sve_vg * NVECS * 4); \
+   ADJUST_NUNITS (VS##SI, aarch64_sve_vg * NVECS * 2); \
+   ADJUST_NUNITS (VD##DI, aarch64_sve_vg * NVECS); \
++  ADJUST_NUNITS (VT##TI, exact_div (aarch64_sve_vg * NVECS, 2)); \
+   ADJUST_NUNITS (VH##BF, aarch64_sve_vg * NVECS * 4); \
+   ADJUST_NUNITS (VH##HF, aarch64_sve_vg * NVECS * 4); \
+   ADJUST_NUNITS (VS##SF, aarch64_sve_vg * NVECS * 2); \
+@@ -163,17 +164,23 @@ ADV_SIMD_Q_REG_STRUCT_MODES (4, V4x16, V4x8, V4x4, V4x2)
+   ADJUST_ALIGNMENT (VH##HI, 16); \
+   ADJUST_ALIGNMENT (VS##SI, 16); \
+   ADJUST_ALIGNMENT (VD##DI, 16); \
++  ADJUST_ALIGNMENT (VT##TI, 16); \
+   ADJUST_ALIGNMENT (VH##BF, 16); \
+   ADJUST_ALIGNMENT (VH##HF, 16); \
+   ADJUST_ALIGNMENT (VS##SF, 16); \
+   ADJUST_ALIGNMENT (VD##DF, 16);
+ 
+-/* Give SVE vectors the names normally used for 256-bit vectors.
+-   The actual number depends on command-line flags.  */
+-SVE_MODES (1, VNx16, VNx8, VNx4, VNx2)
+-SVE_MODES (2, VNx32, VNx16, VNx8, VNx4)
+-SVE_MODES (3, VNx48, VNx24, VNx12, VNx6)
+-SVE_MODES (4, VNx64, VNx32, VNx16, VNx8)
++/* Give SVE vectors names of the form VNxX, where X describes what is
++   stored in each 128-bit unit.  The actual size of the mode depends
++   on command-line flags.
++
++   VNx1TI isn't really a native SVE mode, but it can be useful in some
++   limited situations.  */
++VECTOR_MODE_WITH_PREFIX (VNx, INT, TI, 1, 1);
++SVE_MODES (1, VNx16, VNx8, VNx4, VNx2, VNx1)
++SVE_MODES (2, VNx32, VNx16, VNx8, VNx4, VNx2)
++SVE_MODES (3, VNx48, VNx24, VNx12, VNx6, VNx3)
++SVE_MODES (4, VNx64, VNx32, VNx16, VNx8, VNx4)
+ 
+ /* Partial SVE vectors:
+ 
+-- 
+2.33.0
+

_service:tar_scm:0209-Backport-SME-aarch64-Generalise-unspec_based_functio.patch Added

@@ -0,0 +1,118 @@
+From e3c0d3d98ab1f60900533f3f75c598f899f37c9f Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:27 +0000
+Subject: PATCH 110/157 BackportSME aarch64: Generalise
+ unspec_based_function_base
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1ec23d5a29bc5d89cef60e2aba2fe4095ee12a8f
+
+Until now, SVE intrinsics that map directly to unspecs
+have always used type suffix 0 to distinguish between signed
+integers, unsigned integers, and floating-point values.
+SME adds functions that need to use type suffix 1 instead.
+This patch generalises the classes accordingly.
+
+gcc/
+	* config/aarch64/aarch64-sve-builtins-functions.h
+	(unspec_based_function_base): Allow type suffix 1 to determine
+	the mode of the operation.
+	(unspec_based_function): Update accordingly.
+	(unspec_based_fused_function): Likewise.
+	(unspec_based_fused_lane_function): Likewise.
+---
+ .../aarch64/aarch64-sve-builtins-functions.h  | 29 ++++++++++++-------
+ 1 file changed, 18 insertions(+), 11 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
+index 94a6d1207..f5fa4030c 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h
++++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
+@@ -250,18 +250,21 @@ class unspec_based_function_base : public function_base
+ public:
+   CONSTEXPR unspec_based_function_base (int unspec_for_sint,
+ 					int unspec_for_uint,
+-					int unspec_for_fp)
++					int unspec_for_fp,
++					unsigned int suffix_index = 0)
+     : m_unspec_for_sint (unspec_for_sint),
+       m_unspec_for_uint (unspec_for_uint),
+-      m_unspec_for_fp (unspec_for_fp)
++      m_unspec_for_fp (unspec_for_fp),
++      m_suffix_index (suffix_index)
+   {}
+ 
+   /* Return the unspec code to use for INSTANCE, based on type suffix 0.  */
+   int
+   unspec_for (const function_instance &instance) const
+   {
+-    return (!instance.type_suffix (0).integer_p ? m_unspec_for_fp
+-	    : instance.type_suffix (0).unsigned_p ? m_unspec_for_uint
++    auto &suffix = instance.type_suffix (m_suffix_index);
++    return (!suffix.integer_p ? m_unspec_for_fp
++	    : suffix.unsigned_p ? m_unspec_for_uint
+ 	    : m_unspec_for_sint);
+   }
+ 
+@@ -270,6 +273,9 @@ public:
+   int m_unspec_for_sint;
+   int m_unspec_for_uint;
+   int m_unspec_for_fp;
++
++  /* Which type suffix is used to choose between the unspecs.  */
++  unsigned int m_suffix_index;
+ };
+ 
+ /* A function_base for functions that have an associated unspec code.
+@@ -336,7 +342,8 @@ public:
+   rtx
+   expand (function_expander &e) const OVERRIDE
+   {
+-    return e.use_exact_insn (CODE (unspec_for (e), e.vector_mode (0)));
++    return e.use_exact_insn (CODE (unspec_for (e),
++				   e.vector_mode (m_suffix_index)));
+   }
+ };
+ 
+@@ -395,16 +402,16 @@ public:
+   {
+     int unspec = unspec_for (e);
+     insn_code icode;
+-    if (e.type_suffix (0).float_p)
++    if (e.type_suffix (m_suffix_index).float_p)
+       {
+ 	/* Put the operands in the normal (fma ...) order, with the accumulator
+ 	   last.  This fits naturally since that's also the unprinted operand
+ 	   in the asm output.  */
+ 	e.rotate_inputs_left (0, e.pred != PRED_none ? 4 : 3);
+-	icode = code_for_aarch64_sve (unspec, e.vector_mode (0));
++	icode = code_for_aarch64_sve (unspec, e.vector_mode (m_suffix_index));
+       }
+     else
+-      icode = INT_CODE (unspec, e.vector_mode (0));
++      icode = INT_CODE (unspec, e.vector_mode (m_suffix_index));
+     return e.use_exact_insn (icode);
+   }
+ };
+@@ -430,16 +437,16 @@ public:
+   {
+     int unspec = unspec_for (e);
+     insn_code icode;
+-    if (e.type_suffix (0).float_p)
++    if (e.type_suffix (m_suffix_index).float_p)
+       {
+ 	/* Put the operands in the normal (fma ...) order, with the accumulator
+ 	   last.  This fits naturally since that's also the unprinted operand
+ 	   in the asm output.  */
+ 	e.rotate_inputs_left (0, e.pred != PRED_none ? 5 : 4);
+-	icode = code_for_aarch64_lane (unspec, e.vector_mode (0));
++	icode = code_for_aarch64_lane (unspec, e.vector_mode (m_suffix_index));
+       }
+     else
+-      icode = INT_CODE (unspec, e.vector_mode (0));
++      icode = INT_CODE (unspec, e.vector_mode (m_suffix_index));
+     return e.use_exact_insn (icode);
+   }
+ };
+-- 
+2.33.0
+

_service:tar_scm:0210-Backport-SME-aarch64-Generalise-_m-rules-for-SVE-int.patch Added

@@ -0,0 +1,117 @@
+From 3d721b42c97baba562b77988cec0fec229217519 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:28 +0000
+Subject: PATCH 111/157 BackportSME aarch64: Generalise _m rules for SVE
+ intrinsics
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=8de9304d94d4ec42863a25c1cb1a1ba9a1e3e0fe
+
+In SVE there was a simple rule that unary merging (_m) intrinsics
+had a separate initial argument to specify the values of inactive
+lanes, whereas other merging functions took inactive lanes from
+the first operand to the operation.
+
+That rule began to break down in SVE2, and it continues to do
+so in SME.  This patch therefore adds a virtual function to
+specify whether the separate initial argument is present or not.
+The old rule is still the default.
+
+gcc/
+	* config/aarch64/aarch64-sve-builtins.h
+	(function_shape::has_merge_argument_p): New member function.
+	* config/aarch64/aarch64-sve-builtins.cc:
+	(function_resolver::check_gp_argument): Use it.
+	(function_expander::get_fallback_value): Likewise.
+	* config/aarch64/aarch64-sve-builtins-shapes.cc
+	(apply_predication): Likewise.
+	(unary_convert_narrowt_def::has_merge_argument_p): New function.
+---
+ gcc/config/aarch64/aarch64-sve-builtins-shapes.cc | 10 ++++++++--
+ gcc/config/aarch64/aarch64-sve-builtins.cc        |  4 ++--
+ gcc/config/aarch64/aarch64-sve-builtins.h         | 13 +++++++++++++
+ 3 files changed, 23 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+index 95e40d8f3..c536949ba 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+@@ -66,8 +66,8 @@ apply_predication (const function_instance &instance, tree return_type,
+ 	 the same type as the result.  For unary_convert_narrowt it also
+ 	 provides the "bottom" half of active elements, and is present
+ 	 for all types of predication.  */
+-      if ((argument_types.length () == 2 && instance.pred == PRED_m)
+-	  || instance.shape == shapes::unary_convert_narrowt)
++      auto nargs = argument_types.length () - 1;
++      if (instance.shape->has_merge_argument_p (instance, nargs))
+ 	argument_types.quick_insert (0, return_type);
+     }
+ }
+@@ -3271,6 +3271,12 @@ SHAPE (unary_convert)
+    predicate.  */
+ struct unary_convert_narrowt_def : public overloaded_base<1>
+ {
++  bool
++  has_merge_argument_p (const function_instance &, unsigned int) const override
++  {
++    return true;
++  }
++
+   void
+   build (function_builder &b, const function_group_info &group) const OVERRIDE
+   {
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
+index 5f3a2baea..3441b4294 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
+@@ -2287,7 +2287,7 @@ function_resolver::check_gp_argument (unsigned int nops,
+   if (pred != PRED_none)
+     {
+       /* Unary merge operations should use resolve_unary instead.  */
+-      gcc_assert (nops != 1 || pred != PRED_m);
++      gcc_assert (!shape->has_merge_argument_p (*this, nops));
+       nargs = nops + 1;
+       if (!check_num_arguments (nargs)
+ 	  || !require_vector_type (i, VECTOR_TYPE_svbool_t))
+@@ -2931,7 +2931,7 @@ function_expander::get_fallback_value (machine_mode mode, unsigned int nops,
+ 
+   gcc_assert (pred == PRED_m || pred == PRED_x);
+   if (merge_argno == DEFAULT_MERGE_ARGNO)
+-    merge_argno = nops == 1 && pred == PRED_m ? 0 : 1;
++    merge_argno = shape->has_merge_argument_p (*this, nops) ? 0 : 1;
+ 
+   if (merge_argno == 0)
+     return argsargno++;
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
+index 7132b6e77..f16ac3947 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins.h
++++ b/gcc/config/aarch64/aarch64-sve-builtins.h
+@@ -710,6 +710,9 @@ public:
+ class function_shape
+ {
+ public:
++  virtual bool has_merge_argument_p (const function_instance &,
++				     unsigned int) const;
++
+   virtual bool explicit_type_suffix_p (unsigned int) const = 0;
+ 
+   /* True if the group suffix is present in overloaded names.
+@@ -982,6 +985,16 @@ function_base::vectors_per_tuple (const function_instance &instance) const
+   return instance.group_suffix ().vectors_per_tuple;
+ }
+ 
++/* Return true if INSTANCE (which has NARGS arguments) has an initial
++   vector argument whose only purpose is to specify the values of
++   inactive lanes.  */
++inline bool
++function_shape::has_merge_argument_p (const function_instance &instance,
++				      unsigned int nargs) const
++{
++  return nargs == 1 && instance.pred == PRED_m;
++}
++
+ /* Return the mode of the result of a call.  */
+ inline machine_mode
+ function_expander::result_mode () const
+-- 
+2.33.0
+

_service:tar_scm:0211-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch Added

@@ -0,0 +1,15955 @@
+From 6c651a11f8e68244c4c53ad7b29983f54a3bc737 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:28 +0000
+Subject: PATCH 112/157 BackportSME aarch64: Add support for <arm_sme.h>
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4f6ab9537051e156d52bd8e9df40107ba6685895
+
+This adds support for the SME parts of arm_sme.h.
+
+gcc/
+	* doc/invoke.texi: Document +sme-i16i64 and +sme-f64f64.
+	* config.gcc (aarch64*-*-*): Add arm_sme.h to the list of headers
+	to install and aarch64-sve-builtins-sme.o to the list of objects
+	to build.
+	* config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Define
+	or undefine TARGET_SME, TARGET_SME_I16I64 and TARGET_SME_F64F64.
+	(aarch64_pragma_aarch64): Handle arm_sme.h.
+	* config/aarch64/aarch64-option-extensions.def (sme-i16i64)
+	(sme-f64f64): New extensions.
+	* config/aarch64/aarch64-protos.h (aarch64_sme_vq_immediate)
+	(aarch64_addsvl_addspl_immediate_p, aarch64_output_addsvl_addspl)
+	(aarch64_output_sme_zero_za): Declare.
+	(aarch64_output_move_struct): Delete.
+	(aarch64_sme_ldr_vnum_offset): Declare.
+	(aarch64_sve::handle_arm_sme_h): Likewise.
+	* config/aarch64/aarch64.h (AARCH64_ISA_SM_ON): New macro.
+	(AARCH64_ISA_SME_I16I64, AARCH64_ISA_SME_F64F64): Likewise.
+	(TARGET_STREAMING, TARGET_STREAMING_SME): Likewise.
+	(TARGET_SME_I16I64, TARGET_SME_F64F64): Likewise.
+	* config/aarch64/aarch64.cc (aarch64_sve_rdvl_factor_p): Rename to...
+	(aarch64_sve_rdvl_addvl_factor_p): ...this.
+	(aarch64_sve_rdvl_immediate_p): Update accordingly.
+	(aarch64_rdsvl_immediate_p, aarch64_add_offset): Likewise.
+	(aarch64_sme_vq_immediate): Likewise.  Make public.
+	(aarch64_sve_addpl_factor_p): New function.
+	(aarch64_sve_addvl_addpl_immediate_p): Use
+	aarch64_sve_rdvl_addvl_factor_p and aarch64_sve_addpl_factor_p.
+	(aarch64_addsvl_addspl_immediate_p): New function.
+	(aarch64_output_addsvl_addspl): Likewise.
+	(aarch64_cannot_force_const_mem): Return true for RDSVL immediates.
+	(aarch64_classify_index): Handle .Q scaling for VNx1TImode.
+	(aarch64_classify_address): Likewise for vnum offsets.
+	(aarch64_output_sme_zero_za): New function.
+	(aarch64_sme_ldr_vnum_offset_p): Likewise.
+	* config/aarch64/predicates.md (aarch64_addsvl_addspl_immediate):
+	New predicate.
+	(aarch64_pluslong_operand): Include it for SME.
+	* config/aarch64/constraints.md (Ucj, Uav): New constraints.
+	* config/aarch64/iterators.md (VNx1TI_ONLY): New mode iterator.
+	(SME_ZA_I, SME_ZA_SDI, SME_ZA_SDF_I, SME_MOP_BHI): Likewise.
+	(SME_MOP_HSDF): Likewise.
+	(UNSPEC_SME_ADDHA, UNSPEC_SME_ADDVA, UNSPEC_SME_FMOPA)
+	(UNSPEC_SME_FMOPS, UNSPEC_SME_LD1_HOR, UNSPEC_SME_LD1_VER)
+	(UNSPEC_SME_READ_HOR, UNSPEC_SME_READ_VER, UNSPEC_SME_SMOPA)
+	(UNSPEC_SME_SMOPS, UNSPEC_SME_ST1_HOR, UNSPEC_SME_ST1_VER)
+	(UNSPEC_SME_SUMOPA, UNSPEC_SME_SUMOPS, UNSPEC_SME_UMOPA)
+	(UNSPEC_SME_UMOPS, UNSPEC_SME_USMOPA, UNSPEC_SME_USMOPS)
+	(UNSPEC_SME_WRITE_HOR, UNSPEC_SME_WRITE_VER): New unspecs.
+	(elem_bits): Handle x2 and x4 structure modes, plus VNx1TI.
+	(Vetype, Vesize, VPRED): Handle VNx1TI.
+	(b): New mode attribute.
+	(SME_LD1, SME_READ, SME_ST1, SME_WRITE, SME_BINARY_SDI, SME_INT_MOP)
+	(SME_FP_MOP): New int iterators.
+	(optab): Handle SME unspecs.
+	(hv): New int attribute.
+	* config/aarch64/aarch64.md (*add<mode>3_aarch64): Handle ADDSVL
+	and ADDSPL.
+	* config/aarch64/aarch64-sme.md (UNSPEC_SME_LDR): New unspec.
+	(@aarch64_sme_<optab><mode>, @aarch64_sme_<optab><mode>_plus)
+	(aarch64_sme_ldr0, @aarch64_sme_ldrn<mode>): New patterns.
+	(UNSPEC_SME_STR): New unspec.
+	(@aarch64_sme_<optab><mode>, @aarch64_sme_<optab><mode>_plus)
+	(aarch64_sme_str0, @aarch64_sme_strn<mode>): New patterns.
+	(@aarch64_sme_<optab><v_int_container><mode>): Likewise.
+	(*aarch64_sme_<optab><v_int_container><mode>_plus): Likewise.
+	(@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>): Likewise.
+	(@aarch64_sme_<optab><v_int_container><mode>): Likewise.
+	(*aarch64_sme_<optab><v_int_container><mode>_plus): Likewise.
+	(@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>): Likewise.
+	(UNSPEC_SME_ZERO): New unspec.
+	(aarch64_sme_zero): New pattern.
+	(@aarch64_sme_<SME_BINARY_SDI:optab><mode>): Likewise.
+	(@aarch64_sme_<SME_INT_MOP:optab><mode>): Likewise.
+	(@aarch64_sme_<SME_FP_MOP:optab><mode>): Likewise.
+	* config/aarch64/aarch64-sve-builtins.def: Add ZA type suffixes.
+	Include aarch64-sve-builtins-sme.def.
+	(DEF_SME_ZA_FUNCTION): New macro.
+	* config/aarch64/aarch64-sve-builtins.h (CP_READ_ZA): New call
+	property.
+	(CP_WRITE_ZA): Likewise.
+	(PRED_za_m): New predication type.
+	(type_suffix_index): Handle DEF_SME_ZA_SUFFIX.
+	(type_suffix_info): Add vector_p and za_p fields.
+	(function_instance::num_za_tiles): New member function.
+	(function_builder::get_attributes): Add an aarch64_feature_flags
+	argument.
+	(function_expander::get_contiguous_base): Take a base argument
+	number, a vnum argument number, and an argument that indicates
+	whether the vnum parameter is a factor of the SME vector length
+	or the prevailing vector length.
+	(function_expander::add_integer_operand): Take a poly_int64.
+	(sve_switcher::sve_switcher): Take a base set of flags.
+	(sme_switcher): New class.
+	(scalar_types): Add a null entry for NUM_VECTOR_TYPES.
+	* config/aarch64/aarch64-sve-builtins.cc: Include
+	aarch64-sve-builtins-sme.h.
+	(pred_suffixes): Add an entry for PRED_za_m.
+	(type_suffixes): Initialize vector_p and za_p.  Handle ZA suffixes.
+	(TYPES_all_za, TYPES_d_za, TYPES_za_bhsd_data, TYPES_za_all_data)
+	(TYPES_za_s_integer, TYPES_za_d_integer, TYPES_mop_base)
+	(TYPES_mop_base_signed, TYPES_mop_base_unsigned, TYPES_mop_i16i64)
+	(TYPES_mop_i16i64_signed, TYPES_mop_i16i64_unsigned, TYPES_za): New
+	type suffix macros.
+	(preds_m, preds_za_m): New predication lists.
+	(function_groups): Handle DEF_SME_ZA_FUNCTION.
+	(scalar_types): Add an entry for NUM_VECTOR_TYPES.
+	(find_type_suffix_for_scalar_type): Check positively for vectors
+	rather than negatively for predicates.
+	(check_required_extensions): Handle PSTATE.SM and PSTATE.ZA
+	requirements.
+	(report_out_of_range): Handle the case where the minimum and
+	maximum are the same.
+	(function_instance::reads_global_state_p): Return true for functions
+	that read ZA.
+	(function_instance::modifies_global_state_p): Return true for functions
+	that write to ZA.
+	(sve_switcher::sve_switcher): Add a base flags argument.
+	(function_builder::get_name): Handle "__arm_" prefixes.
+	(add_attribute): Add an overload that takes a namespaces.
+	(add_shared_state_attribute): New function.
+	(function_builder::get_attributes): Take the required feature flags
+	as argument.  Add streaming and ZA attributes where appropriate.
+	(function_builder::add_unique_function): Update calls accordingly.
+	(function_resolver::check_gp_argument): Assert that the predication
+	isn't ZA _m predication.
+	(function_checker::function_checker): Don't bias the argument
+	number for ZA _m predication.
+	(function_expander::get_contiguous_base): Add arguments that
+	specify the base argument number, the vnum argument number,
+	and an argument that indicates whether the vnum parameter is
+	a factor of the SME vector length or the prevailing vector length.
+	Handle the SME case.
+	(function_expander::add_input_operand): Handle pmode_register_operand.
+	(function_expander::add_integer_operand): Take a poly_int64.
+	(init_builtins): Call handle_arm_sme_h for LTO.
+	(handle_arm_sve_h): Skip SME intrinsics.
+	(handle_arm_sme_h): New function.
+	* config/aarch64/aarch64-sve-builtins-functions.h
+	(read_write_za, write_za): New classes.
+	(unspec_based_sme_function, za_arith_function): New using aliases.
+	(quiet_za_arith_function): Likewise.
+	* config/aarch64/aarch64-sve-builtins-shapes.h
+	(binary_za_int_m, binary_za_m, binary_za_uint_m, bool_inherent)
+	(inherent_za, inherent_mask_za, ldr_za, load_za, read_za_m, store_za)
+	(str_za, unary_za_m, write_za_m): Declare.
+	* config/aarch64/aarch64-sve-builtins-shapes.cc (apply_predication):
+	Expect za_m functions to have an existing governing predicate.
+	(binary_za_m_base, binary_za_int_m_def, binary_za_m_def): New classes.
+	(binary_za_uint_m_def, bool_inherent_def, inherent_za_def): Likewise.
+	(inherent_mask_za_def, ldr_za_def, load_za_def, read_za_m_def)
+	(store_za_def, str_za_def, unary_za_m_def, write_za_m_def): Likewise.
+	* config/aarch64/arm_sme.h: New file.
+	* config/aarch64/aarch64-sve-builtins-sme.h: Likewise.
+	* config/aarch64/aarch64-sve-builtins-sme.cc: Likewise.
+	* config/aarch64/aarch64-sve-builtins-sme.def: Likewise.
+	* config/aarch64/t-aarch64 (aarch64-sve-builtins.o): Depend on
+	aarch64-sve-builtins-sme.def and aarch64-sve-builtins-sme.h.
+	(aarch64-sve-builtins-sme.o): New rule.
+
+gcc/testsuite/
+	* lib/target-supports.exp: Add sme and sme-i16i64 features.
+	* gcc.target/aarch64/pragma_cpp_predefs_4.c: Test __ARM_FEATURE_SME*
+	macros.
+	* gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: Allow functions
+	to be marked as __arm_streaming, __arm_streaming_compatible, and
+	__arm_inout("za").
+	* g++.target/aarch64/sve/acle/general-c++/func_redef_4.c: Mark the
+	function as __arm_streaming_compatible.
+	* g++.target/aarch64/sve/acle/general-c++/func_redef_5.c: Likewise.
+	* g++.target/aarch64/sve/acle/general-c++/func_redef_7.c: Likewise.
+	* gcc.target/aarch64/sve/acle/general-c/func_redef_4.c: Likewise.
+	* gcc.target/aarch64/sve/acle/general-c/func_redef_5.c: Likewise.
+	* g++.target/aarch64/sme/aarch64-sme-acle-asm.exp: New test harness.
+	* gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp: Likewise.
+	* gcc.target/aarch64/sve/acle/general-c/binary_za_int_m_1.c: New test.
+	* gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c: Likewise.
+	* gcc.target/aarch64/sve/acle/general-c/binary_za_m_2.c: Likewise.
+	* gcc.target/aarch64/sve/acle/general-c/binary_za_uint_m_1.c: Likewise.
+	* gcc.target/aarch64/sve/acle/general-c/read_za_m_1.c: Likewise.
+	* gcc.target/aarch64/sve/acle/general-c/unary_za_m_1.c: Likewise.
+	* gcc.target/aarch64/sve/acle/general-c/write_za_m_1.c: Likewise.
+---
+ gcc/config.gcc                                |   4 +-
+ gcc/config/aarch64/aarch64-c.cc               |   6 +
+ .../aarch64/aarch64-option-extensions.def     |   4 +
+ gcc/config/aarch64/aarch64-protos.h           |   8 +-
+ gcc/config/aarch64/aarch64-sme.md             | 373 +++++++++++++++
+ .../aarch64/aarch64-sve-builtins-functions.h  |  64 +++
+ .../aarch64/aarch64-sve-builtins-shapes.cc    | 306 +++++++++++-

_service:tar_scm:0212-Backport-SME-aarch64-Add-support-for-__arm_locally_s.patch Added

@@ -0,0 +1,1748 @@
+From 0ad41f11bea5c303ff39c54cae8e46afdfae6070 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:29 +0000
+Subject: PATCH 113/157 BackportSME aarch64: Add support for
+ __arm_locally_streaming
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3f6e5991fab507aa79121dc44d1afcd622c78744
+
+This patch adds support for the __arm_locally_streaming attribute,
+which allows a function to use SME internally without changing
+the function's ABI.  The attribute is valid but redundant for
+__arm_streaming functions.
+
+gcc/
+	* config/aarch64/aarch64.cc (aarch64_arm_attribute_table): Add
+	arm::locally_streaming.
+	(aarch64_fndecl_is_locally_streaming): New function.
+	(aarch64_fndecl_sm_state): Handle locally-streaming functions.
+	(aarch64_cfun_enables_pstate_sm): New function.
+	(aarch64_add_offset): Add an argument that specifies whether
+	the streaming vector length should be used instead of the
+	prevailing one.
+	(aarch64_split_add_offset, aarch64_add_sp, aarch64_sub_sp): Likewise.
+	(aarch64_allocate_and_probe_stack_space): Likewise.
+	(aarch64_expand_mov_immediate): Update calls accordingly.
+	(aarch64_need_old_pstate_sm): Return true for locally-streaming
+	streaming-compatible functions.
+	(aarch64_layout_frame): Force all call-preserved Z and P registers
+	to be saved and restored if the function switches PSTATE.SM in the
+	prologue.
+	(aarch64_get_separate_components): Disable shrink-wrapping of
+	such Z and P saves and restores.
+	(aarch64_use_late_prologue_epilogue): New function.
+	(aarch64_expand_prologue): Measure SVE lengths in the streaming
+	vector length for locally-streaming functions, then emit code
+	to enable streaming mode.
+	(aarch64_expand_epilogue): Likewise in reverse.
+	(TARGET_USE_LATE_PROLOGUE_EPILOGUE): Define.
+	* config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros):
+	Define __arm_locally_streaming.
+
+gcc/testsuite/
+	* gcc.target/aarch64/sme/locally_streaming_1.c: New test.
+	* gcc.target/aarch64/sme/locally_streaming_2.c: Likewise.
+	* gcc.target/aarch64/sme/locally_streaming_3.c: Likewise.
+	* gcc.target/aarch64/sme/locally_streaming_4.c: Likewise.
+	* gcc.target/aarch64/sme/keyword_macros_1.c: Add
+	__arm_locally_streaming.
+	* g++.target/aarch64/sme/keyword_macros_1.C: Likewise.
+---
+ gcc/config/aarch64/aarch64-c.cc               |   1 +
+ gcc/config/aarch64/aarch64.cc                 | 233 +++++++--
+ .../g++.target/aarch64/sme/keyword_macros_1.C |   1 +
+ .../gcc.target/aarch64/sme/keyword_macros_1.c |   1 +
+ .../aarch64/sme/locally_streaming_1.c         | 466 ++++++++++++++++++
+ .../aarch64/sme/locally_streaming_2.c         | 177 +++++++
+ .../aarch64/sme/locally_streaming_3.c         | 273 ++++++++++
+ .../aarch64/sme/locally_streaming_4.c         | 145 ++++++
+ 8 files changed, 1259 insertions(+), 38 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_2.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_3.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_4.c
+
+diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
+index cb8a6c2fc..745719d8b 100644
+--- a/gcc/config/aarch64/aarch64-c.cc
++++ b/gcc/config/aarch64/aarch64-c.cc
+@@ -86,6 +86,7 @@ aarch64_define_unconditional_macros (cpp_reader *pfile)
+ 
+   DEFINE_ARM_KEYWORD_MACRO ("streaming");
+   DEFINE_ARM_KEYWORD_MACRO ("streaming_compatible");
++  DEFINE_ARM_KEYWORD_MACRO ("locally_streaming");
+ 
+ #undef DEFINE_ARM_KEYWORD_MACRO
+ 
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 113784e31..4cb43c2e2 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -3283,6 +3283,7 @@ static const attribute_spec aarch64_arm_attributes =
+ 			  NULL, attr_streaming_exclusions },
+   { "streaming_compatible", 0, 0, false, true,  true,  true,
+ 			  NULL, attr_streaming_exclusions },
++  { "locally_streaming",  0, 0, true, false, false, false, NULL, NULL },
+   { "new",		  1, -1, true, false, false, false,
+ 			  handle_arm_new, NULL },
+   { "preserves",	  1, -1, false, true,  true,  true,
+@@ -4657,6 +4658,16 @@ aarch64_fntype_isa_mode (const_tree fntype)
+ 	  | aarch64_fntype_pstate_za (fntype));
+ }
+ 
++/* Return true if FNDECL uses streaming mode internally, as an
++   implementation choice.  */
++
++static bool
++aarch64_fndecl_is_locally_streaming (const_tree fndecl)
++{
++  return lookup_attribute ("arm", "locally_streaming",
++			   DECL_ATTRIBUTES (fndecl));
++}
++
+ /* Return the state of PSTATE.SM when compiling the body of
+    function FNDECL.  This might be different from the state of
+    PSTATE.SM on entry.  */
+@@ -4664,6 +4675,9 @@ aarch64_fntype_isa_mode (const_tree fntype)
+ static aarch64_feature_flags
+ aarch64_fndecl_pstate_sm (const_tree fndecl)
+ {
++  if (aarch64_fndecl_is_locally_streaming (fndecl))
++    return AARCH64_FL_SM_ON;
++
+   return aarch64_fntype_pstate_sm (TREE_TYPE (fndecl));
+ }
+ 
+@@ -4739,6 +4753,16 @@ aarch64_cfun_has_new_state (const char *state_name)
+   return aarch64_fndecl_has_new_state (cfun->decl, state_name);
+ }
+ 
++/* Return true if PSTATE.SM is 1 in the body of the current function,
++   but is not guaranteed to be 1 on entry.  */
++
++static bool
++aarch64_cfun_enables_pstate_sm ()
++{
++  return (aarch64_fndecl_is_locally_streaming (cfun->decl)
++	  && aarch64_cfun_incoming_pstate_sm () != AARCH64_FL_SM_ON);
++}
++
+ /* Return true if the current function has state STATE_NAME, either by
+    creating new state itself or by sharing state with callers.  */
+ 
+@@ -6931,6 +6955,10 @@ aarch64_add_offset_temporaries (rtx x)
+    TEMP2, if nonnull, is a second temporary register that doesn't
+    overlap either DEST or REG.
+ 
++   FORCE_ISA_MODE is AARCH64_FL_SM_ON if any variable component of OFFSET
++   is measured relative to the SME vector length instead of the current
++   prevailing vector length.  It is 0 otherwise.
++
+    Since this function may be used to adjust the stack pointer, we must
+    ensure that it cannot cause transient stack deallocation (for example
+    by first incrementing SP and then decrementing when adjusting by a
+@@ -6939,6 +6967,7 @@ aarch64_add_offset_temporaries (rtx x)
+ static void
+ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
+ 		    poly_int64 offset, rtx temp1, rtx temp2,
++		    aarch64_feature_flags force_isa_mode,
+ 		    bool frame_related_p, bool emit_move_imm = true)
+ {
+   gcc_assert (emit_move_imm || temp1 != NULL_RTX);
+@@ -6951,9 +6980,18 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
+   /* Try using ADDVL or ADDPL to add the whole value.  */
+   if (src != const0_rtx && aarch64_sve_addvl_addpl_immediate_p (offset))
+     {
+-      rtx offset_rtx = gen_int_mode (offset, mode);
++      gcc_assert (offset.coeffs0 == offset.coeffs1);
++      rtx offset_rtx;
++      if (force_isa_mode == 0)
++	offset_rtx = gen_int_mode (offset, mode);
++      else
++	offset_rtx = aarch64_sme_vq_immediate (mode, offset.coeffs0, 0);
+       rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
+       RTX_FRAME_RELATED_P (insn) = frame_related_p;
++      if (frame_related_p && (force_isa_mode & AARCH64_FL_SM_ON))
++	add_reg_note (insn, REG_CFA_ADJUST_CFA,
++		      gen_rtx_SET (dest, plus_constant (Pmode, src,
++							offset)));
+       return;
+     }
+ 
+@@ -6969,11 +7007,19 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
+   if (src != const0_rtx
+       && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
+     {
+-      rtx offset_rtx = gen_int_mode (poly_offset, mode);
++      rtx offset_rtx;
++      if (force_isa_mode == 0)
++	offset_rtx = gen_int_mode (poly_offset, mode);
++      else
++	offset_rtx = aarch64_sme_vq_immediate (mode, factor, 0);
+       if (frame_related_p)
+ 	{
+ 	  rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
+ 	  RTX_FRAME_RELATED_P (insn) = true;
++	  if (force_isa_mode & AARCH64_FL_SM_ON)
++	    add_reg_note (insn, REG_CFA_ADJUST_CFA,
++			  gen_rtx_SET (dest, plus_constant (Pmode, src,
++							    poly_offset)));
+ 	  src = dest;
+ 	}
+       else
+@@ -7004,9 +7050,19 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
+       rtx val;
+       if (IN_RANGE (rel_factor, -32, 31))
+ 	{
++	  if (force_isa_mode & AARCH64_FL_SM_ON)
++	    {
++	      /* Try to use an unshifted RDSVL, otherwise fall back on

_service:tar_scm:0213-Backport-SME-aarch64-Handle-PSTATE.SM-across-abnorma.patch Added

@@ -0,0 +1,708 @@
+From ef9c800309fa326ca56dd9d9affd7d5498624bb8 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:29 +0000
+Subject: PATCH 114/157 BackportSME aarch64: Handle PSTATE.SM across
+ abnormal edges
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=275706fc59b5fdcc26d46d9f19951fc86b40c515
+
+PSTATE.SM is always off on entry to an exception handler, and on entry
+to a nonlocal goto receiver.  Those entry points need to switch
+PSTATE.SM back to the appropriate state for the current function.
+In the case of streaming-compatible functions, they need to restore
+the mode that the caller was originally using.
+
+The requirement on nonlocal goto receivers means that nonlocal
+jumps need to ensure that PSTATE.SM is zero.
+
+gcc/
+	* config/aarch64/aarch64.cc: Include except.h
+	(aarch64_sme_mode_switch_regs::add_call_preserved_reg): New function.
+	(aarch64_sme_mode_switch_regs::add_call_preserved_regs): Likewise.
+	(aarch64_need_old_pstate_sm): Return true if the function has
+	a nonlocal-goto or exception receiver.
+	(aarch64_switch_pstate_sm_for_landing_pad): New function.
+	(aarch64_switch_pstate_sm_for_jump): Likewise.
+	(pass_switch_pstate_sm::gate): Enable the pass for all
+	streaming and streaming-compatible functions.
+	(pass_switch_pstate_sm::execute): Handle non-local gotos and their
+	receivers.  Handle exception handler entry points.
+
+gcc/testsuite/
+	* g++.target/aarch64/sme/exceptions_2.C: New test.
+	* gcc.target/aarch64/sme/nonlocal_goto_1.c: Likewise.
+	* gcc.target/aarch64/sme/nonlocal_goto_2.c: Likewise.
+	* gcc.target/aarch64/sme/nonlocal_goto_3.c: Likewise.
+	* gcc.target/aarch64/sme/nonlocal_goto_4.c: Likewise.
+	* gcc.target/aarch64/sme/nonlocal_goto_5.c: Likewise.
+	* gcc.target/aarch64/sme/nonlocal_goto_6.c: Likewise.
+	* gcc.target/aarch64/sme/nonlocal_goto_7.c: Likewise.
+---
+ gcc/config/aarch64/aarch64.cc                 | 141 ++++++++++++++++-
+ .../g++.target/aarch64/sme/exceptions_2.C     | 148 ++++++++++++++++++
+ .../gcc.target/aarch64/sme/nonlocal_goto_1.c  |  58 +++++++
+ .../gcc.target/aarch64/sme/nonlocal_goto_2.c  |  44 ++++++
+ .../gcc.target/aarch64/sme/nonlocal_goto_3.c  |  46 ++++++
+ .../gcc.target/aarch64/sme/nonlocal_goto_4.c  |  25 +++
+ .../gcc.target/aarch64/sme/nonlocal_goto_5.c  |  26 +++
+ .../gcc.target/aarch64/sme/nonlocal_goto_6.c  |  31 ++++
+ .../gcc.target/aarch64/sme/nonlocal_goto_7.c  |  25 +++
+ 9 files changed, 537 insertions(+), 7 deletions(-)
+ create mode 100644 gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_7.c
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 4cb43c2e2..effb567c2 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -82,6 +82,7 @@
+ #include "tree-dfa.h"
+ #include "asan.h"
+ #include "aarch64-feature-deps.h"
++#include "except.h"
+ #include "tree-pass.h"
+ #include "cfgbuild.h"
+ 
+@@ -7295,6 +7296,8 @@ public:
+   void add_reg (machine_mode, unsigned int);
+   void add_call_args (rtx_call_insn *);
+   void add_call_result (rtx_call_insn *);
++  void add_call_preserved_reg (unsigned int);
++  void add_call_preserved_regs (bitmap);
+ 
+   void emit_prologue ();
+   void emit_epilogue ();
+@@ -7427,6 +7430,46 @@ aarch64_sme_mode_switch_regs::add_call_result (rtx_call_insn *call_insn)
+     add_reg (GET_MODE (dest), REGNO (dest));
+ }
+ 
++/* REGNO is a register that is call-preserved under the current function's ABI.
++   Record that it must be preserved around the mode switch.  */
++
++void
++aarch64_sme_mode_switch_regs::add_call_preserved_reg (unsigned int regno)
++{
++  if (FP_REGNUM_P (regno))
++    switch (crtl->abi->id ())
++      {
++      case ARM_PCS_SVE:
++	add_reg (VNx16QImode, regno);
++	break;
++      case ARM_PCS_SIMD:
++	add_reg (V16QImode, regno);
++	break;
++      case ARM_PCS_AAPCS64:
++	add_reg (DImode, regno);
++	break;
++      default:
++	gcc_unreachable ();
++      }
++  else if (PR_REGNUM_P (regno))
++    add_reg (VNx16BImode, regno);
++}
++
++/* The hard registers in REGS are call-preserved under the current function's
++   ABI.  Record that they must be preserved around the mode switch.  */
++
++void
++aarch64_sme_mode_switch_regs::add_call_preserved_regs (bitmap regs)
++{
++  bitmap_iterator bi;
++  unsigned int regno;
++  EXECUTE_IF_SET_IN_BITMAP (regs, 0, regno, bi)
++    if (HARD_REGISTER_NUM_P (regno))
++      add_call_preserved_reg (regno);
++    else
++      break;
++}
++
+ /* Emit code to save registers before the mode switch.  */
+ 
+ void
+@@ -9825,6 +9868,23 @@ aarch64_need_old_pstate_sm ()
+   if (aarch64_cfun_enables_pstate_sm ())
+     return true;
+ 
++  /* Non-local goto receivers are entered with PSTATE.SM equal to 0,
++     but the function needs to return with PSTATE.SM unchanged.  */
++  if (nonlocal_goto_handler_labels)
++    return true;
++
++  /* Likewise for exception handlers.  */
++  eh_landing_pad lp;
++  for (unsigned int i = 1; vec_safe_iterate (cfun->eh->lp_array, i, &lp); ++i)
++    if (lp && lp->post_landing_pad)
++      return true;
++
++  /* Non-local gotos need to set PSTATE.SM to zero.  It's possible to call
++     streaming-compatible functions without SME being available, so PSTATE.SM
++     should only be changed if it is currently set to one.  */
++  if (crtl->has_nonlocal_goto)
++    return true;
++
+   if (cfun->machine->call_switches_pstate_sm)
+     for (auto insn = get_insns (); insn; insn = NEXT_INSN (insn))
+       if (auto *call = dyn_cast<rtx_call_insn *> (insn))
+@@ -30209,6 +30269,59 @@ aarch64_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
+   return seq;
+ }
+ 
++/* BB is the target of an exception or nonlocal goto edge, which means
++   that PSTATE.SM is known to be 0 on entry.  Put it into the state that
++   the current function requires.  */
++
++static bool
++aarch64_switch_pstate_sm_for_landing_pad (basic_block bb)
++{
++  if (TARGET_NON_STREAMING)
++    return false;
++
++  start_sequence ();
++  rtx_insn *guard_label = nullptr;
++  if (TARGET_STREAMING_COMPATIBLE)
++    guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM,
++						  AARCH64_FL_SM_OFF);
++  aarch64_sme_mode_switch_regs args_switch;
++  args_switch.add_call_preserved_regs (df_get_live_in (bb));
++  args_switch.emit_prologue ();
++  aarch64_switch_pstate_sm (AARCH64_FL_SM_OFF, AARCH64_FL_SM_ON);
++  args_switch.emit_epilogue ();
++  if (guard_label)
++    emit_label (guard_label);
++  auto seq = get_insns ();
++  end_sequence ();
++
++  emit_insn_after (seq, bb_note (bb));
++  return true;
++}
++
++/* JUMP is a nonlocal goto.  Its target requires PSTATE.SM to be 0 on entry,
++   so arrange to make it so.  */
++
++static bool
++aarch64_switch_pstate_sm_for_jump (rtx_insn *jump)
++{
++  if (TARGET_NON_STREAMING)
++    return false;
++
++  start_sequence ();
++  rtx_insn *guard_label = nullptr;
++  if (TARGET_STREAMING_COMPATIBLE)
++    guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM,
++						  AARCH64_FL_SM_OFF);
++  aarch64_switch_pstate_sm (AARCH64_FL_SM_ON, AARCH64_FL_SM_OFF);

_service:tar_scm:0214-Backport-SME-aarch64-Enforce-inlining-restrictions-f.patch Added

@@ -0,0 +1,913 @@
+From c4578108ab766178fe7ebd51421c1ac9f317b675 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:30 +0000
+Subject: PATCH 115/157 BackportSME aarch64: Enforce inlining
+ restrictions for SME
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0e9aa05df6c643610a3821af52eda642a525a886
+
+A function that has local ZA state cannot be inlined into its caller,
+since we only support managing ZA switches at function scope.
+
+A function whose body directly clobbers ZA state cannot be inlined into
+a function with ZA state.
+
+A function whose body requires a particular PSTATE.SM setting can only
+be inlined into a function body that guarantees that PSTATE.SM setting.
+The callee's function type doesn't matter here: one locally-streaming
+function can be inlined into another.
+
+gcc/
+	* config/aarch64/aarch64.cc: Include symbol-summary.h, ipa-prop.h,
+	and ipa-fnsummary.h
+	(aarch64_function_attribute_inlinable_p): New function.
+	(AARCH64_IPA_SM_FIXED, AARCH64_IPA_CLOBBERS_ZA): New constants.
+	(aarch64_need_ipa_fn_target_info): New function.
+	(aarch64_update_ipa_fn_target_info): Likewise.
+	(aarch64_can_inline_p): Restrict the previous ISA flag checks
+	to non-modal features.  Prevent callees that require a particular
+	PSTATE.SM state from being inlined into callers that can't guarantee
+	that state.  Also prevent callees that have ZA state from being
+	inlined into callers that don't.  Finally, prevent callees that
+	clobber ZA from being inlined into callers that have ZA state.
+	(TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P): Define.
+	(TARGET_NEED_IPA_FN_TARGET_INFO): Likewise.
+	(TARGET_UPDATE_IPA_FN_TARGET_INFO): Likewise.
+
+gcc/testsuite/
+	* gcc.target/aarch64/sme/inlining_1.c: New test.
+	* gcc.target/aarch64/sme/inlining_2.c: Likewise.
+	* gcc.target/aarch64/sme/inlining_3.c: Likewise.
+	* gcc.target/aarch64/sme/inlining_4.c: Likewise.
+	* gcc.target/aarch64/sme/inlining_5.c: Likewise.
+	* gcc.target/aarch64/sme/inlining_6.c: Likewise.
+	* gcc.target/aarch64/sme/inlining_7.c: Likewise.
+	* gcc.target/aarch64/sme/inlining_8.c: Likewise.
+---
+ gcc/config/aarch64/aarch64.cc                 | 132 +++++++++++++++++-
+ .../gcc.target/aarch64/sme/inlining_1.c       |  47 +++++++
+ .../gcc.target/aarch64/sme/inlining_10.c      |  57 ++++++++
+ .../gcc.target/aarch64/sme/inlining_11.c      |  57 ++++++++
+ .../gcc.target/aarch64/sme/inlining_12.c      |  15 ++
+ .../gcc.target/aarch64/sme/inlining_13.c      |  15 ++
+ .../gcc.target/aarch64/sme/inlining_14.c      |  15 ++
+ .../gcc.target/aarch64/sme/inlining_15.c      |  27 ++++
+ .../gcc.target/aarch64/sme/inlining_2.c       |  47 +++++++
+ .../gcc.target/aarch64/sme/inlining_3.c       |  47 +++++++
+ .../gcc.target/aarch64/sme/inlining_4.c       |  47 +++++++
+ .../gcc.target/aarch64/sme/inlining_5.c       |  47 +++++++
+ .../gcc.target/aarch64/sme/inlining_6.c       |  31 ++++
+ .../gcc.target/aarch64/sme/inlining_7.c       |  31 ++++
+ .../gcc.target/aarch64/sme/inlining_8.c       |  31 ++++
+ .../gcc.target/aarch64/sme/inlining_9.c       |  55 ++++++++
+ 16 files changed, 696 insertions(+), 5 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_10.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_11.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_12.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_13.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_14.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_15.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_2.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_3.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_4.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_5.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_6.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_7.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_8.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_9.c
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index effb567c2..eab94d5c2 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -85,6 +85,9 @@
+ #include "except.h"
+ #include "tree-pass.h"
+ #include "cfgbuild.h"
++#include "symbol-summary.h"
++#include "ipa-prop.h"
++#include "ipa-fnsummary.h"
+ 
+ /* This file should be included last.  */
+ #include "target-def.h"
+@@ -21351,6 +21354,17 @@ aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
+   return ret;
+ }
+ 
++/* Implement TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P.  Use an opt-out
++   rather than an opt-in list.  */
++
++static bool
++aarch64_function_attribute_inlinable_p (const_tree fndecl)
++{
++  /* A function that has local ZA state cannot be inlined into its caller,
++     since we only support managing ZA switches at function scope.  */
++  return !aarch64_fndecl_has_new_state (fndecl, "za");
++}
++
+ /* Helper for aarch64_can_inline_p.  In the case where CALLER and CALLEE are
+    tri-bool options (yes, no, don't care) and the default value is
+    DEF, determine whether to reject inlining.  */
+@@ -21372,6 +21386,60 @@ aarch64_tribools_ok_for_inlining_p (int caller, int callee,
+   return (callee == caller || callee == def);
+ }
+ 
++/* Bit allocations for ipa_fn_summary::target_info.  */
++
++/* Set if the function contains a stmt that relies on the function's
++   choice of PSTATE.SM setting (0 for non-streaming, 1 for streaming).
++   Not meaningful for streaming-compatible functions.  */
++constexpr auto AARCH64_IPA_SM_FIXED = 1U << 0;
++
++/* Set if the function clobbers ZA.  Not meaningful for functions that
++   have ZA state.  */
++constexpr auto AARCH64_IPA_CLOBBERS_ZA = 1U << 1;
++
++/* Implement TARGET_NEED_IPA_FN_TARGET_INFO.  */
++
++static bool
++aarch64_need_ipa_fn_target_info (const_tree, unsigned int &)
++{
++  /* We could in principle skip this for streaming-compatible functions
++     that have ZA state, but that's a rare combination.  */
++  return true;
++}
++
++/* Implement TARGET_UPDATE_IPA_FN_TARGET_INFO.  */
++
++static bool
++aarch64_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt)
++{
++  if (auto *ga = dyn_cast<const gasm *> (stmt))
++    {
++      /* We don't know what the asm does, so conservatively assume that
++	 it requires the function's current SM mode.  */
++      info |= AARCH64_IPA_SM_FIXED;
++      for (unsigned int i = 0; i < gimple_asm_nclobbers (ga); ++i)
++	{
++	  tree op = gimple_asm_clobber_op (ga, i);
++	  const char *clobber = TREE_STRING_POINTER (TREE_VALUE (op));
++	  if (strcmp (clobber, "za") == 0)
++	    info |= AARCH64_IPA_CLOBBERS_ZA;
++	}
++    }
++  if (auto *call = dyn_cast<const gcall *> (stmt))
++    {
++      if (gimple_call_builtin_p (call, BUILT_IN_MD))
++	{
++	  /* The attributes on AArch64 builtins are supposed to be accurate.
++	     If the function isn't marked streaming-compatible then it
++	     needs whichever SM mode it selects.  */
++	  tree decl = gimple_call_fndecl (call);
++	  if (aarch64_fndecl_pstate_sm (decl) != 0)
++	    info |= AARCH64_IPA_SM_FIXED;
++	}
++    }
++  return true;
++}
++
+ /* Implement TARGET_CAN_INLINE_P.  Decide whether it is valid
+    to inline CALLEE into CALLER based on target-specific info.
+    Make sure that the caller and callee have compatible architectural
+@@ -21394,12 +21462,56 @@ aarch64_can_inline_p (tree caller, tree callee)
+ 					   : target_option_default_node);
+ 
+   /* Callee's ISA flags should be a subset of the caller's.  */
+-  if ((caller_opts->x_aarch64_asm_isa_flags
+-       & callee_opts->x_aarch64_asm_isa_flags)
+-      != callee_opts->x_aarch64_asm_isa_flags)
++  auto caller_asm_isa = (caller_opts->x_aarch64_asm_isa_flags
++			 & ~AARCH64_FL_ISA_MODES);
++  auto callee_asm_isa = (callee_opts->x_aarch64_asm_isa_flags
++			 & ~AARCH64_FL_ISA_MODES);
++  if (callee_asm_isa & ~caller_asm_isa)
+     return false;
+-  if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
+-      != callee_opts->x_aarch64_isa_flags)
++
++  auto caller_isa = (caller_opts->x_aarch64_isa_flags
++		     & ~AARCH64_FL_ISA_MODES);
++  auto callee_isa = (callee_opts->x_aarch64_isa_flags
++		     & ~AARCH64_FL_ISA_MODES);
++  if (callee_isa & ~caller_isa)
++    return false;
++
++  /* Return true if the callee might have target_info property PROPERTY.
++     The answer must be true unless we have positive proof to the contrary.  */
++  auto callee_has_property = &(unsigned int property)
++    {

_service:tar_scm:0215-Backport-SME-aarch64-Update-sibcall-handling-for-SME.patch Added

@@ -0,0 +1,424 @@
+From 08b6cbe756ede25b16b8e9ff9ee32f76c4f8430f Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 10:11:30 +0000
+Subject: PATCH 116/157 BackportSME aarch64: Update sibcall handling for
+ SME
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0e7fee57c00ae17611651e0b057dc03b6e276b82
+
+We only support tail calls between functions with the same PSTATE.ZA
+setting ("private-ZA" to "private-ZA" and "shared-ZA" to "shared-ZA").
+
+Only a normal non-streaming function can tail-call another non-streaming
+function, and only a streaming function can tail-call another streaming
+function.  Any function can tail-call a streaming-compatible function.
+
+gcc/
+	* config/aarch64/aarch64.cc (aarch64_function_ok_for_sibcall):
+	Enforce PSTATE.SM and PSTATE.ZA restrictions.
+	(aarch64_expand_epilogue): Save and restore the arguments
+	to a sibcall around any change to PSTATE.SM.
+
+gcc/testsuite/
+	* gcc.target/aarch64/sme/sibcall_1.c: New test.
+	* gcc.target/aarch64/sme/sibcall_2.c: Likewise.
+	* gcc.target/aarch64/sme/sibcall_3.c: Likewise.
+	* gcc.target/aarch64/sme/sibcall_4.c: Likewise.
+	* gcc.target/aarch64/sme/sibcall_5.c: Likewise.
+	* gcc.target/aarch64/sme/sibcall_6.c: Likewise.
+	* gcc.target/aarch64/sme/sibcall_7.c: Likewise.
+	* gcc.target/aarch64/sme/sibcall_8.c: Likewise.
+---
+ gcc/config/aarch64/aarch64.cc                 |  9 +++-
+ .../gcc.target/aarch64/sme/sibcall_1.c        | 45 +++++++++++++++++++
+ .../gcc.target/aarch64/sme/sibcall_2.c        | 45 +++++++++++++++++++
+ .../gcc.target/aarch64/sme/sibcall_3.c        | 45 +++++++++++++++++++
+ .../gcc.target/aarch64/sme/sibcall_4.c        | 45 +++++++++++++++++++
+ .../gcc.target/aarch64/sme/sibcall_5.c        | 45 +++++++++++++++++++
+ .../gcc.target/aarch64/sme/sibcall_6.c        | 26 +++++++++++
+ .../gcc.target/aarch64/sme/sibcall_7.c        | 26 +++++++++++
+ .../gcc.target/aarch64/sme/sibcall_8.c        | 19 ++++++++
+ 9 files changed, 304 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_4.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_5.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_6.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_7.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_8.c
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index eab94d5c2..b8e540b6e 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8660,6 +8660,11 @@ aarch64_function_ok_for_sibcall (tree, tree exp)
+   if (crtl->abi->id () != expr_callee_abi (exp).id ())
+     return false;
+ 
++  tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
++  if (aarch64_fntype_pstate_sm (fntype) & ~aarch64_cfun_incoming_pstate_sm ())
++    return false;
++  if (aarch64_fntype_pstate_za (fntype) != aarch64_cfun_incoming_pstate_za ())
++    return false;
+   return true;
+ }
+ 
+@@ -11923,7 +11928,9 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
+ 	guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM,
+ 						      aarch64_isa_flags);
+       aarch64_sme_mode_switch_regs return_switch;
+-      if (crtl->return_rtx && REG_P (crtl->return_rtx))
++      if (sibcall)
++	return_switch.add_call_args (sibcall);
++      else if (crtl->return_rtx && REG_P (crtl->return_rtx))
+ 	return_switch.add_reg (GET_MODE (crtl->return_rtx),
+ 			       REGNO (crtl->return_rtx));
+       return_switch.emit_prologue ();
+diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c
+new file mode 100644
+index 000000000..c7530de5c
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c
+@@ -0,0 +1,45 @@
++/* { dg-options "-O2" } */
++
++void sc_callee () arm::streaming_compatible;
++void s_callee () arm::streaming;
++void n_callee ();
++
++arm::locally_streaming __attribute__((noipa)) void
++sc_ls_callee () arm::streaming_compatible {}
++arm::locally_streaming __attribute__((noipa)) void
++n_ls_callee () {}
++
++void
++sc_to_sc () arm::streaming_compatible
++{
++  sc_callee ();
++}
++/* { dg-final { scan-assembler {\tb\tsc_callee} } } */
++
++void
++sc_to_s () arm::streaming_compatible
++{
++  s_callee ();
++}
++/* { dg-final { scan-assembler {\tbl\ts_callee} } } */
++
++void
++sc_to_n () arm::streaming_compatible
++{
++  n_callee ();
++}
++/* { dg-final { scan-assembler {\tbl\tn_callee} } } */
++
++void
++sc_to_sc_ls () arm::streaming_compatible
++{
++  sc_ls_callee ();
++}
++/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */
++
++void
++sc_to_n_ls () arm::streaming_compatible
++{
++  n_ls_callee ();
++}
++/* { dg-final { scan-assembler {\tbl\tn_ls_callee} } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c
+new file mode 100644
+index 000000000..8d1c8a9f9
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c
+@@ -0,0 +1,45 @@
++/* { dg-options "-O2" } */
++
++void sc_callee () arm::streaming_compatible;
++void s_callee () arm::streaming;
++void n_callee ();
++
++arm::locally_streaming __attribute__((noipa)) void
++sc_ls_callee () arm::streaming_compatible {}
++arm::locally_streaming __attribute__((noipa)) void
++n_ls_callee () {}
++
++void
++s_to_sc () arm::streaming
++{
++  sc_callee ();
++}
++/* { dg-final { scan-assembler {\tb\tsc_callee} } } */
++
++void
++s_to_s () arm::streaming
++{
++  s_callee ();
++}
++/* { dg-final { scan-assembler {\tb\ts_callee} } } */
++
++void
++s_to_n () arm::streaming
++{
++  n_callee ();
++}
++/* { dg-final { scan-assembler {\tbl\tn_callee} } } */
++
++void
++s_to_sc_ls () arm::streaming
++{
++  sc_ls_callee ();
++}
++/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */
++
++void
++s_to_n_ls () arm::streaming
++{
++  n_ls_callee ();
++}
++/* { dg-final { scan-assembler {\tbl\tn_ls_callee} } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c
+new file mode 100644
+index 000000000..2ae937fc5
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c
+@@ -0,0 +1,45 @@
++/* { dg-options "-O2" } */
++
++void sc_callee () arm::streaming_compatible;
++void s_callee () arm::streaming;
++void n_callee ();
++
++arm::locally_streaming __attribute__((noipa)) void
++sc_ls_callee () arm::streaming_compatible {}
++arm::locally_streaming __attribute__((noipa)) void
++n_ls_callee () {}
++
++void
++n_to_sc ()
++{

_service:tar_scm:0216-Backport-SME-libgcc-aarch64-Configure-check-for-.var.patch Added

@@ -0,0 +1,117 @@
+From e0da78a258a34c26488b7ae623f9ae8727c2b264 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Mon, 14 Nov 2022 17:14:18 +0000
+Subject: PATCH 117/157 BackportSME libgcc: aarch64: Configure check for
+ .variant_pcs support
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3ebb591c65b4fbe0cddd23ebc0ca2d9f7aef2cec
+
+Ideally SME support routines in libgcc are marked as variant PCS symbols
+so check if as supports the directive.
+
+libgcc/ChangeLog:
+
+	* config.in: Undef HAVE_AS_VARIANT_PCS.
+	* configure: Regenerate.
+	* configure.ac: Check for .variant_pcs.
+---
+ libgcc/config.in    |  3 +++
+ libgcc/configure    | 39 +++++++++++++++++++++++++++++++++++++++
+ libgcc/configure.ac | 17 +++++++++++++++++
+ 3 files changed, 59 insertions(+)
+
+diff --git a/libgcc/config.in b/libgcc/config.in
+index f93c64a00..5dd96cdf6 100644
+--- a/libgcc/config.in
++++ b/libgcc/config.in
+@@ -13,6 +13,9 @@
+ /* Define to 1 if the assembler supports LSE. */
+ #undef HAVE_AS_LSE
+ 
++/* Define to 1 if the assembler supports .variant_pcs. */
++#undef HAVE_AS_VARIANT_PCS
++
+ /* Define to 1 if the target assembler supports thread-local storage. */
+ #undef HAVE_CC_TLS
+ 
+diff --git a/libgcc/configure b/libgcc/configure
+index 1f9b2ac57..afe02b303 100755
+--- a/libgcc/configure
++++ b/libgcc/configure
+@@ -5619,6 +5619,45 @@ $as_echo "#define HAVE_AS_LSE 1" >>confdefs.h
+   ;;
+ esac
+ 
++
++
++case "${target}" in
++aarch64*-*-*)
++  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if as supports .variant_pcs" >&5
++$as_echo_n "checking if as supports .variant_pcs... " >&6; }
++if ${libgcc_cv_as_variant_pcs+:} false; then :
++  $as_echo_n "(cached) " >&6
++else
++
++    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
++/* end confdefs.h.  */
++
++int
++main ()
++{
++asm (".variant_pcs foobar");
++  ;
++  return 0;
++}
++_ACEOF
++if ac_fn_c_try_compile "$LINENO"; then :
++  libgcc_cv_as_variant_pcs=yes
++else
++  libgcc_cv_as_variant_pcs=no
++fi
++rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
++
++fi
++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgcc_cv_as_variant_pcs" >&5
++$as_echo "$libgcc_cv_as_variant_pcs" >&6; }
++  if test x$libgcc_cv_as_variant_pcs = xyes; then
++
++$as_echo "#define HAVE_AS_VARIANT_PCS 1" >>confdefs.h
++
++  fi
++  ;;
++esac
++
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for init priority support" >&5
+ $as_echo_n "checking for init priority support... " >&6; }
+ if ${libgcc_cv_init_priority+:} false; then :
+diff --git a/libgcc/configure.ac b/libgcc/configure.ac
+index 2fc9d5d7c..abc398c91 100644
+--- a/libgcc/configure.ac
++++ b/libgcc/configure.ac
+@@ -648,6 +648,23 @@ changequote(,)dnl
+ esac)
+ LIBGCC_CHECK_AS_LSE
+ 
++dnl Check if as supports .variant_pcs.
++AC_DEFUN(LIBGCC_CHECK_AS_VARIANT_PCS, 
++case "${target}" in
++aarch64*-*-*)
++  AC_CACHE_CHECK(if as supports .variant_pcs, libgcc_cv_as_variant_pcs, 
++    AC_COMPILE_IFELSE(AC_LANG_PROGRAM(,
++      asm (".variant_pcs foobar");),
++      libgcc_cv_as_variant_pcs=yes, libgcc_cv_as_variant_pcs=no)
++  )
++  if test x$libgcc_cv_as_variant_pcs = xyes; then
++    AC_DEFINE(HAVE_AS_VARIANT_PCS, 1,
++	      Define to 1 if the assembler supports .variant_pcs.)
++  fi
++  ;;
++esac)
++LIBGCC_CHECK_AS_VARIANT_PCS
++
+ dnl Check if as supports RTM instructions.
+ AC_CACHE_CHECK(for init priority support, libgcc_cv_init_priority, 
+ AC_COMPILE_IFELSE(AC_LANG_PROGRAM(,
+-- 
+2.33.0
+

_service:tar_scm:0217-Backport-SME-libgcc-aarch64-Configure-check-for-__ge.patch Added

@@ -0,0 +1,117 @@
+From 66d4035958e1dee2d16f9290004921674eb492b3 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Mon, 4 Dec 2023 10:52:52 +0000
+Subject: PATCH 118/157 BackportSME libgcc: aarch64: Configure check for
+ __getauxval
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dbbfb52b0e9c66ee9d05b8fd17c4f44655e48463
+
+Add configure check for the __getauxval ABI symbol, which is always
+available on aarch64 glibc, and may be available on other linux C
+runtimes. For now only enabled on glibc, others have to override it
+
+  target_configargs=libgcc_cv_have___getauxval=yes
+
+This is deliberately obscure as it should be auto detected, ideally
+via a feature test macro in unistd.h (link time detection is not
+possible since the libc may not be installed at libgcc build time),
+but currently there is no such feature test mechanism.
+
+Without __getauxval, libgcc cannot do runtime CPU feature detection
+and has to assume only the build time known features are available.
+
+libgcc/ChangeLog:
+
+	* config.in: Undef HAVE___GETAUXVAL.
+	* configure: Regenerate.
+	* configure.ac: Check for __getauxval.
+---
+ libgcc/config.in    |  3 +++
+ libgcc/configure    | 26 ++++++++++++++++++++++++++
+ libgcc/configure.ac | 19 +++++++++++++++++++
+ 3 files changed, 48 insertions(+)
+
+diff --git a/libgcc/config.in b/libgcc/config.in
+index 5dd96cdf6..441d4d39b 100644
+--- a/libgcc/config.in
++++ b/libgcc/config.in
+@@ -16,6 +16,9 @@
+ /* Define to 1 if the assembler supports .variant_pcs. */
+ #undef HAVE_AS_VARIANT_PCS
+ 
++/* Define to 1 if __getauxval is available. */
++#undef HAVE___GETAUXVAL
++
+ /* Define to 1 if the target assembler supports thread-local storage. */
+ #undef HAVE_CC_TLS
+ 
+diff --git a/libgcc/configure b/libgcc/configure
+index afe02b303..a874ef57e 100755
+--- a/libgcc/configure
++++ b/libgcc/configure
+@@ -5658,6 +5658,32 @@ $as_echo "#define HAVE_AS_VARIANT_PCS 1" >>confdefs.h
+   ;;
+ esac
+ 
++# Check __getauxval ABI symbol for CPU feature detection.
++case ${target} in
++aarch64*-linux-*)
++  # No link check because the libc may not be present.
++  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __getauxval" >&5
++$as_echo_n "checking for __getauxval... " >&6; }
++if ${libgcc_cv_have___getauxval+:} false; then :
++  $as_echo_n "(cached) " >&6
++else
++  case ${target} in
++		  *-linux-gnu*)
++		    libgcc_cv_have___getauxval=yes
++		    ;;
++		  *)
++		    libgcc_cv_have___getauxval=no
++		  esac
++fi
++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgcc_cv_have___getauxval" >&5
++$as_echo "$libgcc_cv_have___getauxval" >&6; }
++  if test x$libgcc_cv_have___getauxval = xyes; then
++
++$as_echo "#define HAVE___GETAUXVAL 1" >>confdefs.h
++
++  fi
++esac
++
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for init priority support" >&5
+ $as_echo_n "checking for init priority support... " >&6; }
+ if ${libgcc_cv_init_priority+:} false; then :
+diff --git a/libgcc/configure.ac b/libgcc/configure.ac
+index abc398c91..64b45ae14 100644
+--- a/libgcc/configure.ac
++++ b/libgcc/configure.ac
+@@ -665,6 +665,25 @@ aarch64*-*-*)
+ esac)
+ LIBGCC_CHECK_AS_VARIANT_PCS
+ 
++# Check __getauxval ABI symbol for CPU feature detection.
++case ${target} in
++aarch64*-linux-*)
++  # No link check because the libc may not be present.
++  AC_CACHE_CHECK(for __getauxval,
++		 libgcc_cv_have___getauxval,
++		 case ${target} in
++		  *-linux-gnu*)
++		    libgcc_cv_have___getauxval=yes
++		    ;;
++		  *)
++		    libgcc_cv_have___getauxval=no
++		  esac)
++  if test x$libgcc_cv_have___getauxval = xyes; then
++    AC_DEFINE(HAVE___GETAUXVAL, 1,
++	      Define to 1 if __getauxval is available.)
++  fi
++esac
++
+ dnl Check if as supports RTM instructions.
+ AC_CACHE_CHECK(for init priority support, libgcc_cv_init_priority, 
+ AC_COMPILE_IFELSE(AC_LANG_PROGRAM(,
+-- 
+2.33.0
+

_service:tar_scm:0218-Backport-SME-libgcc-aarch64-Add-SME-runtime-support.patch Added

@@ -0,0 +1,627 @@
+From 1e111ac2d71c5469dc526559de009542acaeb16f Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Tue, 15 Nov 2022 14:08:55 +0000
+Subject: PATCH 119/157 BackportSME libgcc: aarch64: Add SME runtime
+ support
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=328c17af772207cb03740809c05ba2c3abfb86be
+
+The call ABI for SME (Scalable Matrix Extension) requires a number of
+helper routines which are added to libgcc so they are tied to the
+compiler version instead of the libc version. See
+https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#sme-support-routines
+
+The routines are in shared libgcc and static libgcc eh, even though
+they are not related to exception handling.  This is to avoid linking
+a copy of the routines into dynamic linked binaries, because TPIDR2_EL0
+block can be extended in the future which is better to handle in a
+single place per process.
+
+The support routines have to decide if SME is accessible or not. Linux
+tells userspace if SME is accessible via AT_HWCAP2, otherwise a new
+__aarch64_sme_accessible symbol was introduced that a libc can define.
+Due to libgcc and libc build order, the symbol availability cannot be
+checked so for __aarch64_sme_accessible an unistd.h feature test macro
+is used while such detection mechanism is not available for __getauxval
+so we rely on configure checks based on the target triplet.
+
+Asm helper code is added to make writing the routines easier.
+
+libgcc/ChangeLog:
+
+	* config/aarch64/t-aarch64: Add sources to the build.
+	* config/aarch64/__aarch64_have_sme.c: New file.
+	* config/aarch64/__arm_sme_state.S: New file.
+	* config/aarch64/__arm_tpidr2_restore.S: New file.
+	* config/aarch64/__arm_tpidr2_save.S: New file.
+	* config/aarch64/__arm_za_disable.S: New file.
+	* config/aarch64/aarch64-asm.h: New file.
+	* config/aarch64/libgcc-sme.ver: New file.
+---
+ libgcc/config/aarch64/__aarch64_have_sme.c   |  75 ++++++++++++++
+ libgcc/config/aarch64/__arm_sme_state.S      |  55 ++++++++++
+ libgcc/config/aarch64/__arm_tpidr2_restore.S |  89 ++++++++++++++++
+ libgcc/config/aarch64/__arm_tpidr2_save.S    | 101 +++++++++++++++++++
+ libgcc/config/aarch64/__arm_za_disable.S     |  65 ++++++++++++
+ libgcc/config/aarch64/aarch64-asm.h          |  98 ++++++++++++++++++
+ libgcc/config/aarch64/libgcc-sme.ver         |  24 +++++
+ libgcc/config/aarch64/t-aarch64              |  10 ++
+ 8 files changed, 517 insertions(+)
+ create mode 100644 libgcc/config/aarch64/__aarch64_have_sme.c
+ create mode 100644 libgcc/config/aarch64/__arm_sme_state.S
+ create mode 100644 libgcc/config/aarch64/__arm_tpidr2_restore.S
+ create mode 100644 libgcc/config/aarch64/__arm_tpidr2_save.S
+ create mode 100644 libgcc/config/aarch64/__arm_za_disable.S
+ create mode 100644 libgcc/config/aarch64/aarch64-asm.h
+ create mode 100644 libgcc/config/aarch64/libgcc-sme.ver
+
+diff --git a/libgcc/config/aarch64/__aarch64_have_sme.c b/libgcc/config/aarch64/__aarch64_have_sme.c
+new file mode 100644
+index 000000000..5e6492462
+--- /dev/null
++++ b/libgcc/config/aarch64/__aarch64_have_sme.c
+@@ -0,0 +1,75 @@
++/* Initializer for SME support.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include "auto-target.h"
++
++#ifndef inhibit_libc
++/* For libc feature test macros.  */
++# include <unistd.h>
++#endif
++
++#if __ARM_FEATURE_SME
++/* Avoid runtime SME detection if libgcc is built with SME.  */
++# define HAVE_SME_CONST const
++# define HAVE_SME_VALUE 1
++#elif HAVE___GETAUXVAL
++/* SME access detection on Linux.  */
++# define HAVE_SME_CONST
++# define HAVE_SME_VALUE 0
++# define HAVE_SME_CTOR sme_accessible ()
++
++# define AT_HWCAP2	26
++# define HWCAP2_SME	(1 << 23)
++unsigned long int __getauxval (unsigned long int);
++
++static _Bool
++sme_accessible (void)
++{
++  unsigned long hwcap2 = __getauxval (AT_HWCAP2);
++  return (hwcap2 & HWCAP2_SME) != 0;
++}
++#elif __LIBC___AARCH64_SME_ACCESSIBLE
++/* Alternative SME access detection.  */
++# define HAVE_SME_CONST
++# define HAVE_SME_VALUE 0
++# define HAVE_SME_CTOR __aarch64_sme_accessible ()
++_Bool __aarch64_sme_accessible (void);
++#else
++# define HAVE_SME_CONST const
++# define HAVE_SME_VALUE 0
++#endif
++
++/* Define the symbol gating SME support in libgcc.  */
++HAVE_SME_CONST _Bool __aarch64_have_sme
++  __attribute__((visibility("hidden"), nocommon)) = HAVE_SME_VALUE;
++
++#ifdef HAVE_SME_CTOR
++/* Use a higher priority to ensure it runs before user constructors
++   with priority 100. */
++static void __attribute__((constructor (90)))
++init_have_sme (void)
++{
++  __aarch64_have_sme = HAVE_SME_CTOR;
++}
++#endif
+diff --git a/libgcc/config/aarch64/__arm_sme_state.S b/libgcc/config/aarch64/__arm_sme_state.S
+new file mode 100644
+index 000000000..c4e16cac0
+--- /dev/null
++++ b/libgcc/config/aarch64/__arm_sme_state.S
+@@ -0,0 +1,55 @@
++/* Support routine for SME.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include "aarch64-asm.h"
++
++/* Query SME state.  Call ABI:
++   - Private ZA, streaming-compatible.
++   - x2-x15, x19-x29, sp and fp regs are call preserved.
++   - Takes no argument.
++   - Returns SME state in x0 and TPIDR2_EL0 in x1.  */
++
++.hidden __aarch64_have_sme
++
++variant_pcs (__arm_sme_state)
++
++ENTRY (__arm_sme_state)
++	/* Check if SME is available.  */
++	adrp	x1, __aarch64_have_sme
++	ldrb	w1, x1, :lo12:__aarch64_have_sme
++	cbz	w1, L(nosme)
++
++	/* Expose the bottom 2 bits of svcr (SM, ZA) in x0 and set the
++	   top 2 bits indicating that SME and TPIDR2_EL0 are available.  */
++	.inst	0xd53b4240  /* mrs	x0, svcr  */
++	.inst	0xd53bd0a1  /* mrs	x1, tpidr2_el0  */
++	and	x0, x0, 3
++	orr	x0, x0, 0xc000000000000000
++	ret
++
++L(nosme):
++	mov	x0, 0
++	mov	x1, 0
++	ret
++END (__arm_sme_state)

_service:tar_scm:0219-Backport-SME-libgcc-aarch64-Add-SME-unwinder-support.patch Added

@@ -0,0 +1,70 @@
+From 310c8b5aaedad1430146fed9d8992201278164a6 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Fri, 29 Sep 2023 13:55:51 +0100
+Subject: PATCH 120/157 BackportSME libgcc: aarch64: Add SME unwinder
+ support
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=91d68665b8b7a5dffd0bbf8cd1f74c3c41d4c2d8
+
+To support the ZA lazy save scheme, the PCS requires the unwinder to
+reset the SME state to PSTATE.SM=0, PSTATE.ZA=0, TPIDR2_EL0=0 on entry
+to an exception handler. We use the __arm_za_disable SME runtime call
+unconditionally to achieve this.
+https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#exceptions
+
+The hidden alias is used to avoid a PLT and avoid inconsistent VPCS
+marking (we don't rely on special PCS at the call site). In case of
+static linking the SME runtime init code is linked in code that raises
+exceptions.
+
+libgcc/ChangeLog:
+
+	* config/aarch64/__arm_za_disable.S: Add hidden alias.
+	* config/aarch64/aarch64-unwind.h: Reset the SME state before
+	EH return via the _Unwind_Frames_Extra hook.
+---
+ libgcc/config/aarch64/__arm_za_disable.S |  5 +++++
+ libgcc/config/aarch64/aarch64-unwind.h   | 16 ++++++++++++++++
+ 2 files changed, 21 insertions(+)
+
+diff --git a/libgcc/config/aarch64/__arm_za_disable.S b/libgcc/config/aarch64/__arm_za_disable.S
+index cff5b9cec..03fc28a39 100644
+--- a/libgcc/config/aarch64/__arm_za_disable.S
++++ b/libgcc/config/aarch64/__arm_za_disable.S
+@@ -63,3 +63,8 @@ ENTRY (__arm_za_disable)
+ L(end):
+ 	ret
+ END (__arm_za_disable)
++
++/* Hidden alias used by the unwinder.  */
++.global __libgcc_arm_za_disable
++.hidden __libgcc_arm_za_disable
++.set __libgcc_arm_za_disable, __arm_za_disable
+diff --git a/libgcc/config/aarch64/aarch64-unwind.h b/libgcc/config/aarch64/aarch64-unwind.h
+index 40b22d3c2..bfa695dcb 100644
+--- a/libgcc/config/aarch64/aarch64-unwind.h
++++ b/libgcc/config/aarch64/aarch64-unwind.h
+@@ -87,4 +87,20 @@ aarch64_frob_update_context (struct _Unwind_Context *context,
+   return;
+ }
+ 
++/* SME runtime function local to libgcc, streaming compatible
++   and preserves more registers than the base PCS requires, but
++   we don't rely on that here.  */
++__attribute__ ((visibility ("hidden")))
++void __libgcc_arm_za_disable (void);
++
++/* Disable the SME ZA state in case an unwound frame used the ZA
++   lazy saving scheme.  */
++#undef _Unwind_Frames_Extra
++#define _Unwind_Frames_Extra(x)				\
++  do							\
++    {							\
++      __libgcc_arm_za_disable ();			\
++    }							\
++  while (0)
++
+ #endif /* defined AARCH64_UNWIND_H && defined __ILP32__ */
+-- 
+2.33.0
+

_service:tar_scm:0220-Backport-SME-libgcc-Fix-config.in.patch Added

@@ -0,0 +1,51 @@
+From b20b75158d1230a8b6cbabb36e3b128cbd9ec86f Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Fri, 8 Dec 2023 12:22:54 +0000
+Subject: PATCH 121/157 BackportSME libgcc: Fix config.in
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=47575ec9edcd3078f066aa54ba428420be796bef
+
+It was updated incorrectly in
+
+  commit dbbfb52b0e9c66ee9d05b8fd17c4f44655e48463
+  Author:     Szabolcs Nagy <szabolcs.nagy@arm.com>
+  CommitDate: 2023-12-08 11:29:06 +0000
+
+    libgcc: aarch64: Configure check for __getauxval
+
+so regenerate it.
+
+libgcc/ChangeLog:
+
+	* config.in: Regenerate.
+---
+ libgcc/config.in | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/libgcc/config.in b/libgcc/config.in
+index 441d4d39b..8f7dd437b 100644
+--- a/libgcc/config.in
++++ b/libgcc/config.in
+@@ -16,9 +16,6 @@
+ /* Define to 1 if the assembler supports .variant_pcs. */
+ #undef HAVE_AS_VARIANT_PCS
+ 
+-/* Define to 1 if __getauxval is available. */
+-#undef HAVE___GETAUXVAL
+-
+ /* Define to 1 if the target assembler supports thread-local storage. */
+ #undef HAVE_CC_TLS
+ 
+@@ -67,6 +64,9 @@
+ /* Define to 1 if you have the <unistd.h> header file. */
+ #undef HAVE_UNISTD_H
+ 
++/* Define to 1 if __getauxval is available. */
++#undef HAVE___GETAUXVAL
++
+ /* Define to the address where bug reports for this package should be sent. */
+ #undef PACKAGE_BUGREPORT
+ 
+-- 
+2.33.0
+

_service:tar_scm:0221-Backport-SME-aarch64-Add-funwind-tables-to-some-test.patch Added

@@ -0,0 +1,54 @@
+From 0214ca06a182481851ed90aae21f460f87d26084 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Sun, 10 Dec 2023 19:46:05 +0000
+Subject: PATCH 122/157 BackportSME aarch64: Add -funwind-tables to some
+ tests
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=02ecdaab7a50f4505fd905effb6d238d773dc813
+
+The .cfi scans in these tests failed for *-elf targets because
+those targets don't enable .eh_frame info by default.
+
+gcc/testsuite/
+	* gcc.target/aarch64/sme/call_sm_switch_1.c: Add -funwind-tables.
+	* gcc.target/aarch64/sme/call_sm_switch_3.c: Likewise.
+	* gcc.target/aarch64/sme/call_sm_switch_5.c: Likewise.
+---
+ gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c | 2 +-
+ gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c | 2 +-
+ gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c | 2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c
+index a2de55773..98922aaea 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c
++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c
+@@ -1,4 +1,4 @@
+-// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" }
++// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" }
+ // { dg-final { check-function-bodies "**" "" } }
+ 
+ void ns_callee ();
+diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c
+index ed999d085..4250fe798 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c
++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c
+@@ -1,4 +1,4 @@
+-// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" }
++// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" }
+ // { dg-final { check-function-bodies "**" "" } }
+ 
+ __attribute__((aarch64_vector_pcs)) void ns_callee ();
+diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c
+index be9b5cc04..e3d9bc274 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c
++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c
+@@ -1,4 +1,4 @@
+-// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" }
++// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" }
+ // { dg-final { check-function-bodies "**" "" } }
+ 
+ #include <arm_sve.h>
+-- 
+2.33.0
+

_service:tar_scm:0222-Backport-SME-aarch64-Skip-some-SME-register-save-tes.patch Added

@@ -0,0 +1,106 @@
+From cc2e901eccd40992432f74270a9ebc1b708b6eb1 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Sun, 10 Dec 2023 19:46:05 +0000
+Subject: PATCH 123/157 BackportSME aarch64: Skip some SME register save
+ tests on BE
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=23ea0bc2cf042d74c4adfe26a57cf96b1d837a91
+
+Big-endian targets need to save Z8-Z15 in the same order as
+the registers would appear for D8-D15, because the layout is
+mandated by the EH ABI.  BE targets therefore use ST1D instead
+of the normal STR for those registers (but not for others).
+
+That difference is already tested elsewhere and isn't important
+for the SME tests.  This patch therefore restricts the affected
+tests to LE.
+
+gcc/testsuite/
+	* gcc.target/aarch64/sme/call_sm_switch_5.c: Restrict tests that
+	contain Z8-Z23 saves to little-endian.
+	* gcc.target/aarch64/sme/call_sm_switch_8.c: Likewise.
+	* gcc.target/aarch64/sme/locally_streaming_1.c: Likewise.
+---
+ gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c    | 6 +++---
+ gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c    | 6 +++---
+ gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c | 2 +-
+ 3 files changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c
+index e3d9bc274..6238ab80d 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c
++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c
+@@ -14,7 +14,7 @@ struct callbacks {
+ };
+ 
+ /*
+-** n_caller:	{ target lp64 }
++** n_caller:	{ target { lp64 && aarch64_little_endian } }
+ **	stp	x30, (x19|x20-8), \sp, #?-32\!
+ **	cntd	x16
+ **	str	x16, \sp, #?16\
+@@ -114,7 +114,7 @@ n_caller (struct callbacks *c)
+ }
+ 
+ /*
+-** s_caller:	{ target lp64 }
++** s_caller:	{ target { lp64 && aarch64_little_endian } }
+ **	stp	x30, (x19|x20-8), \sp, #?-32\!
+ **	cntd	x16
+ **	str	x16, \sp, #?16\
+@@ -214,7 +214,7 @@ s_caller (struct callbacks *c) arm::streaming
+ }
+ 
+ /*
+-** sc_caller:
++** sc_caller:	{ target aarch64_little_endian }
+ **	stp	x29, x30, \sp, #?-32\!
+ **	mov	x29, sp
+ **	cntd	x16
+diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c
+index f44724df3..c909b34ff 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c
++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c
+@@ -7,7 +7,7 @@ svint8_t produce_z0 ();
+ void consume_z0 (svint8_t);
+ 
+ /*
+-** test_z0:
++** test_z0:	{ target aarch64_little_endian }
+ **	...
+ **	smstop	sm
+ **	bl	produce_z0
+@@ -32,7 +32,7 @@ svint8x4_t produce_z3 ();
+ void consume_z3 (svint8x4_t);
+ 
+ /*
+-** test_z3:
++** test_z3:	{ target aarch64_little_endian }
+ **	...
+ **	smstop	sm
+ **	bl	produce_z3
+@@ -61,7 +61,7 @@ svbool_t produce_p0 ();
+ void consume_p0 (svbool_t);
+ 
+ /*
+-** test_p0:
++** test_p0:	{ target aarch64_little_endian }
+ **	...
+ **	smstop	sm
+ **	bl	produce_p0
+diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c
+index 20ff4b87d..4bb637f47 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c
++++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c
+@@ -265,7 +265,7 @@ n_ls_vector_pcs ()
+ }
+ 
+ /*
+-** n_ls_sve_pcs:
++** n_ls_sve_pcs:	{ target aarch64_little_endian }
+ **	sub	sp, sp, #?16
+ **	cntd	x16
+ **	str	x16, \sp\
+-- 
+2.33.0
+

_service:tar_scm:0223-Backport-SME-Add-OPTIONS_H_EXTRA-to-GTFILES.patch Added

@@ -0,0 +1,37 @@
+From ab7a2c3b74c65d62d661621c56ef984cfb72f985 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Sep 2022 11:32:50 +0100
+Subject: PATCH 124/157 BackportSME Add OPTIONS_H_EXTRA to GTFILES
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c1e1fa054970a30844eb94d726b4954dcb8b9063
+
+I have a patch that adds a typedef to aarch64's <cpu>-opts.h.
+The typedef is used for a TargetVariable in the .opt file,
+which means that it is covered by PCH and so needs to be
+visible to gengtype.
+
+<cpu>-opts.h is not included directly in tm.h, but indirectly
+by target headers (in this case aarch64.h).  There was therefore
+nothing that caused it to be added to GTFILES.
+
+gcc/
+	* Makefile.in (GTFILES): Add OPTIONS_H_EXTRA.
+---
+ gcc/Makefile.in | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/gcc/Makefile.in b/gcc/Makefile.in
+index 5cd838270..fcfa54697 100644
+--- a/gcc/Makefile.in
++++ b/gcc/Makefile.in
+@@ -2648,6 +2648,7 @@ s-match: build/genmatch$(build_exeext) $(srcdir)/match.pd cfn-operators.pd
+ 
+ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \
+   $(host_xm_file_list) \
++  $(OPTIONS_H_EXTRA) \
+   $(tm_file_list) $(HASHTAB_H) $(SPLAY_TREE_H) $(srcdir)/bitmap.h \
+   $(srcdir)/wide-int.h $(srcdir)/alias.h \
+   $(srcdir)/coverage.cc  $(srcdir)/rtl.h \
+-- 
+2.33.0
+

_service:tar_scm:0224-Backport-SME-aarch64-Add-V1DI-mode.patch Added

@@ -0,0 +1,177 @@
+From 21f9190106f8324be42e3e8e0510467386dd68a0 Mon Sep 17 00:00:00 2001
+From: Andrew Carlotti <andrew.carlotti@arm.com>
+Date: Fri, 15 Jul 2022 15:25:53 +0100
+Subject: PATCH 125/157 BackportSME aarch64: Add V1DI mode
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5ba864c5d11a1c20891a1e054cb7814ec23de5c9
+
+We already have a V1DF mode, so this makes the vector modes more consistent.
+
+Additionally, this allows us to recognise uint64x1_t and int64x1_t types given
+only the mode and type qualifiers (e.g. in aarch64_lookup_simd_builtin_type).
+
+gcc/ChangeLog:
+
+	* config/aarch64/aarch64-builtins.cc
+	(v1di_UP): Add V1DI mode to _UP macros.
+	* config/aarch64/aarch64-modes.def (VECTOR_MODE): Add V1DI mode.
+	* config/aarch64/aarch64-simd-builtin-types.def: Use V1DI mode.
+	* config/aarch64/aarch64-simd.md
+	(vec_extractv2dfv1df): Replace with...
+	(vec_extract<mode><V1half>): ...this.
+	* config/aarch64/aarch64.cc
+	(aarch64_classify_vector_mode): Add V1DI mode.
+	* config/aarch64/iterators.md
+	(VQ_2E, V1HALF, V1half): New.
+	(nunits): Add V1DI mode.
+---
+ gcc/config/aarch64/aarch64-builtins.cc            |  1 +
+ gcc/config/aarch64/aarch64-modes.def              |  1 +
+ gcc/config/aarch64/aarch64-simd-builtin-types.def |  6 +++---
+ gcc/config/aarch64/aarch64-simd.md                | 14 +++++++-------
+ gcc/config/aarch64/aarch64.cc                     |  2 +-
+ gcc/config/aarch64/iterators.md                   | 14 ++++++++++++--
+ 6 files changed, 25 insertions(+), 13 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
+index 015e9d975..37bb3af48 100644
+--- a/gcc/config/aarch64/aarch64-builtins.cc
++++ b/gcc/config/aarch64/aarch64-builtins.cc
+@@ -55,6 +55,7 @@
+ #define v2si_UP  E_V2SImode
+ #define v2sf_UP  E_V2SFmode
+ #define v1df_UP  E_V1DFmode
++#define v1di_UP  E_V1DImode
+ #define di_UP    E_DImode
+ #define df_UP    E_DFmode
+ #define v16qi_UP E_V16QImode
+diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
+index 8fa66fdb3..dd74da4b3 100644
+--- a/gcc/config/aarch64/aarch64-modes.def
++++ b/gcc/config/aarch64/aarch64-modes.def
+@@ -70,6 +70,7 @@ VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI.  */
+ VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI.  */
+ VECTOR_MODES (FLOAT, 8);      /*                 V2SF.  */
+ VECTOR_MODES (FLOAT, 16);     /*            V4SF V2DF.  */
++VECTOR_MODE (INT, DI, 1);     /*                 V1DI.  */
+ VECTOR_MODE (FLOAT, DF, 1);   /*                 V1DF.  */
+ VECTOR_MODE (FLOAT, HF, 2);   /*                 V2HF.  */
+ 
+diff --git a/gcc/config/aarch64/aarch64-simd-builtin-types.def b/gcc/config/aarch64/aarch64-simd-builtin-types.def
+index 248e51e96..405455814 100644
+--- a/gcc/config/aarch64/aarch64-simd-builtin-types.def
++++ b/gcc/config/aarch64/aarch64-simd-builtin-types.def
+@@ -24,7 +24,7 @@
+   ENTRY (Int16x8_t, V8HI, none, 11)
+   ENTRY (Int32x2_t, V2SI, none, 11)
+   ENTRY (Int32x4_t, V4SI, none, 11)
+-  ENTRY (Int64x1_t, DI, none, 11)
++  ENTRY (Int64x1_t, V1DI, none, 11)
+   ENTRY (Int64x2_t, V2DI, none, 11)
+   ENTRY (Uint8x8_t, V8QI, unsigned, 11)
+   ENTRY (Uint8x16_t, V16QI, unsigned, 12)
+@@ -32,7 +32,7 @@
+   ENTRY (Uint16x8_t, V8HI, unsigned, 12)
+   ENTRY (Uint32x2_t, V2SI, unsigned, 12)
+   ENTRY (Uint32x4_t, V4SI, unsigned, 12)
+-  ENTRY (Uint64x1_t, DI, unsigned, 12)
++  ENTRY (Uint64x1_t, V1DI, unsigned, 12)
+   ENTRY (Uint64x2_t, V2DI, unsigned, 12)
+   ENTRY (Poly8_t, QI, poly, 9)
+   ENTRY (Poly16_t, HI, poly, 10)
+@@ -42,7 +42,7 @@
+   ENTRY (Poly8x16_t, V16QI, poly, 12)
+   ENTRY (Poly16x4_t, V4HI, poly, 12)
+   ENTRY (Poly16x8_t, V8HI, poly, 12)
+-  ENTRY (Poly64x1_t, DI, poly, 12)
++  ENTRY (Poly64x1_t, V1DI, poly, 12)
+   ENTRY (Poly64x2_t, V2DI, poly, 12)
+   ENTRY (Float16x4_t, V4HF, none, 13)
+   ENTRY (Float16x8_t, V8HF, none, 13)
+diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
+index 62493cdfa..04592fc90 100644
+--- a/gcc/config/aarch64/aarch64-simd.md
++++ b/gcc/config/aarch64/aarch64-simd.md
+@@ -8326,16 +8326,16 @@
+ })
+ 
+ ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
+-(define_expand "vec_extractv2dfv1df"
+-  (match_operand:V1DF 0 "register_operand")
+-   (match_operand:V2DF 1 "register_operand")
++(define_expand "vec_extract<mode><V1half>"
++  (match_operand:<V1HALF> 0 "register_operand")
++   (match_operand:VQ_2E 1 "register_operand")
+    (match_operand 2 "immediate_operand")
+   "TARGET_SIMD"
+ {
+-  /* V1DF is rarely used by other patterns, so it should be better to hide
+-     it in a subreg destination of a normal DF op.  */
+-  rtx scalar0 = gen_lowpart (DFmode, operands0);
+-  emit_insn (gen_vec_extractv2dfdf (scalar0, operands1, operands2));
++  /* V1DI and V1DF are rarely used by other patterns, so it should be better
++     to hide it in a subreg destination of a normal DI or DF op.  */
++  rtx scalar0 = gen_lowpart (<VHALF>mode, operands0);
++  emit_insn (gen_vec_extract<mode><Vhalf> (scalar0, operands1, operands2));
+   DONE;
+ })
+ 
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index b8e540b6e..f7285555b 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -4117,7 +4117,7 @@ aarch64_classify_vector_mode (machine_mode mode)
+     case E_V8QImode:
+     case E_V4HImode:
+     case E_V2SImode:
+-    /* ...E_V1DImode doesn't exist.  */
++    case E_V1DImode:
+     case E_V4HFmode:
+     case E_V4BFmode:
+     case E_V2SFmode:
+diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
+index 152d28f6b..94db8c53f 100644
+--- a/gcc/config/aarch64/iterators.md
++++ b/gcc/config/aarch64/iterators.md
+@@ -138,6 +138,9 @@
+ ;; VQ without 2 element modes.
+ (define_mode_iterator VQ_NO2E V16QI V8HI V4SI V8HF V4SF V8BF)
+ 
++;; 2 element quad vector modes.
++(define_mode_iterator VQ_2E V2DI V2DF)
++
+ ;; BFmode vector modes.
+ (define_mode_iterator VBF V4BF V8BF)
+ 
+@@ -1116,12 +1119,13 @@
+ (define_mode_attr nunits (V8QI "8") (V16QI "16")
+ 			  (V4HI "4") (V8HI "8")
+ 			  (V2SI "2") (V4SI "4")
+-			  (V2DI "2") (V8DI "8")
++			  (V1DI "1") (V2DI "2")
+ 			  (V4HF "4") (V8HF "8")
+ 			  (V4BF "4") (V8BF "8")
+ 			  (V2SF "2") (V4SF "4")
+ 			  (V1DF "1") (V2DF "2")
+-			  (DI "1") (DF "1"))
++			  (DI "1") (DF "1")
++			  (V8DI "8"))
+ 
+ ;; Map a mode to the number of bits in it, if the size of the mode
+ ;; is constant.
+@@ -1501,6 +1505,12 @@
+ 			 (V2DI "di")    (V2SF  "sf")
+ 			 (V4SF "v2sf")  (V2DF  "df"))
+ 
++;; Single-element half modes of quad vector modes.
++(define_mode_attr V1HALF (V2DI "V1DI")  (V2DF  "V1DF"))
++
++;; Single-element half modes of quad vector modes, in lower-case
++(define_mode_attr V1half (V2DI "v1di")  (V2DF  "v1df"))
++
+ ;; Double modes of vector modes.
+ (define_mode_attr VDBL (V8QI "V16QI") (V4HI "V8HI")
+ 			(V4HF "V8HF")  (V4BF "V8BF")
+-- 
+2.33.0
+

_service:tar_scm:0225-Backport-SME-Allow-md-iterators-to-include-other-ite.patch Added

@@ -0,0 +1,217 @@
+From eaea26e2218ee61a9be0e2933548c752167dcdb5 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Fri, 10 Nov 2023 15:46:21 +0000
+Subject: PATCH 126/157 BackportSME Allow md iterators to include other
+ iterators
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5dbaf4851bbf56b6176dca1f1e7d38a16b5b84ee
+
+This patch allows an .md iterator to include the contents of
+previous iterators, possibly with an extra condition attached.
+
+Too much indirection might become hard to follow, so for the
+AArch64 changes I tried to stick to things that seemed likely
+to be uncontroversial:
+
+(a) structure iterators that combine modes for different sizes
+    and vector counts
+
+(b) iterators that explicitly duplicate another iterator
+    (for iterating over the cross product)
+
+gcc/
+	* read-rtl.cc (md_reader::read_mapping): Allow iterators to
+	include other iterators.
+	* doc/md.texi: Document the change.
+	* config/aarch64/iterators.md (DREG2, VQ2, TX2, DX2, SX2): Include
+	the iterator that is being duplicated, rather than reproducing it.
+	(VSTRUCT_D): Redefine using VSTRUCT_234D.
+	(VSTRUCT_Q): Likewise VSTRUCT_234Q.
+	(VSTRUCT_2QD, VSTRUCT_3QD, VSTRUCT_4QD, VSTRUCT_QD): Redefine using
+	the individual D and Q iterators.
+---
+ gcc/config/aarch64/iterators.md | 58 ++++++++-------------------------
+ gcc/doc/md.texi                 | 13 ++++++++
+ gcc/read-rtl.cc                 | 21 ++++++++++--
+ 3 files changed, 46 insertions(+), 46 deletions(-)
+
+diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
+index 94db8c53f..a1659dfba 100644
+--- a/gcc/config/aarch64/iterators.md
++++ b/gcc/config/aarch64/iterators.md
+@@ -106,7 +106,7 @@
+ (define_mode_iterator DREG V8QI V4HI V4HF V2SI V2SF DF)
+ 
+ ;; Copy of the above.
+-(define_mode_iterator DREG2 V8QI V4HI V4HF V2SI V2SF DF)
++(define_mode_iterator DREG2 DREG)
+ 
+ ;; All modes suitable to store/load pair (2 elements) using STP/LDP.
+ (define_mode_iterator VP_2E V2SI V2SF V2DI V2DF)
+@@ -121,7 +121,7 @@
+ (define_mode_iterator VQ V16QI V8HI V4SI V2DI V8HF V4SF V2DF V8BF)
+ 
+ ;; Copy of the above.
+-(define_mode_iterator VQ2 V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF)
++(define_mode_iterator VQ2 VQ)
+ 
+ ;; Quad vector modes suitable for moving.  Includes BFmode.
+ (define_mode_iterator VQMOV V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF)
+@@ -321,14 +321,6 @@
+ ;; Advanced SIMD opaque structure modes.
+ (define_mode_iterator VSTRUCT OI CI XI)
+ 
+-;; Advanced SIMD 64-bit vector structure modes.
+-(define_mode_iterator VSTRUCT_D V2x8QI V2x4HI V2x2SI V2x1DI
+-				 V2x4HF V2x2SF V2x1DF V2x4BF
+-				 V3x8QI V3x4HI V3x2SI V3x1DI
+-				 V3x4HF V3x2SF V3x1DF V3x4BF
+-				 V4x8QI V4x4HI V4x2SI V4x1DI
+-				 V4x4HF V4x2SF V4x1DF V4x4BF)
+-
+ ;; Advanced SIMD 64-bit 2-vector structure modes.
+ (define_mode_iterator VSTRUCT_2D V2x8QI V2x4HI V2x2SI V2x1DI
+ 				  V2x4HF V2x2SF V2x1DF V2x4BF)
+@@ -341,6 +333,9 @@
+ (define_mode_iterator VSTRUCT_4D V4x8QI V4x4HI V4x2SI V4x1DI
+ 				  V4x4HF V4x2SF V4x1DF V4x4BF)
+ 
++;; Advanced SIMD 64-bit vector structure modes.
++(define_mode_iterator VSTRUCT_D VSTRUCT_2D VSTRUCT_3D VSTRUCT_4D)
++
+ ;; Advanced SIMD 64-bit 2-vector structure modes minus V2x1DI and V2x1DF.
+ (define_mode_iterator VSTRUCT_2DNX V2x8QI V2x4HI V2x2SI V2x4HF
+ 				    V2x2SF V2x4BF)
+@@ -365,14 +360,6 @@
+ ;; Advanced SIMD 64-bit 4-vector structure modes with 64-bit elements.
+ (define_mode_iterator VSTRUCT_4DX V4x1DI V4x1DF)
+ 
+-;; Advanced SIMD 128-bit vector structure modes.
+-(define_mode_iterator VSTRUCT_Q V2x16QI V2x8HI V2x4SI V2x2DI
+-				 V2x8HF V2x4SF V2x2DF V2x8BF
+-				 V3x16QI V3x8HI V3x4SI V3x2DI
+-				 V3x8HF V3x4SF V3x2DF V3x8BF
+-				 V4x16QI V4x8HI V4x4SI V4x2DI
+-				 V4x8HF V4x4SF V4x2DF V4x8BF)
+-
+ ;; Advanced SIMD 128-bit 2-vector structure modes.
+ (define_mode_iterator VSTRUCT_2Q V2x16QI V2x8HI V2x4SI V2x2DI
+ 				  V2x8HF V2x4SF V2x2DF V2x8BF)
+@@ -385,49 +372,32 @@
+ (define_mode_iterator VSTRUCT_4Q V4x16QI V4x8HI V4x4SI V4x2DI
+ 				  V4x8HF V4x4SF V4x2DF V4x8BF)
+ 
++;; Advanced SIMD 128-bit vector structure modes.
++(define_mode_iterator VSTRUCT_Q VSTRUCT_2Q VSTRUCT_3Q VSTRUCT_4Q)
++
+ ;; Advanced SIMD 2-vector structure modes.
+-(define_mode_iterator VSTRUCT_2QD V2x8QI V2x4HI V2x2SI V2x1DI
+-				   V2x4HF V2x2SF V2x1DF V2x4BF
+-				   V2x16QI V2x8HI V2x4SI V2x2DI
+-				   V2x8HF V2x4SF V2x2DF V2x8BF)
++(define_mode_iterator VSTRUCT_2QD VSTRUCT_2D VSTRUCT_2Q)
+ 
+ ;; Advanced SIMD 3-vector structure modes.
+-(define_mode_iterator VSTRUCT_3QD V3x8QI V3x4HI V3x2SI V3x1DI
+-				   V3x4HF V3x2SF V3x1DF V3x4BF
+-				   V3x16QI V3x8HI V3x4SI V3x2DI
+-				   V3x8HF V3x4SF V3x2DF V3x8BF)
++(define_mode_iterator VSTRUCT_3QD VSTRUCT_3D VSTRUCT_3Q)
+ 
+ ;; Advanced SIMD 4-vector structure modes.
+-(define_mode_iterator VSTRUCT_4QD V4x8QI V4x4HI V4x2SI V4x1DI
+-				   V4x4HF V4x2SF V4x1DF V4x4BF
+-				   V4x16QI V4x8HI V4x4SI V4x2DI
+-				   V4x8HF V4x4SF V4x2DF V4x8BF)
++(define_mode_iterator VSTRUCT_4QD VSTRUCT_4D VSTRUCT_4Q)
+ 
+ ;; Advanced SIMD vector structure modes.
+-(define_mode_iterator VSTRUCT_QD V2x8QI V2x4HI V2x2SI V2x1DI
+-				  V2x4HF V2x2SF V2x1DF V2x4BF
+-				  V3x8QI V3x4HI V3x2SI V3x1DI
+-				  V3x4HF V3x2SF V3x1DF V3x4BF
+-				  V4x8QI V4x4HI V4x2SI V4x1DI
+-				  V4x4HF V4x2SF V4x1DF V4x4BF
+-				  V2x16QI V2x8HI V2x4SI V2x2DI
+-				  V2x8HF V2x4SF V2x2DF V2x8BF
+-				  V3x16QI V3x8HI V3x4SI V3x2DI
+-				  V3x8HF V3x4SF V3x2DF V3x8BF
+-				  V4x16QI V4x8HI V4x4SI V4x2DI
+-				  V4x8HF V4x4SF V4x2DF V4x8BF)
++(define_mode_iterator VSTRUCT_QD VSTRUCT_D VSTRUCT_Q)
+ 
+ ;; Double scalar modes
+ (define_mode_iterator DX DI DF DD)
+ 
+ ;; Duplicate of the above
+-(define_mode_iterator DX2 DI DF DD)
++(define_mode_iterator DX2 DX)
+ 
+ ;; Single scalar modes
+ (define_mode_iterator SX SI SF)
+ 
+ ;; Duplicate of the above
+-(define_mode_iterator SX2 SI SF)
++(define_mode_iterator SX2 SX)
+ 
+ ;; Single and double integer and float modes
+ (define_mode_iterator DSX DF DI SF SI)
+diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
+index 04ace8f7f..c0cf0ec64 100644
+--- a/gcc/doc/md.texi
++++ b/gcc/doc/md.texi
+@@ -11561,6 +11561,19 @@ to @code{@var{mode}}.  For example:
+ means that the @code{:DI} expansion only applies if @code{TARGET_64BIT}
+ but that the @code{:SI} expansion has no such constraint.
+ 
++It is also possible to include iterators in other iterators.  For example:
++
++@smallexample
++(define_mode_iterator VI V16QI V8HI V4SI V2DI)
++(define_mode_iterator VF V8HF V4SF (V2DF "TARGET_DOUBLE"))
++(define_mode_iterator V VI (VF "TARGET_FLOAT"))
++@end smallexample
++
++makes @samp{:V} iterate over the modes in @code{VI} and the modes
++in @code{VF}.  When a construct uses @samp{:V}, the @code{V8HF} and
++@code{V4SF} expansions require @samp{TARGET_FLOAT} while the @code{V2DF}
++expansion requires @samp{TARGET_DOUBLE && TARGET_FLOAT}.
++
+ Iterators are applied in the order they are defined.  This can be
+ significant if two iterators are used in a construct that requires
+ substitutions.  @xref{Substitutions}.
+diff --git a/gcc/read-rtl.cc b/gcc/read-rtl.cc
+index 798d24859..cdfa9e7b8 100644
+--- a/gcc/read-rtl.cc
++++ b/gcc/read-rtl.cc
+@@ -1261,8 +1261,25 @@ md_reader::read_mapping (struct iterator_group *group, htab_t table)
+ 	  string = read_string (false);
+ 	  require_char_ws (')');
+ 	}
+-      number = group->find_builtin (name.string);
+-      end_ptr = add_map_value (end_ptr, number, string);
++      auto *subm = (struct mapping *) htab_find (group->iterators,
++						 &name.string);
++      if (subm)
++	{
++	  if (m == subm)
++	    fatal_with_file_and_line ("recursive definition of `%s'",
++				      name.string);

_service:tar_scm:0226-Backport-SME-riscv-Add-support-for-strlen-inline-exp.patch Added

@@ -0,0 +1,142 @@
+From 637e6469f2225b6f6f6b0c84b4e7abcd8dfd7ca4 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christoph=20M=C3=BCllner?= <christoph.muellner@vrull.eu>
+Date: Wed, 28 Sep 2022 11:19:06 +0200
+Subject: PATCH 127/157 BackportSME riscv: Add support for strlen inline
+ expansion
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=df48285b2484eb4f8e0570c566677114eb0e553a
+
+Note: Only introduce the definitions of function
+emit_likely_jump_insn and emit_unlikely_jump_insn,
+and drop others.
+
+This patch implements the expansion of the strlen builtin for RV32/RV64
+for xlen-aligned aligned strings if Zbb or XTheadBb instructions are available.
+The inserted sequences are:
+
+rv32gc_zbb (RV64 is similar):
+      add     a3,a0,4
+      li      a4,-1
+.L1:  lw      a5,0(a0)
+      add     a0,a0,4
+      orc.b   a5,a5
+      beq     a5,a4,.L1
+      not     a5,a5
+      ctz     a5,a5
+      srl     a5,a5,0x3
+      add     a0,a0,a5
+      sub     a0,a0,a3
+
+rv64gc_xtheadbb (RV32 is similar):
+      add       a4,a0,8
+.L2:  ld        a5,0(a0)
+      add       a0,a0,8
+      th.tstnbz a5,a5
+      beqz      a5,.L2
+      th.rev    a5,a5
+      th.ff1    a5,a5
+      srl       a5,a5,0x3
+      add       a0,a0,a5
+      sub       a0,a0,a4
+
+This allows to inline calls to strlen(), with optimized code for
+xlen-aligned strings, resulting in the following benefits over
+a call to libc:
+* no call/ret instructions
+* no stack frame allocation
+* no register saving/restoring
+* no alignment test
+
+The inlining mechanism is gated by a new switch ('-minline-strlen')
+and by the variable 'optimize_size'.
+
+Tested using the glibc string tests.
+
+Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
+
+gcc/ChangeLog:
+
+	* config.gcc: Add new object riscv-string.o.
+	riscv-string.cc.
+	* config/riscv/riscv-protos.h (riscv_expand_strlen):
+	New function.
+	* config/riscv/riscv.md (strlen<mode>): New expand INSN.
+	* config/riscv/riscv.opt: New flag 'minline-strlen'.
+	* config/riscv/t-riscv: Add new object riscv-string.o.
+	* config/riscv/thead.md (th_rev<mode>2): Export INSN name.
+	(th_rev<mode>2): Likewise.
+	(th_tstnbz<mode>2): New INSN.
+	* doc/invoke.texi: Document '-minline-strlen'.
+	* emit-rtl.cc (emit_likely_jump_insn): New helper function.
+	(emit_unlikely_jump_insn): Likewise.
+	* rtl.h (emit_likely_jump_insn): New prototype.
+	(emit_unlikely_jump_insn): Likewise.
+	* config/riscv/riscv-string.cc: New file.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/riscv/xtheadbb-strlen-unaligned.c: New test.
+	* gcc.target/riscv/xtheadbb-strlen.c: New test.
+	* gcc.target/riscv/zbb-strlen-disabled-2.c: New test.
+	* gcc.target/riscv/zbb-strlen-disabled.c: New test.
+	* gcc.target/riscv/zbb-strlen-unaligned.c: New test.
+	* gcc.target/riscv/zbb-strlen.c: New test.
+---
+ gcc/emit-rtl.cc | 24 ++++++++++++++++++++++++
+ gcc/rtl.h       |  2 ++
+ 2 files changed, 26 insertions(+)
+
+diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
+index 1e02ae254..2df5ff414 100644
+--- a/gcc/emit-rtl.cc
++++ b/gcc/emit-rtl.cc
+@@ -5163,6 +5163,30 @@ emit_jump_insn (rtx x)
+   return last;
+ }
+ 
++/* Make an insn of code JUMP_INSN with pattern X,
++   add a REG_BR_PROB note that indicates very likely probability,
++   and add it to the end of the doubly-linked list.  */
++
++rtx_insn *
++emit_likely_jump_insn (rtx x)
++{
++  rtx_insn *jump = emit_jump_insn (x);
++  add_reg_br_prob_note (jump, profile_probability::very_likely ());
++  return jump;
++}
++
++/* Make an insn of code JUMP_INSN with pattern X,
++   add a REG_BR_PROB note that indicates very unlikely probability,
++   and add it to the end of the doubly-linked list.  */
++
++rtx_insn *
++emit_unlikely_jump_insn (rtx x)
++{
++  rtx_insn *jump = emit_jump_insn (x);
++  add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
++  return jump;
++}
++
+ /* Make an insn of code CALL_INSN with pattern X
+    and add it to the end of the doubly-linked list.  */
+ 
+diff --git a/gcc/rtl.h b/gcc/rtl.h
+index 488016bb4..a0db225cb 100644
+--- a/gcc/rtl.h
++++ b/gcc/rtl.h
+@@ -3333,6 +3333,8 @@ extern rtx_note *emit_note_after (enum insn_note, rtx_insn *);
+ extern rtx_insn *emit_insn (rtx);
+ extern rtx_insn *emit_debug_insn (rtx);
+ extern rtx_insn *emit_jump_insn (rtx);
++extern rtx_insn *emit_likely_jump_insn (rtx);
++extern rtx_insn *emit_unlikely_jump_insn (rtx);
+ extern rtx_insn *emit_call_insn (rtx);
+ extern rtx_code_label *emit_label (rtx);
+ extern rtx_jump_table_data *emit_jump_table_data (rtx);
+-- 
+2.33.0
+

_service:tar_scm:0227-Backport-SME-attribs-Add-overloads-with-namespace-na.patch Added

@@ -0,0 +1,189 @@
+From 8c6ffb4c6f86231eee318ceeb8546a53037edfe9 Mon Sep 17 00:00:00 2001
+From: Jakub Jelinek <jakub@redhat.com>
+Date: Tue, 4 Oct 2022 23:13:15 +0200
+Subject: PATCH 128/157 BackportSME attribs: Add overloads with namespace
+ name
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0764dc8537a4f87089ecd32391cb5f8803b43c96
+
+I've discovered a problem with the way we handle scoped attributes.  For
+declaration or type attributes for attributes we don't know anything about
+we just don't add them to the declarations or types, so later in the FEs and
+middle-end it is fine to use lookup_attribute etc.  which just check the
+attribute name and not namespace because non-standard non-GNU attributes
+just won't show there.  But in the case of attributes on statements, nothing
+has filtered out the unknown attributes, so with my earlier assume
+attribute patch e.g.  c-c++-common/Wno-attributes-6.c test failed because
+it uses:
+vendor::assume(1 + 1 == 2);
+with -Wno-attributes=vendor::assume and lookup_attribute ("assume", )
+finds such attribute and handled it that way.
+So, for those cases, this patch introduces lookup_attribute and
+remove_attribute overloads which specify also the namespace.
+I think the fallthrough, hot, cold, likely, unlikely attribute handling
+will need to use the new APIs too, so that we don't handle
+msft::fallthrough attribute as something we'd know.
+
+2022-10-04  Jakub Jelinek  <jakub@redhat.com>
+
+	* attribs.h (remove_attribute): Declare overload with additional
+	attr_ns argument.
+	(private_lookup_attribute): Declare overload with additional
+	attr_ns and attr_ns_len arguments.
+	(lookup_attribute): New overload with additional attr_ns argument.
+	* attribs.cc (remove_attribute): New overload with additional
+	attr_ns argument.
+	(private_lookup_attribute): New overload with additional
+	attr_ns and attr_ns_len arguments.
+---
+ gcc/attribs.cc | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++
+ gcc/attribs.h  | 38 ++++++++++++++++++++++++++++++
+ 2 files changed, 101 insertions(+)
+
+diff --git a/gcc/attribs.cc b/gcc/attribs.cc
+index 656ea739e..8e2696bc5 100644
+--- a/gcc/attribs.cc
++++ b/gcc/attribs.cc
+@@ -1598,6 +1598,36 @@ remove_attribute (const char *attr_name, tree list)
+   return list;
+ }
+ 
++/* Similarly but also match namespace on the removed attributes.  */
++
++tree
++remove_attribute (const char *attr_ns, const char *attr_name, tree list)
++{
++  tree *p;
++  gcc_checking_assert (attr_name0 != '_');
++  gcc_checking_assert (attr_ns == NULL || attr_ns0 != '_');
++
++  for (p = &list; *p;)
++    {
++      tree l = *p;
++
++      tree attr = get_attribute_name (l);
++      if (is_attribute_p (attr_name, attr))
++	{
++	  tree ns = get_attribute_namespace (l);
++	  if ((ns == NULL_TREE && attr_ns == NULL)
++	      || (ns && attr_ns && is_attribute_p (attr_ns, ns)))
++	    {
++	      *p = TREE_CHAIN (l);
++	      continue;
++	    }
++	}
++      p = &TREE_CHAIN (l);
++    }
++
++  return list;
++}
++
+ /* Return an attribute list that is the union of a1 and a2.  */
+ 
+ tree
+@@ -1995,6 +2025,39 @@ private_lookup_attribute (const char *attr_name, size_t attr_len, tree list)
+   return list;
+ }
+ 
++/* Similarly but with also attribute namespace.  */
++
++tree
++private_lookup_attribute (const char *attr_ns, const char *attr_name,
++			  size_t attr_ns_len, size_t attr_len, tree list)
++{
++  while (list)
++    {
++      tree attr = get_attribute_name (list);
++      size_t ident_len = IDENTIFIER_LENGTH (attr);
++      if (cmp_attribs (attr_name, attr_len, IDENTIFIER_POINTER (attr),
++		       ident_len))
++	{
++	  tree ns = get_attribute_namespace (list);
++	  if (ns == NULL_TREE)
++	    {
++	      if (attr_ns == NULL)
++		break;
++	    }
++	  else if (attr_ns)
++	    {
++	      ident_len = IDENTIFIER_LENGTH (ns);
++	      if (cmp_attribs (attr_ns, attr_ns_len, IDENTIFIER_POINTER (ns),
++			       ident_len))
++		break;
++	    }
++	}
++      list = TREE_CHAIN (list);
++    }
++
++  return list;
++}
++
+ /* Return true if the function decl or type NODE has been declared
+    with attribute ANAME among attributes ATTRS.  */
+ 
+diff --git a/gcc/attribs.h b/gcc/attribs.h
+index 0856f98fb..9ad530fcb 100644
+--- a/gcc/attribs.h
++++ b/gcc/attribs.h
+@@ -88,6 +88,10 @@ extern tree merge_type_attributes (tree, tree);
+ 
+ extern tree remove_attribute (const char *, tree);
+ 
++/* Similarly but also with specific attribute namespace.  */
++
++extern tree remove_attribute (const char *, const char *, tree);
++
+ /* Given two attributes lists, return a list of their union.  */
+ 
+ extern tree merge_attributes (tree, tree);
+@@ -119,6 +123,10 @@ extern int attribute_list_contained (const_tree, const_tree);
+    for size.  */
+ extern tree private_lookup_attribute (const char *attr_name, size_t attr_len,
+ 				      tree list);
++extern tree private_lookup_attribute (const char *attr_ns,
++				      const char *attr_name,
++				      size_t attr_ns_len, size_t attr_len,
++				      tree list);
+ 
+ extern unsigned decls_mismatched_attributes (tree, tree, tree,
+ 					     const char* const,
+@@ -215,6 +223,36 @@ lookup_attribute (const char *attr_name, tree list)
+     }
+ }
+ 
++/* Similar to lookup_attribute, but also match the attribute namespace.  */
++
++static inline tree
++lookup_attribute (const char *attr_ns, const char *attr_name, tree list)
++{
++  if (CHECKING_P && attr_name0 != '_')
++    {
++      size_t attr_len = strlen (attr_name);
++      gcc_checking_assert (!canonicalize_attr_name (attr_name, attr_len));
++    }
++  if (CHECKING_P && attr_ns && attr_ns0 != '_')
++    {
++      size_t attr_ns_len = strlen (attr_ns);
++      gcc_checking_assert (!canonicalize_attr_name (attr_ns, attr_ns_len));
++    }
++  /* In most cases, list is NULL_TREE.  */
++  if (list == NULL_TREE)
++    return NULL_TREE;
++  else
++    {
++      size_t attr_ns_len = attr_ns ? strlen (attr_ns) : 0;
++      size_t attr_len = strlen (attr_name);
++      /* Do the strlen() before calling the out-of-line implementation.
++	 In most cases attr_name is a string constant, and the compiler
++	 will optimize the strlen() away.  */
++      return private_lookup_attribute (attr_ns, attr_name,
++				       attr_ns_len, attr_len, list);
++    }
++}
++
+ /* Given an attribute name ATTR_NAME and a list of attributes LIST,
+    return a pointer to the attribute's list first element if the attribute
+    starts with ATTR_NAME.  ATTR_NAME must be in the form 'text' (not
+-- 
+2.33.0
+

_service:tar_scm:0228-Backport-SME-vec-Add-array_slice-constructors-from-n.patch Added

@@ -0,0 +1,47 @@
+From 044dc671f7eb723df5b6ce2364d6ae579c0cc984 Mon Sep 17 00:00:00 2001
+From: Martin Jambor <mjambor@suse.cz>
+Date: Tue, 30 Aug 2022 18:50:35 +0200
+Subject: PATCH 129/157 BackportSME vec: Add array_slice constructors
+ from non-const and gc vectors
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=15433c214df295f2281a90fcf283355b21beca0e
+
+This patch adds constructors of array_slice that are required to
+create them from non-const (heap or auto) vectors or from GC vectors.
+
+gcc/ChangeLog:
+
+2022-08-08  Martin Jambor  <mjambor@suse.cz>
+
+	* vec.h (array_slice): Add constructors for non-const reference to
+	heap vector and pointers to heap vectors.
+---
+ gcc/vec.h | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/gcc/vec.h b/gcc/vec.h
+index 3ba7ea7ed..fc3b10c85 100644
+--- a/gcc/vec.h
++++ b/gcc/vec.h
+@@ -2264,6 +2264,18 @@ public:
+   array_slice (const vec<OtherT> &v)
+     : m_base (v.address ()), m_size (v.length ()) {}
+ 
++  template<typename OtherT>
++  array_slice (vec<OtherT> &v)
++    : m_base (v.address ()), m_size (v.length ()) {}
++
++  template<typename OtherT>
++  array_slice (const vec<OtherT, va_gc> *v)
++    : m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {}
++
++  template<typename OtherT>
++  array_slice (vec<OtherT, va_gc> *v)
++    : m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {}
++
+   iterator begin () { return m_base; }
+   iterator end () { return m_base + m_size; }
+ 
+-- 
+2.33.0
+

_service:tar_scm:0229-Backport-SME-A-couple-of-va_gc_atomic-tweaks.patch Added

@@ -0,0 +1,140 @@
+From 12dd36f06e13ee9cd684c00732caa684f49b3610 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Jun 2023 08:48:17 +0100
+Subject: PATCH 130/157 BackportSME A couple of va_gc_atomic tweaks
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4e9f6c14280699997a633cefd3fb315b2bd4762c
+
+The only current user of va_gc_atomic is Ada's:
+
+    vec<Entity_Id, va_gc_atomic>
+
+It uses the generic gt_pch_nx routines (with gt_pch_nx being the
+“note pointers” hooks), such as:
+
+    template<typename T, typename A>
+    void
+    gt_pch_nx (vec<T, A, vl_embed> *v)
+    {
+      extern void gt_pch_nx (T &);
+      for (unsigned i = 0; i < v->length (); i++)
+	gt_pch_nx ((*v)i);
+    }
+
+It then defines gt_pch_nx routines for Entity_Id &.
+
+The problem is that if we wanted to take the same approach for
+an array of unsigned ints, we'd need to define:
+
+    inline void gt_pch_nx (unsigned int &) { }
+
+which would then be ambiguous with:
+
+    inline void gt_pch_nx (unsigned int) { }
+
+The point of va_gc_atomic is that the elements don't need to be GCed,
+and so we have:
+
+    template<typename T>
+    void
+    gt_ggc_mx (vec<T, va_gc_atomic, vl_embed> *v ATTRIBUTE_UNUSED)
+    {
+      /* Nothing to do.  Vectors of atomic types wrt GC do not need to
+	 be traversed.  */
+    }
+
+I think it's therefore reasonable to assume that no pointers will
+need to be processed for PCH either.
+
+The patch also relaxes the array_slice constructor for vec<T, va_gc> *
+so that it handles all embedded vectors.
+
+gcc/
+	* vec.h (gt_pch_nx): Add overloads for va_gc_atomic.
+	(array_slice): Relax va_gc constructor to handle all vectors
+	with a vl_embed layout.
+
+gcc/ada/
+	* gcc-interface/decl.cc (gt_pch_nx): Remove overloads for Entity_Id.
+---
+ gcc/ada/gcc-interface/decl.cc | 11 -----------
+ gcc/vec.h                     | 22 ++++++++++++++++++----
+ 2 files changed, 18 insertions(+), 15 deletions(-)
+
+diff --git a/gcc/ada/gcc-interface/decl.cc b/gcc/ada/gcc-interface/decl.cc
+index 1c7a71684..7193b55c7 100644
+--- a/gcc/ada/gcc-interface/decl.cc
++++ b/gcc/ada/gcc-interface/decl.cc
+@@ -163,17 +163,6 @@ struct GTY((for_user)) tree_entity_vec_map
+   vec<Entity_Id, va_gc_atomic> *to;
+ };
+ 
+-void
+-gt_pch_nx (Entity_Id &)
+-{
+-}
+-
+-void
+-gt_pch_nx (Entity_Id *x, gt_pointer_operator op, void *cookie)
+-{
+-  op (x, NULL, cookie);
+-}
+-
+ struct dummy_type_hasher : ggc_cache_ptr_hash<tree_entity_vec_map>
+ {
+   static inline hashval_t
+diff --git a/gcc/vec.h b/gcc/vec.h
+index fc3b10c85..592d3f7e0 100644
+--- a/gcc/vec.h
++++ b/gcc/vec.h
+@@ -1383,6 +1383,13 @@ gt_pch_nx (vec<T, A, vl_embed> *v)
+     gt_pch_nx ((*v)i);
+ }
+ 
++template<typename T>
++void
++gt_pch_nx (vec<T, va_gc_atomic, vl_embed> *)
++{
++  /* No pointers to note.  */
++}
++
+ template<typename T, typename A>
+ void
+ gt_pch_nx (vec<T *, A, vl_embed> *v, gt_pointer_operator op, void *cookie)
+@@ -1400,6 +1407,13 @@ gt_pch_nx (vec<T, A, vl_embed> *v, gt_pointer_operator op, void *cookie)
+     gt_pch_nx (&((*v)i), op, cookie);
+ }
+ 
++template<typename T>
++void
++gt_pch_nx (vec<T, va_gc_atomic, vl_embed> *, gt_pointer_operator, void *)
++{
++  /* No pointers to note.  */
++}
++
+ 
+ /* Space efficient vector.  These vectors can grow dynamically and are
+    allocated together with their control data.  They are suited to be
+@@ -2268,12 +2282,12 @@ public:
+   array_slice (vec<OtherT> &v)
+     : m_base (v.address ()), m_size (v.length ()) {}
+ 
+-  template<typename OtherT>
+-  array_slice (const vec<OtherT, va_gc> *v)
++  template<typename OtherT, typename A>
++  array_slice (const vec<OtherT, A, vl_embed> *v)
+     : m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {}
+ 
+-  template<typename OtherT>
+-  array_slice (vec<OtherT, va_gc> *v)
++  template<typename OtherT, typename A>
++  array_slice (vec<OtherT, A, vl_embed> *v)
+     : m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {}
+ 
+   iterator begin () { return m_base; }
+-- 
+2.33.0
+

_service:tar_scm:0230-Backport-SME-middle-end-Fix-issue-of-poly_uint16-1-1.patch Added

@@ -0,0 +1,34 @@
+From bb15d4c4476e3ba303c5afe0adae0d86ab5f0a9b Mon Sep 17 00:00:00 2001
+From: zhongjuzhe <juzhe.zhong@rivai.ai>
+Date: Mon, 22 Aug 2022 10:15:31 +0100
+Subject: PATCH 131/157 BackportSME middle-end: Fix issue of poly_uint16
+ (1, 1) in self test
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=21e7d87a901d45f0cb5e5510d22bfbdb0d0ac6a1
+
+This patch fix issue of poly_uint16 (1, 1) in machine mode self test.
+
+gcc/ChangeLog:
+
+	* simplify-rtx.cc (test_vector_subregs_fore_back): Make first value
+	and repeat value different.
+---
+ gcc/simplify-rtx.cc | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
+index e152918b0..fc79a2e2e 100644
+--- a/gcc/simplify-rtx.cc
++++ b/gcc/simplify-rtx.cc
+@@ -8380,7 +8380,7 @@ test_vector_subregs_fore_back (machine_mode inner_mode)
+   for (unsigned int i = 0; i < count; ++i)
+     builder.quick_push (gen_int_mode (i, int_mode));
+   for (unsigned int i = 0; i < count; ++i)
+-    builder.quick_push (gen_int_mode (-(int) i, int_mode));
++    builder.quick_push (gen_int_mode (-1 - (int) i, int_mode));
+   rtx x = builder.build ();
+ 
+   test_vector_subregs_modes (x);
+-- 
+2.33.0
+

_service:tar_scm:0231-SME-Add-missing-header-file-in-aarch64.cc.patch Added

_service:tar_scm:0232-Backport-SME-c-Add-support-for-__extension__.patch Added

@@ -0,0 +1,327 @@
+From 3714cfb47fafef884aa2ff330935fb44b7966909 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 24 Aug 2023 11:49:58 +0100
+Subject: PATCH 133/157 BackportSME c: Add support for __extension__
+ ...
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=207a5daa9dcf31e367152163ad2a2ab4a0858967
+
+ attributes are a recent addition to C, but as a GNU extension,
+GCC allows them to be used in C11 and earlier.  Normally this use
+would trigger a pedwarn (for -pedantic, -Wc11-c2x-compat, etc.).
+
+This patch allows the pedwarn to be suppressed by starting the
+attribute-list with __extension__.
+
+Also, :: is not a single lexing token prior to C2X, so it wasn't
+possible to use scoped attributes in C11, even as a GNU extension.
+The patch allows two colons to be used in place of :: when
+__extension__ is used.  No attempt is made to check whether the
+two colons are immediately adjacent.
+
+gcc/
+	* doc/extend.texi: Document the C __extension__ ... construct.
+
+gcc/c/
+	* c-parser.cc (c_parser_std_attribute): Conditionally allow
+	two colons to be used in place of ::.
+	(c_parser_std_attribute_list): New function, split out from...
+	(c_parser_std_attribute_specifier): ...here.  Allow the attribute-list
+	to start with __extension__.  When it does, also allow two colons
+	to be used in place of ::.
+
+gcc/testsuite/
+	* gcc.dg/c2x-attr-syntax-6.c: New test.
+	* gcc.dg/c2x-attr-syntax-7.c: Likewise.
+---
+ gcc/c/c-parser.cc                        | 64 ++++++++++++++++++------
+ gcc/doc/extend.texi                      | 27 ++++++++--
+ gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c | 62 +++++++++++++++++++++++
+ gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c | 60 ++++++++++++++++++++++
+ 4 files changed, 193 insertions(+), 20 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c
+ create mode 100644 gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c
+
+diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
+index 78a313fe3..486f46e1c 100644
+--- a/gcc/c/c-parser.cc
++++ b/gcc/c/c-parser.cc
+@@ -4894,10 +4894,18 @@ c_parser_balanced_token_sequence (c_parser *parser)
+      ( balanced-token-sequenceopt )
+ 
+    Keywords are accepted as identifiers for this purpose.
+-*/
++
++   As an extension, we permit an attribute-specifier to be:
++
++       __extension__ attribute-list  
++
++   Two colons are then accepted as a synonym for ::.  No attempt is made
++   to check whether the colons are immediately adjacent.  LOOSE_SCOPE_P
++   indicates whether this relaxation is in effect.  */
+ 
+ static tree
+-c_parser_std_attribute (c_parser *parser, bool for_tm)
++c_parser_std_attribute (c_parser *parser, bool for_tm,
++			bool loose_scope_p = false)
+ {
+   c_token *token = c_parser_peek_token (parser);
+   tree ns, name, attribute;
+@@ -4910,9 +4918,14 @@ c_parser_std_attribute (c_parser *parser, bool for_tm)
+     }
+   name = canonicalize_attr_name (token->value);
+   c_parser_consume_token (parser);
+-  if (c_parser_next_token_is (parser, CPP_SCOPE))
++  if (c_parser_next_token_is (parser, CPP_SCOPE)
++      || (loose_scope_p
++	  && c_parser_next_token_is (parser, CPP_COLON)
++	  && c_parser_peek_2nd_token (parser)->type == CPP_COLON))
+     {
+       ns = name;
++      if (c_parser_next_token_is (parser, CPP_COLON))
++	c_parser_consume_token (parser);
+       c_parser_consume_token (parser);
+       token = c_parser_peek_token (parser);
+       if (token->type != CPP_NAME && token->type != CPP_KEYWORD)
+@@ -4981,19 +4994,9 @@ c_parser_std_attribute (c_parser *parser, bool for_tm)
+ }
+ 
+ static tree
+-c_parser_std_attribute_specifier (c_parser *parser, bool for_tm)
++c_parser_std_attribute_list (c_parser *parser, bool for_tm,
++			     bool loose_scope_p = false)
+ {
+-  location_t loc = c_parser_peek_token (parser)->location;
+-  if (!c_parser_require (parser, CPP_OPEN_SQUARE, "expected %<%>"))
+-    return NULL_TREE;
+-  if (!c_parser_require (parser, CPP_OPEN_SQUARE, "expected %<%>"))
+-    {
+-      c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE, "expected %<%>");
+-      return NULL_TREE;
+-    }
+-  if (!for_tm)
+-    pedwarn_c11 (loc, OPT_Wpedantic,
+-		 "ISO C does not support %<%> attributes before C2X");
+   tree attributes = NULL_TREE;
+   while (true)
+     {
+@@ -5005,7 +5008,7 @@ c_parser_std_attribute_specifier (c_parser *parser, bool for_tm)
+ 	  c_parser_consume_token (parser);
+ 	  continue;
+ 	}
+-      tree attribute = c_parser_std_attribute (parser, for_tm);
++      tree attribute = c_parser_std_attribute (parser, for_tm, loose_scope_p);
+       if (attribute != error_mark_node)
+ 	{
+ 	  TREE_CHAIN (attribute) = attributes;
+@@ -5014,6 +5017,35 @@ c_parser_std_attribute_specifier (c_parser *parser, bool for_tm)
+       if (c_parser_next_token_is_not (parser, CPP_COMMA))
+ 	break;
+     }
++  return attributes;
++}
++
++static tree
++c_parser_std_attribute_specifier (c_parser *parser, bool for_tm)
++{
++  location_t loc = c_parser_peek_token (parser)->location;
++  if (!c_parser_require (parser, CPP_OPEN_SQUARE, "expected %<%>"))
++    return NULL_TREE;
++  if (!c_parser_require (parser, CPP_OPEN_SQUARE, "expected %<%>"))
++    {
++      c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE, "expected %<%>");
++      return NULL_TREE;
++    }
++  tree attributes;
++  if (c_parser_next_token_is_keyword (parser, RID_EXTENSION))
++    {
++      auto ext = disable_extension_diagnostics ();
++      c_parser_consume_token (parser);
++      attributes = c_parser_std_attribute_list (parser, for_tm, true);
++      restore_extension_diagnostics (ext);
++    }
++  else
++    {
++      if (!for_tm)
++	pedwarn_c11 (loc, OPT_Wpedantic,
++		     "ISO C does not support %<%> attributes before C2X");
++      attributes = c_parser_std_attribute_list (parser, for_tm);
++    }
+   c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE, "expected %<%>");
+   c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE, "expected %<%>");
+   return nreverse (attributes);
+diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
+index 674db2f1a..3cfecee53 100644
+--- a/gcc/doc/extend.texi
++++ b/gcc/doc/extend.texi
+@@ -11726,10 +11726,29 @@ macros to replace them with the customary keywords.  It looks like this:
+ @findex __extension__
+ @opindex pedantic
+ @option{-pedantic} and other options cause warnings for many GNU C extensions.
+-You can
+-prevent such warnings within one expression by writing
+-@code{__extension__} before the expression.  @code{__extension__} has no
+-effect aside from this.
++You can suppress such warnings using the keyword @code{__extension__}.
++Specifically:
++
++@itemize @bullet
++@item
++Writing @code{__extension__} before an expression prevents warnings
++about extensions within that expression.
++
++@item
++In C, writing:
++
++@smallexample
++__extension__ @dots{}
++@end smallexample
++
++suppresses warnings about using @samp{} attributes in C versions
++that predate C2X@.  Since the scope token @samp{::} is not a single
++lexing token in earlier versions of C, this construct also allows two colons
++to be used in place of @code{::}.  GCC does not check whether the two
++colons are immediately adjacent.
++@end itemize
++
++@code{__extension__} has no effect aside from this.
+ 
+ @node Incomplete Enums
+ @section Incomplete @code{enum} Types
+diff --git a/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c b/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c
+new file mode 100644
+index 000000000..9e5f65ce4
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c
+@@ -0,0 +1,62 @@
++/* Test C2x attribute syntax: use of __extension__ in C11 mode.  */
++/* { dg-do compile } */
++/* { dg-options "-std=c11 -pedantic-errors" } */

_service:tar_scm:0233-Backport-SME-lra-Updates-of-biggest-mode-for-hard-re.patch Added

@@ -0,0 +1,140 @@
+From 29a71fc5cbfc3b5e4649abf51740daed5ea243bd Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 5 Dec 2023 09:20:55 +0000
+Subject: PATCH 134/157 BackportSME lra: Updates of biggest mode for hard
+ regs PR112278
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=6e2e0ce6795c863e295eb33559f8dc0500297da3
+
+LRA keeps track of the biggest mode for both hard registers and
+pseudos.  The updates assume that the modes are ordered, i.e. that
+we can tell whether one is no bigger than the other at compile time.
+
+That is (or at least seemed to be) a reasonable restriction for pseudos.
+But it isn't necessarily so for hard registers, since the uses of hard
+registers can be logically distinct.  The testcase is an example of this.
+
+The biggest mode of hard registers is also special for other reasons.
+As the existing comment says:
+
+  /* A reg can have a biggest_mode of VOIDmode if it was only ever seen as
+     part of a multi-word register.  In that case, just use the reg_rtx
+     mode.  Do the same also if the biggest mode was larger than a register
+     or we can not compare the modes.  Otherwise, limit the size to that of
+     the biggest access in the function or to the natural mode at least.  */
+
+This patch applies the same approach to the updates.
+
+gcc/
+	PR rtl-optimization/112278
+	* lra-int.h (lra_update_biggest_mode): New function.
+	* lra-coalesce.cc (merge_pseudos): Use it.
+	* lra-lives.cc (process_bb_lives): Likewise.
+	* lra.cc (new_insn_reg): Likewise.
+
+gcc/testsuite/
+	PR rtl-optimization/112278
+	* gcc.target/aarch64/sve/pr112278.c: New test.
+---
+ gcc/lra-coalesce.cc                             |  4 +---
+ gcc/lra-int.h                                   | 15 +++++++++++++++
+ gcc/lra-lives.cc                                |  4 +---
+ gcc/lra.cc                                      |  5 ++---
+ gcc/testsuite/gcc.target/aarch64/sve/pr112278.c | 15 +++++++++++++++
+ 5 files changed, 34 insertions(+), 9 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr112278.c
+
+diff --git a/gcc/lra-coalesce.cc b/gcc/lra-coalesce.cc
+index c82934569..901a44663 100644
+--- a/gcc/lra-coalesce.cc
++++ b/gcc/lra-coalesce.cc
+@@ -112,9 +112,7 @@ merge_pseudos (int regno1, int regno2)
+     = (lra_merge_live_ranges
+        (lra_reg_infofirst.live_ranges,
+ 	lra_copy_live_range_list (lra_reg_infofirst2.live_ranges)));
+-  if (partial_subreg_p (lra_reg_infofirst.biggest_mode,
+-			lra_reg_infofirst2.biggest_mode))
+-    lra_reg_infofirst.biggest_mode = lra_reg_infofirst2.biggest_mode;
++  lra_update_biggest_mode (first, lra_reg_infofirst2.biggest_mode);
+ }
+ 
+ /* Change pseudos in *LOC on their coalescing group
+diff --git a/gcc/lra-int.h b/gcc/lra-int.h
+index 04baefef3..040e87d11 100644
+--- a/gcc/lra-int.h
++++ b/gcc/lra-int.h
+@@ -525,4 +525,19 @@ lra_assign_reg_val (int from, int to)
+   lra_reg_infoto.offset = lra_reg_infofrom.offset;
+ }
+ 
++/* Update REGNO's biggest recorded mode so that it includes a reference
++   in mode MODE.  */
++inline void
++lra_update_biggest_mode (int regno, machine_mode mode)
++{
++  if (!ordered_p (GET_MODE_SIZE (lra_reg_inforegno.biggest_mode),
++		  GET_MODE_SIZE (mode)))
++    {
++      gcc_checking_assert (HARD_REGISTER_NUM_P (regno));
++      lra_reg_inforegno.biggest_mode = reg_raw_moderegno;
++    }
++  else if (partial_subreg_p (lra_reg_inforegno.biggest_mode, mode))
++    lra_reg_inforegno.biggest_mode = mode;
++}
++
+ #endif /* GCC_LRA_INT_H */
+diff --git a/gcc/lra-lives.cc b/gcc/lra-lives.cc
+index a755464ee..fb4a12304 100644
+--- a/gcc/lra-lives.cc
++++ b/gcc/lra-lives.cc
+@@ -770,9 +770,7 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p)
+ 	{
+ 	  int regno = reg->regno;
+ 
+-	  if (partial_subreg_p (lra_reg_inforegno.biggest_mode,
+-				reg->biggest_mode))
+-	    lra_reg_inforegno.biggest_mode = reg->biggest_mode;
++	  lra_update_biggest_mode (regno, reg->biggest_mode);
+ 	  if (HARD_REGISTER_NUM_P (regno))
+ 	    lra_hard_reg_usageregno += freq;
+ 	}
+diff --git a/gcc/lra.cc b/gcc/lra.cc
+index 1444cb759..8fda432f1 100644
+--- a/gcc/lra.cc
++++ b/gcc/lra.cc
+@@ -559,9 +559,8 @@ new_insn_reg (rtx_insn *insn, int regno, enum op_type type,
+   lra_insn_reg *ir = lra_insn_reg_pool.allocate ();
+   ir->type = type;
+   ir->biggest_mode = mode;
+-  if (NONDEBUG_INSN_P (insn)
+-      && partial_subreg_p (lra_reg_inforegno.biggest_mode, mode))
+-    lra_reg_inforegno.biggest_mode = mode;
++  if (NONDEBUG_INSN_P (insn))
++    lra_update_biggest_mode (regno, mode);
+   ir->subreg_p = subreg_p;
+   ir->early_clobber_alts = early_clobber_alts;
+   ir->regno = regno;
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr112278.c b/gcc/testsuite/gcc.target/aarch64/sve/pr112278.c
+new file mode 100644
+index 000000000..4f56add2b
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/sve/pr112278.c
+@@ -0,0 +1,15 @@
++#include <arm_neon.h>
++#include <arm_sve.h>
++
++void
++f (void)
++{
++  {
++    register svint8_t v0 asm ("z0");
++    asm volatile ("" : "=w" (v0));
++  }
++  {
++    register int8x8x4_t v0 asm ("v0");
++    asm volatile ("" : "=w" (v0));
++  }
++}
+-- 
+2.33.0
+

_service:tar_scm:0234-Backport-SME-c-Support-C2x-empty-initializer-braces.patch Added

@@ -0,0 +1,672 @@
+From 0a34bb6b18cdf34cb9d4f34b1697e1bcfcff139b Mon Sep 17 00:00:00 2001
+From: Joseph Myers <joseph@codesourcery.com>
+Date: Thu, 25 Aug 2022 21:02:57 +0000
+Subject: PATCH 135/157 BackportSME c: Support C2x empty initializer
+ braces
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=14cfa01755a66afbae2539f8b5796c960ddcecc6
+
+ISO C2x standardizes empty initializer braces {}.  Implement this
+feature accordingly.  The basic case was already supported and so just
+needed diagnostic adjustments.  However, the standard feature also
+includes two cases that were not previously supported: empty
+initializer braces for scalars, and empty initializer braces for
+VLAs.  Thus, add support for those features as well, updating existing
+tests that expected them to be diagnosed.
+
+There was already some gimplifier support for converting
+variable-sized initializations with empty CONSTRUCTORs to memset.
+However, it didn't apply here; code earlier in gimplify_modify_expr
+ended up calling gimplify_init_constructor via
+gimplify_modify_expr_rhs, which ended up handling the CONSTRUCTOR in a
+way that generated an ICE later.  Add a check for this case earlier in
+gimplify_modify_expr to avoid that issue.
+
+Bootstrapped with no regressions for x86_64-pc-linux-gnu.
+
+gcc/
+	* gimplify.cc (gimplify_modify_expr): Convert initialization from
+	a variable-size CONSTRUCTOR to memset before call to
+	gimplify_modify_expr_rhs.
+
+gcc/c/
+	* c-decl.cc (start_decl): Do not diagnose initialization of
+	variable-sized objects here.
+	* c-parser.cc (c_parser_braced_init): Add argument DECL.  All
+	callers changed.
+	(c_parser_initializer): Diagnose initialization of variable-sized
+	objects other than with braced initializer.
+	(c_parser_braced_init): Use pedwarn_c11 for empty initializer
+	braces and update diagnostic text.  Diagnose initialization of
+	variable-sized objects with nonempty braces.
+	* c-typeck.cc (digest_init): Update diagnostic for initialization
+	of variable-sized objects.
+	(really_start_incremental_init, set_designator)
+	(process_init_element): Update comments.
+	(pop_init_level): Allow scalar empty initializers.
+
+gcc/testsuite/
+	* gcc.dg/c11-empty-init-1.c, gcc.dg/c11-empty-init-2.c,
+	gcc.dg/c11-empty-init-3.c, gcc.dg/c2x-empty-init-1.c,
+	gcc.dg/c2x-empty-init-2.c, gcc.dg/c2x-empty-init-3.c,
+	gcc.dg/gnu2x-empty-init-1.c, gcc.dg/gnu2x-empty-init-2.c: New
+	tests.
+	* gcc.dg/torture/dfp-default-init-1.c: Also test empty
+	initializers.
+	* gcc.dg/init-bad-1.c, gcc.dg/noncompile/pr71583.c,
+	gcc.dg/pr61096-1.c, gcc.dg/vla-init-2.c, gcc.dg/vla-init-3.c,
+	gcc.target/i386/sse2-bfloat16-scalar-typecheck.c: Update expected
+	diagnostics.
+	* gcc.dg/ubsan/c-shift-1.c: Use nonempty initializers for VLA
+	initializations expected to be diagnosed.
+---
+ gcc/c/c-decl.cc                           | 20 +-----
+ gcc/c/c-parser.cc                         | 24 +++++--
+ gcc/c/c-typeck.cc                         | 23 ++++---
+ gcc/gimplify.cc                           | 15 +++++
+ gcc/testsuite/gcc.dg/c11-empty-init-1.c   | 25 +++++++
+ gcc/testsuite/gcc.dg/c11-empty-init-2.c   | 25 +++++++
+ gcc/testsuite/gcc.dg/c11-empty-init-3.c   | 25 +++++++
+ gcc/testsuite/gcc.dg/c2x-empty-init-1.c   | 80 +++++++++++++++++++++++
+ gcc/testsuite/gcc.dg/c2x-empty-init-2.c   | 18 +++++
+ gcc/testsuite/gcc.dg/c2x-empty-init-3.c   | 25 +++++++
+ gcc/testsuite/gcc.dg/gnu2x-empty-init-1.c | 29 ++++++++
+ gcc/testsuite/gcc.dg/gnu2x-empty-init-2.c | 16 +++++
+ gcc/testsuite/gcc.dg/init-bad-1.c         |  3 +-
+ gcc/testsuite/gcc.dg/noncompile/pr71583.c |  2 +-
+ gcc/testsuite/gcc.dg/pr61096-1.c          |  2 +-
+ gcc/testsuite/gcc.dg/ubsan/c-shift-1.c    | 12 ++--
+ gcc/testsuite/gcc.dg/vla-init-2.c         |  1 -
+ gcc/testsuite/gcc.dg/vla-init-3.c         |  1 -
+ 18 files changed, 301 insertions(+), 45 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/c11-empty-init-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/c11-empty-init-2.c
+ create mode 100644 gcc/testsuite/gcc.dg/c11-empty-init-3.c
+ create mode 100644 gcc/testsuite/gcc.dg/c2x-empty-init-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/c2x-empty-init-2.c
+ create mode 100644 gcc/testsuite/gcc.dg/c2x-empty-init-3.c
+ create mode 100644 gcc/testsuite/gcc.dg/gnu2x-empty-init-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/gnu2x-empty-init-2.c
+
+diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
+index 9d87a8cdb..685bb1757 100644
+--- a/gcc/c/c-decl.cc
++++ b/gcc/c/c-decl.cc
+@@ -5166,29 +5166,15 @@ start_decl (struct c_declarator *declarator, struct c_declspecs *declspecs,
+ 	  initialized = false;
+ 	else if (COMPLETE_TYPE_P (TREE_TYPE (decl)))
+ 	  {
+-	    /* A complete type is ok if size is fixed.  */
+-
+-	    if (!poly_int_tree_p (TYPE_SIZE (TREE_TYPE (decl)))
+-		|| C_DECL_VARIABLE_SIZE (decl))
+-	      {
+-		error ("variable-sized object may not be initialized");
+-		initialized = false;
+-	      }
++	    /* A complete type is ok if size is fixed.  If the size is
++	       variable, an empty initializer is OK and nonempty
++	       initializers will be diagnosed in the parser.  */
+ 	  }
+ 	else if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE)
+ 	  {
+ 	    error ("variable %qD has initializer but incomplete type", decl);
+ 	    initialized = false;
+ 	  }
+-	else if (C_DECL_VARIABLE_SIZE (decl))
+-	  {
+-	    /* Although C99 is unclear about whether incomplete arrays
+-	       of VLAs themselves count as VLAs, it does not make
+-	       sense to permit them to be initialized given that
+-	       ordinary VLAs may not be initialized.  */
+-	    error ("variable-sized object may not be initialized");
+-	    initialized = false;
+-	  }
+       }
+ 
+   if (initialized)
+diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
+index 486f46e1c..6db535d11 100644
+--- a/gcc/c/c-parser.cc
++++ b/gcc/c/c-parser.cc
+@@ -1515,7 +1515,7 @@ static tree c_parser_simple_asm_expr (c_parser *);
+ static tree c_parser_gnu_attributes (c_parser *);
+ static struct c_expr c_parser_initializer (c_parser *, tree);
+ static struct c_expr c_parser_braced_init (c_parser *, tree, bool,
+-					   struct obstack *);
++					   struct obstack *, tree);
+ static void c_parser_initelt (c_parser *, struct obstack *);
+ static void c_parser_initval (c_parser *, struct c_expr *,
+ 			      struct obstack *);
+@@ -5247,11 +5247,15 @@ static struct c_expr
+ c_parser_initializer (c_parser *parser, tree decl)
+ {
+   if (c_parser_next_token_is (parser, CPP_OPEN_BRACE))
+-    return c_parser_braced_init (parser, NULL_TREE, false, NULL);
++    return c_parser_braced_init (parser, NULL_TREE, false, NULL, decl);
+   else
+     {
+       struct c_expr ret;
+       location_t loc = c_parser_peek_token (parser)->location;
++      if (decl != error_mark_node && C_DECL_VARIABLE_SIZE (decl))
++	error_at (loc,
++		  "variable-sized object may not be initialized except "
++		  "with an empty initializer");
+       ret = c_parser_expr_no_commas (parser, NULL);
+       /* This is handled mostly by gimplify.cc, but we have to deal with
+ 	 not warning about int x = x; as it is a GCC extension to turn off
+@@ -5278,11 +5282,12 @@ location_t last_init_list_comma;
+    compound literal, and NULL_TREE for other initializers and for
+    nested braced lists.  NESTED_P is true for nested braced lists,
+    false for the list of a compound literal or the list that is the
+-   top-level initializer in a declaration.  */
++   top-level initializer in a declaration.  DECL is the declaration for
++   the top-level initializer for a declaration, otherwise NULL_TREE.  */
+ 
+ static struct c_expr
+ c_parser_braced_init (c_parser *parser, tree type, bool nested_p,
+-		      struct obstack *outer_obstack)
++		      struct obstack *outer_obstack, tree decl)
+ {
+   struct c_expr ret;
+   struct obstack braced_init_obstack;
+@@ -5300,10 +5305,15 @@ c_parser_braced_init (c_parser *parser, tree type, bool nested_p,
+     really_start_incremental_init (type);
+   if (c_parser_next_token_is (parser, CPP_CLOSE_BRACE))
+     {
+-      pedwarn (brace_loc, OPT_Wpedantic, "ISO C forbids empty initializer braces");
++      pedwarn_c11 (brace_loc, OPT_Wpedantic,
++		   "ISO C forbids empty initializer braces before C2X");
+     }
+   else
+     {
++      if (decl && decl != error_mark_node && C_DECL_VARIABLE_SIZE (decl))
++	error_at (brace_loc,
++		  "variable-sized object may not be initialized except "
++		  "with an empty initializer");
+       /* Parse a non-empty initializer list, possibly with a trailing
+ 	 comma.  */
+       while (true)
+@@ -5559,7 +5569,7 @@ c_parser_initval (c_parser *parser, struct c_expr *after,
+ 
+   if (c_parser_next_token_is (parser, CPP_OPEN_BRACE) && !after)
+     init = c_parser_braced_init (parser, NULL_TREE, true,
+-				 braced_init_obstack);
++				 braced_init_obstack, NULL_TREE);
+   else
+     {
+       init = c_parser_expr_no_commas (parser, after);
+@@ -10312,7 +10322,7 @@ c_parser_postfix_expression_after_paren_type (c_parser *parser,

_service:tar_scm:0235-Backport-SME-aarch64-Update-sizeless-tests-for-recen.patch Added

@@ -0,0 +1,115 @@
+From 67001778883e10110c505dd8876a447a19d1ac5e Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Wed, 31 Aug 2022 15:39:27 +0100
+Subject: PATCH 136/157 BackportSME aarch64: Update sizeless tests for
+ recent GNU C changes
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=de9805c08121a84ce368dccfe043a3f44c3ff13b
+
+The tests for sizeless SVE types include checks that the types
+are handled for initialisation purposes in the same way as scalars.
+GNU C and C2x now allow scalars to be initialised using empty braces,
+so this patch updates the SVE tests to match.
+
+gcc/testsuite/
+	* gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c: Update
+	tests for empty initializers.
+	* gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c: Likewise.
+	* gcc.target/aarch64/sve/acle/general-c/sizeless-1.c: Likewise.
+	* gcc.target/aarch64/sve/acle/general-c/sizeless-2.c: Likewise.
+---
+ .../gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c     | 4 ++--
+ .../gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c     | 4 ++--
+ .../gcc.target/aarch64/sve/acle/general-c/sizeless-1.c        | 4 ++--
+ .../gcc.target/aarch64/sve/acle/general-c/sizeless-2.c        | 4 ++--
+ 4 files changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c
+index 285751eeb..9db953583 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c
+@@ -12,7 +12,7 @@ f (svuint8_t sve_u1, svint8_t sve_s1,
+   /* Initialization.  */
+ 
+   svuint8_t init_sve_u1 = 0; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */
+-  svuint8_t init_sve_u2 = {}; /* { dg-error {empty scalar initializer} } */
++  svuint8_t init_sve_u2 = {};
+   svuint8_t init_sve_u3 = { sve_u1 };
+   svuint8_t init_sve_u4 = { gnu_u1 };
+   svuint8_t init_sve_u5 = { sve_s1 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'svint8_t'} } */
+@@ -31,7 +31,7 @@ f (svuint8_t sve_u1, svint8_t sve_s1,
+ 
+   /* Compound literals.  */
+ 
+-  (svuint8_t) {}; /* { dg-error {empty scalar initializer} } */
++  (svuint8_t) {};
+   (svuint8_t) { 0 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */
+   (svuint8_t) { sve_u1 };
+   (svuint8_t) { gnu_u1 };
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c
+index 306fd4780..c05b16406 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c
+@@ -12,7 +12,7 @@ f (svuint8_t sve_u1, svint8_t sve_s1,
+   /* Initialization.  */
+ 
+   svuint8_t init_sve_u1 = 0; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */
+-  svuint8_t init_sve_u2 = {}; /* { dg-error {empty scalar initializer} } */
++  svuint8_t init_sve_u2 = {};
+   svuint8_t init_sve_u3 = { sve_u1 };
+   svuint8_t init_sve_u4 = { gnu_u1 };
+   svuint8_t init_sve_u5 = { sve_s1 };
+@@ -31,7 +31,7 @@ f (svuint8_t sve_u1, svint8_t sve_s1,
+ 
+   /* Compound literals.  */
+ 
+-  (svuint8_t) {}; /* { dg-error {empty scalar initializer} } */
++  (svuint8_t) {};
+   (svuint8_t) { 0 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */
+   (svuint8_t) { sve_u1 };
+   (svuint8_t) { gnu_u1 };
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c
+index 7fc51e7ad..4b34a71c1 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c
+@@ -66,14 +66,14 @@ statements (int n)
+ 
+   svint8_t init_sve_sc1 = sve_sc1;
+   svint8_t init_sve_sc2 = sve_sh1; /* { dg-error {incompatible types when initializing type 'svint8_t' using type 'svint16_t'} } */
+-  svint8_t init_sve_sc3 = {}; /* { dg-error {empty scalar initializer} } */
++  svint8_t init_sve_sc3 = {};
+ 
+   int initi_a = sve_sc1; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */
+   int initi_b = { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */
+ 
+   /* Compound literals.  */
+ 
+-  (svint8_t) {}; /* { dg-error {empty scalar initializer} } */
++  (svint8_t) {};
+   (svint8_t) { sve_sc1 };
+ 
+   (int) { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c
+index c575492c1..34dfd598e 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c
+@@ -66,14 +66,14 @@ statements (int n)
+ 
+   svint8_t init_sve_sc1 = sve_sc1;
+   svint8_t init_sve_sc2 = sve_sh1; /* { dg-error {incompatible types when initializing type 'svint8_t' using type 'svint16_t'} } */
+-  svint8_t init_sve_sc3 = {}; /* { dg-error {empty scalar initializer} } */
++  svint8_t init_sve_sc3 = {};
+ 
+   int initi_a = sve_sc1; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */
+   int initi_b = { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */
+ 
+   /* Compound literals.  */
+ 
+-  (svint8_t) {}; /* { dg-error {empty scalar initializer} } */
++  (svint8_t) {};
+   (svint8_t) { sve_sc1 };
+ 
+   (int) { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */
+-- 
+2.33.0
+

_service:tar_scm:0236-Backport-SME-attribs-Namespace-aware-lookup_attribut.patch Added

@@ -0,0 +1,58 @@
+From dbe5a29054d4eb1e0f5173c8f2291569eac71c96 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Sat, 2 Dec 2023 13:49:55 +0000
+Subject: PATCH 137/157 BackportSME attribs: Namespace-aware
+ lookup_attribute_spec
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=df4643f90c45db2501c731d4fded60dc1426b484
+
+attribute_ignored_p already used a namespace-aware query
+to find the attribute_spec for an existing attribute:
+
+      const attribute_spec *as = lookup_attribute_spec (TREE_PURPOSE (attr));
+
+This patch does the same for other callers in the file.
+
+gcc/
+	* attribs.cc (comp_type_attributes): Pass the full TREE_PURPOSE
+	to lookup_attribute_spec, rather than just the name.
+	(remove_attributes_matching): Likewise.
+---
+ gcc/attribs.cc | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/attribs.cc b/gcc/attribs.cc
+index 8e2696bc5..1dbc30a95 100644
+--- a/gcc/attribs.cc
++++ b/gcc/attribs.cc
+@@ -1417,7 +1417,7 @@ comp_type_attributes (const_tree type1, const_tree type2)
+       const struct attribute_spec *as;
+       const_tree attr;
+ 
+-      as = lookup_attribute_spec (get_attribute_name (a));
++      as = lookup_attribute_spec (TREE_PURPOSE (a));
+       if (!as || as->affects_type_identity == false)
+ 	continue;
+ 
+@@ -1431,7 +1431,7 @@ comp_type_attributes (const_tree type1, const_tree type2)
+ 	{
+ 	  const struct attribute_spec *as;
+ 
+-	  as = lookup_attribute_spec (get_attribute_name (a));
++	  as = lookup_attribute_spec (TREE_PURPOSE (a));
+ 	  if (!as || as->affects_type_identity == false)
+ 	    continue;
+ 
+@@ -1473,8 +1473,7 @@ remove_attributes_matching (tree attrs, Predicate predicate)
+   const_tree start = attrs;
+   for (const_tree attr = attrs; attr; attr = TREE_CHAIN (attr))
+     {
+-      tree name = get_attribute_name (attr);
+-      const attribute_spec *as = lookup_attribute_spec (name);
++      const attribute_spec *as = lookup_attribute_spec (TREE_PURPOSE (attr));
+       const_tree end;
+       if (!predicate (attr, as))
+ 	end = attr;
+-- 
+2.33.0
+

_service:tar_scm:0237-Backport-SME-c-family-ICE-with-gnu-nocf_check-PR1069.patch Added

@@ -0,0 +1,281 @@
+From 6f42edc5035b7f7e96730dca19757b148e1be70c Mon Sep 17 00:00:00 2001
+From: Marek Polacek <polacek@redhat.com>
+Date: Thu, 29 Sep 2022 17:49:32 -0400
+Subject: PATCH 138/157 BackportSME c-family: ICE with
+ gnu::nocf_check PR106937
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=67efffec943656a509e036cd3c785a5c3d6885e1
+
+When getting the name of an attribute, we ought to use
+get_attribute_name, which handles both  and __attribute__(())
+forms.  Failure to do so may result in an ICE, like here.
+
+pp_c_attributes_display wasn't able to print the  form of
+attributes, so this patch teaches it to.
+
+When printing a pointer to function with a standard attribute, the attribute
+should be printed after the parameter-list.  With this patch we print:
+
+  aka 'void (*)(int) gnu::nocf_check'
+
+or, in C++ with noexcept:
+
+  aka 'void (*)(int) noexcept gnu::nocf_check'
+
+pp_c_attributes has been unused since its introduction in r56273 so
+this patch removes it.
+
+	PR c++/106937
+
+gcc/c-family/ChangeLog:
+
+	* c-pretty-print.cc (pp_c_specifier_qualifier_list): Print only GNU
+	attributes here.
+	(c_pretty_printer::direct_abstract_declarator): Print the standard 
+	attributes here.
+	(pp_c_attributes): Remove.
+	(pp_c_attributes_display): Print the  form if appropriate.  Use
+	get_attribute_name.  Don't print a trailing space when printing the
+	 form.
+	* c-pretty-print.h (pp_c_attributes): Remove.
+
+gcc/cp/ChangeLog:
+
+	* error.cc: Include "attribs.h".
+	(dump_type_prefix): Print only GNU attributes here.
+	(dump_type_suffix): Print standard attributes here.
+
+gcc/testsuite/ChangeLog:
+
+	* c-c++-common/pointer-to-fn1.c: New test.
+---
+ gcc/c-family/c-pretty-print.cc              | 96 ++++++++++++---------
+ gcc/c-family/c-pretty-print.h               |  1 -
+ gcc/cp/error.cc                             | 16 +++-
+ gcc/testsuite/c-c++-common/pointer-to-fn1.c | 18 ++++
+ 4 files changed, 86 insertions(+), 45 deletions(-)
+ create mode 100644 gcc/testsuite/c-c++-common/pointer-to-fn1.c
+
+diff --git a/gcc/c-family/c-pretty-print.cc b/gcc/c-family/c-pretty-print.cc
+index 71a0cb510..4d60627b3 100644
+--- a/gcc/c-family/c-pretty-print.cc
++++ b/gcc/c-family/c-pretty-print.cc
+@@ -462,7 +462,12 @@ pp_c_specifier_qualifier_list (c_pretty_printer *pp, tree t)
+ 	  {
+ 	    pp_c_whitespace (pp);
+ 	    pp_c_left_paren (pp);
+-	    pp_c_attributes_display (pp, TYPE_ATTRIBUTES (pointee));
++	    /* If we're dealing with the GNU form of attributes, print this:
++		 void (__attribute__((noreturn)) *f) ();
++	       If it is the standard  attribute, we'll print the attribute
++	       in c_pretty_printer::direct_abstract_declarator/FUNCTION_TYPE.  */
++	    if (!cxx11_attribute_p (TYPE_ATTRIBUTES (pointee)))
++	      pp_c_attributes_display (pp, TYPE_ATTRIBUTES (pointee));
+ 	  }
+ 	else if (!c_dialect_cxx ())
+ 	  pp_c_whitespace (pp);
+@@ -591,6 +596,13 @@ c_pretty_printer::direct_abstract_declarator (tree t)
+     case FUNCTION_TYPE:
+       pp_c_parameter_type_list (this, t);
+       direct_abstract_declarator (TREE_TYPE (t));
++      /* If this is the standard  attribute, print
++	 void (*)() noreturn;  */
++      if (cxx11_attribute_p (TYPE_ATTRIBUTES (t)))
++	{
++	  pp_space (this);
++	  pp_c_attributes_display (this, TYPE_ATTRIBUTES (t));
++	}
+       break;
+ 
+     case ARRAY_TYPE:
+@@ -845,32 +857,7 @@ c_pretty_printer::declaration (tree t)
+   pp_c_init_declarator (this, t);
+ }
+ 
+-/* Pretty-print ATTRIBUTES using GNU C extension syntax.  */
+-
+-void
+-pp_c_attributes (c_pretty_printer *pp, tree attributes)
+-{
+-  if (attributes == NULL_TREE)
+-    return;
+-
+-  pp_c_ws_string (pp, "__attribute__");
+-  pp_c_left_paren (pp);
+-  pp_c_left_paren (pp);
+-  for (; attributes != NULL_TREE; attributes = TREE_CHAIN (attributes))
+-    {
+-      pp_tree_identifier (pp, TREE_PURPOSE (attributes));
+-      if (TREE_VALUE (attributes))
+-	pp_c_call_argument_list (pp, TREE_VALUE (attributes));
+-
+-      if (TREE_CHAIN (attributes))
+-	pp_separate_with (pp, ',');
+-    }
+-  pp_c_right_paren (pp);
+-  pp_c_right_paren (pp);
+-}
+-
+-/* Pretty-print ATTRIBUTES using GNU C extension syntax for attributes
+-   marked to be displayed on disgnostic.  */
++/* Pretty-print ATTRIBUTES marked to be displayed on diagnostic.  */
+ 
+ void
+ pp_c_attributes_display (c_pretty_printer *pp, tree a)
+@@ -880,10 +867,12 @@ pp_c_attributes_display (c_pretty_printer *pp, tree a)
+   if (a == NULL_TREE)
+     return;
+ 
++  const bool std_p = cxx11_attribute_p (a);
++
+   for (; a != NULL_TREE; a = TREE_CHAIN (a))
+     {
+-      const struct attribute_spec *as;
+-      as = lookup_attribute_spec (TREE_PURPOSE (a));
++      const struct attribute_spec *as
++	= lookup_attribute_spec (get_attribute_name (a));
+       if (!as || as->affects_type_identity == false)
+         continue;
+       if (c_dialect_cxx ()
+@@ -891,26 +880,47 @@ pp_c_attributes_display (c_pretty_printer *pp, tree a)
+ 	/* In C++ transaction_safe is printed at the end of the declarator.  */
+ 	continue;
+       if (is_first)
+-       {
+-         pp_c_ws_string (pp, "__attribute__");
+-         pp_c_left_paren (pp);
+-         pp_c_left_paren (pp);
+-         is_first = false;
+-       }
++	{
++	  if (std_p)
++	    {
++	      pp_c_left_bracket (pp);
++	      pp_c_left_bracket (pp);
++	    }
++	  else
++	    {
++	      pp_c_ws_string (pp, "__attribute__");
++	      pp_c_left_paren (pp);
++	      pp_c_left_paren (pp);
++	    }
++	  is_first = false;
++	}
+       else
+-       {
+-         pp_separate_with (pp, ',');
+-       }
+-      pp_tree_identifier (pp, TREE_PURPOSE (a));
++	pp_separate_with (pp, ',');
++      tree ns;
++      if (std_p && (ns = get_attribute_namespace (a)))
++	{
++	  pp_tree_identifier (pp, ns);
++	  pp_colon (pp);
++	  pp_colon (pp);
++	}
++      pp_tree_identifier (pp, get_attribute_name (a));
+       if (TREE_VALUE (a))
+-       pp_c_call_argument_list (pp, TREE_VALUE (a));
++	pp_c_call_argument_list (pp, TREE_VALUE (a));
+     }
+ 
+   if (!is_first)
+     {
+-      pp_c_right_paren (pp);
+-      pp_c_right_paren (pp);
+-      pp_c_whitespace (pp);
++      if (std_p)
++	{
++	  pp_c_right_bracket (pp);
++	  pp_c_right_bracket (pp);
++	}
++      else
++	{
++	  pp_c_right_paren (pp);
++	  pp_c_right_paren (pp);
++	  pp_c_whitespace (pp);
++	}
+     }

_service:tar_scm:0238-Backport-SME-AArch64-Fix-assert-in-aarch64_move_imm-.patch Added

@@ -0,0 +1,35 @@
+From d13efe98cafa04aeb24f8e0f695e648887986228 Mon Sep 17 00:00:00 2001
+From: Wilco Dijkstra <wilco.dijkstra@arm.com>
+Date: Wed, 7 Dec 2022 14:16:24 +0000
+Subject: PATCH 139/157 BackportSME AArch64: Fix assert in
+ aarch64_move_imm PR108006
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=952c8a1dc6235dc49ab207a7f18f63d2bc97fbc9
+
+Ensure we only pass SI/DImode which fixes the assert.
+
+gcc/
+	PR target/108006
+	* config/aarch64/aarch64.cc (aarch64_expand_sve_const_vector):
+	Fix call to aarch64_move_imm to use SI/DI.
+---
+ gcc/config/aarch64/aarch64.cc | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 0117a3e12..309ecc3d9 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -7925,7 +7925,8 @@ aarch64_expand_sve_const_vector (rtx target, rtx src)
+ 	  /* If the integer can be moved into a general register by a
+ 	     single instruction, do that and duplicate the result.  */
+ 	  if (CONST_INT_P (elt_value)
+-	      && aarch64_move_imm (INTVAL (elt_value), elt_mode))
++	      && aarch64_move_imm (INTVAL (elt_value),
++				   encoded_bits <= 32 ? SImode : DImode))
+ 	    {
+ 	      elt_value = force_reg (elt_mode, elt_value);
+ 	      return expand_vector_broadcast (mode, elt_value);
+-- 
+2.33.0
+

_service:tar_scm:0239-Backport-SME-testsuite-Only-run-fcf-protection-test-.patch Added

@@ -0,0 +1,37 @@
+From 071f26ce18db5a09cbae0607b065028a09a856ac Mon Sep 17 00:00:00 2001
+From: Marek Polacek <polacek@redhat.com>
+Date: Tue, 11 Oct 2022 12:51:40 -0400
+Subject: PATCH 140/157 BackportSME testsuite: Only run -fcf-protection
+ test on i?86/x86_64 PR107213
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=cc694f45087c892e69ebbb177203c708f00b1bc7
+
+This test fails on non-i?86/x86_64 targets because on those targets
+we get
+
+  error: '-fcf-protection=full' is not supported for this target
+
+so this patch limits where the test is run.
+
+	PR testsuite/107213
+
+gcc/testsuite/ChangeLog:
+
+	* c-c++-common/pointer-to-fn1.c: Only run on i?86/x86_64.
+---
+ gcc/testsuite/c-c++-common/pointer-to-fn1.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/gcc/testsuite/c-c++-common/pointer-to-fn1.c b/gcc/testsuite/c-c++-common/pointer-to-fn1.c
+index 975885462..e2f948d82 100644
+--- a/gcc/testsuite/c-c++-common/pointer-to-fn1.c
++++ b/gcc/testsuite/c-c++-common/pointer-to-fn1.c
+@@ -1,4 +1,5 @@
+ /* PR c++/106937 */
++/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+ /* { dg-options "-fcf-protection" } */
+ /* { dg-additional-options "-std=c++11 -fpermissive" { target c++ } } */
+ /* Test printing a pointer to function with attribute.  */
+-- 
+2.33.0
+

_service:tar_scm:0240-Backport-SME-Fix-PRs-106764-106765-and-107307-all-IC.patch Added

@@ -0,0 +1,113 @@
+From 202ebc25e509ae0a2ac7d05c822cf6a8a817e49a Mon Sep 17 00:00:00 2001
+From: Andrew Pinski <apinski@marvell.com>
+Date: Thu, 17 Nov 2022 22:08:07 +0000
+Subject: PATCH 141/157 BackportSME Fix PRs 106764, 106765, and 107307,
+ all ICE after invalid re-declaration
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bd0c9d9e706adaeea0d96152daade0a6819a8715
+
+The problem here is the gimplifier returns GS_ERROR but
+in some cases we don't check that soon enough and try
+to do other work which could crash.
+So the fix in these two cases is to return GS_ERROR
+early if the gimplify_* functions had return GS_ERROR.
+
+OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
+
+Thanks,
+Andrew Pinski
+
+gcc/ChangeLog:
+
+	PR c/106764
+	PR c/106765
+	PR c/107307
+	* gimplify.cc (gimplify_compound_lval): Return GS_ERROR
+	if gimplify_expr had return GS_ERROR.
+	(gimplify_call_expr): Likewise.
+
+gcc/testsuite/ChangeLog:
+
+	PR c/106764
+	PR c/106765
+	PR c/107307
+	* gcc.dg/redecl-19.c: New test.
+	* gcc.dg/redecl-20.c: New test.
+	* gcc.dg/redecl-21.c: New test.
+---
+ gcc/gimplify.cc                  | 5 +++++
+ gcc/testsuite/gcc.dg/redecl-19.c | 5 +++++
+ gcc/testsuite/gcc.dg/redecl-20.c | 9 +++++++++
+ gcc/testsuite/gcc.dg/redecl-21.c | 9 +++++++++
+ 4 files changed, 28 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.dg/redecl-19.c
+ create mode 100644 gcc/testsuite/gcc.dg/redecl-20.c
+ create mode 100644 gcc/testsuite/gcc.dg/redecl-21.c
+
+diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
+index 91500e2fb..e9f527850 100644
+--- a/gcc/gimplify.cc
++++ b/gcc/gimplify.cc
+@@ -3272,6 +3272,8 @@ gimplify_compound_lval (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
+   tret = gimplify_expr (p, pre_p, post_p, is_gimple_min_lval,
+ 			fallback | fb_lvalue);
+   ret = MIN (ret, tret);
++  if (ret == GS_ERROR)
++    return GS_ERROR;
+ 
+   /* Step 2a: if we have component references we do not support on
+      registers then make sure the base isn't a register.  Of course
+@@ -3664,6 +3666,9 @@ gimplify_call_expr (tree *expr_p, gimple_seq *pre_p, bool want_value)
+   ret = gimplify_expr (&CALL_EXPR_FN (*expr_p), pre_p, NULL,
+ 		       is_gimple_call_addr, fb_rvalue);
+ 
++  if (ret == GS_ERROR)
++    return GS_ERROR;
++
+   nargs = call_expr_nargs (*expr_p);
+ 
+   /* Get argument types for verification.  */
+diff --git a/gcc/testsuite/gcc.dg/redecl-19.c b/gcc/testsuite/gcc.dg/redecl-19.c
+new file mode 100644
+index 000000000..cc1068544
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/redecl-19.c
+@@ -0,0 +1,5 @@
++/* We used to ICE in the gimplifier, PR 106764 */
++/* { dg-do compile } */
++/* { dg-options "-w" } */
++(*a)(); // { dg-note "" }
++b(){a()} a; // { dg-error "" }
+diff --git a/gcc/testsuite/gcc.dg/redecl-20.c b/gcc/testsuite/gcc.dg/redecl-20.c
+new file mode 100644
+index 000000000..07f52115e
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/redecl-20.c
+@@ -0,0 +1,9 @@
++/* We used to ICE in the gimplifier, PR 107307 */
++// { dg-do compile }
++// { dg-options "-w" }
++void f ()
++{
++  const struct { int a1; } b; // { dg-note "" }
++  int *c = b.a;
++  int *b; // { dg-error "" }
++}
+diff --git a/gcc/testsuite/gcc.dg/redecl-21.c b/gcc/testsuite/gcc.dg/redecl-21.c
+new file mode 100644
+index 000000000..2f2a6548a
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/redecl-21.c
+@@ -0,0 +1,9 @@
++/* We used to ICE in the gimplifier, PR 106765 */
++/* { dg-do compile } */
++/* { dg-options "-w" } */
++struct a {
++  int b
++} c() {
++  struct a a; // { dg-note "" }
++  a.b;
++  d a; // { dg-error "" }
+-- 
+2.33.0
+

_service:tar_scm:0241-Backport-SME-aarch64-Remove-expected-error-for-compo.patch Added

@@ -0,0 +1,43 @@
+From bc42a8bdab7b2ffeb81441c7c8a9a1215d8502ee Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 26 Jan 2023 15:51:00 +0000
+Subject: PATCH 142/157 BackportSME aarch64: Remove expected error for
+ compound literals
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=96fbe541481fcc7d1a8884fb8dbefd7979eb9543
+
+GCC no longer treats empty compound literals as an error
+(see 14cfa01755a66afbae2539f8b5796c960ddcecc6).
+
+gcc/testsuite/
+	* gcc.target/aarch64/bfloat16_scalar_typecheck.c: Accept empty
+	compound literals.
+---
+ gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c
+index 7c9188cf2..f4ae68028 100644
+--- a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c
++++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c
+@@ -40,7 +40,7 @@ bfloat16_t footest (bfloat16_t scalar0)
+   short initi_1_4 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */
+   double initi_1_5 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */
+ 
+-  bfloat16_t scalar2_1 = {}; /* { dg-error {empty scalar initializer} } */
++  bfloat16_t scalar2_1 = {};
+   bfloat16_t scalar2_2 = { glob_bfloat };
+   bfloat16_t scalar2_3 = { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */
+   bfloat16_t scalar2_4 = { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */
+@@ -92,7 +92,7 @@ bfloat16_t footest (bfloat16_t scalar0)
+ 
+   /* Compound literals.  */
+ 
+-  (bfloat16_t) {}; /* { dg-error {empty scalar initializer} } */
++  (bfloat16_t) {};
+   (bfloat16_t) { glob_bfloat };
+   (bfloat16_t) { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */
+   (bfloat16_t) { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */
+-- 
+2.33.0
+

_service:tar_scm:0242-Backport-SME-aarch64-Remove-redundant-builtins-code.patch Added

@@ -0,0 +1,264 @@
+From 42bfa9a26205da222cebbe830168b6f0b5e668b4 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 12 Jul 2022 12:59:25 +0100
+Subject: PATCH 143/157 BackportSME aarch64: Remove redundant builtins
+ code
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e80daf04c8888f527d2fc7f6cbcd1b4c853dcd04
+
+aarch64_builtin_vectorized_function handles some built-in functions
+that already have equivalent internal functions.  This seems to be
+redundant now, since the target builtins that it chooses are mapped
+to the same optab patterns as the internal functions.
+
+gcc/
+	* config/aarch64/aarch64-builtins.cc
+	(aarch64_builtin_vectorized_function): Remove handling of
+	floor, ceil, trunc, round, nearbyint, sqrt, clz and ctz.
+
+gcc/testsuite/
+	* gcc.target/aarch64/vect_unary_1.c: New test.
+---
+ gcc/config/aarch64/aarch64-builtins.cc        |  32 ---
+ .../gcc.target/aarch64/vect_unary_1.c         | 186 ++++++++++++++++++
+ 2 files changed, 186 insertions(+), 32 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/vect_unary_1.c
+
+diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
+index 37bb3af48..23a84cd53 100644
+--- a/gcc/config/aarch64/aarch64-builtins.cc
++++ b/gcc/config/aarch64/aarch64-builtins.cc
+@@ -2653,38 +2653,6 @@ aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
+   switch (fn)
+     {
+ #undef AARCH64_CHECK_BUILTIN_MODE
+-#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
+-  (out_mode == V##C##N##Fmode && in_mode == V##C##N##Fmode)
+-    CASE_CFN_FLOOR:
+-      return AARCH64_FIND_FRINT_VARIANT (floor);
+-    CASE_CFN_CEIL:
+-      return AARCH64_FIND_FRINT_VARIANT (ceil);
+-    CASE_CFN_TRUNC:
+-      return AARCH64_FIND_FRINT_VARIANT (btrunc);
+-    CASE_CFN_ROUND:
+-      return AARCH64_FIND_FRINT_VARIANT (round);
+-    CASE_CFN_NEARBYINT:
+-      return AARCH64_FIND_FRINT_VARIANT (nearbyint);
+-    CASE_CFN_SQRT:
+-      return AARCH64_FIND_FRINT_VARIANT (sqrt);
+-#undef AARCH64_CHECK_BUILTIN_MODE
+-#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
+-  (out_mode == V##C##SImode && in_mode == V##C##N##Imode)
+-    CASE_CFN_CLZ:
+-      {
+-	if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+-	  return aarch64_builtin_declsAARCH64_SIMD_BUILTIN_UNOP_clzv4si;
+-	return NULL_TREE;
+-      }
+-    CASE_CFN_CTZ:
+-      {
+-	if (AARCH64_CHECK_BUILTIN_MODE (2, S))
+-	  return aarch64_builtin_declsAARCH64_SIMD_BUILTIN_UNOP_ctzv2si;
+-	else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+-	  return aarch64_builtin_declsAARCH64_SIMD_BUILTIN_UNOP_ctzv4si;
+-	return NULL_TREE;
+-      }
+-#undef AARCH64_CHECK_BUILTIN_MODE
+ #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
+   (out_mode == V##C##N##Imode && in_mode == V##C##N##Fmode)
+     CASE_CFN_IFLOOR:
+diff --git a/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c b/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c
+new file mode 100644
+index 000000000..8516808be
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c
+@@ -0,0 +1,186 @@
++/* { dg-options "-O3 --save-temps" } */
++/* { dg-final { check-function-bodies "**" "" "" } } */
++
++#include <stdint.h>
++
++#define TEST2(OUT, NAME, IN)						\
++OUT __attribute__((vector_size(sizeof(OUT) * 2)))			\
++test2_##OUT##_##NAME##_##IN (float dummy,				\
++			     IN __attribute__((vector_size(sizeof(IN) * 2))) y) \
++{									\
++  OUT __attribute__((vector_size(sizeof(OUT) * 2))) x;			\
++  x0 = __builtin_##NAME (y0);					\
++  x1 = __builtin_##NAME (y1);					\
++  return x;								\
++}									\
++
++#define TEST4(OUT, NAME, IN)						\
++OUT __attribute__((vector_size(16)))					\
++test4_##OUT##_##NAME##_##IN (float dummy,				\
++			     IN __attribute__((vector_size(16))) y)	\
++{									\
++  OUT __attribute__((vector_size(16))) x;				\
++  x0 = __builtin_##NAME (y0);					\
++  x1 = __builtin_##NAME (y1);					\
++  x2 = __builtin_##NAME (y2);					\
++  x3 = __builtin_##NAME (y3);					\
++  return x;								\
++}									\
++
++/*
++** test2_float_truncf_float:
++**	frintz	v0.2s, v1.2s
++**	ret
++*/
++TEST2 (float, truncf, float)
++
++/*
++** test2_double_trunc_double:
++**	frintz	v0.2d, v1.2d
++**	ret
++*/
++TEST2 (double, trunc, double)
++
++/*
++** test4_float_truncf_float:
++**	frintz	v0.4s, v1.4s
++**	ret
++*/
++TEST4 (float, truncf, float)
++
++/*
++** test2_float_roundf_float:
++**	frinta	v0.2s, v1.2s
++**	ret
++*/
++TEST2 (float, roundf, float)
++
++/*
++** test2_double_round_double:
++**	frinta	v0.2d, v1.2d
++**	ret
++*/
++TEST2 (double, round, double)
++
++/*
++** test4_float_roundf_float:
++**	frinta	v0.4s, v1.4s
++**	ret
++*/
++TEST4 (float, roundf, float)
++
++/*
++** test2_float_nearbyintf_float:
++**	frinti	v0.2s, v1.2s
++**	ret
++*/
++TEST2 (float, nearbyintf, float)
++
++/*
++** test2_double_nearbyint_double:
++**	frinti	v0.2d, v1.2d
++**	ret
++*/
++TEST2 (double, nearbyint, double)
++
++/*
++** test4_float_nearbyintf_float:
++**	frinti	v0.4s, v1.4s
++**	ret
++*/
++TEST4 (float, nearbyintf, float)
++
++/*
++** test2_float_floorf_float:
++**	frintm	v0.2s, v1.2s
++**	ret
++*/
++TEST2 (float, floorf, float)
++
++/*
++** test2_double_floor_double:
++**	frintm	v0.2d, v1.2d
++**	ret
++*/
++TEST2 (double, floor, double)
++
++/*
++** test4_float_floorf_float:
++**	frintm	v0.4s, v1.4s
++**	ret
++*/
++TEST4 (float, floorf, float)
++
++/*
++** test2_float_ceilf_float:
++**	frintp	v0.2s, v1.2s
++**	ret
++*/
++TEST2 (float, ceilf, float)
++
++/*
++** test2_double_ceil_double:
++**	frintp	v0.2d, v1.2d
++**	ret

_service:tar_scm:0243-Backport-SME-AArch64-Fix-Armv9-a-warnings-that-get-e.patch Added

@@ -0,0 +1,63 @@
+From a1ba437195286af3389ba9f2d43b8cb6c73ba3d8 Mon Sep 17 00:00:00 2001
+From: Tamar Christina <tamar.christina@arm.com>
+Date: Thu, 12 Oct 2023 15:55:58 +0100
+Subject: PATCH 144/157 BackportSME AArch64: Fix Armv9-a warnings that
+ get emitted whenever a ACLE header is used.
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=de593b3cffdc9c030c5e697ad9092b1b085dabc4
+
+At the moment, trying to use -march=armv9-a with any ACLE header such as
+arm_neon.h results in rows and rows of warnings saying:
+
+<built-in>: warning: "__ARM_ARCH" redefined
+<built-in>: note: this is the location of the previous definition
+
+This is obviously not useful and happens because the header was defined at
+__ARM_ARCH == 8 and the commandline changes it.
+
+The Arm port solves this by undef the macro during argument processing and we do
+the same on AArch64 for the majority of macros.  However we define this macro
+using a different helper which requires the manual undef.
+
+Thanks,
+Tamar
+
+gcc/ChangeLog:
+
+	* config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Add undef.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/aarch64/armv9_warning.c: New test.
+---
+ gcc/config/aarch64/aarch64-c.cc                  | 1 +
+ gcc/testsuite/gcc.target/aarch64/armv9_warning.c | 5 +++++
+ 2 files changed, 6 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/armv9_warning.c
+
+diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
+index 745719d8b..2d2ac42c4 100644
+--- a/gcc/config/aarch64/aarch64-c.cc
++++ b/gcc/config/aarch64/aarch64-c.cc
+@@ -129,6 +129,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
+ {
+   aarch64_def_or_undef (flag_unsafe_math_optimizations, "__ARM_FP_FAST", pfile);
+ 
++  cpp_undef (pfile, "__ARM_ARCH");
+   builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9A ? 9 : 8);
+ 
+   builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM",
+diff --git a/gcc/testsuite/gcc.target/aarch64/armv9_warning.c b/gcc/testsuite/gcc.target/aarch64/armv9_warning.c
+new file mode 100644
+index 000000000..35690d5bc
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/armv9_warning.c
+@@ -0,0 +1,5 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-march=armv9-a -Wpedantic -Werror" } */
++
++#include <arm_neon.h>
++
+-- 
+2.33.0
+

_service:tar_scm:0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch Added

@@ -0,0 +1,184 @@
+From f6652dbebf81372884e9fd8b68627fc7a94d8d3b Mon Sep 17 00:00:00 2001
+From: Roger Sayle <roger@nextmovesoftware.com>
+Date: Fri, 27 May 2022 08:57:46 +0100
+Subject: PATCH 145/157 BackportSME Canonicalize X&-Y as X*Y in match.pd
+ when Y is 0,1.
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=8fb94fc6097c0a934aac0d89c9c5e2038da67655
+
+"For every pessimization, there's an equal and opposite optimization".
+
+In the review of my original patch for PR middle-end/98865, Richard
+Biener pointed out that match.pd shouldn't be transforming X*Y into
+X&-Y as the former is considered cheaper by tree-ssa's cost model
+(operator count).  A corollary of this is that we should instead be
+transforming X&-Y into the cheaper X*Y as a preferred canonical form
+(especially as RTL expansion now intelligently selects the appropriate
+implementation based on the target's costs).
+
+With this patch we now generate identical code for:
+int foo(int x, int y) { return -(x&1) & y; }
+int bar(int x, int y) { return (x&1) * y; }
+
+specifically on x86_64-pc-linux-gnu both use and/neg/and with -O2,
+but both use and/mul with -Os.
+
+One minor wrinkle/improvement is that this patch includes three
+additional optimizations (that account for the change in canonical
+form) to continue to optimize PR92834 and PR94786.
+
+2022-05-27  Roger Sayle  <roger@nextmovesoftware.com>
+
+gcc/ChangeLog
+	* match.pd (match_zero_one_valued_p): New predicate.
+	(mult @0 @1): Use zero_one_valued_p for optimization to the
+	expression "bit_and @0 @1".
+	(bit_and (negate zero_one_valued_p@0) @1): Optimize to MULT_EXPR.
+	(plus @0 (mult (minus @1 @0) zero_one_valued_p@2)): New transform.
+	(minus @0 (mult (minus @0 @1) zero_one_valued_p@2)): Likewise.
+	(bit_xor @0 (mult (bit_xor @0 @1) zero_one_valued_p@2)): Likewise.
+	Remove three redundant transforms obsoleted by the three above.
+
+gcc/testsuite/ChangeLog
+	* gcc.dg/pr98865.c: New test case.
+---
+ gcc/match.pd                   | 86 ++++++++++++++++------------------
+ gcc/testsuite/gcc.dg/pr98865.c | 14 ++++++
+ 2 files changed, 55 insertions(+), 45 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/pr98865.c
+
+diff --git a/gcc/match.pd b/gcc/match.pd
+index aee58e47b..6d3165bcd 100644
+--- a/gcc/match.pd
++++ b/gcc/match.pd
+@@ -285,14 +285,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+            || !COMPLEX_FLOAT_TYPE_P (type)))
+    (negate @0)))
+ 
+-/* Transform { 0 or 1 } * { 0 or 1 } into { 0 or 1 } & { 0 or 1 } */
+-(simplify
+- (mult SSA_NAME@1 SSA_NAME@2)
+-  (if (INTEGRAL_TYPE_P (type)
+-       && get_nonzero_bits (@1) == 1
+-       && get_nonzero_bits (@2) == 1)
+-   (bit_and @1 @2)))
+-
+ /* Transform x * { 0 or 1, 0 or 1, ... } into x & { 0 or -1, 0 or -1, ...},
+    unless the target has native support for the former but not the latter.  */
+ (simplify
+@@ -1790,6 +1782,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+   (bit_not (bit_not @0))
+   @0)
+ 
++(match zero_one_valued_p
++ @0
++ (if (INTEGRAL_TYPE_P (type) && tree_nonzero_bits (@0) == 1)))
++(match zero_one_valued_p
++ truth_valued_p@0)
++
++/* Transform { 0 or 1 } * { 0 or 1 } into { 0 or 1 } & { 0 or 1 }.  */
++(simplify
++ (mult zero_one_valued_p@0 zero_one_valued_p@1)
++ (if (INTEGRAL_TYPE_P (type))
++  (bit_and @0 @1)))
++
++/* Transform X & -Y into X * Y when Y is { 0 or 1 }.  */
++(simplify
++ (bit_and:c (convert? (negate zero_one_valued_p@0)) @1)
++ (if (INTEGRAL_TYPE_P (type)
++      && INTEGRAL_TYPE_P (TREE_TYPE (@0))
++      && TREE_CODE (TREE_TYPE (@0)) != BOOLEAN_TYPE
++      && !TYPE_UNSIGNED (TREE_TYPE (@0)))
++  (mult (convert @0) @1)))
++
+ /* Convert ~ (-A) to A - 1.  */
+ (simplify
+  (bit_not (convert? (negate @0)))
+@@ -3281,44 +3294,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+   (cmp @0 (minmax:c @0 @1))
+   { constant_boolean_node (cmp == GE_EXPR || cmp == LE_EXPR, type); } ))
+ 
+-/* Undo fancy way of writing max/min or other ?: expressions,
+-   like a - ((a - b) & -(a < b)), in this case into (a < b) ? b : a.
++/* Undo fancy ways of writing max/min or other ?: expressions, like
++   a - ((a - b) & -(a < b))  and  a - (a - b) * (a < b) into (a < b) ? b : a.
+    People normally use ?: and that is what we actually try to optimize.  */
+-(for cmp (simple_comparison)
+- (simplify
+-  (minus @0 (bit_and:c (minus @0 @1)
+-		       (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
+-  (if (INTEGRAL_TYPE_P (type)
+-       && INTEGRAL_TYPE_P (TREE_TYPE (@4))
+-       && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
+-       && INTEGRAL_TYPE_P (TREE_TYPE (@5))
+-       && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
+-	   || !TYPE_UNSIGNED (TREE_TYPE (@4)))
+-       && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
+-   (cond (cmp @2 @3) @1 @0)))
+- (simplify
+-  (plus:c @0 (bit_and:c (minus @1 @0)
+-			(convert? (negate@4 (convert? (cmp@5 @2 @3))))))
+-  (if (INTEGRAL_TYPE_P (type)
+-       && INTEGRAL_TYPE_P (TREE_TYPE (@4))
+-       && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
+-       && INTEGRAL_TYPE_P (TREE_TYPE (@5))
+-       && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
+-	   || !TYPE_UNSIGNED (TREE_TYPE (@4)))
+-       && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
+-   (cond (cmp @2 @3) @1 @0)))
+- /* Similarly with ^ instead of - though in that case with :c.  */
+- (simplify
+-  (bit_xor:c @0 (bit_and:c (bit_xor:c @0 @1)
+-			   (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
+-  (if (INTEGRAL_TYPE_P (type)
+-       && INTEGRAL_TYPE_P (TREE_TYPE (@4))
+-       && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
+-       && INTEGRAL_TYPE_P (TREE_TYPE (@5))
+-       && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
+-	   || !TYPE_UNSIGNED (TREE_TYPE (@4)))
+-       && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
+-   (cond (cmp @2 @3) @1 @0))))
++/* Transform A + (B-A)*cmp into cmp ? B : A.  */
++(simplify
++ (plus:c @0 (mult:c (minus @1 @0) zero_one_valued_p@2))
++ (if (INTEGRAL_TYPE_P (type)
++      && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
++  (cond (convert:boolean_type_node @2) @1 @0)))
++/* Transform A - (A-B)*cmp into cmp ? B : A.  */
++(simplify
++ (minus @0 (mult:c (minus @0 @1) zero_one_valued_p@2))
++ (if (INTEGRAL_TYPE_P (type)
++      && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
++  (cond (convert:boolean_type_node @2) @1 @0)))
++/* Transform A ^ (A^B)*cmp into cmp ? B : A.  */
++(simplify
++ (bit_xor:c @0 (mult:c (bit_xor:c @0 @1) zero_one_valued_p@2))
++ (if (INTEGRAL_TYPE_P (type)
++      && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
++  (cond (convert:boolean_type_node @2) @1 @0)))
+ 
+ /* Simplifications of shift and rotates.  */
+ 
+diff --git a/gcc/testsuite/gcc.dg/pr98865.c b/gcc/testsuite/gcc.dg/pr98865.c
+new file mode 100644
+index 000000000..95f727033
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/pr98865.c
+@@ -0,0 +1,14 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-tree-optimized" } */
++
++int foo(int x, int y)
++{
++  return -(x&1) & y;
++}
++
++int bar(int x, int y)
++{
++  return (x&1) * y;
++}
++
++/* { dg-final { scan-tree-dump-times " \\* " 2 "optimized" } } */
+-- 
+2.33.0
+

_service:tar_scm:0245-Backport-SME-middle-end-Add-new-tbranch-optab-to-add.patch Added

@@ -0,0 +1,417 @@
+From a8f10b4b73c2624599765edf7ff19d53eca15135 Mon Sep 17 00:00:00 2001
+From: Tamar Christina <tamar.christina@arm.com>
+Date: Mon, 12 Dec 2022 15:16:50 +0000
+Subject: PATCH 146/157 BackportSME middle-end: Add new tbranch optab to
+ add support for bit-test-and-branch operations
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dc582d2ef32e2d3723c68d111f4e49607631f34d
+
+This adds a new test-and-branch optab that can be used to do a conditional test
+of a bit and branch.   This is similar to the cbranch optab but instead can
+test any arbitrary bit inside the register.
+
+This patch recognizes boolean comparisons and single bit mask tests.
+
+gcc/ChangeLog:
+
+	* dojump.cc (do_jump): Pass along value.
+	(do_jump_by_parts_greater_rtx): Likewise.
+	(do_jump_by_parts_zero_rtx): Likewise.
+	(do_jump_by_parts_equality_rtx): Likewise.
+	(do_compare_rtx_and_jump): Likewise.
+	(do_compare_and_jump): Likewise.
+	* dojump.h (do_compare_rtx_and_jump): New.
+	* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
+	(validate_test_and_branch): New.
+	(emit_cmp_and_jump_insns): Optiobally take a value, and when value is
+	supplied then check if it's suitable for tbranch.
+	* optabs.def (tbranch_eq$a4, tbranch_ne$a4): New.
+	* doc/md.texi (tbranch_@var{op}@var{mode}4): Document it.
+	* optabs.h (emit_cmp_and_jump_insns): New.
+	* tree.h (tree_zero_one_valued_p): New.
+---
+ gcc/doc/md.texi |   7 +++
+ gcc/dojump.cc   |  52 +++++++++++++++-------
+ gcc/dojump.h    |   4 ++
+ gcc/optabs.cc   | 114 ++++++++++++++++++++++++++++++++++++++++++++----
+ gcc/optabs.def  |   2 +
+ gcc/optabs.h    |   4 ++
+ gcc/tree.h      |   1 +
+ 7 files changed, 159 insertions(+), 25 deletions(-)
+
+diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
+index c0cf0ec64..2193900e7 100644
+--- a/gcc/doc/md.texi
++++ b/gcc/doc/md.texi
+@@ -7299,6 +7299,13 @@ case, you can and should make operand 1's predicate reject some operators
+ in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
+ from the machine description.
+ 
++@cindex @code{tbranch_@var{op}@var{mode}3} instruction pattern
++@item @samp{tbranch_@var{op}@var{mode}3}
++Conditional branch instruction combined with a bit test-and-compare
++instruction. Operand 0 is the operand of the comparison.  Operand 1 is the bit
++position of Operand 1 to test.  Operand 3 is the @code{code_label} to jump to.
++@var{op} is one of @var{eq} or @var{ne}.
++
+ @cindex @code{cbranch@var{mode}4} instruction pattern
+ @item @samp{cbranch@var{mode}4}
+ Conditional branch instruction combined with a compare instruction.
+diff --git a/gcc/dojump.cc b/gcc/dojump.cc
+index 0c880d653..604b28537 100644
+--- a/gcc/dojump.cc
++++ b/gcc/dojump.cc
+@@ -621,7 +621,7 @@ do_jump (tree exp, rtx_code_label *if_false_label,
+ 	}
+       do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)),
+ 			       NE, TYPE_UNSIGNED (TREE_TYPE (exp)),
+-			       GET_MODE (temp), NULL_RTX,
++			       exp, GET_MODE (temp), NULL_RTX,
+ 			       if_false_label, if_true_label, prob);
+     }
+ 
+@@ -689,7 +689,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
+ 
+       /* All but high-order word must be compared as unsigned.  */
+       do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0),
+-			       word_mode, NULL_RTX, NULL, if_true_label,
++			       NULL, word_mode, NULL_RTX, NULL, if_true_label,
+ 			       prob);
+ 
+       /* Emit only one comparison for 0.  Do not emit the last cond jump.  */
+@@ -697,8 +697,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
+ 	break;
+ 
+       /* Consider lower words only if these are equal.  */
+-      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode,
+-			       NULL_RTX, NULL, if_false_label,
++      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL,
++			       word_mode, NULL_RTX, NULL, if_false_label,
+ 			       prob.invert ());
+     }
+ 
+@@ -757,7 +757,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
+ 
+   if (part != 0)
+     {
+-      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode,
++      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode,
+ 			       NULL_RTX, if_false_label, if_true_label, prob);
+       return;
+     }
+@@ -768,7 +768,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
+ 
+   for (i = 0; i < nwords; i++)
+     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
+-                             const0_rtx, EQ, 1, word_mode, NULL_RTX,
++			     const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX,
+ 			     if_false_label, NULL, prob);
+ 
+   if (if_true_label)
+@@ -811,8 +811,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1,
+ 
+   for (i = 0; i < nwords; i++)
+     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
+-                             operand_subword_force (op1, i, mode),
+-                             EQ, 0, word_mode, NULL_RTX,
++			     operand_subword_force (op1, i, mode),
++			     EQ, 0, NULL, word_mode, NULL_RTX,
+ 			     if_false_label, NULL, prob);
+ 
+   if (if_true_label)
+@@ -964,6 +964,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
+ 			 rtx_code_label *if_false_label,
+ 			 rtx_code_label *if_true_label,
+ 			 profile_probability prob)
++{
++  do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size,
++			  if_false_label, if_true_label, prob);
++}
++
++/* Like do_compare_and_jump but expects the values to compare as two rtx's.
++   The decision as to signed or unsigned comparison must be made by the caller.
++
++   If MODE is BLKmode, SIZE is an RTX giving the size of the objects being
++   compared.  */
++
++void
++do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
++			 tree val, machine_mode mode, rtx size,
++			 rtx_code_label *if_false_label,
++			 rtx_code_label *if_true_label,
++			 profile_probability prob)
+ {
+   rtx tem;
+   rtx_code_label *dummy_label = NULL;
+@@ -1179,8 +1196,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
+ 		    }
+ 		  else
+ 		    dest_label = if_false_label;
+-                  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
+-					   size, dest_label, NULL, first_prob);
++
++		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
++					   val, mode, size, dest_label, NULL,
++					   first_prob);
+ 		}
+ 	      /* For !and_them we want to split:
+ 		 if (x) goto t; // prob;
+@@ -1194,8 +1213,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
+               else
+ 		{
+ 		  profile_probability first_prob = prob.split (cprob);
+-		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
+-					   size, NULL, if_true_label, first_prob);
++		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
++					   val, mode, size, NULL,
++					   if_true_label, first_prob);
+ 		  if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
+ 		    {
+ 		      /* x != y can be split into x unord y || x ltgt y
+@@ -1217,7 +1237,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
+ 	    }
+ 	}
+ 
+-      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
++      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val,
+ 			       if_true_label, prob);
+     }
+ 
+@@ -1291,9 +1311,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
+       op1 = new_op1;
+     }
+ 
+-  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode,
+-                           ((mode == BLKmode)
+-                            ? expr_size (treeop0) : NULL_RTX),
++  do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode,
++			   ((mode == BLKmode)
++			    ? expr_size (treeop0) : NULL_RTX),
+ 			   if_false_label, if_true_label, prob);
+ }
+ 
+diff --git a/gcc/dojump.h b/gcc/dojump.h
+index e379cceb3..d1d79c490 100644
+--- a/gcc/dojump.h
++++ b/gcc/dojump.h
+@@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label,
+ extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *,
+ 			 profile_probability);

_service:tar_scm:0246-Backport-SME-explow-Allow-dynamic-allocations-after-.patch Added

@@ -0,0 +1,110 @@
+From fe64cc72e6221cf05e40bc868287bd1fcf07479f Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Mon, 6 Nov 2023 10:49:58 +0000
+Subject: PATCH 147/157 BackportSME explow: Allow dynamic allocations
+ after vregs
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a7a45364dddef399bfb550235166df51108a3142
+
+This patch allows allocate_dynamic_stack_space to be called before
+or after virtual registers have been instantiated.  It uses the
+same approach as allocate_stack_local, which already supported this.
+
+gcc/
+	* function.h (get_stack_dynamic_offset): Declare.
+	* function.cc (get_stack_dynamic_offset): New function,
+	split out from...
+	(get_stack_dynamic_offset): ...here.
+	* explow.cc (allocate_dynamic_stack_space): Handle calls made
+	after virtual registers have been instantiated.
+---
+ gcc/explow.cc   | 10 +++++++---
+ gcc/function.cc | 12 +++++++++++-
+ gcc/function.h  |  1 +
+ 3 files changed, 19 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/explow.cc b/gcc/explow.cc
+index d8aa75ee9..124e8f49e 100644
+--- a/gcc/explow.cc
++++ b/gcc/explow.cc
+@@ -1375,12 +1375,16 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
+   HOST_WIDE_INT stack_usage_size = -1;
+   rtx_code_label *final_label;
+   rtx final_target, target;
++  rtx addr = (virtuals_instantiated
++	      ? plus_constant (Pmode, stack_pointer_rtx,
++			       get_stack_dynamic_offset ())
++	      : virtual_stack_dynamic_rtx);
+ 
+   /* If we're asking for zero bytes, it doesn't matter what we point
+      to since we can't dereference it.  But return a reasonable
+      address anyway.  */
+   if (size == const0_rtx)
+-    return virtual_stack_dynamic_rtx;
++    return addr;
+ 
+   /* Otherwise, show we're calling alloca or equivalent.  */
+   cfun->calls_alloca = 1;
+@@ -1532,7 +1536,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
+       poly_int64 saved_stack_pointer_delta;
+ 
+       if (!STACK_GROWS_DOWNWARD)
+-	emit_move_insn (target, virtual_stack_dynamic_rtx);
++	emit_move_insn (target, force_operand (addr, target));
+ 
+       /* Check stack bounds if necessary.  */
+       if (crtl->limit_stack)
+@@ -1575,7 +1579,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
+       stack_pointer_delta = saved_stack_pointer_delta;
+ 
+       if (STACK_GROWS_DOWNWARD)
+-	emit_move_insn (target, virtual_stack_dynamic_rtx);
++	emit_move_insn (target, force_operand (addr, target));
+     }
+ 
+   suppress_reg_args_size = false;
+diff --git a/gcc/function.cc b/gcc/function.cc
+index f4fc211a0..e62b2a8d6 100644
+--- a/gcc/function.cc
++++ b/gcc/function.cc
+@@ -1945,6 +1945,16 @@ instantiate_decls (tree fndecl)
+   vec_free (cfun->local_decls);
+ }
+ 
++/* Return the value of STACK_DYNAMIC_OFFSET for the current function.
++   This is done through a function wrapper so that the macro sees a
++   predictable set of included files.  */
++
++poly_int64
++get_stack_dynamic_offset ()
++{
++  return STACK_DYNAMIC_OFFSET (current_function_decl);
++}
++
+ /* Pass through the INSNS of function FNDECL and convert virtual register
+    references to hard register references.  */
+ 
+@@ -1956,7 +1966,7 @@ instantiate_virtual_regs (void)
+   /* Compute the offsets to use for this function.  */
+   in_arg_offset = FIRST_PARM_OFFSET (current_function_decl);
+   var_offset = targetm.starting_frame_offset ();
+-  dynamic_offset = STACK_DYNAMIC_OFFSET (current_function_decl);
++  dynamic_offset = get_stack_dynamic_offset ();
+   out_arg_offset = STACK_POINTER_OFFSET;
+ #ifdef FRAME_POINTER_CFA_OFFSET
+   cfa_offset = FRAME_POINTER_CFA_OFFSET (current_function_decl);
+diff --git a/gcc/function.h b/gcc/function.h
+index 4e8131706..a5846465a 100644
+--- a/gcc/function.h
++++ b/gcc/function.h
+@@ -711,6 +711,7 @@ extern vec<edge> convert_jumps_to_returns (basic_block last_bb, bool simple_p,
+ extern basic_block emit_return_for_exit (edge exit_fallthru_edge,
+ 					 bool simple_p);
+ extern void reposition_prologue_and_epilogue_notes (void);
++extern poly_int64 get_stack_dynamic_offset ();
+ 
+ /* Returns the name of the current function.  */
+ extern const char *fndecl_name (tree);
+-- 
+2.33.0
+

_service:tar_scm:0247-Backport-SME-PR105169-Fix-references-to-discarded-se.patch Added

@@ -0,0 +1,225 @@
+From 6cc7bcadadbc2521a2db4c02adfe066d805e37ef Mon Sep 17 00:00:00 2001
+From: Giuliano Belinassi <gbelinassi@suse.de>
+Date: Fri, 6 May 2022 23:37:52 -0300
+Subject: PATCH 148/157 BackportSME PR105169 Fix references to discarded
+ sections
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7a3f38a966a52893fb5bae301a1a3d56961358fb
+
+When -fpatchable-function-entry= is enabled, certain C++ codes fails to
+link because of generated references to discarded sections in
+__patchable_function_entry section. This commit fixes this problem by
+puting those references in a COMDAT section.
+
+2022-05-06  Giuliano Belinassi  <gbelinassi@suse.de>
+
+gcc/ChangeLog
+	PR c++/105169
+	* targhooks.cc (default_print_patchable_function_entry_1): Handle COMDAT case.
+	* varasm.cc (switch_to_comdat_section): New
+	(handle_vtv_comdat_section): Call switch_to_comdat_section.
+	* varasm.h: Declare switch_to_comdat_section.
+
+gcc/testsuite/ChangeLog
+2022-05-06  Giuliano Belinassi  <gbelinassi@suse.de>
+
+	PR c++/105169
+	* g++.dg/modules/pr105169.h: New file.
+	* g++.dg/modules/pr105169_a.C: New test.
+	* g++.dg/modules/pr105169_b.C: New file.
+---
+ gcc/targhooks.cc                          |  8 ++++--
+ gcc/testsuite/g++.dg/modules/pr105169.h   | 22 +++++++++++++++
+ gcc/testsuite/g++.dg/modules/pr105169_a.C | 25 +++++++++++++++++
+ gcc/testsuite/g++.dg/modules/pr105169_b.C | 12 +++++++++
+ gcc/varasm.cc                             | 33 ++++++++++++++---------
+ gcc/varasm.h                              |  2 ++
+ 6 files changed, 87 insertions(+), 15 deletions(-)
+ create mode 100644 gcc/testsuite/g++.dg/modules/pr105169.h
+ create mode 100644 gcc/testsuite/g++.dg/modules/pr105169_a.C
+ create mode 100644 gcc/testsuite/g++.dg/modules/pr105169_b.C
+
+diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
+index c88afa5db..175a0e18a 100644
+--- a/gcc/targhooks.cc
++++ b/gcc/targhooks.cc
+@@ -2019,8 +2019,12 @@ default_print_patchable_function_entry_1 (FILE *file,
+       patch_area_number++;
+       ASM_GENERATE_INTERNAL_LABEL (buf, "LPFE", patch_area_number);
+ 
+-      switch_to_section (get_section ("__patchable_function_entries",
+-				      flags, current_function_decl));
++      section *sect = get_section ("__patchable_function_entries",
++				  flags, current_function_decl);
++      if (HAVE_COMDAT_GROUP && DECL_COMDAT_GROUP (current_function_decl))
++	switch_to_comdat_section (sect, current_function_decl);
++      else
++	switch_to_section (sect);
+       assemble_align (POINTER_SIZE);
+       fputs (asm_op, file);
+       assemble_name_raw (file, buf);
+diff --git a/gcc/testsuite/g++.dg/modules/pr105169.h b/gcc/testsuite/g++.dg/modules/pr105169.h
+new file mode 100644
+index 000000000..a7e762705
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/modules/pr105169.h
+@@ -0,0 +1,22 @@
++class IPXAddressClass
++{
++public:
++    IPXAddressClass(void);
++};
++
++class WinsockInterfaceClass
++{
++
++public:
++    WinsockInterfaceClass(void);
++
++    virtual void Set_Broadcast_Address(void*){};
++
++    virtual int Get_Protocol(void)
++    {
++        return 0;
++    };
++
++protected:
++};
++
+diff --git a/gcc/testsuite/g++.dg/modules/pr105169_a.C b/gcc/testsuite/g++.dg/modules/pr105169_a.C
+new file mode 100644
+index 000000000..66dc4b790
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/modules/pr105169_a.C
+@@ -0,0 +1,25 @@
++/* { dg-module-do link } */
++/* { dg-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */
++/* { dg-additional-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */
++
++/* This test is in the "modules" package because it supports multiple files
++   linkage.  */
++
++#include "pr105169.h"
++
++WinsockInterfaceClass* PacketTransport;
++
++IPXAddressClass::IPXAddressClass(void)
++{
++}
++
++int function()
++{
++  return PacketTransport->Get_Protocol();
++}
++
++int main()
++{
++  IPXAddressClass ipxaddr;
++  return 0;
++}
+diff --git a/gcc/testsuite/g++.dg/modules/pr105169_b.C b/gcc/testsuite/g++.dg/modules/pr105169_b.C
+new file mode 100644
+index 000000000..5f8b00dfe
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/modules/pr105169_b.C
+@@ -0,0 +1,12 @@
++/* { dg-module-do link } */
++/* { dg-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */
++/* { dg-additional-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */
++
++/* This test is in the "modules" package because it supports multiple files
++   linkage.  */
++
++#include "pr105169.h"
++
++WinsockInterfaceClass::WinsockInterfaceClass(void)
++{
++}
+diff --git a/gcc/varasm.cc b/gcc/varasm.cc
+index 3f69b47a7..bae935694 100644
+--- a/gcc/varasm.cc
++++ b/gcc/varasm.cc
+@@ -8459,25 +8459,21 @@ default_asm_output_ident_directive (const char *ident_str)
+     fprintf (asm_out_file, "%s\"%s\"\n", ident_asm_op, ident_str);
+ }
+ 
+-
+-/* This function ensures that vtable_map variables are not only
+-   in the comdat section, but that each variable has its own unique
+-   comdat name.  Without this the variables end up in the same section
+-   with a single comdat name.
+-
++/* Switch to a COMDAT section with COMDAT name of decl.
++   
+    FIXME:  resolve_unique_section needs to deal better with
+    decls with both DECL_SECTION_NAME and DECL_ONE_ONLY.  Once
+    that is fixed, this if-else statement can be replaced with
+    a single call to "switch_to_section (sect)".  */
+ 
+-static void
+-handle_vtv_comdat_section (section *sect, const_tree decl ATTRIBUTE_UNUSED)
++void
++switch_to_comdat_section (section *sect, tree decl)
+ {
+ #if defined (OBJECT_FORMAT_ELF)
+   targetm.asm_out.named_section (sect->named.name,
+ 				 sect->named.common.flags
+ 				 | SECTION_LINKONCE,
+-				 DECL_NAME (decl));
++				 decl);
+   in_section = sect;
+ #else
+   /* Neither OBJECT_FORMAT_PE, nor OBJECT_FORMAT_COFF is set here.
+@@ -8492,18 +8488,18 @@ handle_vtv_comdat_section (section *sect, const_tree decl ATTRIBUTE_UNUSED)
+     {
+       char *name;
+ 
+-      if (TREE_CODE (DECL_NAME (decl)) == IDENTIFIER_NODE)
++      if (TREE_CODE (decl) == IDENTIFIER_NODE)
+ 	name = ACONCAT ((sect->named.name, "$",
+-			 IDENTIFIER_POINTER (DECL_NAME (decl)), NULL));
++			 IDENTIFIER_POINTER (decl), NULL));
+       else
+ 	name = ACONCAT ((sect->named.name, "$",
+-			 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (DECL_NAME (decl))),
++			 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)),
+ 			 NULL));
+ 
+       targetm.asm_out.named_section (name,
+ 				     sect->named.common.flags
+ 				     | SECTION_LINKONCE,
+-				     DECL_NAME (decl));
++				     decl);
+       in_section = sect;
+     }
+   else
+@@ -8511,4 +8507,15 @@ handle_vtv_comdat_section (section *sect, const_tree decl ATTRIBUTE_UNUSED)
+ #endif
+ }
+

_service:tar_scm:0248-Backport-SME-RISC-V-autovec-Verify-that-GET_MODE_NUN.patch Added

@@ -0,0 +1,53 @@
+From a3b4a0ac472415a52ce836e8997f7a69a06fad33 Mon Sep 17 00:00:00 2001
+From: Michael Collison <collison@rivosinc.com>
+Date: Sat, 6 May 2023 12:37:50 -0600
+Subject: PATCH 149/157 BackportSME RISC-V: autovec: Verify that
+ GET_MODE_NUNITS is a multiple of 2.
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=730909fa858bd691095bc23655077aa13b7941a9
+
+While working on autovectorizing for the RISCV port I encountered an issue
+where can_duplicate_and_interleave_p assumes that GET_MODE_NUNITS is a
+evenly divisible by two. The RISC-V target has vector modes (e.g. VNx1DImode),
+where GET_MODE_NUNITS is equal to one.
+
+Tested on RISCV and x86_64-linux-gnu. Okay?
+
+gcc/
+	* tree-vect-slp.cc (can_duplicate_and_interleave_p):
+	Check that GET_MODE_NUNITS is a multiple of 2.
+---
+ gcc/tree-vect-slp.cc | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
+index 6cbf8085f..d02f0ce37 100644
+--- a/gcc/tree-vect-slp.cc
++++ b/gcc/tree-vect-slp.cc
+@@ -401,10 +401,13 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
+ 	    (GET_MODE_BITSIZE (int_mode), 1);
+ 	  tree vector_type
+ 	    = get_vectype_for_scalar_type (vinfo, int_type, count);
++	  poly_int64 half_nelts;
+ 	  if (vector_type
+ 	      && VECTOR_MODE_P (TYPE_MODE (vector_type))
+ 	      && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
+-			   GET_MODE_SIZE (base_vector_mode)))
++			   GET_MODE_SIZE (base_vector_mode))
++	      && multiple_p (GET_MODE_NUNITS (TYPE_MODE (vector_type)),
++			     2, &half_nelts))
+ 	    {
+ 	      /* Try fusing consecutive sequences of COUNT / NVECTORS elements
+ 		 together into elements of type INT_TYPE and using the result
+@@ -412,7 +415,7 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
+ 	      poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
+ 	      vec_perm_builder sel1 (nelts, 2, 3);
+ 	      vec_perm_builder sel2 (nelts, 2, 3);
+-	      poly_int64 half_nelts = exact_div (nelts, 2);
++
+ 	      for (unsigned int i = 0; i < 3; ++i)
+ 		{
+ 		  sel1.quick_push (i);
+-- 
+2.33.0
+

_service:tar_scm:0249-Backport-SME-Add-operator-to-gimple_stmt_iterator-an.patch Added

@@ -0,0 +1,42 @@
+From b0ca9a6eb1406a60eec566cf302790bee89879af Mon Sep 17 00:00:00 2001
+From: Richard Biener <rguenther@suse.de>
+Date: Tue, 18 Apr 2023 16:58:26 +0200
+Subject: PATCH 150/157 BackportSME Add operator* to gimple_stmt_iterator
+ and gphi_iterator
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c39cdd9e654540f74cd2478019c40f1611554a44
+
+This allows STL style iterator dereference.  It's the same
+as gsi_stmt () or .phi ().
+
+	* gimple-iterator.h (gimple_stmt_iterator::operator*): Add.
+	(gphi_iterator::operator*): Likewise.
+---
+ gcc/gimple-iterator.h | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/gcc/gimple-iterator.h b/gcc/gimple-iterator.h
+index 216ebee24..5d281e4f7 100644
+--- a/gcc/gimple-iterator.h
++++ b/gcc/gimple-iterator.h
+@@ -24,6 +24,8 @@ along with GCC; see the file COPYING3.  If not see
+ 
+ struct gimple_stmt_iterator
+ {
++  gimple *operator * () const { return ptr; }
++
+   /* Sequence node holding the current statement.  */
+   gimple_seq_node ptr;
+ 
+@@ -38,6 +40,8 @@ struct gimple_stmt_iterator
+ /* Iterator over GIMPLE_PHI statements.  */
+ struct gphi_iterator : public gimple_stmt_iterator
+ {
++  gphi *operator * () const { return as_a <gphi *> (ptr); }
++
+   gphi *phi () const
+   {
+     return as_a <gphi *> (ptr);
+-- 
+2.33.0
+

_service:tar_scm:0250-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch Added

@@ -0,0 +1,75 @@
+From 90518c07dfb770b680fd8bdba76dd1b39103277d Mon Sep 17 00:00:00 2001
+From: Richard Biener <rguenther@suse.de>
+Date: Fri, 10 Nov 2023 12:39:11 +0100
+Subject: PATCH 151/157 BackportSME tree-optimization/110221 - SLP and
+ loop mask/len
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e5f1956498251a4973d52c8aad3faf34d0443169
+
+The following fixes the issue that when SLP stmts are internal defs
+but appear invariant because they end up only using invariant defs
+then they get scheduled outside of the loop.  This nice optimization
+breaks down when loop masks or lens are applied since those are not
+explicitly tracked as dependences.  The following makes sure to never
+schedule internal defs outside of the vectorized loop when the
+loop uses masks/lens.
+
+	PR tree-optimization/110221
+	* tree-vect-slp.cc (vect_schedule_slp_node): When loop
+	masking / len is applied make sure to not schedule
+	intenal defs outside of the loop.
+
+	* gfortran.dg/pr110221.f: New testcase.
+---
+ gcc/testsuite/gfortran.dg/pr110221.f | 17 +++++++++++++++++
+ gcc/tree-vect-slp.cc                 | 10 ++++++++++
+ 2 files changed, 27 insertions(+)
+ create mode 100644 gcc/testsuite/gfortran.dg/pr110221.f
+
+diff --git a/gcc/testsuite/gfortran.dg/pr110221.f b/gcc/testsuite/gfortran.dg/pr110221.f
+new file mode 100644
+index 000000000..8b5738431
+--- /dev/null
++++ b/gcc/testsuite/gfortran.dg/pr110221.f
+@@ -0,0 +1,17 @@
++C PR middle-end/68146
++C { dg-do compile }
++C { dg-options "-O2 -w" }
++C { dg-additional-options "-mavx512f --param vect-partial-vector-usage=2" { target avx512f } }
++      SUBROUTINE CJYVB(V,Z,V0,CBJ,CDJ,CBY,CYY)
++      IMPLICIT DOUBLE PRECISION (A,B,G,O-Y)
++      IMPLICIT COMPLEX*16 (C,Z)
++      DIMENSION CBJ(0:*),CDJ(0:*),CBY(0:*)
++      N=INT(V)
++      CALL GAMMA2(VG,GA)
++      DO 65 K=1,N
++        CBY(K)=CYY
++65    CONTINUE
++      CDJ(0)=V0/Z*CBJ(0)-CBJ(1)
++      DO 70 K=1,N
++70      CDJ(K)=-(K+V0)/Z*CBJ(K)+CBJ(K-1)
++      END
+diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
+index d02f0ce37..e3e246977 100644
+--- a/gcc/tree-vect-slp.cc
++++ b/gcc/tree-vect-slp.cc
+@@ -8531,6 +8531,16 @@ vect_schedule_slp_node (vec_info *vinfo,
+       /* Emit other stmts after the children vectorized defs which is
+ 	 earliest possible.  */
+       gimple *last_stmt = NULL;
++      if (auto loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
++	if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
++	    || LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
++	  {
++	    /* But avoid scheduling internal defs outside of the loop when
++	       we might have only implicitly tracked loop mask/len defs.  */
++	    gimple_stmt_iterator si
++	      = gsi_after_labels (LOOP_VINFO_LOOP (loop_vinfo)->header);
++	    last_stmt = *si;
++	  }
+       bool seen_vector_def = false;
+       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+ 	if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
+-- 
+2.33.0
+

_service:tar_scm:0251-SME-Adapt-some-testsuites.patch Added

@@ -0,0 +1,116 @@
+From b60c29e6658c8620f1116ce5a38a6eb823af64e6 Mon Sep 17 00:00:00 2001
+From: xiezhiheng <xiezhiheng@huawei.com>
+Date: Thu, 7 Mar 2024 10:22:39 +0800
+Subject: PATCH 152/157 SME Adapt some testsuites
+
+gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp:
+  GCC 12.3.0 do not support -std=c23 and -std=gnu23
+
+gcc.target/aarch64/sme/streaming_mode_2.c:
+  It's a warning in GCC 12.3.0
+
+gcc.dg/c2x-attr-syntax-6.c:
+gcc.dg/c2x-attr-syntax-7.c:
+  GCC 12.3.0 do not support C2x (...) function prototypes and
+  C2x noreturn attribute
+
+gcc.target/aarch64/sme/za_state_4.c:
+  Seems need a ldp/stp optimization, not a functionality issue
+---
+ gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c             |  2 --
+ gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c             |  2 --
+ .../gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp  |  2 --
+ .../gcc.target/aarch64/sme/streaming_mode_2.c        | 12 ++++++------
+ gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c    |  1 +
+ 5 files changed, 7 insertions(+), 12 deletions(-)
+
+diff --git a/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c b/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c
+index 9e5f65ce4..2385b25fe 100644
+--- a/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c
++++ b/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c
+@@ -15,13 +15,11 @@ typedef int __extension__ gnu FOO vector_size (4) g5;
+ typedef int __extension__ gnu BAR BAR vector_size (4) g6;
+ typedef int __extension__ gnu :/**/: vector_size (4) g7;
+ typedef int __extension__ gnu JOIN(:,:) vector_size (4) g8;
+-typedef int __extension__ gnu :: vector_size (sizeof (void (*)(...))) g10;
+ typedef int __extension__ g11;
+ typedef int __extension__, g12;
+ typedef int __extension__, ,,,, ,, , g13;
+ __extension__ deprecated int g14 ();
+ __extension__ nodiscard int g15 ();
+-__extension__ noreturn void g16 ();
+ 
+ int
+ cases (int x)
+diff --git a/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c b/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c
+index 702f733b1..5bbdba665 100644
+--- a/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c
++++ b/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c
+@@ -15,13 +15,11 @@ typedef int __extension__ gnu FOO vector_size (4) g5;
+ typedef int __extension__ gnu BAR BAR vector_size (4) g6;
+ typedef int __extension__ gnu :/**/: vector_size (4) g7;
+ typedef int __extension__ gnu JOIN(:,:) vector_size (4) g8;
+-typedef int __extension__ gnu :: vector_size (sizeof (void (*)(...))) g10;
+ typedef int __extension__ g11;
+ typedef int __extension__, g12;
+ typedef int __extension__, ,,,, ,, , g13;
+ __extension__ deprecated int g14 ();
+ __extension__ nodiscard int g15 ();
+-__extension__ noreturn void g16 ();
+ 
+ int
+ cases (int x)
+diff --git a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp
+index e2d002f26..a0a4fe4f7 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp
++++ b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp
+@@ -52,9 +52,7 @@ set-torture-options {
+     "-std=c90 -O0 -g"
+     "-std=c99 -Og -g"
+     "-std=c11 -Os -g"
+-    "-std=c23 -O2 -fno-schedule-insns -fno-schedule-insns2 -DCHECK_ASM --save-temps"
+     "-std=gnu90 -O3 -g"
+-    "-std=gnu23 -Ofast -g"
+ } {
+     "-DTEST_FULL"
+     "-DTEST_OVERLOADS"
+diff --git a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c
+index e8be0f821..1e328c817 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c
++++ b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c
+@@ -12,14 +12,14 @@ void
+ f ()
+ {
+   sc_fn_ptr = sc_fn;
+-  sc_fn_ptr = s_fn; // { dg-error "incompatible pointer type" }
+-  sc_fn_ptr = ns_fn; // { dg-error "incompatible pointer type" }
++  sc_fn_ptr = s_fn; // { dg-warning "incompatible pointer type" }
++  sc_fn_ptr = ns_fn; // { dg-warning "incompatible pointer type" }
+ 
+-  s_fn_ptr = sc_fn; // { dg-error "incompatible pointer type" }
++  s_fn_ptr = sc_fn; // { dg-warning "incompatible pointer type" }
+   s_fn_ptr = s_fn;
+-  s_fn_ptr = ns_fn; // { dg-error "incompatible pointer type" }
++  s_fn_ptr = ns_fn; // { dg-warning "incompatible pointer type" }
+ 
+-  ns_fn_ptr = sc_fn; // { dg-error "incompatible pointer type" }
+-  ns_fn_ptr = s_fn; // { dg-error "incompatible pointer type" }
++  ns_fn_ptr = sc_fn; // { dg-warning "incompatible pointer type" }
++  ns_fn_ptr = s_fn; // { dg-warning "incompatible pointer type" }
+   ns_fn_ptr = ns_fn;
+ }
+diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c
+index cec0abf0e..a764a7c89 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c
++++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c
+@@ -105,6 +105,7 @@ __arm_new("za") void test5()
+ **	mul	(x0-9+), \1, \1
+ **	sub	sp, sp, \2
+ **	mov	(x0-9+), sp
++**	add	^\n+
+ **	stp	\3, \1, \x29, #?16\
+ **	add	(x0-9+), x29, #?16
+ **	msr	tpidr2_el0, \4
+-- 
+2.33.0
+

_service:tar_scm:0252-SME-Fix-error-by-backported-patches-and-IPA-prefetch.patch Added

@@ -0,0 +1,43 @@
+From ce53aec1f43f79c093db662a2e8e3062462757b4 Mon Sep 17 00:00:00 2001
+From: xiezhiheng <xiezhiheng@huawei.com>
+Date: Thu, 22 Aug 2024 16:35:28 +0800
+Subject: PATCH 153/157 SME Fix error by backported patches and IPA
+ prefetch
+
+Fix
+gtype-desc.cc: In function 'void gt_pch_p_30vec_cgraph_node__va_gc_atomic_(void*, void*, gt_pointer_operator, void*)':
+gtype-desc.cc:11032:35: error: call of overloaded 'gt_pch_nx(vec<cgraph_node*, va_gc_atomic>*, void (*&)(void*, void*, void*), void*&)' is ambiguous
+11032 |     gt_pch_nx (&((*x)), op, cookie);
+      |                                   ^
+In file included from ../../gcc/hash-table.h:248,
+                 from ../../gcc/coretypes.h:486,
+                 from gtype-desc.cc:23:
+../../gcc/vec.h:1395:1: note: candidate: 'void gt_pch_nx(vec<T*, A, vl_embed>*, gt_pointer_operator, void*) with T = cgraph_node; A = va_gc_atomic; gt_pointer_operator = void (*)(void*, void*, void*)'
+ 1395 | gt_pch_nx (vec<T *, A, vl_embed> *v, gt_pointer_operator op, void *cookie)
+      | ^~~~~~~~~
+../../gcc/vec.h:1403:1: note: candidate: 'void gt_pch_nx(vec<T, A, vl_embed>*, gt_pointer_operator, void*) with T = cgraph_node*; A = va_gc_atomic; gt_pointer_operator = void (*)(void*, void*, void*)'
+ 1403 | gt_pch_nx (vec<T, A, vl_embed> *v, gt_pointer_operator op, void *cookie)
+      | ^~~~~~~~~
+../../gcc/vec.h:1412:1: note: candidate: 'void gt_pch_nx(vec<T, va_gc_atomic, vl_embed>*, gt_pointer_operator, void*) with T = cgraph_node*; gt_pointer_operator = void (*)(void*, void*, void*)'
+ 1412 | gt_pch_nx (vec<T, va_gc_atomic, vl_embed> *, gt_pointer_operator, void *)
+      | ^~~~~~~~~
+---
+ gcc/cgraph.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/cgraph.h b/gcc/cgraph.h
+index b84ff2f98..2332539e5 100644
+--- a/gcc/cgraph.h
++++ b/gcc/cgraph.h
+@@ -1660,7 +1660,7 @@ public:
+   /* ECF flags determined from the caller.  */
+   int ecf_flags;
+   /* Vector of potential call targets determined by analysis.  */
+-  vec<cgraph_node *, va_gc_atomic> *targets;
++  vec<cgraph_node *, va_gc> *targets;
+ 
+   /* Number of speculative call targets, it's less than GCOV_TOPN_VALUES.  */
+   unsigned num_speculative_call_targets : 16;
+-- 
+2.33.0
+

_service:tar_scm:0253-aarch64-Fix-return-register-handling-in-untyped_call.patch Added

@@ -0,0 +1,66 @@
+From 38d0605ac8bc90324170041676fc05e7e595769e Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Wed, 20 Sep 2023 11:13:19 +0100
+Subject: PATCH aarch64: Fix return register handling in untyped_call
+
+While working on another patch, I hit a problem with the aarch64
+expansion of untyped_call.  The expander emits the usual:
+
+  (set (mem ...) (reg resN))
+
+instructions to store the result registers to memory, but it didn't
+say in RTL where those resN results came from.  This eventually led
+to a failure of gcc.dg/torture/stackalign/builtin-return-2.c,
+via regrename.
+
+This patch turns the untyped call from a plain call to a call_value,
+to represent that the call returns (or might return) a useful value.
+The patch also uses a PARALLEL return rtx to represent all the possible
+return registers.
+
+gcc/
+	* config/aarch64/aarch64.md (untyped_call): Emit a call_value
+	rather than a call.  List each possible destination register
+	in the call pattern.
+
+(cherry picked from commit 629efe27744d13c3b83bbe8338b84c37c83dbe4f)
+---
+ gcc/config/aarch64/aarch64.md | 20 +++++++++++++++++++-
+ 1 file changed, 19 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+index b9b3ba4f9164..cd568f3daa0f 100644
+--- a/gcc/config/aarch64/aarch64.md
++++ b/gcc/config/aarch64/aarch64.md
+@@ -1159,9 +1159,27 @@
+ {
+   int i;
+ 
++  /* Generate a PARALLEL that contains all of the register results.
++     The offsets are somewhat arbitrary, since we don't know the
++     actual return type.  The main thing we need to avoid is having
++     overlapping byte ranges, since those might give the impression
++     that two registers are known to have data in common.  */
++  rtvec rets = rtvec_alloc (XVECLEN (operands2, 0));
++  poly_int64 offset = 0;
++  for (i = 0; i < XVECLEN (operands2, 0); i++)
++    {
++      rtx reg = SET_SRC (XVECEXP (operands2, 0, i));
++      gcc_assert (REG_P (reg));
++      rtx offset_rtx = gen_int_mode (offset, Pmode);
++      rtx piece = gen_rtx_EXPR_LIST (VOIDmode, reg, offset_rtx);
++      RTVEC_ELT (rets, i) = piece;
++      offset += GET_MODE_SIZE (GET_MODE (reg));
++    }
++  rtx ret = gen_rtx_PARALLEL (VOIDmode, rets);
++
+   /* Untyped calls always use the default ABI.  It's only possible to use
+      ABI variants if we know the type of the target function.  */
+-  emit_call_insn (gen_call (operands0, const0_rtx, const0_rtx));
++  emit_call_insn (gen_call_value (ret, operands0, const0_rtx, const0_rtx));
+ 
+   for (i = 0; i < XVECLEN (operands2, 0); i++)
+     {
+-- 
+2.43.5
+

_service:tar_scm:0254-aarch64-Fix-loose-ldpstp-check.patch Added

@@ -0,0 +1,119 @@
+From 74f99f1adc696f446115f36974a3f94f66294a53 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Wed, 20 Sep 2023 11:13:20 +0100
+Subject: PATCH aarch64: Fix loose ldpstp check PR111411
+
+aarch64_operands_ok_for_ldpstp contained the code:
+
+  /* One of the memory accesses must be a mempair operand.
+     If it is not the first one, they need to be swapped by the
+     peephole.  */
+  if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1))
+       && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2)))
+    return false;
+
+But the requirement isn't just that one of the accesses must be a
+valid mempair operand.  It's that the lower access must be, since
+that's the access that will be used for the instruction operand.
+
+gcc/
+	PR target/111411
+	* config/aarch64/aarch64.cc (aarch64_operands_ok_for_ldpstp): Require
+	the lower memory access to a mem-pair operand.
+
+gcc/testsuite/
+	PR target/111411
+	* gcc.dg/rtl/aarch64/pr111411.c: New test.
+
+(cherry picked from commit 2d38f45bcca62ca0c7afef4b579f82c5c2a01610)
+---
+ gcc/config/aarch64/aarch64.cc               |  8 ++-
+ gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c | 57 +++++++++++++++++++++
+ 2 files changed, 60 insertions(+), 5 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 96c3f48fdc49..a979accd90a9 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -26031,11 +26031,9 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
+   gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)),
+ 			GET_MODE_SIZE (GET_MODE (mem_2))));
+ 
+-  /* One of the memory accesses must be a mempair operand.
+-     If it is not the first one, they need to be swapped by the
+-     peephole.  */
+-  if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1))
+-       && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2)))
++  /* The lower memory access must be a mem-pair operand.  */
++  rtx lower_mem = reversed ? mem_2 : mem_1;
++  if (!aarch64_mem_pair_operand (lower_mem, GET_MODE (lower_mem)))
+     return false;
+ 
+   if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
+diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c b/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c
+new file mode 100644
+index 000000000000..ad07e9c6c893
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c
+@@ -0,0 +1,57 @@
++/* { dg-do compile { target aarch64*-*-* } } */
++/* { dg-require-effective-target lp64 } */
++/* { dg-options "-O -fdisable-rtl-postreload -fpeephole2 -fno-schedule-fusion" } */
++
++extern int data;
++
++void __RTL (startwith ("ira")) foo (void *ptr)
++{
++  (function "foo"
++    (param "ptr"
++      (DECL_RTL (reg/v:DI <0>  ptr ))
++      (DECL_RTL_INCOMING (reg/v:DI x0  ptr ))
++    ) ;; param "ptr"
++    (insn-chain
++      (block 2
++	(edge-from entry (flags "FALLTHRU"))
++	(cnote 3 bb 2 NOTE_INSN_BASIC_BLOCK)
++	(insn 4 (set (reg:DI <0>) (reg:DI x0)))
++	(insn 5 (set (reg:DI <1>)
++		     (plus:DI (reg:DI <0>) (const_int 768))))
++	(insn 6 (set (mem:SI (plus:DI (reg:DI <0>)
++				      (const_int 508)) 1 &data+508 S4 A4)
++		     (const_int 0)))
++	(insn 7 (set (mem:SI (plus:DI (reg:DI <1>)
++				      (const_int -256)) 1 &data+512 S4 A4)
++		     (const_int 0)))
++	(edge-to exit (flags "FALLTHRU"))
++      ) ;; block 2
++    ) ;; insn-chain
++  ) ;; function
++}
++
++void __RTL (startwith ("ira")) bar (void *ptr)
++{
++  (function "bar"
++    (param "ptr"
++      (DECL_RTL (reg/v:DI <0>  ptr ))
++      (DECL_RTL_INCOMING (reg/v:DI x0  ptr ))
++    ) ;; param "ptr"
++    (insn-chain
++      (block 2
++	(edge-from entry (flags "FALLTHRU"))
++	(cnote 3 bb 2 NOTE_INSN_BASIC_BLOCK)
++	(insn 4 (set (reg:DI <0>) (reg:DI x0)))
++	(insn 5 (set (reg:DI <1>)
++		     (plus:DI (reg:DI <0>) (const_int 768))))
++	(insn 6 (set (mem:SI (plus:DI (reg:DI <1>)
++				      (const_int -256)) 1 &data+512 S4 A4)
++		     (const_int 0)))
++	(insn 7 (set (mem:SI (plus:DI (reg:DI <0>)
++				      (const_int 508)) 1 &data+508 S4 A4)
++		     (const_int 0)))
++	(edge-to exit (flags "FALLTHRU"))
++      ) ;; block 2
++    ) ;; insn-chain
++  ) ;; function
++}
+-- 
+2.43.5
+

_service:tar_scm:0255-x86-Add-a-new-option-mdaz-ftz-to-enable-FTZ-and-DAZ-.patch Added

@@ -0,0 +1,135 @@
+From 1649f9fbbc5267de2a675336d3ac665528a03db8 Mon Sep 17 00:00:00 2001
+From: liuhongt <hongtao.liu@intel.com>
+Date: Wed, 10 May 2023 15:16:58 +0800
+Subject: PATCH 01/28 x86: Add a new option -mdaz-ftz to enable FTZ and DAZ
+ flags in MXCSR.
+
+    if (mdaz-ftz)
+      link crtfastmath.o
+    else if ((Ofast || ffast-math || funsafe-math-optimizations)
+             && !mno-daz-ftz)
+      link crtfastmath.o
+    else
+      Don't link crtfastmath.o
+
+gcc/ChangeLog:
+
+	* config/i386/cygwin.h (ENDFILE_SPEC): Link crtfastmath.o
+	whenever -mdaz-ftz is specified. Don't link crtfastmath.o
+	when -mno-daz-ftz is specified.
+	* config/i386/darwin.h (ENDFILE_SPEC): Ditto.
+	* config/i386/gnu-user-common.h
+	(GNU_USER_TARGET_MATHFILE_SPEC): Ditto.
+	* config/i386/mingw32.h (ENDFILE_SPEC): Ditto.
+	* config/i386/i386.opt (mdaz-ftz): New option.
+	* doc/invoke.texi (x86 options): Document mftz-daz.
+---
+ gcc/config/i386/cygwin.h          |  2 +-
+ gcc/config/i386/darwin.h          |  4 ++--
+ gcc/config/i386/gnu-user-common.h |  2 +-
+ gcc/config/i386/i386.opt          |  4 ++++
+ gcc/config/i386/mingw32.h         |  2 +-
+ gcc/doc/invoke.texi               | 11 ++++++++++-
+ 6 files changed, 19 insertions(+), 6 deletions(-)
+
+diff --git a/gcc/config/i386/cygwin.h b/gcc/config/i386/cygwin.h
+index d06eda369..5412c5d44 100644
+--- a/gcc/config/i386/cygwin.h
++++ b/gcc/config/i386/cygwin.h
+@@ -57,7 +57,7 @@ along with GCC; see the file COPYING3.  If not see
+ 
+ #undef ENDFILE_SPEC
+ #define ENDFILE_SPEC \
+-  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}\
++  "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \
+    %{!shared:%:if-exists(default-manifest.o%s)}\
+    %{fvtable-verify=none:%s; \
+     fvtable-verify=preinit:vtv_end.o%s; \
+diff --git a/gcc/config/i386/darwin.h b/gcc/config/i386/darwin.h
+index a55f6b2b8..2f773924d 100644
+--- a/gcc/config/i386/darwin.h
++++ b/gcc/config/i386/darwin.h
+@@ -109,8 +109,8 @@ along with GCC; see the file COPYING3.  If not see
+ "%{!force_cpusubtype_ALL:-force_cpusubtype_ALL} "
+ 
+ #undef ENDFILE_SPEC
+-#define ENDFILE_SPEC \
+-  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
++#define ENDFILE_SPEC
++\  "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \
+    %{mpc32:crtprec32.o%s} \
+    %{mpc64:crtprec64.o%s} \
+    %{mpc80:crtprec80.o%s}" TM_DESTRUCTOR
+diff --git a/gcc/config/i386/gnu-user-common.h b/gcc/config/i386/gnu-user-common.h
+index 23b54c5be..3d2a33f17 100644
+--- a/gcc/config/i386/gnu-user-common.h
++++ b/gcc/config/i386/gnu-user-common.h
+@@ -47,7 +47,7 @@ along with GCC; see the file COPYING3.  If not see
+ 
+ /* Similar to standard GNU userspace, but adding -ffast-math support.  */
+ #define GNU_USER_TARGET_MATHFILE_SPEC \
+-  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
++  "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \
+    %{mpc32:crtprec32.o%s} \
+    %{mpc64:crtprec64.o%s} \
+    %{mpc80:crtprec80.o%s}"
+diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
+index fc1b944ac..498fb454d 100644
+--- a/gcc/config/i386/i386.opt
++++ b/gcc/config/i386/i386.opt
+@@ -420,6 +420,10 @@ mpc80
+ Target RejectNegative
+ Set 80387 floating-point precision to 80-bit.
+ 
++mdaz-ftz
++Target
++Set the FTZ and DAZ Flags.
++
+ mpreferred-stack-boundary=
+ Target RejectNegative Joined UInteger Var(ix86_preferred_stack_boundary_arg)
+ Attempt to keep stack aligned to this power of 2.
+diff --git a/gcc/config/i386/mingw32.h b/gcc/config/i386/mingw32.h
+index d3ca0cd02..ddbe6a405 100644
+--- a/gcc/config/i386/mingw32.h
++++ b/gcc/config/i386/mingw32.h
+@@ -197,7 +197,7 @@ along with GCC; see the file COPYING3.  If not see
+ 
+ #undef ENDFILE_SPEC
+ #define ENDFILE_SPEC \
+-  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
++  "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \
+    %{!shared:%:if-exists(default-manifest.o%s)}\
+    %{fvtable-verify=none:%s; \
+     fvtable-verify=preinit:vtv_end.o%s; \
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 2b376e0e9..3a48655e5 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -1437,7 +1437,7 @@ See RS/6000 and PowerPC Options.
+ -m96bit-long-double  -mlong-double-64  -mlong-double-80  -mlong-double-128 @gol
+ -mregparm=@var{num}  -msseregparm @gol
+ -mveclibabi=@var{type}  -mvect8-ret-in-mem @gol
+--mpc32  -mpc64  -mpc80  -mstackrealign @gol
++-mpc32  -mpc64  -mpc80 -mdaz-ftz -mstackrealign @gol
+ -momit-leaf-frame-pointer  -mno-red-zone  -mno-tls-direct-seg-refs @gol
+ -mcmodel=@var{code-model}  -mabi=@var{name}  -maddress-mode=@var{mode} @gol
+ -m32  -m64  -mx32  -m16  -miamcu  -mlarge-data-threshold=@var{num} @gol
+@@ -32122,6 +32122,15 @@ are enabled by default; routines in such libraries could suffer significant
+ loss of accuracy, typically through so-called ``catastrophic cancellation'',
+ when this option is used to set the precision to less than extended precision.
+ 
++@item -mdaz-ftz
++@opindex mdaz-ftz
++
++The flush-to-zero (FTZ) and denormals-are-zero (DAZ) flags in the MXCSR register
++are used to control floating-point calculations.SSE and AVX instructions
++including scalar and vector instructions could benefit from enabling the FTZ
++and DAZ flags when @option{-mdaz-ftz} is specified. Don't set FTZ/DAZ flags
++when @option{-mno-daz-ftz} is specified.
++
+ @item -mstackrealign
+ @opindex mstackrealign
+ Realign the stack at entry.  On the x86, the @option{-mstackrealign}
+-- 
+2.31.1
+

_service:tar_scm:0256-Explicitly-view_convert_expr-mask-to-signed-type-whe.patch Added

@@ -0,0 +1,65 @@
+From e70fa730dcfcb3a7b1d56a2e166752d4299f0504 Mon Sep 17 00:00:00 2001
+From: liuhongt <hongtao.liu@intel.com>
+Date: Mon, 5 Jun 2023 12:38:41 +0800
+Subject: PATCH 02/28 Explicitly view_convert_expr mask to signed type when
+ folding pblendvb builtins.
+
+Since mask < 0 will be always false for vector char when
+-funsigned-char, but vpblendvb needs to check the most significant
+bit. The patch explicitly VCE to vector signed char.
+
+gcc/ChangeLog:
+
+	PR target/110108
+	* config/i386/i386.cc (ix86_gimple_fold_builtin): Explicitly
+	view_convert_expr mask to signed type when folding pblendvb
+	builtins.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/i386/pr110108-2.c: New test.
+---
+ gcc/config/i386/i386.cc                    |  4 +++-
+ gcc/testsuite/gcc.target/i386/pr110108-2.c | 14 ++++++++++++++
+ 2 files changed, 17 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.target/i386/pr110108-2.c
+
+diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
+index 462dce10e..479fc6010 100644
+--- a/gcc/config/i386/i386.cc
++++ b/gcc/config/i386/i386.cc
+@@ -18396,8 +18396,10 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
+ 	      tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
+ 		? intSI_type_node : intDI_type_node;
+ 	      type = get_same_sized_vectype (itype, type);
+-	      arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
+ 	    }
++	  else
++	    type = signed_type_for (type);
++	  arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
+ 	  tree zero_vec = build_zero_cst (type);
+ 	  tree cmp_type = truth_type_for (type);
+ 	  tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
+diff --git a/gcc/testsuite/gcc.target/i386/pr110108-2.c b/gcc/testsuite/gcc.target/i386/pr110108-2.c
+new file mode 100644
+index 000000000..2d1d2fd49
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/pr110108-2.c
+@@ -0,0 +1,14 @@
++/* { dg-do compile } */
++/* { dg-options "-mavx2 -O2 -funsigned-char" } */
++/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
++
++#include <immintrin.h>
++__m128i do_stuff_128(__m128i X0, __m128i X1, __m128i X2) {
++  __m128i Result = _mm_blendv_epi8(X0, X1, X2);
++  return Result;
++}
++
++__m256i do_stuff_256(__m256i X0, __m256i X1, __m256i X2) {
++  __m256i Result = _mm256_blendv_epi8(X0, X1, X2);
++  return Result;
++}
+-- 
+2.31.1
+

_service:tar_scm:0257-Make-option-mvzeroupper-independent-of-optimization-.patch Added

@@ -0,0 +1,138 @@
+From 48715f03ad08f185153bfb0ff4c0802ab2d9579c Mon Sep 17 00:00:00 2001
+From: liuhongt <hongtao.liu@intel.com>
+Date: Mon, 26 Jun 2023 09:50:25 +0800
+Subject: PATCH 03/28 Make option mvzeroupper independent of optimization
+ level.
+
+pass_insert_vzeroupper is under condition
+
+TARGET_AVX && TARGET_VZEROUPPER
+&& flag_expensive_optimizations && !optimize_size
+
+But the document of mvzeroupper doesn't mention the insertion
+required -O2 and above, it may confuse users when they explicitly
+use -Os -mvzeroupper.
+
+------------
+mvzeroupper
+Target Mask(VZEROUPPER) Save
+Generate vzeroupper instruction before a transfer of control flow out of
+the function.
+------------
+
+The patch moves flag_expensive_optimizations && !optimize_size to
+ix86_option_override_internal. It makes -mvzeroupper independent of
+optimization level, but still keeps the behavior of architecture
+tuning(emit_vzeroupper) unchanged.
+
+gcc/ChangeLog:
+
+	* config/i386/i386-features.cc (pass_insert_vzeroupper:gate):
+	Move flag_expensive_optimizations && !optimize_size to ..
+	* config/i386/i386-options.cc (ix86_option_override_internal):
+	.. this, it makes -mvzeroupper independent of optimization
+	level, but still keeps the behavior of architecture
+	tuning(emit_vzeroupper) unchanged.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/i386/avx-vzeroupper-29.c: New testcase.
+	* gcc.target/i386/avx-vzeroupper-12.c: Adjust testcase.
+	* gcc.target/i386/avx-vzeroupper-7.c: Ditto.
+	* gcc.target/i386/avx-vzeroupper-9.c: Ditto.
+---
+ gcc/config/i386/i386-features.cc                  |  3 +--
+ gcc/config/i386/i386-options.cc                   |  4 +++-
+ gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c |  3 ++-
+ gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c | 14 ++++++++++++++
+ gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c  |  3 ++-
+ gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c  |  3 ++-
+ 6 files changed, 24 insertions(+), 6 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
+
+diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
+index 6fe41c3c2..6a2444eb6 100644
+--- a/gcc/config/i386/i386-features.cc
++++ b/gcc/config/i386/i386-features.cc
+@@ -1875,8 +1875,7 @@ public:
+   /* opt_pass methods: */
+   virtual bool gate (function *)
+     {
+-      return TARGET_AVX && TARGET_VZEROUPPER
+-	&& flag_expensive_optimizations && !optimize_size;
++      return TARGET_AVX && TARGET_VZEROUPPER;
+     }
+ 
+   virtual unsigned int execute (function *)
+diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
+index ff44ad4e0..74e969b68 100644
+--- a/gcc/config/i386/i386-options.cc
++++ b/gcc/config/i386/i386-options.cc
+@@ -2702,7 +2702,9 @@ ix86_option_override_internal (bool main_args_p,
+     sorry ("%<-mcall-ms2sysv-xlogues%> isn%'t currently supported with SEH");
+ 
+   if (!(opts_set->x_target_flags & MASK_VZEROUPPER)
+-      && TARGET_EMIT_VZEROUPPER)
++      && TARGET_EMIT_VZEROUPPER
++      && flag_expensive_optimizations
++      && !optimize_size)
+     opts->x_target_flags |= MASK_VZEROUPPER;
+   if (!(opts_set->x_target_flags & MASK_STV))
+     opts->x_target_flags |= MASK_STV;
+diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
+index e694d4048..5a40e8783 100644
+--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
++++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
+@@ -16,5 +16,6 @@ foo ()
+   _mm256_zeroupper ();
+ }
+ 
+-/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */
++/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */
++/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */
+ /* { dg-final { scan-assembler-times "\\*avx_vzeroall" 1 } } */
+diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
+new file mode 100644
+index 000000000..4af637757
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
+@@ -0,0 +1,14 @@
++/* { dg-do compile } */
++/* { dg-options "-O0 -mavx -mtune=generic -mvzeroupper -dp" } */
++
++#include <immintrin.h>
++
++extern __m256 x, y;
++
++void
++foo ()
++{
++  x = y;
++}
++
++/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
+diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
+index ab6d68779..75fe58897 100644
+--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
++++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
+@@ -12,4 +12,5 @@ foo ()
+   _mm256_zeroupper ();
+ }
+ 
+-/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
++/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 { target ia32 } } } */
++/* { dg-final { scan-assembler-times "avx_vzeroupper" 2 { target { ! ia32 } } } } */
+diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
+index 974e1626a..fa0a6dfca 100644
+--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
++++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
+@@ -15,4 +15,5 @@ foo ()
+   _mm256_zeroupper ();
+ }
+ 
+-/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */
++/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */
++/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */
+-- 
+2.31.1
+

_service:tar_scm:0258-i386-Sync-tune_string-with-arch_string-for-target-at.patch Added

@@ -0,0 +1,68 @@
+From 8039d773354360ed8ff2f25c63843fc637eacc67 Mon Sep 17 00:00:00 2001
+From: Hongyu Wang <hongyu.wang@intel.com>
+Date: Sun, 25 Jun 2023 09:50:21 +0800
+Subject: PATCH 04/28 i386: Sync tune_string with arch_string for target
+ attribute
+
+arch=*
+
+For function with target attribute arch=*, current logic will set its
+tune to -mtune from command line so all target_clones will get same
+tuning flags which would affect the performance for each clone. Override
+tune with arch if tune was not explicitly specified to get proper tuning
+flags for target_clones.
+
+gcc/ChangeLog:
+
+	* config/i386/i386-options.cc (ix86_valid_target_attribute_tree):
+	Override tune_string with arch_string if tune_string is not
+	explicitly specified.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/i386/mvc17.c: New test.
+
+(cherry picked from commit 2916278d14e9ac28c361c396a67256acbebda6e8)
+---
+ gcc/config/i386/i386-options.cc       |  6 +++++-
+ gcc/testsuite/gcc.target/i386/mvc17.c | 11 +++++++++++
+ 2 files changed, 16 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.target/i386/mvc17.c
+
+diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
+index 74e969b68..fb2ed942f 100644
+--- a/gcc/config/i386/i386-options.cc
++++ b/gcc/config/i386/i386-options.cc
+@@ -1378,7 +1378,11 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args,
+       if (option_stringsIX86_FUNCTION_SPECIFIC_TUNE)
+ 	opts->x_ix86_tune_string
+ 	  = ggc_strdup (option_stringsIX86_FUNCTION_SPECIFIC_TUNE);
+-      else if (orig_tune_defaulted)
++      /* If we have explicit arch string and no tune string specified, set
++	 tune_string to NULL and later it will be overriden by arch_string
++	 so target clones can get proper optimization.  */
++      else if (option_stringsIX86_FUNCTION_SPECIFIC_ARCH
++	       || orig_tune_defaulted)
+ 	opts->x_ix86_tune_string = NULL;
+ 
+       /* If fpmath= is not set, and we now have sse2 on 32-bit, use it.  */
+diff --git a/gcc/testsuite/gcc.target/i386/mvc17.c b/gcc/testsuite/gcc.target/i386/mvc17.c
+new file mode 100644
+index 000000000..8b83c1aec
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/mvc17.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-require-ifunc "" } */
++/* { dg-options "-O2 -march=x86-64" } */
++/* { dg-final { scan-assembler-times "rep mov" 1 } } */
++
++__attribute__((target_clones("default","arch=icelake-server")))
++void
++foo (char *a, char *b, int size)
++{
++  __builtin_memcpy (a, b, size & 0x7F);
++}
+-- 
+2.31.1
+

_service:tar_scm:0259-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch Added

@@ -0,0 +1,111 @@
+From fbcb1a5899b1bd3964aed78ed74041121e618d36 Mon Sep 17 00:00:00 2001
+From: liuhongt <hongtao.liu@intel.com>
+Date: Tue, 20 Jun 2023 15:41:00 +0800
+Subject: PATCH 05/28 Refine maskloadmn pattern with UNSPEC_MASKLOAD.
+
+If mem_addr points to a memory region with less than whole vector size
+bytes of accessible memory and k is a mask that would prevent reading
+the inaccessible bytes from mem_addr, add UNSPEC_MASKLOAD to prevent
+it to be transformed to vpblendd.
+
+gcc/ChangeLog:
+
+	PR target/110309
+	* config/i386/sse.md (maskload<mode><avx512fmaskmodelower>):
+	Refine pattern with UNSPEC_MASKLOAD.
+	(maskload<mode><avx512fmaskmodelower>): Ditto.
+	(*<avx512>_load<mode>_mask): Extend mode iterator to
+	VI12HF_AVX512VL.
+	(*<avx512>_load<mode>): Ditto.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/i386/pr110309.c: New test.
+---
+ gcc/config/i386/sse.md                   | 32 +++++++++++++-----------
+ gcc/testsuite/gcc.target/i386/pr110309.c | 10 ++++++++
+ 2 files changed, 28 insertions(+), 14 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/i386/pr110309.c
+
+diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
+index eb767e56c..b30e96cb1 100644
+--- a/gcc/config/i386/sse.md
++++ b/gcc/config/i386/sse.md
+@@ -1411,12 +1411,12 @@
+ })
+ 
+ (define_insn "*<avx512>_load<mode>_mask"
+-  (set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+-	(vec_merge:VI12_AVX512VL
+-	  (unspec:VI12_AVX512VL
+-	    (match_operand:VI12_AVX512VL 1 "memory_operand" "m")
++  (set (match_operand:VI12HF_AVX512VL 0 "register_operand" "=v")
++	(vec_merge:VI12HF_AVX512VL
++	  (unspec:VI12HF_AVX512VL
++	    (match_operand:VI12HF_AVX512VL 1 "memory_operand" "m")
+ 	    UNSPEC_MASKLOAD)
+-	  (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
++	  (match_operand:VI12HF_AVX512VL 2 "nonimm_or_0_operand" "0C")
+ 	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))
+   "TARGET_AVX512BW"
+   "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+@@ -1425,9 +1425,9 @@
+    (set_attr "mode" "<sseinsnmode>"))
+ 
+ (define_insn_and_split "*<avx512>_load<mode>"
+-  (set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+-	(unspec:VI12_AVX512VL
+-	  (match_operand:VI12_AVX512VL 1 "memory_operand" "m")
++  (set (match_operand:VI12HF_AVX512VL 0 "register_operand" "=v")
++	(unspec:VI12HF_AVX512VL
++	  (match_operand:VI12HF_AVX512VL 1 "memory_operand" "m")
+ 	  UNSPEC_MASKLOAD))
+   "TARGET_AVX512BW"
+   "#"
+@@ -25973,17 +25973,21 @@
+   "TARGET_AVX")
+ 
+ (define_expand "maskload<mode><avx512fmaskmodelower>"
+-  (set (match_operand:V48H_AVX512VL 0 "register_operand")
+-	(vec_merge:V48H_AVX512VL
+-	  (match_operand:V48H_AVX512VL 1 "memory_operand")
++  (set (match_operand:V48_AVX512VL 0 "register_operand")
++	(vec_merge:V48_AVX512VL
++	  (unspec:V48_AVX512VL
++	    (match_operand:V48_AVX512VL 1 "memory_operand")
++	    UNSPEC_MASKLOAD)
+ 	  (match_dup 0)
+ 	  (match_operand:<avx512fmaskmode> 2 "register_operand")))
+   "TARGET_AVX512F")
+ 
+ (define_expand "maskload<mode><avx512fmaskmodelower>"
+-  (set (match_operand:VI12_AVX512VL 0 "register_operand")
+-	(vec_merge:VI12_AVX512VL
+-	  (match_operand:VI12_AVX512VL 1 "memory_operand")
++  (set (match_operand:VI12HF_AVX512VL 0 "register_operand")
++	(vec_merge:VI12HF_AVX512VL
++	  (unspec:VI12HF_AVX512VL
++	    (match_operand:VI12HF_AVX512VL 1 "memory_operand")
++	    UNSPEC_MASKLOAD)
+ 	  (match_dup 0)
+ 	  (match_operand:<avx512fmaskmode> 2 "register_operand")))
+   "TARGET_AVX512BW")
+diff --git a/gcc/testsuite/gcc.target/i386/pr110309.c b/gcc/testsuite/gcc.target/i386/pr110309.c
+new file mode 100644
+index 000000000..f6e9e9c3c
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/pr110309.c
+@@ -0,0 +1,10 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 --param vect-partial-vector-usage=1 -march=znver4 -mprefer-vector-width=256" } */
++/* { dg-final { scan-assembler-not {(?n)vpblendd.*ymm} } } */
++
++
++void foo (int * __restrict a, int *b)
++{
++  for (int i = 0; i < 6; ++i)
++    ai = bi + 42;
++}
+-- 
+2.31.1
+

_service:tar_scm:0260-Refine-maskstore-patterns-with-UNSPEC_MASKMOV.patch Added

@@ -0,0 +1,126 @@
+From 5ad28ef4010c1248b4d94396d03f863705f7b0db Mon Sep 17 00:00:00 2001
+From: liuhongt <hongtao.liu@intel.com>
+Date: Mon, 26 Jun 2023 21:07:09 +0800
+Subject: PATCH 06/28 Refine maskstore patterns with UNSPEC_MASKMOV.
+
+Similar like r14-2070-gc79476da46728e
+
+If mem_addr points to a memory region with less than whole vector size
+bytes of accessible memory and k is a mask that would prevent reading
+the inaccessible bytes from mem_addr, add UNSPEC_MASKMOV to prevent
+it to be transformed to any other whole memory access instructions.
+
+gcc/ChangeLog:
+
+	PR rtl-optimization/110237
+	* config/i386/sse.md (<avx512>_store<mode>_mask): Refine with
+	UNSPEC_MASKMOV.
+	(maskstore<mode><avx512fmaskmodelower): Ditto.
+	(*<avx512>_store<mode>_mask): New define_insn, it's renamed
+	from original <avx512>_store<mode>_mask.
+---
+ gcc/config/i386/sse.md | 69 ++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 57 insertions(+), 12 deletions(-)
+
+diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
+index b30e96cb1..3af159896 100644
+--- a/gcc/config/i386/sse.md
++++ b/gcc/config/i386/sse.md
+@@ -1554,7 +1554,7 @@
+    (set_attr "prefix" "evex")
+    (set_attr "mode" "<sseinsnmode>"))
+ 
+-(define_insn "<avx512>_store<mode>_mask"
++(define_insn "*<avx512>_store<mode>_mask"
+   (set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
+ 	(vec_merge:V48_AVX512VL
+ 	  (match_operand:V48_AVX512VL 1 "register_operand" "v")
+@@ -1582,7 +1582,7 @@
+    (set_attr "memory" "store")
+    (set_attr "mode" "<sseinsnmode>"))
+ 
+-(define_insn "<avx512>_store<mode>_mask"
++(define_insn "*<avx512>_store<mode>_mask"
+   (set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m")
+ 	(vec_merge:VI12HF_AVX512VL
+ 	  (match_operand:VI12HF_AVX512VL 1 "register_operand" "v")
+@@ -26002,21 +26002,66 @@
+   "TARGET_AVX")
+ 
+ (define_expand "maskstore<mode><avx512fmaskmodelower>"
+-  (set (match_operand:V48H_AVX512VL 0 "memory_operand")
+-	(vec_merge:V48H_AVX512VL
+-	  (match_operand:V48H_AVX512VL 1 "register_operand")
+-	  (match_dup 0)
+-	  (match_operand:<avx512fmaskmode> 2 "register_operand")))
++  (set (match_operand:V48_AVX512VL 0 "memory_operand")
++	(unspec:V48_AVX512VL
++	  (match_operand:V48_AVX512VL 1 "register_operand")
++	   (match_dup 0)
++	   (match_operand:<avx512fmaskmode> 2 "register_operand")
++	  UNSPEC_MASKMOV))
+   "TARGET_AVX512F")
+ 
+ (define_expand "maskstore<mode><avx512fmaskmodelower>"
+-  (set (match_operand:VI12_AVX512VL 0 "memory_operand")
+-	(vec_merge:VI12_AVX512VL
+-	  (match_operand:VI12_AVX512VL 1 "register_operand")
+-	  (match_dup 0)
+-	  (match_operand:<avx512fmaskmode> 2 "register_operand")))
++  (set (match_operand:VI12HF_AVX512VL 0 "memory_operand")
++	(unspec:VI12HF_AVX512VL
++	  (match_operand:VI12HF_AVX512VL 1 "register_operand")
++	   (match_dup 0)
++	   (match_operand:<avx512fmaskmode> 2 "register_operand")
++	  UNSPEC_MASKMOV))
+   "TARGET_AVX512BW")
+ 
++(define_insn "<avx512>_store<mode>_mask"
++  (set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
++	(unspec:V48_AVX512VL
++	  (match_operand:V48_AVX512VL 1 "register_operand" "v")
++	   (match_dup 0)
++	   (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")
++	  UNSPEC_MASKMOV))
++  "TARGET_AVX512F"
++{
++  if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
++    {
++      if (misaligned_operand (operands0, <MODE>mode))
++	return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
++      else
++	return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
++    }
++  else
++    {
++      if (misaligned_operand (operands0, <MODE>mode))
++	return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
++      else
++	return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
++    }
++}
++  (set_attr "type" "ssemov")
++   (set_attr "prefix" "evex")
++   (set_attr "memory" "store")
++   (set_attr "mode" "<sseinsnmode>"))
++
++(define_insn "<avx512>_store<mode>_mask"
++  (set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m")
++	(unspec:VI12HF_AVX512VL
++	  (match_operand:VI12HF_AVX512VL 1 "register_operand" "v")
++	   (match_dup 0)
++	   (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")
++	   UNSPEC_MASKMOV))
++  "TARGET_AVX512BW"
++  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
++  (set_attr "type" "ssemov")
++   (set_attr "prefix" "evex")
++   (set_attr "memory" "store")
++   (set_attr "mode" "<sseinsnmode>"))
++
+ (define_expand "cbranch<mode>4"
+   (set (reg:CC FLAGS_REG)
+ 	(compare:CC (match_operand:VI48_AVX 1 "register_operand")
+-- 
+2.31.1
+

_service:tar_scm:0261-x86-Update-model-values-for-Alderlake-and-Rocketlake.patch Added

@@ -0,0 +1,38 @@
+From 50757adc93ef32a97a8a1083f5d53a9c00da6ac8 Mon Sep 17 00:00:00 2001
+From: "Cui, Lili" <lili.cui@intel.com>
+Date: Thu, 29 Jun 2023 03:10:35 +0000
+Subject: PATCH 07/28 x86: Update model values for Alderlake and Rocketlake.
+
+Update model values for Alderlake and Rocketlake according to SDM.
+
+gcc/ChangeLog
+
+	* common/config/i386/cpuinfo.h (get_intel_cpu): Remove model value 0xa8
+	from Rocketlake, remove model value 0xbf from Alderlake.
+---
+ gcc/common/config/i386/cpuinfo.h | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
+index 0333da56b..28b2ff0b0 100644
+--- a/gcc/common/config/i386/cpuinfo.h
++++ b/gcc/common/config/i386/cpuinfo.h
+@@ -435,7 +435,6 @@ get_intel_cpu (struct __processor_model *cpu_model,
+       cpu_model->__cpu_subtype = INTEL_COREI7_SKYLAKE;
+       break;
+     case 0xa7:
+-    case 0xa8:
+       /* Rocket Lake.  */
+       cpu = "rocketlake";
+       CHECK___builtin_cpu_is ("corei7");
+@@ -508,7 +507,6 @@ get_intel_cpu (struct __processor_model *cpu_model,
+       break;
+     case 0x97:
+     case 0x9a:
+-    case 0xbf:
+       /* Alder Lake.  */
+       cpu = "alderlake";
+       CHECK___builtin_cpu_is ("corei7");
+-- 
+2.31.1
+

_service:tar_scm:0262-Workaround-possible-CPUID-bug-in-Sandy-Bridge.patch Added

@@ -0,0 +1,78 @@
+From 60364b439a80c217174e1830e0b7507d6f4538c4 Mon Sep 17 00:00:00 2001
+From: liuhongt <hongtao.liu@intel.com>
+Date: Fri, 4 Aug 2023 09:27:39 +0800
+Subject: PATCH 08/28 Workaround possible CPUID bug in Sandy Bridge.
+
+Don't access leaf 7 subleaf 1 unless subleaf 0 says it is
+supported via EAX.
+
+Intel documentation says invalid subleaves return 0. We had been
+relying on that behavior instead of checking the max sublef number.
+
+It appears that some Sandy Bridge CPUs return at least the subleaf 0
+EDX value for subleaf 1. Best guess is that this is a bug in a
+microcode patch since all of the bits we're seeing set in EDX were
+introduced after Sandy Bridge was originally released.
+
+This is causing avxvnniint16 to be incorrectly enabled with
+-march=native on these CPUs.
+
+gcc/ChangeLog:
+
+	* common/config/i386/cpuinfo.h (get_available_features): Check
+	max_subleaf_level for valid subleaf before use CPUID.
+---
+ gcc/common/config/i386/cpuinfo.h | 29 +++++++++++++++++------------
+ 1 file changed, 17 insertions(+), 12 deletions(-)
+
+diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
+index 28b2ff0b0..316ad3cb3 100644
+--- a/gcc/common/config/i386/cpuinfo.h
++++ b/gcc/common/config/i386/cpuinfo.h
+@@ -647,7 +647,9 @@ get_available_features (struct __processor_model *cpu_model,
+   /* Get Advanced Features at level 7 (eax = 7, ecx = 0/1). */
+   if (max_cpuid_level >= 7)
+     {
+-      __cpuid_count (7, 0, eax, ebx, ecx, edx);
++      unsigned int max_subleaf_level;
++
++      __cpuid_count (7, 0, max_subleaf_level, ebx, ecx, edx);
+       if (ebx & bit_BMI)
+ 	set_feature (FEATURE_BMI);
+       if (ebx & bit_SGX)
+@@ -759,18 +761,21 @@ get_available_features (struct __processor_model *cpu_model,
+ 	    set_feature (FEATURE_AVX512FP16);
+ 	}
+ 
+-      __cpuid_count (7, 1, eax, ebx, ecx, edx);
+-      if (eax & bit_HRESET)
+-	set_feature (FEATURE_HRESET);
+-      if (avx_usable)
+-	{
+-	  if (eax & bit_AVXVNNI)
+-	    set_feature (FEATURE_AVXVNNI);
+-	}
+-      if (avx512_usable)
++      if (max_subleaf_level >= 1)
+ 	{
+-	  if (eax & bit_AVX512BF16)
+-	    set_feature (FEATURE_AVX512BF16);
++	  __cpuid_count (7, 1, eax, ebx, ecx, edx);
++	  if (eax & bit_HRESET)
++	    set_feature (FEATURE_HRESET);
++	  if (avx_usable)
++	    {
++	      if (eax & bit_AVXVNNI)
++		set_feature (FEATURE_AVXVNNI);
++	    }
++	  if (avx512_usable)
++	    {
++	      if (eax & bit_AVX512BF16)
++		set_feature (FEATURE_AVX512BF16);
++	    }
+ 	}
+     }
+ 
+-- 
+2.31.1
+

_service:tar_scm:0263-Software-mitigation-Disable-gather-generation-in-vec.patch Added

@@ -0,0 +1,220 @@
+From cfffbec938afdc45c31db5ec282ce21ad1ba2dc7 Mon Sep 17 00:00:00 2001
+From: liuhongt <hongtao.liu@intel.com>
+Date: Thu, 10 Aug 2023 11:41:39 +0800
+Subject: PATCH 09/28 Software mitigation: Disable gather generation in
+ vectorization for GDS affected Intel Processors.
+
+For more details of GDS (Gather Data Sampling), refer to
+https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/gather-data-sampling.html
+
+After microcode update, there's performance regression. To avoid that,
+the patch disables gather generation in autovectorization but uses
+gather scalar emulation instead.
+
+gcc/ChangeLog:
+
+	* config/i386/i386-options.cc (m_GDS): New macro.
+	* config/i386/x86-tune.def (X86_TUNE_USE_GATHER_2PARTS): Don't
+	enable for m_GDS.
+	(X86_TUNE_USE_GATHER_4PARTS): Ditto.
+	(X86_TUNE_USE_GATHER): Ditto.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/i386/avx2-gather-2.c: Adjust options to keep
+	gather vectorization.
+	* gcc.target/i386/avx2-gather-6.c: Ditto.
+	* gcc.target/i386/avx512f-pr88464-1.c: Ditto.
+	* gcc.target/i386/avx512f-pr88464-5.c: Ditto.
+	* gcc.target/i386/avx512vl-pr88464-1.c: Ditto.
+	* gcc.target/i386/avx512vl-pr88464-11.c: Ditto.
+	* gcc.target/i386/avx512vl-pr88464-3.c: Ditto.
+	* gcc.target/i386/avx512vl-pr88464-9.c: Ditto.
+	* gcc.target/i386/pr88531-1b.c: Ditto.
+	* gcc.target/i386/pr88531-1c.c: Ditto.
+
+(cherry picked from commit 3064d1f5c48cb6ce1b4133570dd08ecca8abb52d)
+---
+ gcc/config/i386/i386-options.cc                     | 5 +++++
+ gcc/config/i386/x86-tune.def                        | 9 ++++++---
+ gcc/testsuite/gcc.target/i386/avx2-gather-2.c       | 2 +-
+ gcc/testsuite/gcc.target/i386/avx2-gather-6.c       | 2 +-
+ gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c   | 2 +-
+ gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c   | 2 +-
+ gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c  | 2 +-
+ gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c | 2 +-
+ gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c  | 2 +-
+ gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c  | 2 +-
+ gcc/testsuite/gcc.target/i386/pr88531-1b.c          | 2 +-
+ gcc/testsuite/gcc.target/i386/pr88531-1c.c          | 2 +-
+ 12 files changed, 21 insertions(+), 13 deletions(-)
+
+diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
+index fb2ed942f..9617fc162 100644
+--- a/gcc/config/i386/i386-options.cc
++++ b/gcc/config/i386/i386-options.cc
+@@ -137,6 +137,11 @@ along with GCC; see the file COPYING3.  If not see
+ #define m_GOLDMONT_PLUS (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT_PLUS)
+ #define m_TREMONT (HOST_WIDE_INT_1U<<PROCESSOR_TREMONT)
+ #define m_INTEL (HOST_WIDE_INT_1U<<PROCESSOR_INTEL)
++/* Gather Data Sampling / CVE-2022-40982 / INTEL-SA-00828.
++   Software mitigation.  */
++#define m_GDS (m_SKYLAKE | m_SKYLAKE_AVX512 | m_CANNONLAKE \
++	       | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \
++	       | m_TIGERLAKE | m_COOPERLAKE | m_ROCKETLAKE)
+ 
+ #define m_GEODE (HOST_WIDE_INT_1U<<PROCESSOR_GEODE)
+ #define m_K6 (HOST_WIDE_INT_1U<<PROCESSOR_K6)
+diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
+index e6b9e2125..4392709fc 100644
+--- a/gcc/config/i386/x86-tune.def
++++ b/gcc/config/i386/x86-tune.def
+@@ -467,7 +467,8 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
+ /* X86_TUNE_USE_GATHER_2PARTS: Use gather instructions for vectors with 2
+    elements.  */
+ DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts",
+-	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC))
++	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE
++	    | m_GENERIC | m_GDS))
+ 
+ /* X86_TUNE_USE_SCATTER_2PARTS: Use scater instructions for vectors with 2
+    elements.  */
+@@ -477,7 +478,8 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_2PARTS, "use_scatter_2parts",
+ /* X86_TUNE_USE_GATHER_4PARTS: Use gather instructions for vectors with 4
+    elements.  */
+ DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts",
+-	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 |  m_ALDERLAKE | m_GENERIC))
++	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE
++	    | m_GENERIC | m_GDS))
+ 
+ /* X86_TUNE_USE_SCATTER_4PARTS: Use scater instructions for vectors with 4
+    elements.  */
+@@ -487,7 +489,8 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts",
+ /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more
+    elements.  */
+ DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
+-	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC))
++	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE
++	    | m_GENERIC | m_GDS))
+ 
+ /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
+    elements.  */
+diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c
+index ad5ef7310..978924b0f 100644
+--- a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c
++++ b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake" } */
++/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake -mtune=haswell" } */
+ 
+ #include "avx2-gather-1.c"
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-6.c b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
+index b9119581a..067b251e3 100644
+--- a/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
++++ b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O3 -mavx2 -fno-common -fdump-tree-vect-details -mtune=skylake" } */
++/* { dg-options "-O3 -mavx2 -fno-common -fdump-tree-vect-details  -mtune=haswell" } */
+ 
+ #include "avx2-gather-5.c"
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c
+index 06d21bb01..d1a229861 100644
+--- a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c
++++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c
+@@ -1,6 +1,6 @@
+ /* PR tree-optimization/88464 */
+ /* { dg-do compile } */
+-/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
++/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=haswell -fdump-tree-vect-details" } */
+ /* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c
+index 462e951fd..d7b0b2b28 100644
+--- a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c
++++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c
+@@ -1,6 +1,6 @@
+ /* PR tree-optimization/88464 */
+ /* { dg-do compile } */
+-/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
++/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=haswell -fdump-tree-vect-details" } */
+ /* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c
+index 55a28dddb..07439185e 100644
+--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c
++++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c
+@@ -1,6 +1,6 @@
+ /* PR tree-optimization/88464 */
+ /* { dg-do compile } */
+-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
++/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=haswell -fdump-tree-vect-details" } */
+ /* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c
+index 969600885..3a9810827 100644
+--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c
++++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c
+@@ -1,6 +1,6 @@
+ /* PR tree-optimization/88464 */
+ /* { dg-do compile } */
+-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
++/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=haswell -fdump-tree-vect-details" } */
+ /* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c
+index 6b0c8a859..ac669e048 100644
+--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c
++++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c
+@@ -1,6 +1,6 @@
+ /* PR tree-optimization/88464 */
+ /* { dg-do compile } */
+-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
++/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=haswell -fdump-tree-vect-details" } */
+ /* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c
+index 3af568ab3..14a1083b6 100644
+--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c
++++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c
+@@ -1,6 +1,6 @@
+ /* PR tree-optimization/88464 */
+ /* { dg-do compile } */
+-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
++/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=haswell -fdump-tree-vect-details" } */
+ /* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/pr88531-1b.c b/gcc/testsuite/gcc.target/i386/pr88531-1b.c
+index 812c8a10f..e6df789de 100644
+--- a/gcc/testsuite/gcc.target/i386/pr88531-1b.c
++++ b/gcc/testsuite/gcc.target/i386/pr88531-1b.c

_service:tar_scm:0264-Support-m-no-gather-m-no-scatter-to-enable-disable-v.patch Added

@@ -0,0 +1,187 @@
+From c269629130cb23252da2db026ce9ed13f57f69f4 Mon Sep 17 00:00:00 2001
+From: liuhongt <hongtao.liu@intel.com>
+Date: Thu, 10 Aug 2023 16:26:13 +0800
+Subject: PATCH 10/28 Support -mno-gather -mno-scatter to enable/disable
+ vectorization for all gather/scatter instructions
+
+Rename original use_gather to use_gather_8parts, Support
+-mtune-ctrl={,^}use_gather to set/clear tune features
+use_gather_{2parts, 4parts, 8parts}. Support the new option -mgather
+as alias of -mtune-ctrl=, use_gather, ^use_gather.
+
+Similar for use_scatter.
+
+gcc/ChangeLog:
+
+	* config/i386/i386-builtins.cc
+	(ix86_vectorize_builtin_gather): Adjust for use_gather_8parts.
+	* config/i386/i386-options.cc (parse_mtune_ctrl_str):
+	Set/Clear tune features use_{gather,scatter}_{2parts, 4parts,
+	8parts} for -mtune-crtl={,^}{use_gather,use_scatter}.
+	* config/i386/i386.cc (ix86_vectorize_builtin_scatter): Adjust
+	for use_scatter_8parts
+	* config/i386/i386.h (TARGET_USE_GATHER): Rename to ..
+	(TARGET_USE_GATHER_8PARTS): .. this.
+	(TARGET_USE_SCATTER): Rename to ..
+	(TARGET_USE_SCATTER_8PARTS): .. this.
+	* config/i386/x86-tune.def (X86_TUNE_USE_GATHER): Rename to
+	(X86_TUNE_USE_GATHER_8PARTS): .. this.
+	(X86_TUNE_USE_SCATTER): Rename to
+	(X86_TUNE_USE_SCATTER_8PARTS): .. this.
+	* config/i386/i386.opt: Add new options mgather, mscatter.
+
+(cherry picked from commit b2a927fb5343db363ea4361da0d6bcee227b6737)
+---
+ gcc/config/i386/i386-builtins.cc |  2 +-
+ gcc/config/i386/i386-options.cc  | 54 +++++++++++++++++++++++---------
+ gcc/config/i386/i386.cc          |  2 +-
+ gcc/config/i386/i386.h           |  8 ++---
+ gcc/config/i386/i386.opt         |  4 +++
+ gcc/config/i386/x86-tune.def     |  4 +--
+ 6 files changed, 52 insertions(+), 22 deletions(-)
+
+diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
+index 050c6228a..8ed32e14f 100644
+--- a/gcc/config/i386/i386-builtins.cc
++++ b/gcc/config/i386/i386-builtins.cc
+@@ -1790,7 +1790,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
+ 	  ? !TARGET_USE_GATHER_2PARTS
+ 	  : (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), 4u)
+ 	     ? !TARGET_USE_GATHER_4PARTS
+-	     : !TARGET_USE_GATHER)))
++	     : !TARGET_USE_GATHER_8PARTS)))
+     return NULL_TREE;
+ 
+   if ((TREE_CODE (index_type) != INTEGER_TYPE
+diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
+index 9617fc162..3df1f0c41 100644
+--- a/gcc/config/i386/i386-options.cc
++++ b/gcc/config/i386/i386-options.cc
+@@ -1705,20 +1705,46 @@ parse_mtune_ctrl_str (struct gcc_options *opts, bool dump)
+           curr_feature_string++;
+           clear = true;
+         }
+-      for (i = 0; i < X86_TUNE_LAST; i++)
+-        {
+-          if (!strcmp (curr_feature_string, ix86_tune_feature_namesi))
+-            {
+-              ix86_tune_featuresi = !clear;
+-              if (dump)
+-                fprintf (stderr, "Explicitly %s feature %s\n",
+-                         clear ? "clear" : "set", ix86_tune_feature_namesi);
+-              break;
+-            }
+-        }
+-      if (i == X86_TUNE_LAST)
+-	error ("unknown parameter to option %<-mtune-ctrl%>: %s",
+-	       clear ? curr_feature_string - 1 : curr_feature_string);
++
++      if (!strcmp (curr_feature_string, "use_gather"))
++	{
++	  ix86_tune_featuresX86_TUNE_USE_GATHER_2PARTS = !clear;
++	  ix86_tune_featuresX86_TUNE_USE_GATHER_4PARTS = !clear;
++	  ix86_tune_featuresX86_TUNE_USE_GATHER_8PARTS = !clear;
++	  if (dump)
++	    fprintf (stderr, "Explicitly %s features use_gather_2parts,"
++		     " use_gather_4parts, use_gather_8parts\n",
++		     clear ? "clear" : "set");
++
++	}
++      else if (!strcmp (curr_feature_string, "use_scatter"))
++	{
++	  ix86_tune_featuresX86_TUNE_USE_SCATTER_2PARTS = !clear;
++	  ix86_tune_featuresX86_TUNE_USE_SCATTER_4PARTS = !clear;
++	  ix86_tune_featuresX86_TUNE_USE_SCATTER_8PARTS = !clear;
++	  if (dump)
++	    fprintf (stderr, "Explicitly %s features use_scatter_2parts,"
++		     " use_scatter_4parts, use_scatter_8parts\n",
++		     clear ? "clear" : "set");
++	}
++      else
++	{
++	  for (i = 0; i < X86_TUNE_LAST; i++)
++	    {
++	      if (!strcmp (curr_feature_string, ix86_tune_feature_namesi))
++		{
++		  ix86_tune_featuresi = !clear;
++		  if (dump)
++		    fprintf (stderr, "Explicitly %s feature %s\n",
++			     clear ? "clear" : "set", ix86_tune_feature_namesi);
++		  break;
++		}
++	    }
++
++	  if (i == X86_TUNE_LAST)
++	    error ("unknown parameter to option %<-mtune-ctrl%>: %s",
++		   clear ? curr_feature_string - 1 : curr_feature_string);
++	}
+       curr_feature_string = next_feature_string;
+     }
+   while (curr_feature_string);
+diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
+index 479fc6010..e75d37023 100644
+--- a/gcc/config/i386/i386.cc
++++ b/gcc/config/i386/i386.cc
+@@ -18937,7 +18937,7 @@ ix86_vectorize_builtin_scatter (const_tree vectype,
+       ? !TARGET_USE_SCATTER_2PARTS
+       : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
+ 	 ? !TARGET_USE_SCATTER_4PARTS
+-	 : !TARGET_USE_SCATTER))
++	 : !TARGET_USE_SCATTER_8PARTS))
+     return NULL_TREE;
+ 
+   if ((TREE_CODE (index_type) != INTEGER_TYPE
+diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
+index 688aaabd3..aaa136ba0 100644
+--- a/gcc/config/i386/i386.h
++++ b/gcc/config/i386/i386.h
+@@ -403,10 +403,10 @@ extern unsigned char ix86_tune_featuresX86_TUNE_LAST;
+ 	ix86_tune_featuresX86_TUNE_USE_GATHER_4PARTS
+ #define TARGET_USE_SCATTER_4PARTS \
+ 	ix86_tune_featuresX86_TUNE_USE_SCATTER_4PARTS
+-#define TARGET_USE_GATHER \
+-	ix86_tune_featuresX86_TUNE_USE_GATHER
+-#define TARGET_USE_SCATTER \
+-	ix86_tune_featuresX86_TUNE_USE_SCATTER
++#define TARGET_USE_GATHER_8PARTS \
++	ix86_tune_featuresX86_TUNE_USE_GATHER_8PARTS
++#define TARGET_USE_SCATTER_8PARTS \
++	ix86_tune_featuresX86_TUNE_USE_SCATTER_8PARTS
+ #define TARGET_FUSE_CMP_AND_BRANCH_32 \
+ 	ix86_tune_featuresX86_TUNE_FUSE_CMP_AND_BRANCH_32
+ #define TARGET_FUSE_CMP_AND_BRANCH_64 \
+diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
+index 498fb454d..b154110d8 100644
+--- a/gcc/config/i386/i386.opt
++++ b/gcc/config/i386/i386.opt
+@@ -1222,3 +1222,7 @@ Instructions number above which STFL stall penalty can be compensated.
+ munroll-only-small-loops
+ Target Var(ix86_unroll_only_small_loops) Init(0) Save
+ Enable conservative small loop unrolling.
++
++mscatter
++Target Alias(mtune-ctrl=, use_scatter, ^use_scatter)
++Enable vectorization for scatter instruction.
+diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
+index 4392709fc..bdb455d20 100644
+--- a/gcc/config/i386/x86-tune.def
++++ b/gcc/config/i386/x86-tune.def
+@@ -488,13 +488,13 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts",
+ 
+ /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more
+    elements.  */
+-DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
++DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts",
+ 	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE
+ 	    | m_GENERIC | m_GDS))
+ 
+ /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
+    elements.  */
+-DEF_TUNE (X86_TUNE_USE_SCATTER, "use_scatter",
++DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts",
+ 	  ~(m_ZNVER4))
+ 
+ /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
+-- 
+2.31.1
+

_service:tar_scm:0265-Remove-constraint-modifier-for-fcmaddcph-fmaddcph-fc.patch Added

@@ -0,0 +1,129 @@
+From 764518a35e90a3e13c469275da9c3c7002fe1982 Mon Sep 17 00:00:00 2001
+From: liuhongt <hongtao.liu@intel.com>
+Date: Fri, 8 Sep 2023 09:22:43 +0800
+Subject: PATCH 11/28 Remove constraint modifier % for
+ fcmaddcph/fmaddcph/fcmulcph since there're not commutative.
+
+gcc/ChangeLog:
+
+	PR target/111306
+	PR target/111335
+	* config/i386/sse.md (int_comm): New int_attr.
+	(fma_<complexopname>_<mode><sdc_maskz_name><round_name>):
+	Remove % for Complex conjugate operations since they're not
+	commutative.
+	(fma_<complexpairopname>_<mode>_pair): Ditto.
+	(<avx512>_<complexopname>_<mode>_mask<round_name>): Ditto.
+	(cmul<conj_op><mode>3): Ditto.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/i386/pr111306.c: New test.
+
+(cherry picked from commit f197392a16ffb1327f1d12ff8ff05f9295e015cb)
+---
+ gcc/config/i386/sse.md                   | 16 ++++++++---
+ gcc/testsuite/gcc.target/i386/pr111306.c | 36 ++++++++++++++++++++++++
+ 2 files changed, 48 insertions(+), 4 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/i386/pr111306.c
+
+diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
+index 3af159896..f25dd5f2b 100644
+--- a/gcc/config/i386/sse.md
++++ b/gcc/config/i386/sse.md
+@@ -6318,6 +6318,14 @@
+ 	(UNSPEC_COMPLEX_FMA_PAIR "fmaddc")
+ 	 (UNSPEC_COMPLEX_FCMA_PAIR "fcmaddc"))
+ 
++(define_int_attr int_comm
++	(UNSPEC_COMPLEX_FMA "")
++	 (UNSPEC_COMPLEX_FMA_PAIR "")
++	 (UNSPEC_COMPLEX_FCMA "")
++	 (UNSPEC_COMPLEX_FCMA_PAIR "")
++	 (UNSPEC_COMPLEX_FMUL "%")
++	 (UNSPEC_COMPLEX_FCMUL ""))
++
+ (define_int_attr conj_op
+ 	(UNSPEC_COMPLEX_FMA "")
+ 	 (UNSPEC_COMPLEX_FCMA "_conj")
+@@ -6431,7 +6439,7 @@
+ (define_insn "fma_<complexopname>_<mode><sdc_maskz_name><round_name>"
+   (set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
+ 	(unspec:VF_AVX512FP16VL
+-	  (match_operand:VF_AVX512FP16VL 1 "<round_nimm_predicate>" "%v")
++	  (match_operand:VF_AVX512FP16VL 1 "<round_nimm_predicate>" "<int_comm>v")
+ 	   (match_operand:VF_AVX512FP16VL 2 "<round_nimm_predicate>" "<round_constraint>")
+ 	   (match_operand:VF_AVX512FP16VL 3 "<round_nimm_predicate>" "0")
+ 	   UNSPEC_COMPLEX_F_C_MA))
+@@ -6495,7 +6503,7 @@
+ (define_insn "fma_<complexpairopname>_<mode>_pair"
+  (set (match_operand:VF1_AVX512VL 0 "register_operand" "=&v")
+        (unspec:VF1_AVX512VL
+-	 (match_operand:VF1_AVX512VL 1 "vector_operand" "%v")
++	 (match_operand:VF1_AVX512VL 1 "vector_operand" "<int_comm>v")
+ 	  (match_operand:VF1_AVX512VL 2 "bcst_vector_operand" "vmBr")
+ 	  (match_operand:VF1_AVX512VL 3 "vector_operand" "0")
+ 	  UNSPEC_COMPLEX_F_C_MA_PAIR))
+@@ -6562,7 +6570,7 @@
+   (set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
+ 	(vec_merge:VF_AVX512FP16VL
+ 	  (unspec:VF_AVX512FP16VL
+-	    (match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "%v")
++	    (match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "<int_comm>v")
+ 	     (match_operand:VF_AVX512FP16VL 2 "nonimmediate_operand" "<round_constraint>")
+ 	     (match_operand:VF_AVX512FP16VL 3 "register_operand" "0")
+ 	     UNSPEC_COMPLEX_F_C_MA)
+@@ -6586,7 +6594,7 @@
+ (define_insn "<avx512>_<complexopname>_<mode><maskc_name><round_name>"
+   (set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
+ 	  (unspec:VF_AVX512FP16VL
+-	    (match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "%v")
++	    (match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "<int_comm>v")
+ 	     (match_operand:VF_AVX512FP16VL 2 "nonimmediate_operand" "<round_constraint>")
+ 	     UNSPEC_COMPLEX_F_C_MUL))
+   "TARGET_AVX512FP16 && <round_mode512bit_condition>"
+diff --git a/gcc/testsuite/gcc.target/i386/pr111306.c b/gcc/testsuite/gcc.target/i386/pr111306.c
+new file mode 100644
+index 000000000..541725ebd
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/pr111306.c
+@@ -0,0 +1,36 @@
++/* { dg-do run } */
++/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
++/* { dg-require-effective-target avx512fp16 } */
++
++#define AVX512FP16
++#include "avx512f-helper.h"
++
++__attribute__((optimize("O2"),noipa))
++void func1(_Float16 *a, _Float16 *b, int n, _Float16 *c) {
++  __m512h rA = _mm512_loadu_ph(a);
++  for (int i = 0; i < n; i += 32) {
++    __m512h rB = _mm512_loadu_ph(b + i);
++    _mm512_storeu_ph(c + i, _mm512_fcmul_pch(rB, rA));
++  }
++}
++
++void
++test_512 (void)
++{
++  int n = 32;
++  _Float16 an, bn, cn;
++  _Float16 expn;
++  for (int i = 1; i <= n; i++) {
++    ai - 1 = i & 1 ? -i : i;
++    bi - 1 = i;
++  }
++
++  func1(a, b, n, c);
++  for (int i = 0; i < n / 32; i += 2) {
++    if (ci != ai * bi + ai+1 * bi+1
++	|| ci+1 != ai * bi+1 - ai+1*bi)
++      __builtin_abort ();
++    }
++}
++
++
+-- 
+2.31.1
+

_service:tar_scm:0266-Disparage-slightly-for-the-alternative-which-move-DF.patch Added

@@ -0,0 +1,106 @@
+From afd539adfe762adb57863299a11987b7e20e7987 Mon Sep 17 00:00:00 2001
+From: liuhongt <hongtao.liu@intel.com>
+Date: Wed, 5 Jul 2023 13:45:11 +0800
+Subject: PATCH 12/28 Disparage slightly for the alternative which move
+ DFmode between SSE_REGS and GENERAL_REGS.
+
+For testcase
+
+void __cond_swap(double* __x, double* __y) {
+  bool __r = (*__x < *__y);
+  auto __tmp = __r ? *__x : *__y;
+  *__y = __r ? *__y : *__x;
+  *__x = __tmp;
+}
+
+GCC-14 with -O2 and -march=x86-64 options generates the following code:
+
+__cond_swap(double*, double*):
+        movsd   xmm1, QWORD PTR rdi
+        movsd   xmm0, QWORD PTR rsi
+        comisd  xmm0, xmm1
+        jbe     .L2
+        movq    rax, xmm1
+        movapd  xmm1, xmm0
+        movq    xmm0, rax
+.L2:
+        movsd   QWORD PTR rsi, xmm1
+        movsd   QWORD PTR rdi, xmm0
+        ret
+
+rax is used to save and restore DFmode value. In RA both GENERAL_REGS
+and SSE_REGS cost zero since we didn't disparage the
+alternative in movdf_internal pattern, according to register
+allocation order, GENERAL_REGS is allocated. The patch add ? for
+alternative (r,v) and (v,r) just like we did for movsf/hf/bf_internal
+pattern, after that we get optimal RA.
+
+__cond_swap:
+.LFB0:
+	.cfi_startproc
+	movsd	(%rdi), %xmm1
+	movsd	(%rsi), %xmm0
+	comisd	%xmm1, %xmm0
+	jbe	.L2
+	movapd	%xmm1, %xmm2
+	movapd	%xmm0, %xmm1
+	movapd	%xmm2, %xmm0
+.L2:
+	movsd	%xmm1, (%rsi)
+	movsd	%xmm0, (%rdi)
+	ret
+
+gcc/ChangeLog:
+
+	PR target/110170
+	* config/i386/i386.md (movdf_internal): Disparage slightly for
+	2 alternatives (r,v) and (v,r) by adding constraint modifier
+	'?'.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/i386/pr110170-3.c: New test.
+
+(cherry picked from commit 37a231cc7594d12ba0822077018aad751a6fb94e)
+---
+ gcc/config/i386/i386.md                    |  4 ++--
+ gcc/testsuite/gcc.target/i386/pr110170-3.c | 11 +++++++++++
+ 2 files changed, 13 insertions(+), 2 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/i386/pr110170-3.c
+
+diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
+index be07be10d..71691f598 100644
+--- a/gcc/config/i386/i386.md
++++ b/gcc/config/i386/i386.md
+@@ -3582,9 +3582,9 @@
+ ;; Possible store forwarding (partial memory) stall in alternatives 4, 6 and 7.
+ (define_insn "*movdf_internal"
+   (set (match_operand:DF 0 "nonimmediate_operand"
+-    "=Yf*f,m   ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,v,r  ,o ,r  ,m")
++    "=Yf*f,m   ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,?r,?v,r  ,o ,r  ,m")
+ 	(match_operand:DF 1 "general_operand"
+-    "Yf*fm,Yf*f,G   ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,v,r ,roF,rF,rmF,rC"))
++    "Yf*fm,Yf*f,G   ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x, v, r,roF,rF,rmF,rC"))
+   "!(MEM_P (operands0) && MEM_P (operands1))
+    && (lra_in_progress || reload_completed
+        || !CONST_DOUBLE_P (operands1)
+diff --git a/gcc/testsuite/gcc.target/i386/pr110170-3.c b/gcc/testsuite/gcc.target/i386/pr110170-3.c
+new file mode 100644
+index 000000000..70daa89e9
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/pr110170-3.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile { target { ! ia32 } } } */
++/* { dg-options "-O2 -fno-if-conversion -fno-if-conversion2" } */
++/* { dg-final { scan-assembler-not {(?n)movq.*r} } } */
++
++void __cond_swap(double* __x, double* __y) {
++  _Bool __r = (*__x < *__y);
++  double __tmp = __r ? *__x : *__y;
++  *__y = __r ? *__y : *__x;
++  *__x = __tmp;
++}
++
+-- 
+2.31.1
+

_service:tar_scm:0267-Fix-wrong-code-due-to-vec_merge-pcmp-to-blendvb-spli.patch Added

@@ -0,0 +1,163 @@
+From 88516507757932c1e67ce99d240596935971d2d0 Mon Sep 17 00:00:00 2001
+From: liuhongt <hongtao.liu@intel.com>
+Date: Thu, 9 Nov 2023 13:20:05 +0800
+Subject: PATCH 13/28 Fix wrong code due to vec_merge + pcmp to blendvb
+ splitter.
+
+gcc/ChangeLog:
+
+	PR target/112443
+	* config/i386/sse.md (*avx2_pcmp<mode>3_4): Fix swap condition
+	from LT to GT since there's not in the pattern.
+	(*avx2_pcmp<mode>3_5): Ditto.
+
+gcc/testsuite/ChangeLog:
+
+	* g++.target/i386/pr112443.C: New test.
+
+(cherry picked from commit 9a0cc04b9c9b02426762892b88efc5c44ba546bd)
+---
+ gcc/config/i386/sse.md                   |   4 +-
+ gcc/testsuite/g++.target/i386/pr112443.C | 108 +++++++++++++++++++++++
+ 2 files changed, 110 insertions(+), 2 deletions(-)
+ create mode 100644 gcc/testsuite/g++.target/i386/pr112443.C
+
+diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
+index f25dd5f2b..23b858ab2 100644
+--- a/gcc/config/i386/sse.md
++++ b/gcc/config/i386/sse.md
+@@ -16358,7 +16358,7 @@
+ 	     (match_dup 4))
+ 	     UNSPEC_BLENDV))
+ {
+-  if (INTVAL (operands5) == 1)
++  if (INTVAL (operands5) == 5)
+     std::swap (operands1, operands2);
+   operands3 = gen_lowpart (<MODE>mode, operands3);
+ })
+@@ -16388,7 +16388,7 @@
+ 	     (match_dup 4))
+ 	     UNSPEC_BLENDV))
+ {
+-  if (INTVAL (operands5) == 1)
++  if (INTVAL (operands5) == 5)
+     std::swap (operands1, operands2);
+ })
+ 
+diff --git a/gcc/testsuite/g++.target/i386/pr112443.C b/gcc/testsuite/g++.target/i386/pr112443.C
+new file mode 100644
+index 000000000..ebfa9b4a7
+--- /dev/null
++++ b/gcc/testsuite/g++.target/i386/pr112443.C
+@@ -0,0 +1,108 @@
++/* { dg-do run } */
++/* { dg-require-effective-target avx512bw } */
++/* { dg-require-effective-target avx512vl } */
++/* { dg-options "-O2 -std=c++17 -mavx512bw -mavx512vl" } */
++
++#include <cstdint>
++#include <x86intrin.h>
++#include <functional>
++#include <ostream>
++
++#define AVX512BW
++#define AVX512VL
++
++#include "avx512f-helper.h"
++
++struct TensorIteratorBase{
++  char* in;
++  char* out;
++
++  void for_each(std::function<void(char*, char*, int64_t size)> loop){
++    loop(out, in, 32);
++  }    
++};
++
++class Vectorized {
++protected:
++  __m256i values;
++
++  static inline __m256i invert(const __m256i& v) {
++    const auto ones = _mm256_set1_epi64x(-1);
++    return _mm256_xor_si256(ones, v);
++  }
++public:
++  operator __m256i() const {
++    return values;
++  }
++
++  static constexpr int size() {
++    return 32;
++  }
++
++  Vectorized() {}
++  Vectorized(__m256i v) : values(v) {}
++  Vectorized(uint8_t v) { values = _mm256_set1_epi8(v); }
++  static Vectorized blendv(const Vectorized& a, const Vectorized& b,
++			   const Vectorized& mask) {
++    return _mm256_blendv_epi8(a, b, mask);
++  }
++  static Vectorized loadu(const void* ptr) {
++    return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr));
++  }
++  void store(void* ptr) const {
++    _mm256_storeu_si256(reinterpret_cast<__m256i*>(ptr), values);
++  }
++
++  Vectorized operator<(const Vectorized& other) const {
++    __m256i max = _mm256_max_epu8(values, other);
++    return invert(_mm256_cmpeq_epi8(max, values));
++  }
++  Vectorized operator-(const Vectorized& b) {
++    return _mm256_sub_epi8(values, b);
++  }
++};
++
++std::ostream& operator<<(std::ostream& stream, const Vectorized& vec) {
++  uint8_t bufVectorized::size();
++  vec.store(buf);
++  stream << "vec";
++  for (int i = 0; i != Vectorized::size(); i++) {
++    if (i != 0)
++      stream << ", ";
++    stream << bufi*1;
++  }
++  stream << "";
++  return stream;
++}
++
++void run(TensorIteratorBase iter){
++  Vectorized zero_vec(0);
++  Vectorized one_vec(1);
++
++  iter.for_each(=(char* out, char* in, int64_t size) {
++    for (int64_t i = 0; i <= size - Vectorized::size(); i += Vectorized::size()) {
++      auto self_vec = Vectorized::loadu(in + i);
++      auto left = Vectorized::blendv(zero_vec, one_vec, zero_vec < self_vec);
++      auto right = Vectorized::blendv(zero_vec, one_vec, self_vec < zero_vec);
++      auto outv = left - right;
++      outv.store(out + i);
++    }
++  });
++}
++
++void
++test_256 (){
++  char in32;
++  char out32;
++  for(auto& x: in) x = 1;
++  run(TensorIteratorBase{in, out});
++  Vectorized::loadu (out);
++  for (int i = 0; i != 32; i++)
++    if (outi != 1)
++      __builtin_abort ();
++}
++
++void
++test_128 ()
++{
++}
+-- 
+2.31.1
+

_service:tar_scm:0268-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch Added

@@ -0,0 +1,151 @@
+From 204ffa7f503411ccac0161c951726274648b6374 Mon Sep 17 00:00:00 2001
+From: liuhongt <hongtao.liu@intel.com>
+Date: Thu, 7 Dec 2023 09:17:27 +0800
+Subject: PATCH 14/28 Don't assume it's AVX_U128_CLEAN after call_insn whose
+ abi.mode_clobber(V4DImode) deosn't contains all SSE_REGS.
+
+If the function desn't clobber any sse registers or only clobber
+128-bit part, then vzeroupper isn't issued before the function exit.
+the status not CLEAN but ANY after the function.
+
+Also for sibling_call, it's safe to issue an vzeroupper. Also there
+could be missing vzeroupper since there's no mode_exit for
+sibling_call_p.
+
+gcc/ChangeLog:
+
+	PR target/112891
+	* config/i386/i386.cc (ix86_avx_u128_mode_after): Return
+	AVX_U128_ANY if callee_abi doesn't clobber all_sse_regs to
+	align with ix86_avx_u128_mode_needed.
+	(ix86_avx_u128_mode_needed): Return AVX_U128_ClEAN for
+	sibling_call.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/i386/pr112891.c: New test.
+	* gcc.target/i386/pr112891-2.c: New test.
+
+(cherry picked from commit fc189a08f5b7ad5889bd4c6b320c1dd99dd5d642)
+---
+ gcc/config/i386/i386.cc                    | 22 +++++++++++++---
+ gcc/testsuite/gcc.target/i386/pr112891-2.c | 30 ++++++++++++++++++++++
+ gcc/testsuite/gcc.target/i386/pr112891.c   | 29 +++++++++++++++++++++
+ 3 files changed, 78 insertions(+), 3 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/i386/pr112891-2.c
+ create mode 100644 gcc/testsuite/gcc.target/i386/pr112891.c
+
+diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
+index e75d37023..60f3296b0 100644
+--- a/gcc/config/i386/i386.cc
++++ b/gcc/config/i386/i386.cc
+@@ -14416,8 +14416,12 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
+ 	 modes wider than 256 bits.  It's only safe to issue a
+ 	 vzeroupper if all SSE registers are clobbered.  */
+       const function_abi &abi = insn_callee_abi (insn);
+-      if (!hard_reg_set_subset_p (reg_class_contentsSSE_REGS,
+-				  abi.mode_clobbers (V4DImode)))
++      /* Should be safe to issue an vzeroupper before sibling_call_p.
++	 Also there not mode_exit for sibling_call, so there could be
++	 missing vzeroupper for that.  */
++      if (!(SIBLING_CALL_P (insn)
++	    || hard_reg_set_subset_p (reg_class_contentsSSE_REGS,
++				      abi.mode_clobbers (V4DImode))))
+ 	return AVX_U128_ANY;
+ 
+       return AVX_U128_CLEAN;
+@@ -14555,7 +14559,19 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
+       bool avx_upper_reg_found = false;
+       note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
+ 
+-      return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
++      if (avx_upper_reg_found)
++	return AVX_U128_DIRTY;
++
++      /* If the function desn't clobber any sse registers or only clobber
++	 128-bit part, Then vzeroupper isn't issued before the function exit.
++	 the status not CLEAN but ANY after the function.  */
++      const function_abi &abi = insn_callee_abi (insn);
++      if (!(SIBLING_CALL_P (insn)
++	    || hard_reg_set_subset_p (reg_class_contentsSSE_REGS,
++				      abi.mode_clobbers (V4DImode))))
++	return AVX_U128_ANY;
++
++      return  AVX_U128_CLEAN;
+     }
+ 
+   /* Otherwise, return current mode.  Remember that if insn
+diff --git a/gcc/testsuite/gcc.target/i386/pr112891-2.c b/gcc/testsuite/gcc.target/i386/pr112891-2.c
+new file mode 100644
+index 000000000..164c3985d
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/pr112891-2.c
+@@ -0,0 +1,30 @@
++/* { dg-do compile } */
++/* { dg-options "-mavx2 -O3" } */
++/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
++
++void
++__attribute__((noinline))
++bar (double* a)
++{
++  a0 = 1.0;
++  a1 = 2.0;
++}
++
++double
++__attribute__((noinline))
++foo (double* __restrict a, double* b)
++{
++  a0 += b0;
++  a1 += b1;
++  a2 += b2;
++  a3 += b3;
++  bar (b);
++  return a5 + b5;
++}
++
++double
++foo1 (double* __restrict a, double* b)
++{
++  double c = foo (a, b);
++  return __builtin_exp (c);
++}
+diff --git a/gcc/testsuite/gcc.target/i386/pr112891.c b/gcc/testsuite/gcc.target/i386/pr112891.c
+new file mode 100644
+index 000000000..dbf6c6794
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/pr112891.c
+@@ -0,0 +1,29 @@
++/* { dg-do compile } */
++/* { dg-options "-mavx2 -O3" } */
++/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
++
++void
++__attribute__((noinline))
++bar (double* a)
++{
++  a0 = 1.0;
++  a1 = 2.0;
++}
++
++void
++__attribute__((noinline))
++foo (double* __restrict a, double* b)
++{
++  a0 += b0;
++  a1 += b1;
++  a2 += b2;
++  a3 += b3;
++  bar (b);
++}
++
++double
++foo1 (double* __restrict a, double* b)
++{
++  foo (a, b);
++  return __builtin_exp (b1);
++}
+-- 
+2.31.1
+

_service:tar_scm:0269-Disable-FMADD-in-chains-for-Zen4-and-generic.patch Added

@@ -0,0 +1,142 @@
+From 19ee37b11702c86d7ed271e9e1d00e23cc4ab93c Mon Sep 17 00:00:00 2001
+From: Jan Hubicka <jh@suse.cz>
+Date: Fri, 29 Dec 2023 23:51:03 +0100
+Subject: PATCH 15/28 Disable FMADD in chains for Zen4 and generic
+
+this patch disables use of FMA in matrix multiplication loop for generic (for
+x86-64-v3) and zen4.  I tested this on zen4 and Xenon Gold Gold 6212U.
+
+For Intel this is neutral both on the matrix multiplication microbenchmark
+(attached) and spec2k17 where the difference was within noise for Core.
+
+On core the micro-benchmark runs as follows:
+
+With FMA:
+
+       578,500,241      cycles:u                         #    3.645 GHz
+                ( +-  0.12% )
+       753,318,477      instructions:u                   #    1.30  insn per
+cycle              ( +-  0.00% )
+       125,417,701      branches:u                       #  790.227 M/sec
+                ( +-  0.00% )
+          0.159146 +- 0.000363 seconds time elapsed  ( +-  0.23% )
+
+No FMA:
+
+       577,573,960      cycles:u                         #    3.514 GHz
+                ( +-  0.15% )
+       878,318,479      instructions:u                   #    1.52  insn per
+cycle              ( +-  0.00% )
+       125,417,702      branches:u                       #  763.035 M/sec
+                ( +-  0.00% )
+          0.164734 +- 0.000321 seconds time elapsed  ( +-  0.19% )
+
+So the cycle count is unchanged and discrete multiply+add takes same time as
+FMA.
+
+While on zen:
+
+With FMA:
+         484875179      cycles:u                         #    3.599 GHz
+             ( +-  0.05% )  (82.11%)
+         752031517      instructions:u                   #    1.55  insn per
+cycle
+         125106525      branches:u                       #  928.712 M/sec
+             ( +-  0.03% )  (85.09%)
+            128356      branch-misses:u                  #    0.10% of all
+branches          ( +-  0.06% )  (83.58%)
+
+No FMA:
+         375875209      cycles:u                         #    3.592 GHz
+             ( +-  0.08% )  (80.74%)
+         875725341      instructions:u                   #    2.33  insn per
+cycle
+         124903825      branches:u                       #    1.194 G/sec
+             ( +-  0.04% )  (84.59%)
+          0.105203 +- 0.000188 seconds time elapsed  ( +-  0.18% )
+
+The diffrerence is that Cores understand the fact that fmadd does not need
+all three parameters to start computation, while Zen cores doesn't.
+
+Since this seems noticeable win on zen and not loss on Core it seems like good
+default for generic.
+
+float aSIZESIZE;
+float bSIZESIZE;
+float cSIZESIZE;
+
+void init(void)
+{
+   int i, j, k;
+   for(i=0; i<SIZE; ++i)
+   {
+      for(j=0; j<SIZE; ++j)
+      {
+         aij = (float)i + j;
+         bij = (float)i - j;
+         cij = 0.0f;
+      }
+   }
+}
+
+void mult(void)
+{
+   int i, j, k;
+
+   for(i=0; i<SIZE; ++i)
+   {
+      for(j=0; j<SIZE; ++j)
+      {
+         for(k=0; k<SIZE; ++k)
+         {
+            cij += aik * bkj;
+         }
+      }
+   }
+}
+
+int main(void)
+{
+   clock_t s, e;
+
+   init();
+   s=clock();
+   mult();
+   e=clock();
+   printf("        mult took %10d clocks\n", (int)(e-s));
+
+   return 0;
+
+}
+
+gcc/ChangeLog:
+
+	* config/i386/x86-tune.def (X86_TUNE_AVOID_128FMA_CHAINS,
+	X86_TUNE_AVOID_256FMA_CHAINS): Enable for znver4 and Core.
+---
+ gcc/config/i386/x86-tune.def | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
+index bdb455d20..fd095f3ec 100644
+--- a/gcc/config/i386/x86-tune.def
++++ b/gcc/config/i386/x86-tune.def
+@@ -499,12 +499,13 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts",
+ 
+ /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
+    smaller FMA chain.  */
+-DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 | m_ZNVER3)
++DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2
++	  | m_ZNVER3 | m_ZNVER4 | m_GENERIC)
+ 
+ /* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or
+    smaller FMA chain.  */
+ DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3
+-	  | m_ALDERLAKE | m_SAPPHIRERAPIDS)
++	  | m_ZNVER4 | m_ALDERLAKE | m_SAPPHIRERAPIDS | m_GENERIC)
+ 
+ /* X86_TUNE_AVOID_512FMA_CHAINS: Avoid creating loops with tight 512bit or
+    smaller FMA chain.  */
+-- 
+2.31.1
+

_service:tar_scm:0270-Initial-Raptorlake-Support.patch Added

@@ -0,0 +1,47 @@
+From 411d1f0bcc0d1c8018fdf5fe84ad2404929556ec Mon Sep 17 00:00:00 2001
+From: Haochen Jiang <haochen.jiang@intel.com>
+Date: Fri, 16 Sep 2022 13:59:01 +0800
+Subject: PATCH 16/28 Initial Raptorlake Support
+
+gcc/ChangeLog:
+
+	* common/config/i386/cpuinfo.h:
+	(get_intel_cpu): Handle Raptorlake.
+	* common/config/i386/i386-common.cc:
+	(processor_alias_table): Add Raptorlake.
+
+(cherry picked from commit 470a0659b508d684148f362c4dc0eccf5a83a23e)
+---
+ gcc/common/config/i386/cpuinfo.h      | 2 ++
+ gcc/common/config/i386/i386-common.cc | 2 ++
+ 2 files changed, 4 insertions(+)
+
+diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
+index 316ad3cb3..13d0f4cd8 100644
+--- a/gcc/common/config/i386/cpuinfo.h
++++ b/gcc/common/config/i386/cpuinfo.h
+@@ -508,6 +508,8 @@ get_intel_cpu (struct __processor_model *cpu_model,
+     case 0x97:
+     case 0x9a:
+       /* Alder Lake.  */
++    case 0xb7:
++      /* Raptor Lake.  */
+       cpu = "alderlake";
+       CHECK___builtin_cpu_is ("corei7");
+       CHECK___builtin_cpu_is ("alderlake");
+diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
+index f650e255f..c1d700f89 100644
+--- a/gcc/common/config/i386/i386-common.cc
++++ b/gcc/common/config/i386/i386-common.cc
+@@ -1939,6 +1939,8 @@ const pta processor_alias_table =
+     M_CPU_SUBTYPE (INTEL_COREI7_SAPPHIRERAPIDS), P_PROC_AVX512F},
+   {"alderlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
+     M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
++  {"raptorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
++    M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
+   {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
+     M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
+   {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
+-- 
+2.31.1
+

_service:tar_scm:0271-Initial-Meteorlake-Support.patch Added

@@ -0,0 +1,49 @@
+From 87cea29ede520f4a5af01dff7071ab1d23bd47b5 Mon Sep 17 00:00:00 2001
+From: "Hu, Lin1" <lin1.hu@intel.com>
+Date: Fri, 16 Sep 2022 11:25:13 +0800
+Subject: PATCH 17/28 Initial Meteorlake Support
+
+gcc/ChangeLog:
+
+	* common/config/i386/cpuinfo.h:
+	(get_intel_cpu): Handle Meteorlake.
+	* common/config/i386/i386-common.cc:
+	(processor_alias_table): Add Meteorlake.
+
+(cherry picked from commit fd206f0e95fb6f41b96eaaaab1dc0c30378e5e08)
+---
+ gcc/common/config/i386/cpuinfo.h      | 4 ++++
+ gcc/common/config/i386/i386-common.cc | 2 ++
+ 2 files changed, 6 insertions(+)
+
+diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
+index 13d0f4cd8..37af92d6b 100644
+--- a/gcc/common/config/i386/cpuinfo.h
++++ b/gcc/common/config/i386/cpuinfo.h
+@@ -510,6 +510,10 @@ get_intel_cpu (struct __processor_model *cpu_model,
+       /* Alder Lake.  */
+     case 0xb7:
+       /* Raptor Lake.  */
++    case 0xb5:
++    case 0xaa:
++    case 0xac:
++      /* Meteor Lake.  */
+       cpu = "alderlake";
+       CHECK___builtin_cpu_is ("corei7");
+       CHECK___builtin_cpu_is ("alderlake");
+diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
+index c1d700f89..cfee672fb 100644
+--- a/gcc/common/config/i386/i386-common.cc
++++ b/gcc/common/config/i386/i386-common.cc
+@@ -1941,6 +1941,8 @@ const pta processor_alias_table =
+     M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
+   {"raptorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
+     M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
++  {"meteorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
++    M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
+   {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
+     M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
+   {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
+-- 
+2.31.1
+

_service:tar_scm:0272-Support-Intel-AMX-FP16-ISA.patch Added

@@ -0,0 +1,691 @@
+From c11301c7780213ddf46a0bcdb06079af485f431c Mon Sep 17 00:00:00 2001
+From: Hongyu Wang <hongyu.wang@intel.com>
+Date: Fri, 4 Nov 2022 15:50:55 +0800
+Subject: PATCH 18/28 Support Intel AMX-FP16 ISA
+
+gcc/ChangeLog:
+
+	* common/config/i386/cpuinfo.h (get_available_features): Detect
+	amx-fp16.
+	* common/config/i386/i386-common.cc (OPTION_MASK_ISA2_AMX_FP16_SET,
+	OPTION_MASK_ISA2_AMX_FP16_UNSET): New macros.
+	(ix86_handle_option): Handle -mamx-fp16.
+	* common/config/i386/i386-cpuinfo.h (enum processor_features):
+	Add FEATURE_AMX_FP16.
+	* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
+	amx-fp16.
+	* config.gcc: Add amxfp16intrin.h.
+	* config/i386/cpuid.h (bit_AMX_FP16): New.
+	* config/i386/i386-c.cc (ix86_target_macros_internal): Define
+	__AMX_FP16__.
+	* config/i386/i386-isa.def: Add DEF_PTA for AMX_FP16.
+	* config/i386/i386-options.cc (isa2_opts): Add -mamx-fp16.
+	(ix86_valid_target_attribute_inner_p): Add new ATTR.
+	(ix86_option_override_internal): Handle AMX-FP16.
+	* config/i386/i386.opt: Add -mamx-fp16.
+	* config/i386/immintrin.h: Include amxfp16intrin.h.
+	* doc/extend.texi: Document -mamx-fp16.
+	* doc/invoke.texi: Document amx-fp16.
+	* doc/sourcebuild.texi: Document amx_fp16.
+	* config/i386/amxfp16intrin.h: New file.
+
+gcc/testsuite/ChangeLog:
+
+	* g++.dg/other/i386-2.C: Add -mamx-fp16.
+	* g++.dg/other/i386-3.C: Ditto.
+	* gcc.target/i386/sse-12.c: Ditto.
+	* gcc.target/i386/sse-13.c: Ditto.
+	* gcc.target/i386/sse-14.c: Ditto.
+	* gcc.target/i386/sse-22.c: Ditto.
+	* gcc.target/i386/sse-23.c: Ditto.
+	* lib/target-supports.exp: (check_effective_target_amx_fp16):
+	New proc.
+	* gcc.target/i386/funcspec-56.inc: Add new target attribute.
+	* gcc.target/i386/amx-check.h: Add AMX_FP16.
+	* gcc.target/i386/amx-helper.h: New file to support amx-fp16.
+	* gcc.target/i386/amxfp16-asmatt-1.c: New test.
+	* gcc.target/i386/amxfp16-asmintel-1.c: Ditto.
+	* gcc.target/i386/amxfp16-dpfp16ps-2.c: Ditto.
+
+Co-authored-by: Haochen Jiang <haochen.jiang@intel.com>
+
+(cherry picked from commit 2b4a03962a0fe18cadc944d90f1fb85a40004226)
+---
+ gcc/common/config/i386/cpuinfo.h              |  5 ++
+ gcc/common/config/i386/i386-common.cc         | 15 +++++
+ gcc/common/config/i386/i386-cpuinfo.h         |  1 +
+ gcc/common/config/i386/i386-isas.h            |  1 +
+ gcc/config.gcc                                |  3 +-
+ gcc/config/i386/amxfp16intrin.h               | 46 ++++++++++++++
+ gcc/config/i386/cpuid.h                       |  1 +
+ gcc/config/i386/i386-c.cc                     |  2 +
+ gcc/config/i386/i386-isa.def                  |  1 +
+ gcc/config/i386/i386-options.cc               |  4 +-
+ gcc/config/i386/i386.opt                      |  4 ++
+ gcc/config/i386/immintrin.h                   |  2 +
+ gcc/doc/extend.texi                           |  5 ++
+ gcc/doc/invoke.texi                           |  9 ++-
+ gcc/doc/sourcebuild.texi                      |  3 +
+ gcc/testsuite/g++.dg/other/i386-2.C           |  2 +-
+ gcc/testsuite/g++.dg/other/i386-3.C           |  2 +-
+ gcc/testsuite/gcc.target/i386/amx-check.h     |  3 +
+ gcc/testsuite/gcc.target/i386/amx-helper.h    | 61 +++++++++++++++++++
+ .../gcc.target/i386/amxfp16-asmatt-1.c        | 13 ++++
+ .../gcc.target/i386/amxfp16-asmintel-1.c      | 10 +++
+ .../gcc.target/i386/amxfp16-dpfp16ps-2.c      | 57 +++++++++++++++++
+ gcc/testsuite/gcc.target/i386/funcspec-56.inc |  2 +
+ gcc/testsuite/gcc.target/i386/sse-12.c        |  2 +-
+ gcc/testsuite/gcc.target/i386/sse-13.c        |  2 +-
+ gcc/testsuite/gcc.target/i386/sse-14.c        |  2 +-
+ gcc/testsuite/gcc.target/i386/sse-22.c        |  4 +-
+ gcc/testsuite/gcc.target/i386/sse-23.c        |  2 +-
+ gcc/testsuite/lib/target-supports.exp         | 11 ++++
+ 29 files changed, 262 insertions(+), 13 deletions(-)
+ create mode 100644 gcc/config/i386/amxfp16intrin.h
+ create mode 100644 gcc/testsuite/gcc.target/i386/amx-helper.h
+ create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-asmatt-1.c
+ create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c
+ create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c
+
+diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
+index 37af92d6b..5951a30aa 100644
+--- a/gcc/common/config/i386/cpuinfo.h
++++ b/gcc/common/config/i386/cpuinfo.h
+@@ -783,6 +783,11 @@ get_available_features (struct __processor_model *cpu_model,
+ 		set_feature (FEATURE_AVX512BF16);
+ 	    }
+ 	}
++      if (amx_usable)
++	{
++	  if (eax & bit_AMX_FP16)
++	    set_feature (FEATURE_AMX_FP16);
++	}
+     }
+ 
+   /* Get Advanced Features at level 0xd (eax = 0xd, ecx = 1). */
+diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
+index cfee672fb..922db33ee 100644
+--- a/gcc/common/config/i386/i386-common.cc
++++ b/gcc/common/config/i386/i386-common.cc
+@@ -107,6 +107,7 @@ along with GCC; see the file COPYING3.  If not see
+ #define OPTION_MASK_ISA2_AMX_TILE_SET OPTION_MASK_ISA2_AMX_TILE
+ #define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8
+ #define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
++#define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16
+ 
+ /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
+    as -msse4.2.  */
+@@ -275,6 +276,7 @@ along with GCC; see the file COPYING3.  If not see
+ #define OPTION_MASK_ISA2_KL_UNSET \
+   (OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET)
+ #define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
++#define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16
+ 
+ /* SSE4 includes both SSE4.1 and SSE4.2.  -mno-sse4 should the same
+    as -mno-sse4.1. */
+@@ -1125,6 +1127,19 @@ ix86_handle_option (struct gcc_options *opts,
+ 	}
+       return true;
+ 
++    case OPT_mamx_fp16:
++      if (value)
++	{
++	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_FP16_SET;
++	  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_FP16_SET;
++	}
++      else
++	{
++	  opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_FP16_UNSET;
++	  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_FP16_UNSET;
++	}
++      return true;
++
+     case OPT_mfma:
+       if (value)
+ 	{
+diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
+index 82996ebb3..8f22897de 100644
+--- a/gcc/common/config/i386/i386-cpuinfo.h
++++ b/gcc/common/config/i386/i386-cpuinfo.h
+@@ -240,6 +240,7 @@ enum processor_features
+   FEATURE_X86_64_V2,
+   FEATURE_X86_64_V3,
+   FEATURE_X86_64_V4,
++  FEATURE_AMX_FP16,
+   CPU_FEATURE_MAX
+ };
+ 
+diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h
+index 2d0646a68..95bab6da2 100644
+--- a/gcc/common/config/i386/i386-isas.h
++++ b/gcc/common/config/i386/i386-isas.h
+@@ -175,4 +175,5 @@ ISA_NAMES_TABLE_START
+   ISA_NAMES_TABLE_ENTRY("x86-64-v2", FEATURE_X86_64_V2, P_X86_64_V2, NULL)
+   ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3, P_X86_64_V3, NULL)
+   ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL)
++  ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16")
+ ISA_NAMES_TABLE_END
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index 4a0ae9328..e2b4a23dc 100644
+--- a/gcc/config.gcc
++++ b/gcc/config.gcc
+@@ -423,7 +423,8 @@ i3456786-*-* | x86_64-*-*)
+ 		       tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
+ 		       amxbf16intrin.h x86gprintrin.h uintrintrin.h
+ 		       hresetintrin.h keylockerintrin.h avxvnniintrin.h
+-		       mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h"
++		       mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
++		       amxfp16intrin.h"
+ 	;;
+ ia64-*-*)
+ 	extra_headers=ia64intrin.h
+diff --git a/gcc/config/i386/amxfp16intrin.h b/gcc/config/i386/amxfp16intrin.h
+new file mode 100644
+index 000000000..6a114741a
+--- /dev/null
++++ b/gcc/config/i386/amxfp16intrin.h
+@@ -0,0 +1,46 @@
++/* Copyright (C) 2020 Free Software Foundation, Inc.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify
++   it under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   GCC is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

_service:tar_scm:0273-Support-Intel-prefetchit0-t1.patch Added

@@ -0,0 +1,902 @@
+From 42a38c8abaa28f67e26b9af3f434fe0107894e7d Mon Sep 17 00:00:00 2001
+From: Haochen Jiang <haochen.jiang@intel.com>
+Date: Fri, 4 Nov 2022 15:01:05 +0800
+Subject: PATCH 19/28 Support Intel prefetchit0/t1
+
+gcc/ChangeLog:
+
+	* common/config/i386/cpuinfo.h (get_available_features):
+	Detect PREFETCHI.
+	* common/config/i386/i386-common.cc
+	(OPTION_MASK_ISA2_PREFETCHI_SET,
+	OPTION_MASK_ISA2_PREFETCHI_UNSET): New.
+	(ix86_handle_option): Handle -mprefetchi.
+	* common/config/i386/i386-cpuinfo.h
+	(enum processor_features): Add FEATURE_PREFETCHI.
+	* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY
+	for prefetchi.
+	* config.gcc: Add prfchiintrin.h.
+	* config/i386/cpuid.h (bit_PREFETCHI): New.
+	* config/i386/i386-builtin-types.def:
+	Add DEF_FUNCTION_TYPE (VOID, PCVOID, INT)
+	and DEF_FUNCTION_TYPE (VOID, PCVOID, INT, INT, INT).
+	* config/i386/i386-builtin.def (BDESC): Add new builtins.
+	* config/i386/i386-c.cc (ix86_target_macros_internal):
+	Define __PREFETCHI__.
+	* config/i386/i386-expand.cc: Handle new builtins.
+	* config/i386/i386-isa.def (PREFETCHI):
+	Add DEF_PTA(PREFETCHI).
+	* config/i386/i386-options.cc
+	(ix86_valid_target_attribute_inner_p): Handle prefetchi.
+	* config/i386/i386.md (prefetchi): New define_insn.
+	* config/i386/i386.opt: Add option -mprefetchi.
+	* config/i386/predicates.md (local_func_symbolic_operand):
+	New predicates.
+	* config/i386/x86gprintrin.h: Include prfchiintrin.h.
+	* config/i386/xmmintrin.h (enum _mm_hint): New enum for
+	prefetchi.
+	(_mm_prefetch): Handle the highest bit of enum.
+	* doc/extend.texi: Document prefetchi.
+	* doc/invoke.texi: Document -mprefetchi.
+	* doc/sourcebuild.texi: Document target prefetchi.
+	* config/i386/prfchiintrin.h: New file.
+
+gcc/testsuite/ChangeLog:
+
+	* g++.dg/other/i386-2.C: Add -mprefetchi.
+	* g++.dg/other/i386-3.C: Ditto.
+	* gcc.target/i386/avx-1.c: Ditto.
+	* gcc.target/i386/funcspec-56.inc: Add new target attribute.
+	* gcc.target/i386/sse-13.c: Add -mprefetchi.
+	* gcc.target/i386/sse-23.c: Ditto.
+	* gcc.target/i386/x86gprintrin-1.c: Ditto.
+	* gcc.target/i386/x86gprintrin-2.c: Ditto.
+	* gcc.target/i386/x86gprintrin-3.c: Ditto.
+	* gcc.target/i386/x86gprintrin-4.c: Ditto.
+	* gcc.target/i386/x86gprintrin-5.c: Ditto.
+	* gcc.target/i386/prefetchi-1.c: New test.
+	* gcc.target/i386/prefetchi-2.c: Ditto.
+	* gcc.target/i386/prefetchi-3.c: Ditto.
+	* gcc.target/i386/prefetchi-4.c: Ditto.
+
+Co-authored-by: Hongtao Liu <hongtao.liu@intel.com>
+---
+ gcc/common/config/i386/cpuinfo.h              |  2 +
+ gcc/common/config/i386/i386-common.cc         | 15 ++++
+ gcc/common/config/i386/i386-cpuinfo.h         |  1 +
+ gcc/common/config/i386/i386-isas.h            |  1 +
+ gcc/config.gcc                                |  2 +-
+ gcc/config/i386/cpuid.h                       |  1 +
+ gcc/config/i386/i386-builtin-types.def        |  4 +
+ gcc/config/i386/i386-builtin.def              |  4 +
+ gcc/config/i386/i386-c.cc                     |  2 +
+ gcc/config/i386/i386-expand.cc                | 77 +++++++++++++++++++
+ gcc/config/i386/i386-isa.def                  |  1 +
+ gcc/config/i386/i386-options.cc               |  4 +-
+ gcc/config/i386/i386.md                       | 23 ++++++
+ gcc/config/i386/i386.opt                      |  4 +
+ gcc/config/i386/predicates.md                 | 15 ++++
+ gcc/config/i386/prfchiintrin.h                | 49 ++++++++++++
+ gcc/config/i386/x86gprintrin.h                |  2 +
+ gcc/config/i386/xmmintrin.h                   |  7 +-
+ gcc/doc/extend.texi                           |  5 ++
+ gcc/doc/invoke.texi                           |  7 +-
+ gcc/doc/sourcebuild.texi                      |  3 +
+ gcc/testsuite/g++.dg/other/i386-2.C           |  2 +-
+ gcc/testsuite/g++.dg/other/i386-3.C           |  2 +-
+ gcc/testsuite/gcc.target/i386/avx-1.c         |  4 +-
+ gcc/testsuite/gcc.target/i386/funcspec-56.inc |  2 +
+ gcc/testsuite/gcc.target/i386/prefetchi-1.c   | 40 ++++++++++
+ gcc/testsuite/gcc.target/i386/prefetchi-2.c   | 26 +++++++
+ gcc/testsuite/gcc.target/i386/prefetchi-3.c   | 20 +++++
+ gcc/testsuite/gcc.target/i386/prefetchi-4.c   | 19 +++++
+ gcc/testsuite/gcc.target/i386/sse-13.c        |  4 +-
+ gcc/testsuite/gcc.target/i386/sse-23.c        |  4 +-
+ .../gcc.target/i386/x86gprintrin-1.c          |  2 +-
+ .../gcc.target/i386/x86gprintrin-2.c          |  2 +-
+ .../gcc.target/i386/x86gprintrin-3.c          |  2 +-
+ .../gcc.target/i386/x86gprintrin-4.c          |  2 +-
+ .../gcc.target/i386/x86gprintrin-5.c          |  2 +-
+ 36 files changed, 343 insertions(+), 19 deletions(-)
+ create mode 100644 gcc/config/i386/prfchiintrin.h
+ create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-1.c
+ create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-2.c
+ create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-3.c
+ create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-4.c
+
+diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
+index 5951a30aa..f17e88144 100644
+--- a/gcc/common/config/i386/cpuinfo.h
++++ b/gcc/common/config/i386/cpuinfo.h
+@@ -772,6 +772,8 @@ get_available_features (struct __processor_model *cpu_model,
+ 	  __cpuid_count (7, 1, eax, ebx, ecx, edx);
+ 	  if (eax & bit_HRESET)
+ 	    set_feature (FEATURE_HRESET);
++	  if (edx & bit_PREFETCHI)
++	    set_feature (FEATURE_PREFETCHI);
+ 	  if (avx_usable)
+ 	    {
+ 	      if (eax & bit_AVXVNNI)
+diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
+index 922db33ee..c8cf532cf 100644
+--- a/gcc/common/config/i386/i386-common.cc
++++ b/gcc/common/config/i386/i386-common.cc
+@@ -108,6 +108,7 @@ along with GCC; see the file COPYING3.  If not see
+ #define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8
+ #define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
+ #define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16
++#define OPTION_MASK_ISA2_PREFETCHI_SET OPTION_MASK_ISA2_PREFETCHI
+ 
+ /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
+    as -msse4.2.  */
+@@ -277,6 +278,7 @@ along with GCC; see the file COPYING3.  If not see
+   (OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET)
+ #define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
+ #define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16
++#define OPTION_MASK_ISA2_PREFETCHI_UNSET OPTION_MASK_ISA2_PREFETCHI
+ 
+ /* SSE4 includes both SSE4.1 and SSE4.2.  -mno-sse4 should the same
+    as -mno-sse4.1. */
+@@ -1140,6 +1142,19 @@ ix86_handle_option (struct gcc_options *opts,
+ 	}
+       return true;
+ 
++    case OPT_mprefetchi:
++      if (value)
++	{
++	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_PREFETCHI_SET;
++	  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_PREFETCHI_SET;
++	}
++      else
++	{
++	  opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_PREFETCHI_UNSET;
++	  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_PREFETCHI_UNSET;
++	}
++      return true;
++
+     case OPT_mfma:
+       if (value)
+ 	{
+diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
+index 8f22897de..95b078acf 100644
+--- a/gcc/common/config/i386/i386-cpuinfo.h
++++ b/gcc/common/config/i386/i386-cpuinfo.h
+@@ -241,6 +241,7 @@ enum processor_features
+   FEATURE_X86_64_V3,
+   FEATURE_X86_64_V4,
+   FEATURE_AMX_FP16,
++  FEATURE_PREFETCHI,
+   CPU_FEATURE_MAX
+ };
+ 
+diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h
+index 95bab6da2..6caf06249 100644
+--- a/gcc/common/config/i386/i386-isas.h
++++ b/gcc/common/config/i386/i386-isas.h
+@@ -176,4 +176,5 @@ ISA_NAMES_TABLE_START
+   ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3, P_X86_64_V3, NULL)
+   ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL)
+   ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16")
++  ISA_NAMES_TABLE_ENTRY("prefetchi", FEATURE_PREFETCHI, P_NONE, "-mprefetchi")
+ ISA_NAMES_TABLE_END
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index e2b4a23dc..81012c651 100644
+--- a/gcc/config.gcc
++++ b/gcc/config.gcc
+@@ -424,7 +424,7 @@ i3456786-*-* | x86_64-*-*)
+ 		       amxbf16intrin.h x86gprintrin.h uintrintrin.h
+ 		       hresetintrin.h keylockerintrin.h avxvnniintrin.h
+ 		       mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
+-		       amxfp16intrin.h"
++		       amxfp16intrin.h prfchiintrin.h"
+ 	;;
+ ia64-*-*)
+ 	extra_headers=ia64intrin.h
+diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
+index d6cd8d1bf..21100149a 100644
+--- a/gcc/config/i386/cpuid.h
++++ b/gcc/config/i386/cpuid.h
+@@ -50,6 +50,7 @@

_service:tar_scm:0274-Initial-Granite-Rapids-Support.patch Added

@@ -0,0 +1,277 @@
+From 7f0f8b585cf60b4c09bca42b5339995c2cc74633 Mon Sep 17 00:00:00 2001
+From: Haochen Jiang <haochen.jiang@intel.com>
+Date: Mon, 7 Nov 2022 11:04:57 +0800
+Subject: PATCH 20/28 Initial Granite Rapids Support
+
+gcc/ChangeLog:
+
+	* common/config/i386/cpuinfo.h
+	(get_intel_cpu): Handle Granite Rapids.
+	* common/config/i386/i386-common.cc:
+	(processor_names): Add graniterapids.
+	(processor_alias_table): Ditto.
+	* common/config/i386/i386-cpuinfo.h
+	(enum processor_subtypes): Add INTEL_GRANTIERAPIDS.
+	* config.gcc: Add -march=graniterapids.
+	* config/i386/driver-i386.cc (host_detect_local_cpu):
+	Handle graniterapids.
+	* config/i386/i386-c.cc (ix86_target_macros_internal):
+	Ditto.
+	* config/i386/i386-options.cc (m_GRANITERAPIDS): New.
+	(processor_cost_table): Add graniterapids.
+	* config/i386/i386.h (enum processor_type):
+	Add PROCESSOR_GRANITERAPIDS.
+	(PTA_GRANITERAPIDS): Ditto.
+	* doc/extend.texi: Add graniterapids.
+	* doc/invoke.texi: Ditto.
+
+gcc/testsuite/ChangeLog:
+
+	* g++.target/i386/mv16.C: Add graniterapids.
+	* gcc.target/i386/funcspec-56.inc: Handle new march.
+
+(cherry picked from commit 339ffc5a792dd66647392a235f2f7f6344c5359e)
+---
+ gcc/common/config/i386/cpuinfo.h              |  9 +++++++++
+ gcc/common/config/i386/i386-common.cc         |  3 +++
+ gcc/common/config/i386/i386-cpuinfo.h         |  1 +
+ gcc/config.gcc                                |  2 +-
+ gcc/config/i386/driver-i386.cc                |  5 ++++-
+ gcc/config/i386/i386-c.cc                     |  7 +++++++
+ gcc/config/i386/i386-options.cc               |  4 +++-
+ gcc/config/i386/i386.h                        |  3 +++
+ gcc/doc/extend.texi                           |  3 +++
+ gcc/doc/invoke.texi                           | 11 +++++++++++
+ gcc/testsuite/g++.target/i386/mv16.C          |  6 ++++++
+ gcc/testsuite/gcc.target/i386/funcspec-56.inc |  1 +
+ 12 files changed, 52 insertions(+), 3 deletions(-)
+
+diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
+index f17e88144..1f75ff1ca 100644
+--- a/gcc/common/config/i386/cpuinfo.h
++++ b/gcc/common/config/i386/cpuinfo.h
+@@ -528,6 +528,15 @@ get_intel_cpu (struct __processor_model *cpu_model,
+       cpu_model->__cpu_type = INTEL_COREI7;
+       cpu_model->__cpu_subtype = INTEL_COREI7_SAPPHIRERAPIDS;
+       break;
++    case 0xad:
++    case 0xae:
++      /* Granite Rapids.  */
++      cpu = "graniterapids";
++      CHECK___builtin_cpu_is ("corei7");
++      CHECK___builtin_cpu_is ("graniterapids");
++      cpu_model->__cpu_type = INTEL_COREI7;
++      cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS;
++      break;
+     case 0x17:
+     case 0x1d:
+       /* Penryn.  */
+diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
+index c8cf532cf..1aa163463 100644
+--- a/gcc/common/config/i386/i386-common.cc
++++ b/gcc/common/config/i386/i386-common.cc
+@@ -1855,6 +1855,7 @@ const char *const processor_names =
+   "sapphirerapids",
+   "alderlake",
+   "rocketlake",
++  "graniterapids",
+   "intel",
+   "geode",
+   "k6",
+@@ -1973,6 +1974,8 @@ const pta processor_alias_table =
+     M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
+   {"meteorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
+     M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
++  {"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS,
++    M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F},
+   {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
+     M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
+   {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
+diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
+index 95b078acf..7b2d4d242 100644
+--- a/gcc/common/config/i386/i386-cpuinfo.h
++++ b/gcc/common/config/i386/i386-cpuinfo.h
+@@ -92,6 +92,7 @@ enum processor_subtypes
+   AMDFAM19H_ZNVER3,
+   INTEL_COREI7_ROCKETLAKE,
+   AMDFAM19H_ZNVER4,
++  INTEL_COREI7_GRANITERAPIDS,
+   CPU_SUBTYPE_MAX
+ };
+ 
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index 81012c651..9bad238e3 100644
+--- a/gcc/config.gcc
++++ b/gcc/config.gcc
+@@ -670,7 +670,7 @@ slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \
+ silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \
+ skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \
+ sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \
+-nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 native"
++nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids native"
+ 
+ # Additional x86 processors supported by --with-cpu=.  Each processor
+ # MUST be separated by exactly one space.
+diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
+index 3b5161aed..ea8c3d8d1 100644
+--- a/gcc/config/i386/driver-i386.cc
++++ b/gcc/config/i386/driver-i386.cc
+@@ -576,8 +576,11 @@ const char *host_detect_local_cpu (int argc, const char **argv)
+ 	      /* This is unknown family 0x6 CPU.  */
+ 	      if (has_feature (FEATURE_AVX))
+ 		{
++		  /* Assume Granite Rapids.  */
++		  if (has_feature (FEATURE_AMX_FP16))
++		    cpu = "graniterapids";
+ 		  /* Assume Tiger Lake */
+-		  if (has_feature (FEATURE_AVX512VP2INTERSECT))
++		  else if (has_feature (FEATURE_AVX512VP2INTERSECT))
+ 		    cpu = "tigerlake";
+ 		  /* Assume Sapphire Rapids.  */
+ 		  else if (has_feature (FEATURE_TSXLDTRK))
+diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
+index 00880bd17..04f1dd682 100644
+--- a/gcc/config/i386/i386-c.cc
++++ b/gcc/config/i386/i386-c.cc
+@@ -242,6 +242,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
+       def_or_undef (parse_in, "__sapphirerapids");
+       def_or_undef (parse_in, "__sapphirerapids__");
+       break;
++    case PROCESSOR_GRANITERAPIDS:
++      def_or_undef (parse_in, "__graniterapids");
++      def_or_undef (parse_in, "__graniterapids__");
++      break;
+     case PROCESSOR_ALDERLAKE:
+       def_or_undef (parse_in, "__alderlake");
+       def_or_undef (parse_in, "__alderlake__");
+@@ -419,6 +423,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
+     case PROCESSOR_ROCKETLAKE:
+       def_or_undef (parse_in, "__tune_rocketlake__");
+       break;
++    case PROCESSOR_GRANITERAPIDS:
++      def_or_undef (parse_in, "__tune_graniterapids__");
++      break;
+     case PROCESSOR_INTEL:
+     case PROCESSOR_GENERIC:
+       break;
+diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
+index 724375f02..6645e3259 100644
+--- a/gcc/config/i386/i386-options.cc
++++ b/gcc/config/i386/i386-options.cc
+@@ -127,10 +127,11 @@ along with GCC; see the file COPYING3.  If not see
+ #define m_SAPPHIRERAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_SAPPHIRERAPIDS)
+ #define m_ALDERLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ALDERLAKE)
+ #define m_ROCKETLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ROCKETLAKE)
++#define m_GRANITERAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_GRANITERAPIDS)
+ #define m_CORE_AVX512 (m_SKYLAKE_AVX512 | m_CANNONLAKE \
+ 		       | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \
+ 		       | m_TIGERLAKE | m_COOPERLAKE | m_SAPPHIRERAPIDS \
+-		       | m_ROCKETLAKE)
++		       | m_ROCKETLAKE | m_GRANITERAPIDS)
+ #define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512)
+ #define m_CORE_ALL (m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE | m_CORE_AVX2)
+ #define m_GOLDMONT (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT)
+@@ -761,6 +762,7 @@ static const struct processor_costs *processor_cost_table =
+   &icelake_cost,
+   &alderlake_cost,
+   &icelake_cost,
++  &icelake_cost,
+   &intel_cost,
+   &geode_cost,
+   &k6_cost,
+diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
+index aaa136ba0..75953defc 100644
+--- a/gcc/config/i386/i386.h
++++ b/gcc/config/i386/i386.h
+@@ -2250,6 +2250,7 @@ enum processor_type
+   PROCESSOR_SAPPHIRERAPIDS,
+   PROCESSOR_ALDERLAKE,
+   PROCESSOR_ROCKETLAKE,
++  PROCESSOR_GRANITERAPIDS,
+   PROCESSOR_INTEL,
+   PROCESSOR_GEODE,
+   PROCESSOR_K6,
+@@ -2356,6 +2357,8 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
+   | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT
+   | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
+   | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
++constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16
++  | PTA_PREFETCHI;

_service:tar_scm:0275-Support-Intel-AMX-COMPLEX.patch Added

@@ -0,0 +1,722 @@
+From 4f1aff10d93cabe8dfbaf076b6d826a142efb6e1 Mon Sep 17 00:00:00 2001
+From: Haochen Jiang <haochen.jiang@intel.com>
+Date: Wed, 31 May 2023 10:45:00 +0800
+Subject: PATCH 21/28 Support Intel AMX-COMPLEX
+
+gcc/ChangeLog:
+
+	* common/config/i386/cpuinfo.h (get_available_features):
+	Detect AMX-COMPLEX.
+	* common/config/i386/i386-common.cc
+	(OPTION_MASK_ISA2_AMX_COMPLEX_SET,
+	OPTION_MASK_ISA2_AMX_COMPLEX_UNSET): New.
+	(ix86_handle_option): Handle -mamx-complex.
+	* common/config/i386/i386-cpuinfo.h (enum processor_features):
+	Add FEATURE_AMX_COMPLEX.
+	* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
+	amx-complex.
+	* config.gcc: Add amxcomplexintrin.h.
+	* config/i386/cpuid.h (bit_AMX_COMPLEX): New.
+	* config/i386/i386-c.cc (ix86_target_macros_internal): Define
+	__AMX_COMPLEX__.
+	* config/i386/i386-isa.def (AMX_COMPLEX): Add DEF_PTA(AMX_COMPLEX).
+	* config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p):
+	Handle amx-complex.
+	* config/i386/i386.opt: Add option -mamx-complex.
+	* config/i386/immintrin.h: Include amxcomplexintrin.h.
+	* doc/extend.texi: Document amx-complex.
+	* doc/invoke.texi: Document -mamx-complex.
+	* doc/sourcebuild.texi: Document target amx-complex.
+	* config/i386/amxcomplexintrin.h: New file.
+
+gcc/testsuite/ChangeLog:
+
+	* g++.dg/other/i386-2.C: Add -mamx-complex.
+	* g++.dg/other/i386-3.C: Ditto.
+	* gcc.target/i386/amx-check.h: Add cpu check for AMX-COMPLEX.
+	* gcc.target/i386/amx-helper.h: Add amx-complex support.
+	* gcc.target/i386/funcspec-56.inc: Add new target attribute.
+	* gcc.target/i386/sse-12.c: Add -mamx-complex.
+	* gcc.target/i386/sse-13.c: Ditto.
+	* gcc.target/i386/sse-14.c: Ditto.
+	* gcc.target/i386/sse-22.c: Add amx-complex.
+	* gcc.target/i386/sse-23.c: Ditto.
+	* lib/target-supports.exp (check_effective_target_amx_complex): New.
+	* gcc.target/i386/amxcomplex-asmatt-1.c: New test.
+	* gcc.target/i386/amxcomplex-asmintel-1.c: Ditto.
+	* gcc.target/i386/amxcomplex-cmmimfp16ps-2.c: Ditto.
+	* gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c: Ditto.
+---
+ gcc/common/config/i386/cpuinfo.h              |  2 +
+ gcc/common/config/i386/i386-common.cc         | 19 +++++-
+ gcc/common/config/i386/i386-cpuinfo.h         |  1 +
+ gcc/common/config/i386/i386-isas.h            |  2 +
+ gcc/config.gcc                                |  2 +-
+ gcc/config/i386/amxcomplexintrin.h            | 59 +++++++++++++++++++
+ gcc/config/i386/cpuid.h                       |  1 +
+ gcc/config/i386/i386-c.cc                     |  2 +
+ gcc/config/i386/i386-isa.def                  |  1 +
+ gcc/config/i386/i386-options.cc               |  4 +-
+ gcc/config/i386/i386.opt                      |  4 ++
+ gcc/config/i386/immintrin.h                   |  2 +
+ gcc/doc/extend.texi                           |  5 ++
+ gcc/doc/invoke.texi                           |  7 ++-
+ gcc/doc/sourcebuild.texi                      |  3 +
+ gcc/testsuite/g++.dg/other/i386-2.C           |  2 +-
+ gcc/testsuite/g++.dg/other/i386-3.C           |  2 +-
+ gcc/testsuite/gcc.target/i386/amx-check.h     |  3 +
+ gcc/testsuite/gcc.target/i386/amx-helper.h    |  4 +-
+ .../gcc.target/i386/amxcomplex-asmatt-1.c     | 15 +++++
+ .../gcc.target/i386/amxcomplex-asmintel-1.c   | 12 ++++
+ .../i386/amxcomplex-cmmimfp16ps-2.c           | 53 +++++++++++++++++
+ .../i386/amxcomplex-cmmrlfp16ps-2.c           | 53 +++++++++++++++++
+ gcc/testsuite/gcc.target/i386/funcspec-56.inc |  2 +
+ gcc/testsuite/gcc.target/i386/sse-12.c        |  2 +-
+ gcc/testsuite/gcc.target/i386/sse-13.c        |  2 +-
+ gcc/testsuite/gcc.target/i386/sse-14.c        |  2 +-
+ gcc/testsuite/gcc.target/i386/sse-22.c        |  4 +-
+ gcc/testsuite/gcc.target/i386/sse-23.c        |  2 +-
+ gcc/testsuite/lib/target-supports.exp         | 11 ++++
+ 30 files changed, 268 insertions(+), 15 deletions(-)
+ create mode 100644 gcc/config/i386/amxcomplexintrin.h
+ create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-asmatt-1.c
+ create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c
+ create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-cmmimfp16ps-2.c
+ create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c
+
+diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
+index 1f75ff1ca..39d3351db 100644
+--- a/gcc/common/config/i386/cpuinfo.h
++++ b/gcc/common/config/i386/cpuinfo.h
+@@ -798,6 +798,8 @@ get_available_features (struct __processor_model *cpu_model,
+ 	{
+ 	  if (eax & bit_AMX_FP16)
+ 	    set_feature (FEATURE_AMX_FP16);
++	  if (edx & bit_AMX_COMPLEX)
++	    set_feature (FEATURE_AMX_COMPLEX);
+ 	}
+     }
+ 
+diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
+index 1aa163463..87e8afe9b 100644
+--- a/gcc/common/config/i386/i386-common.cc
++++ b/gcc/common/config/i386/i386-common.cc
+@@ -109,6 +109,8 @@ along with GCC; see the file COPYING3.  If not see
+ #define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
+ #define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16
+ #define OPTION_MASK_ISA2_PREFETCHI_SET OPTION_MASK_ISA2_PREFETCHI
++#define OPTION_MASK_ISA2_AMX_COMPLEX_SET \
++  (OPTION_MASK_ISA2_AMX_TILE | OPTION_MASK_ISA2_AMX_COMPLEX)
+ 
+ /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
+    as -msse4.2.  */
+@@ -269,7 +271,8 @@ along with GCC; see the file COPYING3.  If not see
+ #define OPTION_MASK_ISA2_SERIALIZE_UNSET OPTION_MASK_ISA2_SERIALIZE
+ #define OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET OPTION_MASK_ISA2_AVX512VP2INTERSECT
+ #define OPTION_MASK_ISA2_TSXLDTRK_UNSET OPTION_MASK_ISA2_TSXLDTRK
+-#define OPTION_MASK_ISA2_AMX_TILE_UNSET OPTION_MASK_ISA2_AMX_TILE
++#define OPTION_MASK_ISA2_AMX_TILE_UNSET \
++  (OPTION_MASK_ISA2_AMX_TILE | OPTION_MASK_ISA2_AMX_COMPLEX_UNSET)
+ #define OPTION_MASK_ISA2_AMX_INT8_UNSET OPTION_MASK_ISA2_AMX_INT8
+ #define OPTION_MASK_ISA2_AMX_BF16_UNSET OPTION_MASK_ISA2_AMX_BF16
+ #define OPTION_MASK_ISA2_UINTR_UNSET OPTION_MASK_ISA2_UINTR
+@@ -279,6 +282,7 @@ along with GCC; see the file COPYING3.  If not see
+ #define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
+ #define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16
+ #define OPTION_MASK_ISA2_PREFETCHI_UNSET OPTION_MASK_ISA2_PREFETCHI
++#define OPTION_MASK_ISA2_AMX_COMPLEX_UNSET OPTION_MASK_ISA2_AMX_COMPLEX
+ 
+ /* SSE4 includes both SSE4.1 and SSE4.2.  -mno-sse4 should the same
+    as -mno-sse4.1. */
+@@ -1155,6 +1159,19 @@ ix86_handle_option (struct gcc_options *opts,
+ 	}
+       return true;
+ 
++    case OPT_mamx_complex:
++      if (value)
++	{
++	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_COMPLEX_SET;
++	  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_COMPLEX_SET;
++	}
++      else
++	{
++	  opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_COMPLEX_UNSET;
++	  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_COMPLEX_UNSET;
++	}
++      return true;
++
+     case OPT_mfma:
+       if (value)
+ 	{
+diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
+index 7b2d4d242..56020faac 100644
+--- a/gcc/common/config/i386/i386-cpuinfo.h
++++ b/gcc/common/config/i386/i386-cpuinfo.h
+@@ -243,6 +243,7 @@ enum processor_features
+   FEATURE_X86_64_V4,
+   FEATURE_AMX_FP16,
+   FEATURE_PREFETCHI,
++  FEATURE_AMX_COMPLEX,
+   CPU_FEATURE_MAX
+ };
+ 
+diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h
+index 6caf06249..cbef68479 100644
+--- a/gcc/common/config/i386/i386-isas.h
++++ b/gcc/common/config/i386/i386-isas.h
+@@ -177,4 +177,6 @@ ISA_NAMES_TABLE_START
+   ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL)
+   ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16")
+   ISA_NAMES_TABLE_ENTRY("prefetchi", FEATURE_PREFETCHI, P_NONE, "-mprefetchi")
++  ISA_NAMES_TABLE_ENTRY("amx-complex", FEATURE_AMX_COMPLEX,
++			P_NONE, "-mamx-complex")
+ ISA_NAMES_TABLE_END
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index 9bad238e3..ca5c8f8a0 100644
+--- a/gcc/config.gcc
++++ b/gcc/config.gcc
+@@ -424,7 +424,7 @@ i3456786-*-* | x86_64-*-*)
+ 		       amxbf16intrin.h x86gprintrin.h uintrintrin.h
+ 		       hresetintrin.h keylockerintrin.h avxvnniintrin.h
+ 		       mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
+-		       amxfp16intrin.h prfchiintrin.h"
++		       amxfp16intrin.h prfchiintrin.h amxcomplexintrin.h"
+ 	;;
+ ia64-*-*)
+ 	extra_headers=ia64intrin.h
+diff --git a/gcc/config/i386/amxcomplexintrin.h b/gcc/config/i386/amxcomplexintrin.h
+new file mode 100644
+index 000000000..6ea1eca04
+--- /dev/null
++++ b/gcc/config/i386/amxcomplexintrin.h
+@@ -0,0 +1,59 @@
++/* Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify
++   it under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)

_service:tar_scm:0276-i386-Add-AMX-COMPLEX-to-Granite-Rapids.patch Added

@@ -0,0 +1,30 @@
+From 40469a6119085e4c4741bcaeb9418606d28b40c4 Mon Sep 17 00:00:00 2001
+From: Haochen Jiang <haochen.jiang@intel.com>
+Date: Fri, 31 Mar 2023 10:49:14 +0800
+Subject: PATCH 22/28 i386: Add AMX-COMPLEX to Granite Rapids
+
+gcc/Changelog:
+
+	* config/i386/i386.h (PTA_GRANITERAPIDS): Add PTA_AMX_COMPLEX.
+
+(cherry picked from commit afa87bd5f7b126e20268aa959441cde2e02bba0e)
+---
+ gcc/config/i386/i386.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
+index 75953defc..56d7794dc 100644
+--- a/gcc/config/i386/i386.h
++++ b/gcc/config/i386/i386.h
+@@ -2358,7 +2358,7 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
+   | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
+   | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
+ constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16
+-  | PTA_PREFETCHI;
++  | PTA_PREFETCHI | PTA_AMX_COMPLEX;
+ constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
+   | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
+ constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
+-- 
+2.31.1
+

_service:tar_scm:0277-Initial-Granite-Rapids-D-Support.patch Added

@@ -0,0 +1,212 @@
+From 125e5d448538f7534e0fe3df9b7947cf41605b51 Mon Sep 17 00:00:00 2001
+From: "Mo, Zewei" <zewei.mo@intel.com>
+Date: Mon, 3 Jul 2023 11:00:26 +0800
+Subject: PATCH 23/28 Initial Granite Rapids D Support
+
+gcc/ChangeLog:
+
+	* common/config/i386/cpuinfo.h
+	(get_intel_cpu): Handle Granite Rapids D.
+	* common/config/i386/i386-common.cc:
+	(processor_alias_table): Add graniterapids-d.
+	* common/config/i386/i386-cpuinfo.h
+	(enum processor_subtypes): Add INTEL_COREI7_GRANITERAPIDS_D.
+	* config.gcc: Add -march=graniterapids-d.
+	* config/i386/driver-i386.cc (host_detect_local_cpu):
+	Handle graniterapids-d.
+	* config/i386/i386.h: (PTA_GRANITERAPIDS_D): New.
+	* doc/extend.texi: Add graniterapids-d.
+	* doc/invoke.texi: Ditto.
+
+gcc/testsuite/ChangeLog:
+
+	* g++.target/i386/mv16.C: Add graniterapids-d.
+	* gcc.target/i386/funcspec-56.inc: Handle new march.
+
+(cherry picked from commit a0cb65d34cc141571e870fb3b53b3ff47ae3338d)
+---
+ gcc/common/config/i386/cpuinfo.h              |  9 ++++++++-
+ gcc/common/config/i386/i386-common.cc         |  2 ++
+ gcc/common/config/i386/i386-cpuinfo.h         |  1 +
+ gcc/config.gcc                                |  3 ++-
+ gcc/config/i386/driver-i386.cc                |  5 ++++-
+ gcc/config/i386/i386.h                        |  4 +++-
+ gcc/doc/extend.texi                           |  3 +++
+ gcc/doc/invoke.texi                           | 11 +++++++++++
+ gcc/testsuite/g++.target/i386/mv16.C          |  6 ++++++
+ gcc/testsuite/gcc.target/i386/funcspec-56.inc |  1 +
+ 10 files changed, 41 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
+index 39d3351db..1e53248ef 100644
+--- a/gcc/common/config/i386/cpuinfo.h
++++ b/gcc/common/config/i386/cpuinfo.h
+@@ -529,7 +529,6 @@ get_intel_cpu (struct __processor_model *cpu_model,
+       cpu_model->__cpu_subtype = INTEL_COREI7_SAPPHIRERAPIDS;
+       break;
+     case 0xad:
+-    case 0xae:
+       /* Granite Rapids.  */
+       cpu = "graniterapids";
+       CHECK___builtin_cpu_is ("corei7");
+@@ -537,6 +536,14 @@ get_intel_cpu (struct __processor_model *cpu_model,
+       cpu_model->__cpu_type = INTEL_COREI7;
+       cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS;
+       break;
++    case 0xae:
++      /* Granite Rapids D.  */
++      cpu = "graniterapids-d";
++      CHECK___builtin_cpu_is ("corei7");
++      CHECK___builtin_cpu_is ("graniterapids-d");
++      cpu_model->__cpu_type = INTEL_COREI7;
++      cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS_D;
++      break;
+     case 0x17:
+     case 0x1d:
+       /* Penryn.  */
+diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
+index 87e8afe9b..28f468f48 100644
+--- a/gcc/common/config/i386/i386-common.cc
++++ b/gcc/common/config/i386/i386-common.cc
+@@ -1993,6 +1993,8 @@ const pta processor_alias_table =
+     M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
+   {"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS,
+     M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F},
++  {"graniterapids-d", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS_D,
++    M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), P_PROC_AVX512F},
+   {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
+     M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
+   {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
+diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
+index 56020faac..a32f32c97 100644
+--- a/gcc/common/config/i386/i386-cpuinfo.h
++++ b/gcc/common/config/i386/i386-cpuinfo.h
+@@ -93,6 +93,7 @@ enum processor_subtypes
+   INTEL_COREI7_ROCKETLAKE,
+   AMDFAM19H_ZNVER4,
+   INTEL_COREI7_GRANITERAPIDS,
++  INTEL_COREI7_GRANITERAPIDS_D,
+   CPU_SUBTYPE_MAX
+ };
+ 
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index ca5c8f8a0..3108ac4eb 100644
+--- a/gcc/config.gcc
++++ b/gcc/config.gcc
+@@ -670,7 +670,8 @@ slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \
+ silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \
+ skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \
+ sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \
+-nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids native"
++nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids \
++graniterapids-d native"
+ 
+ # Additional x86 processors supported by --with-cpu=.  Each processor
+ # MUST be separated by exactly one space.
+diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
+index ea8c3d8d1..e3bca4b49 100644
+--- a/gcc/config/i386/driver-i386.cc
++++ b/gcc/config/i386/driver-i386.cc
+@@ -576,8 +576,11 @@ const char *host_detect_local_cpu (int argc, const char **argv)
+ 	      /* This is unknown family 0x6 CPU.  */
+ 	      if (has_feature (FEATURE_AVX))
+ 		{
++		  /* Assume Granite Rapids D.  */
++		  if (has_feature (FEATURE_AMX_COMPLEX))
++		    cpu = "graniterapids-d";
+ 		  /* Assume Granite Rapids.  */
+-		  if (has_feature (FEATURE_AMX_FP16))
++		  else if (has_feature (FEATURE_AMX_FP16))
+ 		    cpu = "graniterapids";
+ 		  /* Assume Tiger Lake */
+ 		  else if (has_feature (FEATURE_AVX512VP2INTERSECT))
+diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
+index 56d7794dc..eda3e5e5b 100644
+--- a/gcc/config/i386/i386.h
++++ b/gcc/config/i386/i386.h
+@@ -2358,7 +2358,9 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
+   | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
+   | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
+ constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16
+-  | PTA_PREFETCHI | PTA_AMX_COMPLEX;
++  | PTA_PREFETCHI;
++constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS
++  | PTA_AMX_COMPLEX;
+ constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
+   | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
+ constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
+diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
+index d7b0bc802..674db2f1a 100644
+--- a/gcc/doc/extend.texi
++++ b/gcc/doc/extend.texi
+@@ -21837,6 +21837,9 @@ Intel Core i7 Rocketlake CPU.
+ @item graniterapids
+ Intel Core i7 graniterapids CPU.
+ 
++@item graniterapids-d
++Intel Core i7 graniterapids D CPU.
++
+ @item bonnell
+ Intel Atom Bonnell CPU.
+ 
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 186b33481..a2ec060fd 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -31626,6 +31626,17 @@ MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
+ SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
+ AVX512BF16, AMX-FP16 and PREFETCHI instruction set support.
+ 
++@item graniterapids-d
++Intel graniterapids D CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
++SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE,
++RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
++AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ,
++AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2,
++VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB,
++MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
++SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
++AVX512BF16, AMX-FP16, PREFETCHI and AMX-COMPLEX instruction set support.
++
+ @item k6
+ AMD K6 CPU with MMX instruction set support.
+ 
+diff --git a/gcc/testsuite/g++.target/i386/mv16.C b/gcc/testsuite/g++.target/i386/mv16.C
+index 65cc24f32..17b1fc722 100644
+--- a/gcc/testsuite/g++.target/i386/mv16.C
++++ b/gcc/testsuite/g++.target/i386/mv16.C
+@@ -96,6 +96,10 @@ int __attribute__ ((target("arch=graniterapids"))) foo () {
+   return 26;
+ }
+ 
++int __attribute__ ((target("arch=graniterapids-d"))) foo () {
++  return 28;
++}
++
+ int main ()
+ {
+   int val = foo ();
+@@ -136,6 +140,8 @@ int main ()
+     assert (val == 24);
+   else if (__builtin_cpu_is ("graniterapids"))
+     assert (val == 25);
++  else if (__builtin_cpu_is ("graniterapids-d"))
++    assert (val == 26);
+   else
+     assert (val == 0);
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
+index 1a2f3b83d..f0f3397a7 100644

_service:tar_scm:0278-Correct-Granite-Rapids-D-documentation.patch Added

@@ -0,0 +1,48 @@
+From a809a6a416af4d08f7feeadfdd5d1f5a76a830b5 Mon Sep 17 00:00:00 2001
+From: Haochen Jiang <haochen.jiang@intel.com>
+Date: Thu, 20 Jul 2023 10:47:18 +0800
+Subject: PATCH 24/28 Correct Granite Rapids{, D} documentation
+
+gcc/Changelog:
+
+	* doc/invoke.texi: Remove AVX512VP2INTERSECT in
+	Granite Rapids{, D} from documentation.
+
+(cherry picked from commit 38daaaa91438d3f635a10bf5d5181c3b29f07df9)
+---
+ gcc/doc/invoke.texi | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index a2ec060fd..4d3eccdb2 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -31622,9 +31622,9 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
+ AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ,
+ AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2,
+ VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB,
+-MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
+-SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
+-AVX512BF16, AMX-FP16 and PREFETCHI instruction set support.
++MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK,
++UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512-FP16, AVX512BF16, AMX-FP16
++and PREFETCHI instruction set support.
+ 
+ @item graniterapids-d
+ Intel graniterapids D CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
+@@ -31633,9 +31633,9 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
+ AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ,
+ AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2,
+ VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB,
+-MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
+-SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
+-AVX512BF16, AMX-FP16, PREFETCHI and AMX-COMPLEX instruction set support.
++MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK,
++UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, AVX512BF16, AMX-FP16,
++PREFETCHI and AMX-COMPLEX instruction set support.
+ 
+ @item k6
+ AMD K6 CPU with MMX instruction set support.
+-- 
+2.31.1
+

_service:tar_scm:0279-i386-Remove-Meteorlake-s-family_model.patch Added

_service:tar_scm:0280-x86-Update-model-values-for-Alderlake-Rocketlake-and.patch Added

@@ -0,0 +1,33 @@
+From 73042aa18fe70aa30a9c7c760b08e642560ecccd Mon Sep 17 00:00:00 2001
+From: "Cui, Lili" <lili.cui@intel.com>
+Date: Thu, 29 Jun 2023 03:10:35 +0000
+Subject: PATCH 26/28 x86: Update model values for Alderlake, Rocketlake and
+ Raptorlake.
+
+Update model values for Alderlake, Rocketlake and Raptorlake according to SDM.
+
+gcc/ChangeLog
+
+	* common/config/i386/cpuinfo.h (get_intel_cpu): Remove model value 0xa8
+	from Rocketlake, move model value 0xbf from Alderlake to Raptorlake.
+
+(cherry picked from commit e510c3be13a8ccdf1fc1b27c2501c126d493f335)
+---
+ gcc/common/config/i386/cpuinfo.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
+index 348bc0c12..f9bcb6fad 100644
+--- a/gcc/common/config/i386/cpuinfo.h
++++ b/gcc/common/config/i386/cpuinfo.h
+@@ -509,6 +509,7 @@ get_intel_cpu (struct __processor_model *cpu_model,
+     case 0x9a:
+       /* Alder Lake.  */
+     case 0xb7:
++    case 0xbf:
+       /* Raptor Lake.  */
+     case 0xaa:
+     case 0xac:
+-- 
+2.31.1
+

_service:tar_scm:0281-x86-Update-model-values-for-Raptorlake.patch Added

_service:tar_scm:0282-Fix-target_clone-arch-graniterapids-d.patch Added

@@ -0,0 +1,159 @@
+From 8db0f3cd29bd7f937ffa01dd1100360fbbf5b6f4 Mon Sep 17 00:00:00 2001
+From: liuhongt <hongtao.liu@intel.com>
+Date: Tue, 22 Aug 2023 18:18:31 +0800
+Subject: PATCH 28/28 Fix target_clone ("arch=graniterapids-d")
+
+Both "graniterapid-d" and "graniterapids" are attached with
+PROCESSOR_GRANITERAPID in processor_alias_table but mapped to
+different __cpu_subtype in get_intel_cpu.
+
+And get_builtin_code_for_version will try to match the first
+PROCESSOR_GRANITERAPIDS in processor_alias_table which maps to
+"granitepraids" here.
+
+861      else if (new_target->arch_specified && new_target->arch > 0)
+1862        for (i = 0; i < pta_size; i++)
+1863          if (processor_alias_tablei.processor == new_target->arch)
+1864            {
+1865              const pta *arch_info = &processor_alias_tablei;
+1866              switch (arch_info->priority)
+1867                {
+1868                default:
+1869                  arg_str = arch_info->name;
+
+This mismatch makes dispatch_function_versions check the preidcate
+of__builtin_cpu_is ("graniterapids") for "graniterapids-d" and causes
+the issue.
+The patch explicitly adds PROCESSOR_GRANITERAPIDS_D to make a distinction.
+
+For "alderlake","raptorlake", "meteorlake" they share same isa, cost,
+tuning, and mapped to the same __cpu_type/__cpu_subtype in
+get_intel_cpu, so no need to add PROCESSOR_RAPTORLAKE and others.
+
+gcc/ChangeLog:
+
+	* common/config/i386/i386-common.cc (processor_names): Add new
+	member graniterapids-s.
+	* config/i386/i386-options.cc (processor_alias_table): Update
+	table with and PROCESSOR_GRANITERAPIDS_D.
+	(m_GRANITERAPID_D): New macro.
+	(m_CORE_AVX512): Add m_GRANITERAPIDS_D.
+	(processor_cost_table): Add icelake_cost for
+	PROCESSOR_GRANITERAPIDS_D.
+	* config/i386/i386.h (enum processor_type): Add new member
+	PROCESSOR_GRANITERAPIDS_D.
+	* config/i386/i386-c.cc (ix86_target_macros_internal): Handle
+	PROCESSOR_GRANITERAPIDS_D
+---
+ gcc/common/config/i386/i386-common.cc | 6 ++++--
+ gcc/config/i386/i386-c.cc             | 8 ++++++++
+ gcc/config/i386/i386-options.cc       | 4 +++-
+ gcc/config/i386/i386.h                | 3 ++-
+ 4 files changed, 17 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
+index 28f468f48..bec6801ce 100644
+--- a/gcc/common/config/i386/i386-common.cc
++++ b/gcc/common/config/i386/i386-common.cc
+@@ -1873,6 +1873,7 @@ const char *const processor_names =
+   "alderlake",
+   "rocketlake",
+   "graniterapids",
++  "graniterapids-d",
+   "intel",
+   "geode",
+   "k6",
+@@ -1993,8 +1994,9 @@ const pta processor_alias_table =
+     M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
+   {"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS,
+     M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F},
+-  {"graniterapids-d", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS_D,
+-    M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), P_PROC_AVX512F},
++  {"graniterapids-d", PROCESSOR_GRANITERAPIDS_D, CPU_HASWELL,
++    PTA_GRANITERAPIDS_D, M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D),
++    P_PROC_AVX512F},
+   {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
+     M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
+   {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
+diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
+index 5e0ac278c..49f0db2b8 100644
+--- a/gcc/config/i386/i386-c.cc
++++ b/gcc/config/i386/i386-c.cc
+@@ -246,6 +246,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
+       def_or_undef (parse_in, "__graniterapids");
+       def_or_undef (parse_in, "__graniterapids__");
+       break;
++    case PROCESSOR_GRANITERAPIDS_D:
++      def_or_undef (parse_in, "__graniterapids_d");
++      def_or_undef (parse_in, "__graniterapids_d__");
++      break;
+     case PROCESSOR_ALDERLAKE:
+       def_or_undef (parse_in, "__alderlake");
+       def_or_undef (parse_in, "__alderlake__");
+@@ -254,6 +258,7 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
+       def_or_undef (parse_in, "__rocketlake");
+       def_or_undef (parse_in, "__rocketlake__");
+       break;
++
+     /* use PROCESSOR_max to not set/unset the arch macro.  */
+     case PROCESSOR_max:
+       break;
+@@ -426,6 +431,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
+     case PROCESSOR_GRANITERAPIDS:
+       def_or_undef (parse_in, "__tune_graniterapids__");
+       break;
++    case PROCESSOR_GRANITERAPIDS_D:
++      def_or_undef (parse_in, "__tune_graniterapids_d__");
++      break;
+     case PROCESSOR_INTEL:
+     case PROCESSOR_GENERIC:
+       break;
+diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
+index 7efd25084..86932d719 100644
+--- a/gcc/config/i386/i386-options.cc
++++ b/gcc/config/i386/i386-options.cc
+@@ -128,10 +128,11 @@ along with GCC; see the file COPYING3.  If not see
+ #define m_ALDERLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ALDERLAKE)
+ #define m_ROCKETLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ROCKETLAKE)
+ #define m_GRANITERAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_GRANITERAPIDS)
++#define m_GRANITERAPIDS_D (HOST_WIDE_INT_1U<<PROCESSOR_GRANITERAPIDS_D)
+ #define m_CORE_AVX512 (m_SKYLAKE_AVX512 | m_CANNONLAKE \
+ 		       | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \
+ 		       | m_TIGERLAKE | m_COOPERLAKE | m_SAPPHIRERAPIDS \
+-		       | m_ROCKETLAKE | m_GRANITERAPIDS)
++		       | m_ROCKETLAKE | m_GRANITERAPIDS | m_GRANITERAPIDS_D)
+ #define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512)
+ #define m_CORE_ALL (m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE | m_CORE_AVX2)
+ #define m_GOLDMONT (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT)
+@@ -764,6 +765,7 @@ static const struct processor_costs *processor_cost_table =
+   &alderlake_cost,
+   &icelake_cost,
+   &icelake_cost,
++  &icelake_cost,
+   &intel_cost,
+   &geode_cost,
+   &k6_cost,
+diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
+index eda3e5e5b..5052f878d 100644
+--- a/gcc/config/i386/i386.h
++++ b/gcc/config/i386/i386.h
+@@ -2216,7 +2216,7 @@ extern int const svr4_dbx_register_mapFIRST_PSEUDO_REGISTER;
+ #define DEFAULT_LARGE_SECTION_THRESHOLD 65536
+ &#xc;
+ /* Which processor to tune code generation for.  These must be in sync
+-   with processor_target_table in i386.cc.  */ 
++   with processor_cost_table in i386-options.cc.  */
+ 
+ enum processor_type
+ {
+@@ -2251,6 +2251,7 @@ enum processor_type
+   PROCESSOR_ALDERLAKE,
+   PROCESSOR_ROCKETLAKE,
+   PROCESSOR_GRANITERAPIDS,
++  PROCESSOR_GRANITERAPIDS_D,
+   PROCESSOR_INTEL,
+   PROCESSOR_GEODE,
+   PROCESSOR_K6,
+-- 
+2.31.1
+

_service:tar_scm:0283-i386-Change-prefetchi-output-template.patch Added

@@ -0,0 +1,62 @@
+From 59e07c6c77dcc92d274ca6156b8916f66035dce8 Mon Sep 17 00:00:00 2001
+From: Haochen Jiang <haochen.jiang@intel.com>
+Date: Mon, 22 Jul 2024 14:06:18 +0800
+Subject: PATCH 1/2 i386: Change prefetchi output template
+
+For prefetchi instructions, RIP-relative address is explicitly mentioned
+for operand and assembler obeys that rule strictly. This makes
+instruction like:
+
+	prefetchit0	bar
+
+got illegal for assembler, which should be a broad usage for prefetchi.
+
+Change to %a to explicitly add (%rip) after function label to make it
+legal in assembler so that it could pass to linker to get the real address.
+
+gcc/ChangeLog:
+
+	* config/i386/i386.md (prefetchi): Change to %a.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/i386/prefetchi-1.c: Check (%rip).
+
+Reference:
+https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=
+062e46a813799684c6f900815fd22451d6294ae1
+---
+ gcc/config/i386/i386.md                     | 2 +-
+ gcc/testsuite/gcc.target/i386/prefetchi-1.c | 4 ++--
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
+index f08c2cfb1..1b733008e 100644
+--- a/gcc/config/i386/i386.md
++++ b/gcc/config/i386/i386.md
+@@ -22917,7 +22917,7 @@
+   "TARGET_PREFETCHI && TARGET_64BIT"
+ {
+   static const char * const patterns2 = {
+-    "prefetchit1\t%0", "prefetchit0\t%0"
++    "prefetchit1\t%a0", "prefetchit0\t%a0"
+   };
+ 
+   int locality = INTVAL (operands1);
+diff --git a/gcc/testsuite/gcc.target/i386/prefetchi-1.c b/gcc/testsuite/gcc.target/i386/prefetchi-1.c
+index 80f25e70e..03dfdc55e 100644
+--- a/gcc/testsuite/gcc.target/i386/prefetchi-1.c
++++ b/gcc/testsuite/gcc.target/i386/prefetchi-1.c
+@@ -1,7 +1,7 @@
+ /* { dg-do compile { target { ! ia32 } } } */
+ /* { dg-options "-mprefetchi -O2" } */
+-/* { dg-final { scan-assembler-times "\ \\t\+prefetchit0\ \\t\+" 2 } } */
+-/* { dg-final { scan-assembler-times "\ \\t\+prefetchit1\ \\t\+" 2 } } */
++/* { dg-final { scan-assembler-times "\ \\t\+prefetchit0\ \\t\+bar\$%rip\$" 2 } } */
++/* { dg-final { scan-assembler-times "\ \\t\+prefetchit1\ \\t\+bar\$%rip\$" 2 } } */
+ 
+ #include <x86intrin.h>
+ 
+-- 
+2.31.1
+

_service:tar_scm:0284-i386-Add-non-optimize-prefetchi-intrins.patch Added

@@ -0,0 +1,92 @@
+From c19afda0ee549d294fd5714c63db24bcd4570d03 Mon Sep 17 00:00:00 2001
+From: Haochen Jiang <haochen.jiang@intel.com>
+Date: Thu, 25 Jul 2024 16:16:05 +0800
+Subject: PATCH 2/2 i386: Add non-optimize prefetchi intrins
+
+Under -O0, with the "newly" introduced intrins, the variable will be
+transformed as mem instead of the origin symbol_ref. The compiler will
+then treat the operand as invalid and turn the operation into nop, which
+is not expected. Use macro for non-optimize to keep the variable as
+symbol_ref just as how prefetch intrin does.
+
+gcc/ChangeLog:
+
+	* config/i386/prfchiintrin.h
+	(_m_prefetchit0): Add macro for non-optimized option.
+	(_m_prefetchit1): Ditto.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/i386/prefetchi-1b.c: New test.
+
+Reference:
+https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=
+b4524c4430ba9771265bd9fc31e69a3f35dfe117
+---
+ gcc/config/i386/prfchiintrin.h               |  9 +++++++
+ gcc/testsuite/gcc.target/i386/prefetchi-1b.c | 26 ++++++++++++++++++++
+ 2 files changed, 35 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-1b.c
+
+diff --git a/gcc/config/i386/prfchiintrin.h b/gcc/config/i386/prfchiintrin.h
+index 06deef488..1e3d42dc3 100644
+--- a/gcc/config/i386/prfchiintrin.h
++++ b/gcc/config/i386/prfchiintrin.h
+@@ -30,6 +30,7 @@
+ 
+ #ifdef __x86_64__
+ 
++#ifdef __OPTIMIZE__
+ extern __inline void
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ _m_prefetchit0 (void* __P)
+@@ -43,6 +44,14 @@ _m_prefetchit1 (void* __P)
+ {
+   __builtin_ia32_prefetchi (__P, 2);
+ }
++#else
++#define _m_prefetchit0(P)	\
++  __builtin_ia32_prefetchi(P, 3)
++
++#define _m_prefetchit1(P)	\
++  __builtin_ia32_prefetchi(P, 2)
++
++#endif
+ 
+ #endif
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/prefetchi-1b.c b/gcc/testsuite/gcc.target/i386/prefetchi-1b.c
+new file mode 100644
+index 000000000..93139554d
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/prefetchi-1b.c
+@@ -0,0 +1,26 @@
++/* { dg-do compile { target { ! ia32 } } } */
++/* { dg-options "-mprefetchi -O0" } */
++/* { dg-final { scan-assembler-times "\ \\t\+prefetchit0\ \\t\+bar\$%rip\$" 1 } } */
++/* { dg-final { scan-assembler-times "\ \\t\+prefetchit1\ \\t\+bar\$%rip\$" 1 } } */
++
++#include <x86intrin.h>
++
++int
++bar (int a)
++{
++  return a + 1;
++}
++
++int
++foo1 (int b)
++{
++  _m_prefetchit0 (bar);
++  return bar (b) + 1;
++}
++
++int
++foo2 (int b)
++{
++  _m_prefetchit1 (bar);
++  return bar (b) + 1;
++}
+-- 
+2.31.1
+

_service:tar_scm:0285-SME-Recover-hip09-and-hip11-in-aarch64-cores.def.patch Added

@@ -0,0 +1,32 @@
+From 239f0637307ff2f6afb1473e99d0bb0eaf8946b2 Mon Sep 17 00:00:00 2001
+From: xiezhiheng <xiezhiheng@huawei.com>
+Date: Fri, 23 Aug 2024 15:37:17 +0800
+Subject: PATCH 154/157 SME Recover hip09 and hip11 in aarch64-cores.def
+
+---
+ gcc/config/aarch64/aarch64-cores.def | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
+index f069c81cf..3337fd1a0 100644
+--- a/gcc/config/aarch64/aarch64-cores.def
++++ b/gcc/config/aarch64/aarch64-cores.def
+@@ -130,6 +130,7 @@ AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A,  (F16, SVE), a64fx, 0x46, 0x001, -1)
+ 
+ /* HiSilicon ('H') cores. */
+ AARCH64_CORE("tsv110",  tsv110, tsv110, V8_2A,  (CRYPTO, F16), tsv110,   0x48, 0xd01, -1)
++AARCH64_CORE("hip09", hip09, hip09, V8_5A,  (SVE, I8MM, F32MM, F64MM, PROFILE, PREDRES), hip09, 0x48, 0xd02, 0x0)
+ 
+ /* ARMv8.3-A Architecture Processors.  */
+ 
+@@ -171,6 +172,7 @@ AARCH64_CORE("cortex-a710",  cortexa710, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG,
+ AARCH64_CORE("cortex-x2",  cortexx2, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd48, -1)
+ 
+ AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversen2, 0x41, 0xd49, -1)
++AARCH64_CORE("hip11", hip11, hip11, V8_5A,  (SVE, SVE2, F16), hip11, 0x48, 0xd22, -1)
+ 
+ AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1)
+ AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1)
+-- 
+2.33.0
+

_service:tar_scm:0286-Try-to-use-AI-model-to-guide-optimization.patch Added

@@ -0,0 +1,671 @@
+diff --git a/gcc/Makefile.in b/gcc/Makefile.in
+index fcfa54697..f42aeb8e8 100644
+--- a/gcc/Makefile.in
++++ b/gcc/Makefile.in
+@@ -1449,6 +1449,7 @@ OBJS = \
+ 	inchash.o \
+ 	incpath.o \
+ 	init-regs.o \
++	ipa-hardware-detection.o \
+ 	internal-fn.o \
+ 	ipa-struct-reorg/ipa-struct-reorg.o \
+ 	ipa-cp.o \
+diff --git a/gcc/common.opt b/gcc/common.opt
+index fd98382fa..99e626641 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -185,6 +185,9 @@ const char *main_input_basename
+ Variable
+ int main_input_baselength
+ 
++Variable
++bool optimize_maximum
++
+ ; The base name used for auxiliary output files.
+ ; dump_base_name minus dump_base_ext.
+ 
+@@ -469,6 +472,10 @@ Ofast
+ Common Optimization
+ Optimize for speed disregarding exact standards compliance.
+ 
++Om
++Common Optimization
++Optimize for maximizing radical optimization.
++
+ Og
+ Common Optimization
+ Optimize for debugging experience rather than speed or size.
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 309ecc3d9..ad853af9a 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -18637,6 +18637,134 @@ aarch64_sve_adjust_stmt_cost (class vec_info *vinfo, vect_cost_for_stmt kind,
+   return stmt_cost;
+ }
+ 
++/* Check whether in C language or LTO with only C language.  */
++extern bool lang_c_p (void);
++
++static void
++override_C_optimize_options (struct gcc_options *opts)
++{
++  opts->x_flag_ipa_reorder_fields = 1;
++  opts->x_flag_ipa_struct_reorg = 6;
++  opts->x_struct_layout_optimize_level = 6;
++  opts->x_flag_gnu89_inline = 1;
++  opts->x_flag_ccmp2 = 1;
++  opts->x_flag_array_widen_compare = 1;
++  opts->x_flag_convert_minmax = 1;
++  opts->x_flag_tree_slp_transpose_vectorize = 1;
++  opts->x_param_max_inline_insns_auto = 64;
++  opts->x_param_inline_unit_growth = 96;
++}
++
++/* Check whether in CPP language or LTO with only CPP language.  */
++static bool
++lang_cpp_p (void)
++{
++  const char *language_string = lang_hooks.name;
++  if (!language_string)
++    {
++      return false;
++    }
++  if (lang_GNU_CXX ())
++    {
++      return true;
++    }
++  else if (strcmp (language_string, "GNU GIMPLE") == 0) // for LTO check
++    {
++      unsigned i = 0;
++      tree t = NULL_TREE;
++      FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t)
++	{
++	  language_string = TRANSLATION_UNIT_LANGUAGE (t);
++	  if (language_string == NULL
++	      || strncmp (lang_hooks.name, "GNU C++", 7))
++	    {
++	      return false;
++	    }
++	}
++      return true;
++    }
++  return false;
++}
++
++static void
++override_CPP_optimize_options (struct gcc_options *opts)
++{
++  opts->x_flag_finite_loops = 1;
++  opts->x_flag_omit_frame_pointer = 1;
++  opts->x_flag_sized_deallocation = 0;
++  opts->x_flag_loop_elim = 1;
++  opts->x_flag_convert_minmax = 1;
++  opts->x_param_early_inlining_insns = 256;
++  opts->x_param_max_inline_insns_auto = 128;
++  opts->x_param_inline_unit_growth = 256;
++  opts->x_flag_cmlt_arith = 1;
++}
++
++static void
++override_optimize_options_1 (struct gcc_options *opts)
++{
++  opts->x_flag_split_ldp_stp = 1;
++  opts->x_flag_if_conversion_gimple = 1;
++  opts->x_flag_ifcvt_allow_complicated_cmps = 1;
++  opts->x_param_ifcvt_allow_register_renaming = 2;
++  opts->x_param_max_rtl_if_conversion_unpredictable_cost = 48;
++  opts->x_param_max_rtl_if_conversion_predictable_cost = 48;
++}
++
++static void
++override_Fortran_optimize_options (struct gcc_options *opts)
++{
++  opts->x_flag_unroll_loops = 1;
++  opts->x_flag_unconstrained_commons = 1;
++  opts->x_param_ipa_cp_eval_threshold = 1;
++  opts->x_param_ipa_cp_unit_growth = 80;
++  opts->x_param_ipa_cp_max_recursive_depth = 8;
++  opts->x_param_large_unit_insns = 30000;
++  opts->x_flag_ira_loop_pressure = 1;
++  opts->x_flag_inline_functions_called_once = 0;
++  opts->x_flag_ira_algorithm = IRA_ALGORITHM_PRIORITY;
++  opts->x_flag_delayed_branch = 1;
++  opts->x_flag_gcse_las = 1;
++  opts->x_flag_gcse_sm = 1;
++  opts->x_flag_ipa_pta = 1;
++  opts->x_flag_reorder_blocks_and_partition = 1;
++  opts->x_flag_reorder_blocks = 1;
++  opts->x_flag_crypto_accel_aes = 1;
++  opts->x_param_flexible_seg_len = 1;
++}
++
++/* Reset the optimize option.
++   After checking the model result, this function can
++   reset the more appropriate options.  */
++static void
++reset_machine_option (struct gcc_options *opts)
++{
++  if (!(opts->x_optimize_maximum)
++      || strstr (opts->x_aarch64_tune_string, "hip09") == NULL)
++    {
++      return;
++    }
++
++  const char *ai_infer_level = getenv ("AI_INFER_LEVEL");
++  if (ai_infer_level)
++    {
++      override_optimize_options_1 (opts);
++      if (lang_c_p ())
++	{
++	  override_C_optimize_options (opts);
++	}
++      else if (lang_cpp_p ())
++	{
++	  override_CPP_optimize_options (opts);
++	}
++      else if (lang_GNU_Fortran ())
++	{
++	  override_Fortran_optimize_options (opts);
++	}
++    }
++}
++
++
+ /* STMT_COST is the cost calculated for STMT_INFO, which has cost kind KIND
+    and which when vectorized would operate on vector type VECTYPE.  Add the
+    cost of any embedded operations.  */
+@@ -20089,6 +20217,7 @@ aarch64_override_options_internal (struct gcc_options *opts)
+       && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
+     opts->x_flag_prefetch_loop_arrays = 1;
+ 
++  reset_machine_option (opts);
+   aarch64_override_options_after_change_1 (opts);
+ }
+ 
+diff --git a/gcc/ipa-hardware-detection.cc b/gcc/ipa-hardware-detection.cc
+new file mode 100644
+index 000000000..8085a8c65
+--- /dev/null
++++ b/gcc/ipa-hardware-detection.cc
+@@ -0,0 +1,243 @@
++/* Hardware Detection.
++   Copyright (C) 2024-2024 Free Software Foundation, Inc.
++This file is part of GCC.
++GCC is free software; you can redistribute it and/or modify it
++under the terms of the GNU General Public License as published by the
++Free Software Foundation; either version 3, or (at your option) any
++later version.
++GCC is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

_service:tar_scm:0287-Add-dynamic-memory-access-checks.patch Added

@@ -0,0 +1,774 @@
+From 08fb60d0a0707af4004b20358f4a921e4ae6cca6 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Thu, 22 Aug 2024 15:23:36 +0800
+Subject: PATCH 156/157 Add dynamic memory access checks
+
+Signed-off-by: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+---
+ gcc/ipa-prefetch.cc | 622 +++++++++++++++++++++++++++++++++++++-------
+ gcc/params.opt      |   4 +
+ 2 files changed, 525 insertions(+), 101 deletions(-)
+
+diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
+index 94290ea9c..b000d4d75 100644
+--- a/gcc/ipa-prefetch.cc
++++ b/gcc/ipa-prefetch.cc
+@@ -368,6 +368,7 @@ typedef std::map<memref_t *, tree> memref_tree_map;
+ typedef std::set<gimple *> stmt_set;
+ typedef std::set<tree> tree_set;
+ typedef std::map<tree, tree> tree_map;
++typedef std::map<tree, poly_offset_int> tree_poly_offset_map;
+ 
+ tree_memref_map *tm_map;
+ funct_mrs_map *fmrs_map;
+@@ -710,6 +711,20 @@ get_mem_ref_address_ssa_name (tree mem, tree base)
+   return NULL_TREE;
+ }
+ 
++static void
++dump_base_addr (tree base_addr)
++{
++  if (base_addr)
++    {
++      fprintf (dump_file, "Base addr (%s): ",
++	      get_tree_code_name (TREE_CODE (base_addr)));
++      print_generic_expr (dump_file, base_addr);
++    }
++  else
++    fprintf (dump_file, "Base addr (%s): ", "null");
++  fprintf (dump_file, "\n");
++}
++
+ static void
+ analyse_mem_ref (gimple *stmt, tree mem, memref_t* mr)
+ {
+@@ -736,14 +751,7 @@ analyse_mem_ref (gimple *stmt, tree mem, memref_t* mr)
+       {
+ 	tree base_addr = get_mem_ref_address_ssa_name (mem, base);
+ 	if (dump_file)
+-	  {
+-	    fprintf (dump_file, "Base addr (%s): ",
+-		     base_addr ? get_tree_code_name (TREE_CODE (base_addr))
+-			       : "null");
+-	    if (base_addr)
+-	      print_generic_expr (dump_file, base_addr);
+-	    fprintf (dump_file, "\n");
+-	  }
++	  dump_base_addr (base_addr);
+ 	if (base_addr)
+ 	  {
+ 	    mr->base = analyse_addr_eval (base_addr, mr);
+@@ -1187,7 +1195,7 @@ reduce_memref_set (memref_set *set, vec<memref_t *> &vec)
+ }
+ 
+ static void
+-find_nearest_common_dominator (memref_t *mr, basic_block &dom)
++find_nearest_common_post_dominator (memref_t *mr, basic_block &dom)
+ {
+   for (unsigned int i = 0; i < mr->stmts.length (); i++)
+     {
+@@ -1196,7 +1204,7 @@ find_nearest_common_dominator (memref_t *mr, basic_block &dom)
+       if (dom == bb)
+ 	continue;
+       if (dom)
+-	dom = nearest_common_dominator (CDI_DOMINATORS, dom, bb);
++	dom = nearest_common_dominator (CDI_POST_DOMINATORS, dom, bb);
+       else
+ 	dom = bb;
+     }
+@@ -1495,10 +1503,13 @@ gimple_copy_and_remap (gimple *stmt)
+ 
+ static gimple *
+ gimple_copy_and_remap_memref_stmts (memref_t *mr, gimple_seq &stmts,
+-				    int last_idx, stmt_set &processed)
++				    int first_idx, int last_idx,
++				    stmt_set &processed)
+ {
+   gimple *last_stmt = NULL;
+-  for (int i = mr->stmts.length () - 1; i >= last_idx ; i--)
++  if (first_idx == 0)
++    first_idx = mr->stmts.length () - 1;
++  for (int i = first_idx; i >= last_idx; i--)
+     {
+       if (processed.count (mr->stmtsi))
+ 	continue;
+@@ -1515,6 +1526,436 @@ gimple_copy_and_remap_memref_stmts (memref_t *mr, gimple_seq &stmts,
+   return last_stmt;
+ }
+ 
++/* Check if prefetch insertion may be always unsafe in this case.  For now
++   reject cases with access to arrays with no domain or with no elements.  */
++
++static bool
++check_prefetch_safety (vec<memref_t *> &mrs, memref_t *cmr)
++{
++  for (unsigned int i = 0; i < mrs.length (); i++)
++    {
++      memref_t *mr = mrsi;
++      if (mr == cmr || mr->used_mrs.empty ())
++	continue;
++      bool is_store;
++      tree *mem = simple_mem_ref_in_stmt (mr->stmts0, &is_store);
++      if (mem == NULL || TREE_CODE (*mem) != ARRAY_REF)
++	continue;
++      tree array = TREE_OPERAND (*mem, 0);
++      tree atype = TREE_TYPE (array);
++      gcc_assert (atype);
++      tree domain = TYPE_DOMAIN (atype);
++      if (!domain || !tree_fits_uhwi_p (TYPE_MIN_VALUE (domain))
++	  || !tree_fits_uhwi_p (TYPE_MAX_VALUE (domain)))
++	{
++	  if (dump_file)
++	    {
++	      fprintf (dump_file, "Unsupported array type: ");
++	      print_generic_expr (dump_file, atype);
++	      fprintf (dump_file, "\n");
++	    }
++	  return false;
++	}
++      unsigned HOST_WIDE_INT min_val = tree_to_uhwi (TYPE_MIN_VALUE (domain));
++      unsigned HOST_WIDE_INT max_val = tree_to_uhwi (TYPE_MAX_VALUE (domain));
++      if (min_val == 0 && max_val == 0)
++	{
++	  if (dump_file)
++	    {
++	      fprintf (dump_file, "Unsupported array type's bounds: ");
++	      print_generic_expr (dump_file, atype);
++	      fprintf (dump_file, "\n");
++	    }
++	  return false;
++	}
++    }
++  return true;
++}
++
++/* Collect base addresses which we need to check.  */
++
++static void
++collect_base_addresses (vec<memref_t *> &used_mr_vec, HOST_WIDE_INT dist_val,
++			memref_t *comp_mr, tree_poly_offset_map &offset_map)
++{
++  if (dump_file)
++    fprintf (dump_file, "Collect base addresses which we need to check.\n");
++  for (unsigned int i = 0; i < used_mr_vec.length (); i++)
++    {
++      memref_t *mr = used_mr_veci;
++      if (mr == comp_mr || mr->used_mrs.empty ())
++	continue;
++      bool is_store;
++      tree *mem = simple_mem_ref_in_stmt (mr->stmts0, &is_store);
++      if (mem == NULL || TREE_CODE (*mem) != MEM_REF)
++	continue;
++      tree base = get_base_address (*mem);
++      tree base_addr = get_mem_ref_address_ssa_name (*mem, base);
++      if (!base_addr)
++	continue;
++      if (dump_file)
++	{
++	  dump_base_addr (base_addr);
++	  if (base)
++	    {
++	      fprintf (dump_file, "Base:");
++	      print_generic_expr (dump_file, base);
++	      fprintf (dump_file, "\n");
++	    }
++	}
++      if (!TREE_OPERAND (base, 1))
++	continue;
++      poly_offset_int curr_offset = mem_ref_offset (base);
++      poly_offset_int saved_offset = 0;
++      if (offset_map.count (base_addr))
++	{
++	  saved_offset = offset_mapbase_addr;
++	  if ((dist_val > 0 && known_gt (curr_offset, saved_offset))
++	      || (dist_val < 0 && known_lt (curr_offset, saved_offset)))
++	    offset_mapbase_addr = curr_offset;
++	  else if (dump_file)
++	    fprintf (dump_file, "Off: step=%ld gt=%d lt=%d\n", dist_val,
++		     known_gt (curr_offset, saved_offset),
++		     known_lt (curr_offset, saved_offset));
++	}
++      else
++	offset_mapbase_addr = curr_offset;
++    }
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Final list of base addresses:\n");
++      for (tree_poly_offset_map::iterator it1 = offset_map.begin ();
++	   it1 != offset_map.end (); ++it1)
++	{

_service:tar_scm:0288-Enable-macro-use-commandline.patch Added

@@ -0,0 +1,207 @@
+From 7a578a8725f8fd7d92fcbbac14841ea7e8d0870f Mon Sep 17 00:00:00 2001
+From: zhangxiaohua <xiaohua20100827@163.com>
+Date: Sun, 25 Aug 2024 23:08:53 +0800
+Subject: PATCH 157/157 Enable macro-use-commandline
+
+Signed-off-by: zhangxiaohua <xiaohua20100827@163.com>
+---
+ gcc/c-family/c-opts.cc                        |  4 +++
+ gcc/c-family/c.opt                            |  4 +++
+ gcc/doc/cppopts.texi                          |  4 +++
+ gcc/doc/invoke.texi                           |  1 +
+ .../gcc.dg/cpp/macro-use-cmdline-1.c          | 26 ++++++++++++++
+ .../gcc.dg/cpp/macro-use-cmdline-2.c          | 34 +++++++++++++++++++
+ libcpp/include/cpplib.h                       |  3 ++
+ libcpp/init.cc                                |  1 +
+ libcpp/macro.cc                               | 16 ++++++++-
+ 9 files changed, 92 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-2.c
+
+diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
+index 5134f6128..744b54dc3 100644
+--- a/gcc/c-family/c-opts.cc
++++ b/gcc/c-family/c-opts.cc
+@@ -527,6 +527,10 @@ c_common_handle_option (size_t scode, const char *arg, HOST_WIDE_INT value,
+ 	cpp_opts->track_macro_expansion = 2;
+       break;
+ 
++    case OPT_fmacro_use_commandline:
++      cpp_opts->macro_use_commandline = 1;
++      break;
++
+     case OPT_fexec_charset_:
+       cpp_opts->narrow_charset = arg;
+       break;
+diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
+index 07da40ef4..a36c27f07 100644
+--- a/gcc/c-family/c.opt
++++ b/gcc/c-family/c.opt
+@@ -2012,6 +2012,10 @@ ftrack-macro-expansion=
+ C ObjC C++ ObjC++ JoinedOrMissing RejectNegative UInteger
+ -ftrack-macro-expansion=<0|1|2>	Track locations of tokens coming from macro expansion and display them in error messages.
+ 
++fmacro-use-commandline
++C ObjC C++ ObjC++ JoinedOrMissing RejectNegative UInteger
++Preferentially use options from the commandline.
++
+ fpretty-templates
+ C++ ObjC++ Var(flag_pretty_templates) Init(1)
+ Do not pretty-print template specializations as the template signature followed by the arguments.
+diff --git a/gcc/doc/cppopts.texi b/gcc/doc/cppopts.texi
+index c0a92b370..8c8a81eac 100644
+--- a/gcc/doc/cppopts.texi
++++ b/gcc/doc/cppopts.texi
+@@ -277,6 +277,10 @@ correct column numbers in warnings or errors, even if tabs appear on the
+ line.  If the value is less than 1 or greater than 100, the option is
+ ignored.  The default is 8.
+ 
++@item -fmacro-use-commandline
++@opindex fmacro-use-commandline
++Preferentially use options from the command line.
++
+ @item -ftrack-macro-expansion@r{}=@var{level}@r{}
+ @opindex ftrack-macro-expansion
+ Track locations of tokens across macro expansions. This allows the
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index bdd8b9429..2ff7d860d 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -630,6 +630,7 @@ Objective-C and Objective-C++ Dialects}.
+ -fexec-charset=@var{charset}  -fextended-identifiers  @gol
+ -finput-charset=@var{charset}  -flarge-source-files  @gol
+ -fmacro-prefix-map=@var{old}=@var{new} -fmax-include-depth=@var{depth} @gol
++-fmacro-use-commandline @gol
+ -fno-canonical-system-headers  -fpch-deps  -fpch-preprocess  @gol
+ -fpreprocessed  -ftabstop=@var{width}  -ftrack-macro-expansion  @gol
+ -fwide-exec-charset=@var{charset}  -fworking-directory @gol
+diff --git a/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-1.c b/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-1.c
+new file mode 100644
+index 000000000..f85d9c268
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-1.c
+@@ -0,0 +1,26 @@
++/*
++   { dg-options "-fmacro-use-commandline -DTEST_MACRO=1 -DTEST_MACRO=20" }
++   { dg-do compile }
++   { dg-do run }
++*/
++
++/* { dg-warning "-:redefined" "redef TEST_MACRO"      { target *-*-* } 0  }
++   { dg-message "-:previous"  "prev def TEST_MACRO"   { target *-*-* } 0  }
++*/
++
++#if DEBUG
++extern int puts (const char *);
++#else
++#define puts(X)
++#endif
++extern void abort (void);
++
++#define err(str) do { puts(str); abort(); } while (0)
++
++int main (int argc, char *argv)
++{
++  int macroValue = TEST_MACRO;
++  if (macroValue != 20)
++    err("macroValue");
++  return 0;
++}
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-2.c b/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-2.c
+new file mode 100644
+index 000000000..99d92d1e4
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-2.c
+@@ -0,0 +1,34 @@
++/*
++   { dg-options "-fmacro-use-commandline -DTEST_MACRO=1" }
++   { dg-do compile }
++   { dg-do run }
++*/
++
++#define TEST_MACRO 300
++#define TEST_MACRO_1 400
++/*
++   { dg-warning "-:redefined" "redef TEST_MACRO"      { target *-*-* } 7  }
++   { dg-message "-:previous"  "prev def TEST_MACRO"   { target *-*-* } 0  }
++*/
++
++#if DEBUG
++extern int puts (const char *);
++#else
++#define puts(X)
++#endif
++
++extern void abort (void);
++
++#define err(str) do { puts(str); abort(); } while (0)
++
++int main (int argc, char *argv)
++{
++  int macroValue = TEST_MACRO;
++  if (macroValue != 1)
++    err("macroValue");
++
++  int macroValue1 = TEST_MACRO_1;
++  if (macroValue1 != 400)
++    err("macroValue1");
++  return 0;
++}
+\ No newline at end of file
+diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
+index 3eba6f74b..c6101ca01 100644
+--- a/libcpp/include/cpplib.h
++++ b/libcpp/include/cpplib.h
+@@ -471,6 +471,9 @@ struct cpp_options
+      consumes the highest amount of memory.  */
+   unsigned char track_macro_expansion;
+ 
++  /* Use the options on the command line first.  */
++  unsigned char macro_use_commandline;
++
+   /* Nonzero means handle C++ alternate operator names.  */
+   unsigned char operator_names;
+ 
+diff --git a/libcpp/init.cc b/libcpp/init.cc
+index f4ab83d21..47be60a36 100644
+--- a/libcpp/init.cc
++++ b/libcpp/init.cc
+@@ -215,6 +215,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table,
+      cpp_options::track_macro_expansion to learn about the other
+      values.  */
+   CPP_OPTION (pfile, track_macro_expansion) = 2;
++  CPP_OPTION (pfile, macro_use_commandline) = 0;
+   CPP_OPTION (pfile, warn_normalize) = normalized_C;
+   CPP_OPTION (pfile, warn_literal_suffix) = 1;
+   CPP_OPTION (pfile, canonical_system_headers)
+diff --git a/libcpp/macro.cc b/libcpp/macro.cc
+index 8ebf360c0..aa9e4ffa6 100644
+--- a/libcpp/macro.cc
++++ b/libcpp/macro.cc
+@@ -3852,7 +3852,21 @@ _cpp_create_definition (cpp_reader *pfile, cpp_hashnode *node)
+ 				 node->value.macro->line, 0,
+ 			 "this is the location of the previous definition");
+ 	}
+-      _cpp_free_definition (node);
++#define LOCATION_FROM_LINEMAP 0
++#define MIN_LINE_OF_MACRO_BEEN_OVERRIDDEN 96
++#define MAX_LINE_OF_MACRO_BEEN_OVERRIDDEN 128
++     if (CPP_OPTION (pfile, macro_use_commandline)
++	    && node->value.macro->line >= MIN_LINE_OF_MACRO_BEEN_OVERRIDDEN
++	    && node->value.macro->line <= MAX_LINE_OF_MACRO_BEEN_OVERRIDDEN
++	    && pfile->forced_token_location == LOCATION_FROM_LINEMAP)
++	{
++	  cpp_pedwarning_with_line (pfile, CPP_W_NONE,
++	    node->value.macro->line, 0,
++	    "use the previous definition from commandline");
++	    return false;
++	}

_service:tar_scm:0289-tree-ssa-loop-crc.cc-TARGET_CRC32-may-be-not-defined.patch Added

_service:tar_scm:0290-Add-ipa-prefetch-test-for-gcc-s-case.patch Added

@@ -0,0 +1,209 @@
+From 0534ae05fc313c0d449b48ffe3e01642b644e6d2 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilial@huawei-partners.com>
+Date: Fri, 6 Sep 2024 10:40:50 +0800
+Subject: PATCH 1/2 Add ipa-prefetch test for gcc's case
+
+---
+ gcc/ipa-prefetch.cc                         |   4 +-
+ gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c | 167 ++++++++++++++++++++
+ 2 files changed, 170 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c
+
+diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
+index b000d4d75..8e628390b 100644
+--- a/gcc/ipa-prefetch.cc
++++ b/gcc/ipa-prefetch.cc
+@@ -1668,6 +1668,8 @@ static gimple *
+ insert_page_check (tree addr, tree_poly_offset_map &offset_map,
+ 		   gimple_seq &stmts)
+ {
++  if (dump_file)
++    fprintf (dump_file, "Insert page check.\n");
+   poly_offset_int offset = 0;
+   if (offset_map.count (addr))
+     offset = offset_mapaddr;
+@@ -1783,7 +1785,7 @@ static gimple *
+ insert_index_check (tree mem, gimple_seq &stmts)
+ {
+   if (dump_file)
+-    fprintf (dump_file, "Insert array index check\n");
++    fprintf (dump_file, "Insert array index check.\n");
+   tree atype = TREE_TYPE (TREE_OPERAND (mem, 0));
+   tree ind = TREE_OPERAND (mem, 1);
+   if (decl_map->count (ind))
+diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c b/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c
+new file mode 100644
+index 000000000..f1001c350
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c
+@@ -0,0 +1,167 @@
++/* { dg-do link } */
++/* { dg-options "-O3 -fipa-prefetch -flto -flto-partition=one -fdump-ipa-ipa_prefetch" } */
++/* { dg-require-effective-target lto } */
++
++/* Based on opensource gcc code.  */
++
++#include <stdbool.h>
++#include <stdlib.h>
++#include <stddef.h>
++
++#define SPARSESET_ELT_TYPE unsigned int
++#define ALLOCNO_NUM(A) ((A)->num)
++
++typedef struct sparseset_def
++{
++  SPARSESET_ELT_TYPE *dense;	/* Dense array.  */
++  SPARSESET_ELT_TYPE *sparse;	/* Sparse array.  */
++  SPARSESET_ELT_TYPE members;	/* Number of elements.  */
++  SPARSESET_ELT_TYPE size;	/* Maximum number of elements.  */
++  SPARSESET_ELT_TYPE iter;	/* Iterator index.  */
++  unsigned char iter_inc;	/* Iteration increment amount.  */
++  bool iterating;
++  SPARSESET_ELT_TYPE elms2;   /* Combined dense and sparse arrays.  */
++} *sparseset;
++
++struct ira_allocno
++{
++  /* The allocno order number starting with 0.  Each allocno has an
++     unique number and the number is never changed for the
++     allocno.  */
++  int num;
++  /* Regno for allocno or cap.  */
++  int regno;
++  /*...*/
++};
++
++typedef struct ira_allocno_live_range *allocno_live_range_t;
++typedef struct ira_allocno *ira_allocno_t;
++
++struct ira_allocno_live_range
++{
++  /* Allocno whose live range is described by given structure.  */
++  ira_allocno_t allocno;
++  /* Program point range.  */
++  int start, finish;
++  /* Next structure describing program points where the allocno
++     lives.  */
++  allocno_live_range_t next;
++  /* Pointer to structures with the same start/finish.  */
++  allocno_live_range_t start_next, finish_next;
++};
++
++bool
++sparseset_bit_p (sparseset s, SPARSESET_ELT_TYPE e)
++{
++  SPARSESET_ELT_TYPE idx;
++
++  idx = s->sparsee;
++
++  return idx < s->members && s->denseidx == e;
++}
++
++bool new_pseudos_p;
++int ira_max_point, ira_allocnos_num;
++allocno_live_range_t *ira_finish_point_ranges;
++
++static inline void
++sparseset_clear (sparseset s)
++{
++  s->members = 0;
++  s->iterating = false;
++}
++
++sparseset
++sparseset_alloc (SPARSESET_ELT_TYPE n_elms)
++{
++  unsigned int n_bytes = sizeof (struct sparseset_def)
++			 + ((n_elms - 1) * 2 * sizeof (SPARSESET_ELT_TYPE));
++
++  /* We use xcalloc rather than xmalloc to silence some valgrind uninitialized
++     read errors when accessing set->sparsen when "n" is not, and never has
++     been, in the set.  These uninitialized reads are expected, by design and
++     harmless.  If this turns into a performance problem due to some future
++     additional users of sparseset, we can revisit this decision.  */
++  sparseset set = (sparseset) calloc (1, n_bytes);
++  set->dense = &(set->elms0);
++  set->sparse = &(set->elmsn_elms);
++  set->size = n_elms;
++  sparseset_clear (set);
++  return set;
++}
++
++void
++sparseset_insert_bit (sparseset s, SPARSESET_ELT_TYPE e, SPARSESET_ELT_TYPE idx)
++{
++  s->sparsee = idx;
++  s->denseidx = e;
++}
++
++void
++sparseset_swap (sparseset s, SPARSESET_ELT_TYPE idx1, SPARSESET_ELT_TYPE idx2)
++{
++  SPARSESET_ELT_TYPE tmp = s->denseidx2;
++  sparseset_insert_bit (s, s->denseidx1, idx2);
++  sparseset_insert_bit (s, tmp, idx1);
++}
++
++void __attribute__ ((noinline))
++sparseset_clear_bit (sparseset s, SPARSESET_ELT_TYPE e)
++{
++  if (sparseset_bit_p (s, e))
++    {
++      SPARSESET_ELT_TYPE idx = s->sparsee;
++      SPARSESET_ELT_TYPE iter = s->iter;
++      SPARSESET_ELT_TYPE mem = s->members - 1;
++
++      /* If we are iterating over this set and we want to delete a
++	 member we've already visited, then we swap the element we
++	 want to delete with the element at the current iteration
++	 index so that it plays well together with the code below
++	 that actually removes the element.  */
++      if (s->iterating && idx <= iter)
++	{
++	  if (idx < iter)
++	    {
++	      sparseset_swap (s, idx, iter);
++	      idx = iter;
++	    }
++	  s->iter_inc = 0;
++	}
++
++      /* Replace the element we want to delete with the last element
++	 in the dense array and then decrement s->members, effectively
++	 removing the element we want to delete.  */
++      sparseset_insert_bit (s, s->densemem, idx);
++      s->members = mem;
++    }
++}
++
++allocno_live_range_t r;
++sparseset allocnos_live;
++
++void
++ira_flattening ()
++{
++  int i;
++
++  if (new_pseudos_p)
++    {
++      allocnos_live = sparseset_alloc (ira_allocnos_num);
++      for (i = 0; i < ira_max_point; i++)
++	{
++	  for (r = ira_finish_point_rangesi; r != NULL; r = r->finish_next)
++	    sparseset_clear_bit (allocnos_live, ALLOCNO_NUM (r->allocno));
++	}
++    }
++}
++
++int main()
++{

_service:tar_scm:0291-Fix-settings-for-wide-operations-tests.patch Added

@@ -0,0 +1,73 @@
+From 411792b0bbb63715d8e90d46eb4f0d9c810ce8ba Mon Sep 17 00:00:00 2001
+From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
+Date: Tue, 3 Sep 2024 21:26:03 +0800
+Subject: PATCH 2/2 Fix settings for wide operations tests
+
+Signed-off-by: lin-houzhong <hz_lin8@163.com>
+---
+ gcc/testsuite/gcc.dg/double_sized_mul-1.c | 8 +++++---
+ gcc/testsuite/gcc.dg/double_sized_mul-2.c | 9 +++++----
+ gcc/testsuite/gcc.dg/uaddsub.c            | 6 ++++--
+ 3 files changed, 14 insertions(+), 9 deletions(-)
+
+diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-1.c b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
+index d32a25223..b848e02de 100644
+--- a/gcc/testsuite/gcc.dg/double_sized_mul-1.c
++++ b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
+@@ -1,7 +1,8 @@
+-/* { dg-do compile } */
++/* { dg-do compile { target aarch64*-*-* x86_64*-*-*} } */
+ /* fif-conversion-gimple and fuaddsub-overflow-match-all are required for
+    proper overflow detection in some cases.  */
+-/* { dg-options "-O2 -fif-conversion-gimple -march=armv8.2-a -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
++/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
++/* { dg-additional-options "-march=armv8.2-a" { target aarch64*-*-* } } */
+ #include <stdint.h>
+ 
+ typedef unsigned __int128 uint128_t;
+@@ -138,4 +139,5 @@ uint128_t mul128_perm (uint64_t a, uint64_t b)
+   return res;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "double sized mul optimized: 1" 6 "widening_mul" } } */
++/* { dg-final { scan-tree-dump-times "double sized mul optimized: 1" 6 "widening_mul" { target aarch64*-*-* } } } */
++/* { dg-final { scan-tree-dump-times "double sized mul optimized: 1" 4 "widening_mul" { target x86_64*-*-* } } } */
+diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-2.c b/gcc/testsuite/gcc.dg/double_sized_mul-2.c
+index ff35902b7..cf8f0aedd 100644
+--- a/gcc/testsuite/gcc.dg/double_sized_mul-2.c
++++ b/gcc/testsuite/gcc.dg/double_sized_mul-2.c
+@@ -1,7 +1,8 @@
+-/* { dg-do compile } */
+-/* fif-conversion-gimple is required for proper overflow detection
+-   in some cases.  */
+-/* { dg-options "-O2 -fif-conversion-gimple -march=armv8.2-a -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
++/* { dg-do compile { target aarch64*-*-* x86_64*-*-*} } */
++/* fif-conversion-gimple and fuaddsub-overflow-match-all are required for
++   proper overflow detection in some cases.  */
++/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
++/* { dg-additional-options "-march=armv8.2-a" { target aarch64*-*-* } } */
+ #include <stdint.h>
+ 
+ typedef unsigned __int128 uint128_t;
+diff --git a/gcc/testsuite/gcc.dg/uaddsub.c b/gcc/testsuite/gcc.dg/uaddsub.c
+index 96c26d308..dcb587fc8 100644
+--- a/gcc/testsuite/gcc.dg/uaddsub.c
++++ b/gcc/testsuite/gcc.dg/uaddsub.c
+@@ -1,5 +1,6 @@
+-/* { dg-do compile } */
++/* { dg-do compile { target aarch64*-*-* x86_64-*-* } } */
+ /* { dg-options "-O2 -fuaddsub-overflow-match-all -fdump-tree-optimized" } */
++/* { dg-additional-options "-march=armv8.2-a" { target aarch64*-*-* } } */
+ #include <stdint.h>
+ 
+ typedef unsigned __int128 uint128_t;
+@@ -140,4 +141,5 @@ uint256_t sub256 (uint128_t a, uint128_t b)
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "= .ADD_OVERFLOW \$a_\0-9\+\\(D\$, b_\0-9\+\$D\$\\)" 5 "optimized" } } */
+-/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \$a_\0-9\+\\(D\$, b_\0-9\+\$D\$\\)" 5 "optimized" } } */
++/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \$a_\0-9\+\\(D\$, b_\0-9\+\$D\$\\)" 5 "optimized" { target aarch64*-*-* } } } */
++/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \$a_\0-9\+\\(D\$, b_\0-9\+\$D\$\\)" 4 "optimized" { target x86_64*-*-* } } } */
+-- 
+2.33.0
+

_service:tar_scm:0292-Fix-errors-in-ipa-prefetch-IAORPF-and-IAOSJ0.patch Added

@@ -0,0 +1,42 @@
+From 808294bf0f32aaff1cc7e56a756b246d328b3402 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Fri, 6 Sep 2024 11:10:03 +0800
+Subject: PATCH 2/3 Fix errors in ipa-prefetch (IAORPF and IAOSJ0)
+
+Signed-off-by: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+---
+ gcc/ipa-prefetch.cc | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
+index b000d4d75..74af55af0 100644
+--- a/gcc/ipa-prefetch.cc
++++ b/gcc/ipa-prefetch.cc
+@@ -1681,7 +1681,8 @@ insert_page_check (tree addr, tree_poly_offset_map &offset_map,
+   unsigned long long pmask = ~(param_ipa_prefetch_pagesize - 1);
+   tree pmask_cst = build_int_cst (utype, pmask);
+   tree off_tree = wide_int_to_tree (sizetype, offset);
+-  gcc_assert (TREE_CODE (addr_type) == POINTER_TYPE);
++  gcc_assert (TREE_CODE (addr_type) == POINTER_TYPE
++	      || TREE_CODE (addr_type) == REFERENCE_TYPE);
+   tree addr_with_offset = gimple_build (&stmts, POINTER_PLUS_EXPR,
+ 					addr_type, addr, off_tree);
+   tree conv_addr = make_ssa_name (utype);
+@@ -2082,11 +2083,11 @@ optimize_function (cgraph_node *n, function *fn)
+   for (unsigned int i = 0; i < vmrs.length (); i++)
+     find_nearest_common_post_dominator (vmrsi, dom_bb);
+ 
+-  if (!dom_bb)
++  if (!dom_bb || dom_bb->index == ENTRY_BLOCK || dom_bb->index == EXIT_BLOCK)
+     {
+       if (dump_file)
+-	fprintf (dump_file, "Post dominator bb for MRs is not found.  "
+-		 "Skip the case.\n");
++	fprintf (dump_file, "Post dominator bb for MRs is not found or "
++		 "it's an entry/exit block.  Skip the case.\n");
+       return 0;
+     }
+   else if (dump_file)
+-- 
+2.33.0
+

_service:tar_scm:0293-Fix-error-with-stmts-insertion-in-ipa-prefetch-for-I.patch Added

@@ -0,0 +1,51 @@
+From bfb77997f423ffe3bdcbd8bb8d7f739fe51ce4f5 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Fri, 6 Sep 2024 11:36:11 +0800
+Subject: PATCH 3/3 Fix error with stmts insertion in ipa-prefetch (for
+ IAO6R3)
+
+Signed-off-by: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+---
+ gcc/ipa-prefetch.cc | 19 +++++++++++++++++--
+ 1 file changed, 17 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
+index b000d4d75..6190c2ebb 100644
+--- a/gcc/ipa-prefetch.cc
++++ b/gcc/ipa-prefetch.cc
+@@ -2096,7 +2096,7 @@ optimize_function (cgraph_node *n, function *fn)
+       fprintf (dump_file, "\n");
+     }
+ 
+-  /* Try to find comp_mr's stmt in the dominator bb.  */
++  /* Try to find comp_mr's stmt in the post dominator bb.  */
+   gimple *last_used = NULL;
+   for (gimple_stmt_iterator si = gsi_last_bb (dom_bb); !gsi_end_p (si);
+        gsi_prev (&si))
+@@ -2168,7 +2168,22 @@ optimize_function (cgraph_node *n, function *fn)
+   vec<gimple *> pcalls = vNULL;
+   gimple *last_pref = NULL;
+   insert_prefetch_stmts (pcalls, stmts, last_pref, vmrs, processed_stmts);
+-  gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
++
++  gimple *gstmt = gsi_stmt (gsi);
++  bool insert_after = last_used || gstmt == NULL || !is_ctrl_stmt (gstmt);
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Insert prefetch sequence %s stmt:\n",
++	       insert_after ? "after": "before");
++      if (gstmt)
++	print_gimple_stmt (dump_file, gstmt, 0);
++      else
++	fprintf (dump_file, "(no stmts)\n");
++    }
++  if (insert_after)
++    gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
++  else
++    gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT);
+ 
+   correct_cfg (bbends, last_pref, dom_bb);
+ 
+-- 
+2.33.0
+

_service:tar_scm:0294-Fix-errors-in-ipa-prefetch-IAO50J-and-IAO5H7.patch Added

@@ -0,0 +1,80 @@
+From cd79fc29d2cdb73836f8699355113e94b833e0e0 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Wed, 11 Sep 2024 17:18:58 +0800
+Subject: PATCH 2/2 Fix errors in ipa-prefetch(IAO50J and IAO5H7)
+
+Signed-off-by: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+---
+ gcc/ipa-prefetch.cc | 35 ++++++++++++++++++++++++++++++-----
+ 1 file changed, 30 insertions(+), 5 deletions(-)
+
+diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
+index 5184687aa..685f9c267 100644
+--- a/gcc/ipa-prefetch.cc
++++ b/gcc/ipa-prefetch.cc
+@@ -2099,6 +2099,18 @@ optimize_function (cgraph_node *n, function *fn)
+       fprintf (dump_file, "\n");
+     }
+ 
++  /* Check that all used mrs dominate found post dominator bb.  This case
++     may be supported later by copying MR evaluation to the bb.  */
++  for (unsigned int i = 0; i < used_mr_vec.length (); i++)
++    if (!dominated_by_p (CDI_DOMINATORS, dom_bb,
++			 gimple_bb (used_mr_veci->stmts0)))
++      {
++	if (dump_file)
++	  fprintf (dump_file, "MR's (%d) bb is not dominate the found bb %d.  "
++		   "Skip the case.\n", used_mr_veci->mr_id, dom_bb->index);
++	return 0;
++      }
++
+   /* Try to find comp_mr's stmt in the post dominator bb.  */
+   gimple *last_used = NULL;
+   for (gimple_stmt_iterator si = gsi_last_bb (dom_bb); !gsi_end_p (si);
+@@ -2133,17 +2145,29 @@ optimize_function (cgraph_node *n, function *fn)
+ 
+   /* Create new inc var.  Insert new_var = old_var + step * factor.  */
+   decl_map = new tree_map;
+-  gcc_assert (comp_mr->stmts0 && gimple_assign_single_p (comp_mr->stmts0));
+-  tree inc_var = gimple_assign_lhs (comp_mr->stmts0);
++  gimple *old_inc_stmt = comp_mr->stmts0;
++  gcc_assert (old_inc_stmt && gimple_assign_single_p (old_inc_stmt));
++  tree inc_var = gimple_assign_lhs (old_inc_stmt);
++  if (dump_file)
++    {
++      fprintf (dump_file, "Old inc stmt: ");
++      print_gimple_stmt (dump_file, old_inc_stmt, 0);
++    }
+   /* If old_var definition dominates the current use, just use it, otherwise
+      evaluate it just before new inc var evaluation.  */
+   gimple_seq stmts = NULL;
+   stmt_set processed_stmts;
+-  if (!dominated_by_p (CDI_DOMINATORS, dom_bb, gimple_bb (comp_mr->stmts0)))
++  tree local_inc_var = inc_var;
++  if (!dominated_by_p (CDI_DOMINATORS, dom_bb, gimple_bb (old_inc_stmt)))
+     {
+       gimple *tmp = gimple_copy_and_remap_memref_stmts (comp_mr, stmts, 0, 0,
+ 							processed_stmts);
+-      inc_var = gimple_assign_lhs (tmp);
++      local_inc_var = gimple_assign_lhs (tmp);
++      if (dump_file)
++	{
++	  fprintf (dump_file, "Localized old inc stmt: ");
++	  print_gimple_stmt (dump_file, tmp, 0);
++	}
+     }
+   tree var_type = TREE_TYPE (inc_var);
+   enum tree_code inc_code;
+@@ -2155,7 +2179,8 @@ optimize_function (cgraph_node *n, function *fn)
+   HOST_WIDE_INT dist_val = tree_to_shwi (step)
+ 			   * param_ipa_prefetch_distance_factor;
+   tree dist = build_int_cst (TREE_TYPE (step), dist_val);
+-  tree new_inc_var = gimple_build (&stmts, inc_code, var_type, inc_var, dist);
++  tree new_inc_var = gimple_build (&stmts, inc_code, var_type, local_inc_var,
++				   dist);
+   (*decl_map)inc_var = new_inc_var;
+   if (dump_file)
+     {
+-- 
+2.33.0
+

_service:tar_scm:0295-Fix-error-with-grouped_load-merge-in-slp-transpose-v.patch Added

@@ -0,0 +1,30 @@
+From 7b4cce4896cefefedba9545a9633585e086b7621 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com>
+Date: Wed, 11 Sep 2024 18:26:22 +0800
+Subject: PATCH 1/2 Fix error with grouped_load merge in
+ slp-transpose-vectorize (for IALR8B)
+
+---
+ gcc/tree-vect-slp.cc | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
+index e3e246977..d4870de43 100644
+--- a/gcc/tree-vect-slp.cc
++++ b/gcc/tree-vect-slp.cc
+@@ -3807,7 +3807,11 @@ vect_slp_grouped_load_find (bb_vec_info bb_vinfo, vec<bool> &visited,
+ 	 these two grouped loads need to be merged.  */
+       tree opb = get_op_base_address (first_element);
+       unsigned int grp_size_b = DR_GROUP_SIZE (first_element);
+-      if (opa == opb && grp_size_a == grp_size_b)
++      /* Ensure that the elements merge to load group meet the alignment condition (dr_misalignment) */
++      HOST_WIDE_INT diff = 0;
++      diff = (TREE_INT_CST_LOW (DR_INIT (first_element->dr_aux.dr))
++	      - TREE_INT_CST_LOW (DR_INIT (merge_first_element->dr_aux.dr)));
++      if (opa == opb && grp_size_a == grp_size_b && diff >= 0)
+ 	{
+ 	  res.safe_push (first_element);
+ 	  visitedi = true;
+-- 
+2.33.0
+

_service:tar_scm:0296-Fix-error-in-slp-transpose-vectorize-for-IAQFM3.patch Added

@@ -0,0 +1,28 @@
+From b3a6a170bf1dc0e460e98a7fd02c92e6b036784a Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com>
+Date: Fri, 13 Sep 2024 14:13:07 +0800
+Subject: PATCH 2/2 Fix error in slp-transpose-vectorize (for IAQFM3)
+
+---
+ gcc/tree-vect-slp.cc | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
+index d4870de43..d7e198dff 100644
+--- a/gcc/tree-vect-slp.cc
++++ b/gcc/tree-vect-slp.cc
+@@ -3811,7 +3811,10 @@ vect_slp_grouped_load_find (bb_vec_info bb_vinfo, vec<bool> &visited,
+       HOST_WIDE_INT diff = 0;
+       diff = (TREE_INT_CST_LOW (DR_INIT (first_element->dr_aux.dr))
+ 	      - TREE_INT_CST_LOW (DR_INIT (merge_first_element->dr_aux.dr)));
+-      if (opa == opb && grp_size_a == grp_size_b && diff >= 0)
++      if (opa == opb
++	  && grp_size_a == grp_size_b
++	  && diff >= 0
++	  && check_same_bb (first_element, merge_first_element))
+ 	{
+ 	  res.safe_push (first_element);
+ 	  visitedi = true;
+-- 
+2.33.0
+

_service:tar_scm:0297-Fix-grouped-load-merging-error-in-SLP-transpose-vectorization.patch Added

_service:tar_scm:0298-Mark-prefetch-builtin-as-willreturn.patch Added

@@ -0,0 +1,99 @@
+From a252bbd11d22481a1e719ed36d800e2192abb369 Mon Sep 17 00:00:00 2001
+From: Pronin Alexander <pronin.alexander@huawei.com>
+Date: Thu, 31 Oct 2024 15:49:27 +0800
+Subject: PATCH 1/6 Mark prefetch builtin as willreturn
+
+Signed-off-by: Pronin Alexander <pronin.alexander@huawei.com>
+---
+ gcc/common.opt      |  4 ++++
+ gcc/gimple.cc       | 30 ++++++++++++++++++++++++++++++
+ gcc/gimple.h        |  1 +
+ gcc/tree-ssa-pre.cc |  4 +---
+ 4 files changed, 36 insertions(+), 3 deletions(-)
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 688d65e4d..be5fcc681 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1313,6 +1313,10 @@ fdelete-null-pointer-checks
+ Common Var(flag_delete_null_pointer_checks) Init(-1) Optimization
+ Delete useless null pointer checks.
+ 
++fbuiltin-will-return
++Common Var(flag_builtin_will_return) Optimization
++Consider some of the builtins as definitely returning.
++
+ fdevirtualize-at-ltrans
+ Common Var(flag_ltrans_devirtualize)
+ Stream extra data to support more aggressive devirtualization in LTO local transformation mode.
+diff --git a/gcc/gimple.cc b/gcc/gimple.cc
+index 9e62da426..04ca9f161 100644
+--- a/gcc/gimple.cc
++++ b/gcc/gimple.cc
+@@ -2998,6 +2998,36 @@ nonbarrier_call_p (gimple *call)
+   return false;
+ }
+ 
++static inline bool
++will_return_builtin_p (gimple *call)
++{
++  if (!flag_builtin_will_return)
++    return false;
++
++  if (!gimple_call_builtin_p (call, BUILT_IN_NORMAL))
++    return false;
++
++  switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call)))
++    {
++    case BUILT_IN_PREFETCH:
++      return true;
++    default:
++      return false;
++    }
++}
++
++bool
++will_return_call_p (gimple *call, function *fun)
++{
++  int flags = gimple_call_flags (call);
++  if (!(flags & (ECF_CONST|ECF_PURE))
++      || (flags & ECF_LOOPING_CONST_OR_PURE)
++      || stmt_can_throw_external (fun, call))
++    return will_return_builtin_p (call);
++
++  return true;
++}
++
+ /* Callback for walk_stmt_load_store_ops.
+  
+    Return TRUE if OP will dereference the tree stored in DATA, FALSE
+diff --git a/gcc/gimple.h b/gcc/gimple.h
+index 77a5a07e9..bb05a7664 100644
+--- a/gcc/gimple.h
++++ b/gcc/gimple.h
+@@ -1628,6 +1628,7 @@ extern bool gimple_asm_clobbers_memory_p (const gasm *);
+ extern void dump_decl_set (FILE *, bitmap);
+ extern bool nonfreeing_call_p (gimple *);
+ extern bool nonbarrier_call_p (gimple *);
++extern bool will_return_call_p (gimple *, function *);
+ extern bool infer_nonnull_range (gimple *, tree);
+ extern bool infer_nonnull_range_by_dereference (gimple *, tree);
+ extern bool infer_nonnull_range_by_attribute (gimple *, tree);
+diff --git a/gcc/tree-ssa-pre.cc b/gcc/tree-ssa-pre.cc
+index 98134b5d3..b5264133a 100644
+--- a/gcc/tree-ssa-pre.cc
++++ b/gcc/tree-ssa-pre.cc
+@@ -3988,9 +3988,7 @@ compute_avail (function *fun)
+ 		 that forbids hoisting possibly trapping expressions
+ 		 before it.  */
+ 	      int flags = gimple_call_flags (stmt);
+-	      if (!(flags & (ECF_CONST|ECF_PURE))
+-		  || (flags & ECF_LOOPING_CONST_OR_PURE)
+-		  || stmt_can_throw_external (fun, stmt))
++	      if (!will_return_call_p (stmt, fun))
+ 		/* Defer setting of BB_MAY_NOTRETURN to avoid it
+ 		   influencing the processing of the call itself.  */
+ 		set_bb_may_notreturn = true;
+-- 
+2.33.0
+

_service:tar_scm:0299-Backport-Disallow-pointer-operands-for-and-partly-PR.patch Added

@@ -0,0 +1,156 @@
+From 3b109376d057342a31267ea4c9bd422d940874cb Mon Sep 17 00:00:00 2001
+From: Jakub Jelinek <jakub@redhat.com>
+Date: Thu, 31 Oct 2024 16:09:43 +0800
+Subject: PATCH 2/6 BackportDisallow pointer operands for |,^ and partly
+ &PR106878
+
+Signed-off-by: Jakub Jelinek <jakub@redhat.com>
+---
+ gcc/match.pd                                  |  6 ++++-
+ .../gcc.c-torture/compile/pr106878.c          | 15 +++++++++++++
+ gcc/tree-cfg.cc                               | 22 ++++++++++++++++---
+ gcc/tree-ssa-reassoc.cc                       | 16 +++++++++++++-
+ 4 files changed, 54 insertions(+), 5 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr106878.c
+
+diff --git a/gcc/match.pd b/gcc/match.pd
+index 8f41c292f..822e065e8 100644
+--- a/gcc/match.pd
++++ b/gcc/match.pd
+@@ -1655,6 +1655,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+ 	 && (int_fits_type_p (@1, TREE_TYPE (@0))
+ 	     || tree_nop_conversion_p (TREE_TYPE (@0), type)))
+ 	|| types_match (@0, @1))
++       && !POINTER_TYPE_P (TREE_TYPE (@0))
++       && TREE_CODE (TREE_TYPE (@0)) != OFFSET_TYPE
+        /* ???  This transform conflicts with fold-const.cc doing
+ 	  Convert (T)(x & c) into (T)x & (T)c, if c is an integer
+ 	  constants (if x has signed type, the sign bit cannot be set
+@@ -1691,7 +1693,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+   (if (GIMPLE
+        && TREE_CODE (@1) != INTEGER_CST
+        && tree_nop_conversion_p (type, TREE_TYPE (@2))
+-       && types_match (type, @0))
++       && types_match (type, @0)
++       && !POINTER_TYPE_P (TREE_TYPE (@0))
++       && TREE_CODE (TREE_TYPE (@0)) != OFFSET_TYPE)
+    (bitop @0 (convert @1)))))
+ 
+ (for bitop (bit_and bit_ior)
+diff --git a/gcc/testsuite/gcc.c-torture/compile/pr106878.c b/gcc/testsuite/gcc.c-torture/compile/pr106878.c
+new file mode 100644
+index 000000000..c84571894
+--- /dev/null
++++ b/gcc/testsuite/gcc.c-torture/compile/pr106878.c
+@@ -0,0 +1,15 @@
++/* PR tree-optimization/106878 */
++
++typedef __INTPTR_TYPE__ intptr_t;
++typedef __UINTPTR_TYPE__ uintptr_t;
++int a;
++
++int
++foo (const int *c)
++{
++  uintptr_t d = ((intptr_t) c | (intptr_t) &a) & 65535 << 16;
++  intptr_t e = (intptr_t) c;
++  if (d != (e & 65535 << 16))
++    return 1;
++  return 0;
++}
+diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
+index 48b52f785..d33aaec8c 100644
+--- a/gcc/tree-cfg.cc
++++ b/gcc/tree-cfg.cc
+@@ -4163,7 +4163,9 @@ verify_gimple_assign_binary (gassign *stmt)
+     case ROUND_MOD_EXPR:
+     case RDIV_EXPR:
+     case EXACT_DIV_EXPR:
+-      /* Disallow pointer and offset types for many of the binary gimple. */
++    case BIT_IOR_EXPR:
++    case BIT_XOR_EXPR:
++      /* Disallow pointer and offset types for many of the binary gimple.  */
+       if (POINTER_TYPE_P (lhs_type)
+ 	  || TREE_CODE (lhs_type) == OFFSET_TYPE)
+ 	{
+@@ -4178,9 +4180,23 @@ verify_gimple_assign_binary (gassign *stmt)
+ 
+     case MIN_EXPR:
+     case MAX_EXPR:
+-    case BIT_IOR_EXPR:
+-    case BIT_XOR_EXPR:
++      /* Continue with generic binary expression handling.  */
++      break;
++
+     case BIT_AND_EXPR:
++      if (POINTER_TYPE_P (lhs_type)
++	  && TREE_CODE (rhs2) == INTEGER_CST)
++	break;
++      /* Disallow pointer and offset types for many of the binary gimple.  */
++      if (POINTER_TYPE_P (lhs_type)
++	  || TREE_CODE (lhs_type) == OFFSET_TYPE)
++	{
++	  error ("invalid types for %qs", code_name);
++	  debug_generic_expr (lhs_type);
++	  debug_generic_expr (rhs1_type);
++	  debug_generic_expr (rhs2_type);
++	  return true;
++	}
+       /* Continue with generic binary expression handling.  */
+       break;
+ 
+diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc
+index e3d521e32..6baef4764 100644
+--- a/gcc/tree-ssa-reassoc.cc
++++ b/gcc/tree-ssa-reassoc.cc
+@@ -3617,10 +3617,14 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
+ 	tree type2 = NULL_TREE;
+ 	bool strict_overflow_p = false;
+ 	candidates.truncate (0);
++	if (POINTER_TYPE_P (type1))
++	  type1 = pointer_sized_int_node;
+ 	for (j = i; j; j = chainsj - 1)
+ 	  {
+ 	    tree type = TREE_TYPE (rangesj - 1.exp);
+ 	    strict_overflow_p |= rangesj - 1.strict_overflow_p;
++	    if (POINTER_TYPE_P (type))
++	      type = pointer_sized_int_node;
+ 	    if ((b % 4) == 3)
+ 	      {
+ 		/* For the signed < 0 cases, the types should be
+@@ -3651,6 +3655,8 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
+ 	    tree type = TREE_TYPE (rangesj - 1.exp);
+ 	    if (j == k)
+ 	      continue;
++	    if (POINTER_TYPE_P (type))
++	      type = pointer_sized_int_node;
+ 	    if ((b % 4) == 3)
+ 	      {
+ 		if (!useless_type_conversion_p (type1, type))
+@@ -3680,7 +3686,7 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
+ 		op = r->exp;
+ 		continue;
+ 	      }
+-	    if (id == l)
++	    if (id == l || POINTER_TYPE_P (TREE_TYPE (op)))
+ 	      {
+ 		code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
+ 		g = gimple_build_assign (make_ssa_name (type1), code, op);
+@@ -3704,6 +3710,14 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
+ 	    gimple_seq_add_stmt_without_update (&seq, g);
+ 	    op = gimple_assign_lhs (g);
+ 	  }
++	type1 = TREE_TYPE (rangesk - 1.exp);
++	if (POINTER_TYPE_P (type1))
++	  {
++	    gimple *g
++	      = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
++	    gimple_seq_add_stmt_without_update (&seq, g);
++	    op = gimple_assign_lhs (g);
++	  }
+ 	candidates.pop ();
+ 	if (update_range_test (&rangesk - 1, NULL, candidates.address (),
+ 			       candidates.length (), opcode, ops, op,
+-- 
+2.33.0
+

_service:tar_scm:0300-Remove-erroneous-pattern-from-gimple-ifcvt.patch Added

@@ -0,0 +1,55 @@
+From 91ef8899a80e493042fd2687ad89064c9f90cf17 Mon Sep 17 00:00:00 2001
+From: Pronin Alexander <pronin.alexander@huawei.com>
+Date: Thu, 31 Oct 2024 16:14:34 +0800
+Subject: PATCH 3/6 Remove erroneous pattern from gimple ifcvt
+
+Signed-off-by: Pronin Alexander  <pronin.alexander@huawei.com>
+---
+ gcc/match.pd                          |  2 +-
+ gcc/testsuite/gcc.dg/ifcvt-gimple-1.c | 21 +++++++++++++++++++++
+ 2 files changed, 22 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.dg/ifcvt-gimple-1.c
+
+diff --git a/gcc/match.pd b/gcc/match.pd
+index 8f41c292f..2dd6581d1 100644
+--- a/gcc/match.pd
++++ b/gcc/match.pd
+@@ -4276,7 +4276,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+ )
+ 
+ (if (flag_if_conversion_gimple)
+- (for simple_op (plus minus bit_and bit_ior bit_xor)
++ (for simple_op (plus minus bit_ior bit_xor)
+   (simplify
+    (cond @0 (simple_op @1 INTEGER_CST@2) @1)
+    (switch
+diff --git a/gcc/testsuite/gcc.dg/ifcvt-gimple-1.c b/gcc/testsuite/gcc.dg/ifcvt-gimple-1.c
+new file mode 100644
+index 000000000..381a4ad51
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/ifcvt-gimple-1.c
+@@ -0,0 +1,21 @@
++/* { dg-do run } */
++/* { dg-options "-O2 -fno-inline -fif-conversion-gimple" } */
++
++#include <stdlib.h>
++
++void foo(int a, int *p) {
++    *p = a;
++}
++
++void verify (int a) {
++    if (a != 3)
++        abort ();
++}
++
++int main() {
++    int a = 0;
++    foo (3, &a);
++    int tmp = (a > 7) ? a & 1 : a;
++    verify (tmp);
++    return 0;
++}
+-- 
+2.33.0
+

_service:tar_scm:0301-Add-required-check-for-iteration-through-uses.patch Added

@@ -0,0 +1,33 @@
+From ca24d352e98e357f4f7b8f0d262201765705a08a Mon Sep 17 00:00:00 2001
+From: Pronin Alexander <pronin.alexander@huawei.com>
+Date: Thu, 31 Oct 2024 16:31:33 +0800
+Subject: PATCH 4/6 Add required check for iteration through uses
+
+Signed-off-by: Pronin Alexander  <pronin.alexander@huawei.com>
+---
+ gcc/tree-ssa-math-opts.cc | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
+index 2c06b8a60..80c06fa01 100644
+--- a/gcc/tree-ssa-math-opts.cc
++++ b/gcc/tree-ssa-math-opts.cc
+@@ -4938,8 +4938,13 @@ convert_double_size_mul (gimple_stmt_iterator *gsi, gimple *stmt)
+ 
+   /* Find the mult low part getter.  */
+   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, match3)
+-    if (gimple_assign_rhs_code (use_stmt) == REALPART_EXPR)
+-      break;
++    {
++      if (!is_gimple_assign (use_stmt))
++	continue;
++
++      if (gimple_assign_rhs_code (use_stmt) == REALPART_EXPR)
++	break;
++    }
+ 
+   /* Create high and low (if needed) parts extractors.  */
+   /* Low part.  */
+-- 
+2.33.0
+

_service:tar_scm:0302-Added-param-for-optimization-for-merging-bb-s-with-c.patch Added

@@ -0,0 +1,158 @@
+From 210147e28d542a03588ba3c3fa473301a03bb687 Mon Sep 17 00:00:00 2001
+From: Gmyrikov Konstantin <gmyrikov.konstantin@huawei-partners.com>
+Date: Thu, 31 Oct 2024 16:45:15 +0800
+Subject: PATCH 6/6 Added param for optimization for merging bb's with cheap
+ insns.Zero param means turned off optimization(default implementation),One
+ means turned on
+
+Signed-off-by: Gmyrikov Konstantin  <gmyrikov.konstantin@huawei-partners.com>
+---
+ gcc/params.opt                  |  4 +++
+ gcc/testsuite/gcc.dg/if_comb1.c | 13 +++++++++
+ gcc/testsuite/gcc.dg/if_comb2.c | 13 +++++++++
+ gcc/testsuite/gcc.dg/if_comb3.c | 12 +++++++++
+ gcc/tree-ssa-ifcombine.cc       | 47 ++++++++++++++++++++++++++++++---
+ 5 files changed, 86 insertions(+), 3 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/if_comb1.c
+ create mode 100644 gcc/testsuite/gcc.dg/if_comb2.c
+ create mode 100644 gcc/testsuite/gcc.dg/if_comb3.c
+
+diff --git a/gcc/params.opt b/gcc/params.opt
+index fc700ab79..3ddfaf5b2 100644
+--- a/gcc/params.opt
++++ b/gcc/params.opt
+@@ -789,6 +789,10 @@ Maximum number of VALUEs handled during a single find_base_term call.
+ Common Joined UInteger Var(param_max_vrp_switch_assertions) Init(10) Param Optimization
+ Maximum number of assertions to add along the default edge of a switch statement during VRP.
+ 
++-param=merge-assign-stmts-ifcombine=
++Common Joined UInteger Var(param_merge_assign_stmts_ifcombine) Init(0) IntegerRange(0, 1) Param Optimization
++Whether bb's with cheap gimple_assign stmts should be merged in the ifcombine pass.
++
+ -param=min-crossjump-insns=
+ Common Joined UInteger Var(param_min_crossjump_insns) Init(5) IntegerRange(1, 65536) Param Optimization
+ The minimum number of matching instructions to consider for crossjumping.
+diff --git a/gcc/testsuite/gcc.dg/if_comb1.c b/gcc/testsuite/gcc.dg/if_comb1.c
+new file mode 100644
+index 000000000..e00adc37d
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/if_comb1.c
+@@ -0,0 +1,13 @@
++/* { dg-do compile } */
++/* { dg-options "-Ofast -S --param=merge-assign-stmts-ifcombine=1 -fdump-tree-ifcombine" } */
++
++int foo (double a, double b, int c)
++{
++    if (c < 10 || a - b > 1.0)
++        return 0;
++    else 
++        return 1;
++}
++
++/* { dg-final { scan-tree-dump "optimizing two comparisons" "ifcombine"} } */
++/* { dg-final { scan-tree-dump "Merging blocks" "ifcombine"} } */
+diff --git a/gcc/testsuite/gcc.dg/if_comb2.c b/gcc/testsuite/gcc.dg/if_comb2.c
+new file mode 100644
+index 000000000..176e7e726
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/if_comb2.c
+@@ -0,0 +1,13 @@
++/* { dg-do compile } */
++/* { dg-options "-Ofast -S --param=merge-assign-stmts-ifcombine=1 -fdump-tree-ifcombine" } */
++
++int foo (int a, int b, int c)
++{
++    if (a > 1 || b * c < 10)
++        return 0;
++    else 
++        return 1;
++}
++
++/* { dg-final { scan-tree-dump "optimizing two comparisons" "ifcombine"} } */
++/* { dg-final { scan-tree-dump "Merging blocks" "ifcombine"} } */
+diff --git a/gcc/testsuite/gcc.dg/if_comb3.c b/gcc/testsuite/gcc.dg/if_comb3.c
+new file mode 100644
+index 000000000..aa2e4510c
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/if_comb3.c
+@@ -0,0 +1,12 @@
++/* { dg-do compile } */
++/* { dg-options "-Ofast -S --param=merge-assign-stmts-ifcombine=1 -fdump-tree-ifcombine" } */
++
++int foo (int a, int b, int c)
++{
++    if (a > 1 && b + c < 10)
++        a++;
++    return a;
++}
++
++/* { dg-final { scan-tree-dump "optimizing two comparisons" "ifcombine"} } */
++/* { dg-final { scan-tree-dump "Merging blocks" "ifcombine"} } */
+diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
+index ce9bbebf9..264a8bcae 100644
+--- a/gcc/tree-ssa-ifcombine.cc
++++ b/gcc/tree-ssa-ifcombine.cc
+@@ -110,6 +110,18 @@ recognize_if_then_else (basic_block cond_bb,
+   return true;
+ }
+ 
++/* Verify if gimple insn cheap for param=merge-assign-stmts-ifcombine
++   optimization.  */
++
++bool is_insn_cheap (enum tree_code t)
++{
++  static enum tree_code cheap_insns = {MULT_EXPR, PLUS_EXPR, MINUS_EXPR};
++  for (int i = 0; i < sizeof (cheap_insns)/sizeof (enum tree_code); i++)
++    if (t == cheap_insnsi)
++      return 1;
++  return 0;
++}
++
+ /* Verify if the basic block BB does not have side-effects.  Return
+    true in this case, else false.  */
+ 
+@@ -572,9 +584,38 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv,
+ 	      = param_logical_op_non_short_circuit;
+ 	  if (!logical_op_non_short_circuit || sanitize_coverage_p ())
+ 	    return false;
+-	  /* Only do this optimization if the inner bb contains only the conditional. */
+-	  if (!gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb (inner_cond_bb)))
+-	    return false;
++	  if (param_merge_assign_stmts_ifcombine)
++	    {
++	      int number_cheap_insns = 0;
++	      int number_conds = 0;
++	      for (auto i = gsi_start_nondebug_after_labels_bb
++	           (outer_cond_bb); !gsi_end_p (i); gsi_next_nondebug (&i))
++	        if (gimple_code (gsi_stmt (i)) == GIMPLE_ASSIGN
++	            && is_insn_cheap (gimple_assign_rhs_code (gsi_stmt (i))))
++	          number_cheap_insns++;
++	        else if (gimple_code (gsi_stmt (i)) == GIMPLE_COND)
++	          number_conds++;
++	      for (auto i = gsi_start_nondebug_after_labels_bb
++	           (inner_cond_bb); !gsi_end_p (i); gsi_next_nondebug (&i))
++	        if (gimple_code (gsi_stmt (i)) == GIMPLE_ASSIGN
++	            && is_insn_cheap (gimple_assign_rhs_code (gsi_stmt (i))))
++	          number_cheap_insns++;
++	        else if (gimple_code (gsi_stmt (i)) == GIMPLE_COND)
++	          number_conds++;
++	      if (!(number_cheap_insns == 1 && number_conds == 2)
++	          && !gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb
++	          (inner_cond_bb)))
++	        return false;
++	    }
++	  else
++	    {
++	    /* Only do this optimization if the inner bb contains
++	    only the conditional.  */
++	      if (!gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb
++	          (inner_cond_bb)))
++	        return false;
++	    }
++
+ 	  t1 = fold_build2_loc (gimple_location (inner_cond),
+ 				inner_cond_code,
+ 				boolean_type_node,
+-- 
+2.33.0
+

_service:tar_scm:0303-Add-generation-of-stream-in-functions-for-pre-versio.patch Added

@@ -0,0 +1,6263 @@
+From 4789a6eae616df0b7d07901114c91a2099e4d56d Mon Sep 17 00:00:00 2001
+From: wangchunyang <wangchunyang15@huawei.com>
+Date: Wed, 13 Nov 2024 11:26:16 +0800
+Subject: PATCH 1/2 Add generation of stream in functions for pre-version lto
+ objects
+
+---
+ gcc/lto-streamer.h    |    6 +
+ gcc/opt-read.awk      |    1 +
+ gcc/optc-save-gen.awk | 6044 ++++++++++++++++++++++++++++++++++++++++-
+ 3 files changed, 6050 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h
+index 597e9e405..9db1a20b3 100644
+--- a/gcc/lto-streamer.h
++++ b/gcc/lto-streamer.h
+@@ -943,12 +943,18 @@ void cl_target_option_stream_in (class data_in *,
+ 				 struct bitpack_d *,
+ 				 struct cl_target_option *);
+ 
++void cl_target_option_stream_in_prev (class data_in *,
++				 struct bitpack_d *,
++				 struct cl_target_option *);
++
+ void cl_optimization_stream_out (struct output_block *,
+ 				 struct bitpack_d *, struct cl_optimization *);
+ 
+ void cl_optimization_stream_in (class data_in *,
+ 				struct bitpack_d *, struct cl_optimization *);
+ 
++void cl_optimization_stream_in_prev (class data_in *,
++				struct bitpack_d *, struct cl_optimization *);
+ 
+ 
+ /* In lto-opts.cc.  */
+diff --git a/gcc/opt-read.awk b/gcc/opt-read.awk
+index ce3617c8d..624cf6e3d 100644
+--- a/gcc/opt-read.awk
++++ b/gcc/opt-read.awk
+@@ -71,6 +71,7 @@ BEGIN {
+ 			n_target_save++
+ 
+ 			extra_target_varsn_extra_target_vars = name
++			extra_target_vars_setname = 1
+ 			extra_target_var_typesn_extra_target_vars = type
+ 			n_extra_target_vars++
+ 		}
+diff --git a/gcc/optc-save-gen.awk b/gcc/optc-save-gen.awk
+index 76e9b3cb9..7c012dd4e 100644
+--- a/gcc/optc-save-gen.awk
++++ b/gcc/optc-save-gen.awk
+@@ -174,6 +174,8 @@ print "  unsigned HOST_WIDE_INT mask = 0;";
+ j = 0;
+ k = 0;
+ for (i = 0; i < n_opt_other; i++) {
++	var_opt_other_jvar_opt_otheri = j;
++	var_opt_other_kvar_opt_otheri = k;
+ 	print "  if (opts_set->x_" var_opt_otheri ") mask |= HOST_WIDE_INT_1U << " j ";";
+ 	j++;
+ 	if (j == 64) {
+@@ -185,6 +187,8 @@ for (i = 0; i < n_opt_other; i++) {
+ }
+ 
+ for (i = 0; i < n_opt_int; i++) {
++	var_opt_int_jvar_opt_inti = j;
++	var_opt_int_kvar_opt_inti = k;
+ 	print "  if (opts_set->x_" var_opt_inti ") mask |= HOST_WIDE_INT_1U << " j ";";
+ 	j++;
+ 	if (j == 64) {
+@@ -196,6 +200,8 @@ for (i = 0; i < n_opt_int; i++) {
+ }
+ 
+ for (i = 0; i < n_opt_enum; i++) {
++	var_opt_enum_jvar_opt_enumi = j;
++	var_opt_enum_kvar_opt_enumi = k;
+ 	print "  if (opts_set->x_" var_opt_enumi ") mask |= HOST_WIDE_INT_1U << " j ";";
+ 	j++;
+ 	if (j == 64) {
+@@ -207,6 +213,8 @@ for (i = 0; i < n_opt_enum; i++) {
+ }
+ 
+ for (i = 0; i < n_opt_short; i++) {
++	var_opt_short_jvar_opt_shorti = j;
++	var_opt_short_kvar_opt_shorti = k;
+ 	print "  if (opts_set->x_" var_opt_shorti ") mask |= HOST_WIDE_INT_1U << " j ";";
+ 	j++;
+ 	if (j == 64) {
+@@ -218,6 +226,8 @@ for (i = 0; i < n_opt_short; i++) {
+ }
+ 
+ for (i = 0; i < n_opt_char; i++) {
++	var_opt_char_jvar_opt_chari = j;
++	var_opt_char_kvar_opt_chari = k;
+ 	print "  if (opts_set->x_" var_opt_chari ") mask |= HOST_WIDE_INT_1U << " j ";";
+ 	j++;
+ 	if (j == 64) {
+@@ -229,6 +239,8 @@ for (i = 0; i < n_opt_char; i++) {
+ }
+ 
+ for (i = 0; i < n_opt_string; i++) {
++	var_opt_string_jvar_opt_stringi = j;
++	var_opt_string_kvar_opt_stringi = k;
+ 	print "  if (opts_set->x_" var_opt_stringi ") mask |= HOST_WIDE_INT_1U << " j ";";
+ 	j++;
+ 	if (j == 64) {
+@@ -604,6 +616,8 @@ for (i = 0; i < n_extra_target_vars; i++) {
+ 	if (j == 0 && k == 0) {
+ 		print "  unsigned HOST_WIDE_INT mask = 0;";
+ 	}
++	extra_target_vars_jextra_target_varsi = j;
++	extra_target_vars_kextra_target_varsi = k;
+ 	print "  if (opts_set->x_" extra_target_varsi ") mask |= HOST_WIDE_INT_1U << " j ";";
+ 	j++;
+ 	if (j == 64) {
+@@ -622,6 +636,8 @@ for (i = 0; i < n_target_other; i++) {
+ 	if (j == 0 && k == 0) {
+ 		print "  unsigned HOST_WIDE_INT mask = 0;";
+ 	}
++	var_target_other_jvar_target_otheri = j;
++	var_target_other_kvar_target_otheri = k;
+ 	print "  if (opts_set->x_" var_target_otheri ") mask |= HOST_WIDE_INT_1U << " j ";";
+ 	j++;
+ 	if (j == 64) {
+@@ -636,6 +652,8 @@ for (i = 0; i < n_target_enum; i++) {
+ 	if (j == 0 && k == 0) {
+ 		print "  unsigned HOST_WIDE_INT mask = 0;";
+ 	}
++	var_target_enum_jvar_target_enumi = j;
++	var_target_enum_kvar_target_enumi = k;
+ 	print "  if (opts_set->x_" var_target_enumi ") mask |= HOST_WIDE_INT_1U << " j ";";
+ 	j++;
+ 	if (j == 64) {
+@@ -654,6 +672,8 @@ for (i = 0; i < n_target_int; i++) {
+ 	if (j == 0 && k == 0) {
+ 		print "  unsigned HOST_WIDE_INT mask = 0;";
+ 	}
++	var_target_int_jvar_target_inti = j;
++	var_target_int_kvar_target_inti = k;
+ 	print "  if (opts_set->x_" var_target_inti ") mask |= HOST_WIDE_INT_1U << " j ";";
+ 	j++;
+ 	if (j == 64) {
+@@ -668,6 +688,8 @@ for (i = 0; i < n_target_short; i++) {
+ 	if (j == 0 && k == 0) {
+ 		print "  unsigned HOST_WIDE_INT mask = 0;";
+ 	}
++	var_target_short_jvar_target_shorti = j;
++	var_target_short_kvar_target_shorti = k;
+ 	print "  if (opts_set->x_" var_target_shorti ") mask |= HOST_WIDE_INT_1U << " j ";";
+ 	j++;
+ 	if (j == 64) {
+@@ -682,6 +704,8 @@ for (i = 0; i < n_target_char; i++) {
+ 	if (j == 0 && k == 0) {
+ 		print "  unsigned HOST_WIDE_INT mask = 0;";
+ 	}
++	var_target_char_jvar_target_chari = j;
++	var_target_char_kvar_target_chari = k;
+ 	print "  if (opts_set->x_" var_target_chari ") mask |= HOST_WIDE_INT_1U << " j ";";
+ 	j++;
+ 	if (j == 64) {
+@@ -696,6 +720,8 @@ for (i = 0; i < n_target_string; i++) {
+ 	if (j == 0 && k == 0) {
+ 		print "  unsigned HOST_WIDE_INT mask = 0;";
+ 	}
++	var_target_string_jvar_target_stringi = j;
++	var_target_string_kvar_target_stringi = k;
+ 	print "  if (opts_set->x_" var_target_stringi ") mask |= HOST_WIDE_INT_1U << " j ";";
+ 	j++;
+ 	if (j == 64) {
+@@ -1038,6 +1064,7 @@ for (i = 0; i < n_target_save; i++) {
+ 	sub(" *" name "$", "", type)
+ 	if (target_save_decli ~ "^const char \\*+_" alnum "+$") {
+ 		var_target_strn_target_str++ = name;
++		var_target_str_setname = 1;
+ 		string_options_namesname++
+ 	}
+ 	else {
+@@ -1048,12 +1075,14 @@ for (i = 0; i < n_target_save; i++) {
+ 			sub("\\.+", "", name)
+ 			sub(" ^ +$", "", type)
+ 			var_target_arrayn_target_array = name
++			var_target_array_setname = 1
+ 			var_target_array_typen_target_array = type
+ 			var_target_array_sizen_target_array++ = size
+ 		}
+ 		else {
+ 			var_target_val_typen_target_val = type;
+ 			var_target_valn_target_val++ = name;
++			var_target_val_setname = 1;
+ 		}
+ 	}
+ }
+@@ -1069,17 +1098,21 @@ if (have_save) {
+ 
+ 			var_list_seenname++;
+ 			otype = var_type_struct(flagsi)
+-			if (otype ~ "^const char \\**$")
++			if (otype ~ "^const char \\**$") {
+ 				var_target_strn_target_str++ = "x_" name;
++				var_target_str_set"x_" name = 1;

_service:tar_scm:0304-Add-multi-version-lto-symbol-parse-cross-lto-units-i.patch Added

@@ -0,0 +1,963 @@
+From f81a5b294711e3a420fe66702f0d9221332271c4 Mon Sep 17 00:00:00 2001
+From: h00564365 <huangxiaoquan1@huawei.com>
+Date: Wed, 13 Nov 2024 17:18:01 +0800
+Subject: PATCH 2/2 Add multi-version lto symbol parse, cross lto units
+ ipa-inline extension, and lto compression algorithm specified.
+
+---
+ gcc/common.opt                               |  20 +++
+ gcc/config/aarch64/aarch64.cc                |  41 ++++++
+ gcc/doc/tm.texi                              |   6 +
+ gcc/doc/tm.texi.in                           |   2 +
+ gcc/ipa-inline.cc                            | 141 ++++++++++++++++++-
+ gcc/lto-compress.cc                          |   6 +-
+ gcc/lto-section-in.cc                        |   5 +
+ gcc/lto-streamer-out.cc                      |   7 +-
+ gcc/lto-wrapper.cc                           |   4 +
+ gcc/optc-save-gen.awk                        |  57 ++++++++
+ gcc/opth-gen.awk                             |   3 +
+ gcc/opts.cc                                  |  46 ++++++
+ gcc/target.def                               |  10 ++
+ gcc/testsuite/gcc.dg/lto/binary-inline-1_0.c |  15 ++
+ gcc/testsuite/gcc.dg/lto/binary-inline-1_1.c |   6 +
+ gcc/testsuite/gcc.dg/lto/binary-inline-2_0.c |  15 ++
+ gcc/testsuite/gcc.dg/lto/binary-inline-2_1.c |   5 +
+ gcc/testsuite/gcc.dg/lto/binary-inline-3_0.c |  15 ++
+ gcc/testsuite/gcc.dg/lto/binary-inline-3_1.c |  10 ++
+ gcc/tree-streamer-in.cc                      |  58 +++++++-
+ lto-plugin/lto-plugin.c                      |  83 +++++++++++
+ 21 files changed, 547 insertions(+), 8 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-1_0.c
+ create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-1_1.c
+ create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-2_0.c
+ create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-2_1.c
+ create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-3_0.c
+ create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-3_1.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index be5fcc681..78cfc333a 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1928,6 +1928,21 @@ finline-atomics
+ Common Var(flag_inline_atomics) Init(1) Optimization
+ Inline __atomic operations when a lock free instruction sequence is available.
+ 
++fmulti-version-lib=
++Common Joined Var(multi_version_lib_string)
++Use specify LTO stream in mode for specified target (object or lib). If there
++are multiple target files, use commas (,) to separate them and without spaces.
++
++finline-force
++Common Var(flag_inline_force) Init(0) Optimization
++Force perform ipa inline when march options are incompatible between functions.
++
++finline-force=
++Common Joined Var(force_inline_targets_string)
++Force perform ipa inline specified target(object or lib) when march options are
++incompatible between functions.  If there are multiple target files, use commas
++(,) to separate them and without spaces.
++
+ fcf-protection
+ Common RejectNegative Alias(fcf-protection=,full)
+ 
+@@ -2168,6 +2183,11 @@ flto-partition=
+ Common Joined RejectNegative Enum(lto_partition_model) Var(flag_lto_partition) Init(LTO_PARTITION_BALANCED)
+ Specify the algorithm to partition symbols and vars at linktime.
+ 
++flto-compression-algorithm=
++Common Joined Var(lto_compression_algorithm)
++-flto-compression-algorithm=<format> Generate lto compression in zlib/zstd
++format <format>.
++
+ ; The initial value of -1 comes from Z_DEFAULT_COMPRESSION in zlib.h.
+ flto-compression-level=
+ Common Joined RejectNegative UInteger Var(flag_lto_compression_level) Init(-1) IntegerRange(0, 19)
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 025a3c478..f095f17aa 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -20829,6 +20829,44 @@ aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
+ 	   arch->name, extension.c_str ());
+ }
+ 
++/* Implement TARGET_OPTION_PRINT_DIFF.  */
++
++static void
++aarch64_option_print_diff (FILE *file, int indent,
++			   struct cl_target_option *ptr1,
++			   struct cl_target_option *ptr2)
++{
++  const char *const cpu1
++    = aarch64_get_tune_cpu (ptr1->x_selected_tune)->name;
++  const struct processor *arch1 = aarch64_get_arch (ptr1->x_selected_arch);
++  std::string extension1
++    = aarch64_get_extension_string_for_isa_flags (ptr1->x_aarch64_isa_flags,
++						  arch1->flags);
++
++  const char *const cpu2
++    = aarch64_get_tune_cpu (ptr2->x_selected_tune)->name;
++  const struct processor *arch2 = aarch64_get_arch (ptr2->x_selected_arch);
++  std::string extension2
++    = aarch64_get_extension_string_for_isa_flags (ptr2->x_aarch64_isa_flags,
++						  arch2->flags);
++
++  if (cpu1 != cpu2 && (!cpu1 || !cpu2 || strcmp (cpu1, cpu2)))
++    fprintf (file, "%*s%s (%s/%s)\n", indent, "",
++	     "cpu", cpu1 ? cpu1 : "(null)", cpu2 ? cpu2 : "(null)");
++
++  if (arch1->name != arch2->name
++      && (!arch1->name || !arch2->name || strcmp (arch1->name, arch2->name)))
++    fprintf (file, "%*s%s (%s/%s)\n", indent, "",
++	     "arch", arch1->name ? arch1->name : "(null)",
++	     arch2->name ? arch2->name : "(null)");
++
++  if (extension1 != extension2)
++    fprintf (file, "%*s%s (%s/%s)\n", indent, "",
++	     "extension",
++	     extension1.empty () ? "(null)" : extension1.c_str (),
++	     extension2.empty () ? "(null)" : extension2.c_str ());
++}
++
+ static GTY(()) tree aarch64_previous_fndecl;
+ 
+ void
+@@ -31161,6 +31199,9 @@ aarch64_libgcc_floating_mode_supported_p
+ #undef TARGET_OPTION_PRINT
+ #define TARGET_OPTION_PRINT aarch64_option_print
+ 
++#undef TARGET_OPTION_PRINT_DIFF
++#define TARGET_OPTION_PRINT_DIFF aarch64_option_print_diff
++
+ #undef TARGET_OPTION_VALID_ATTRIBUTE_P
+ #define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
+ 
+diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
+index 1e96521e6..50bbbbc42 100644
+--- a/gcc/doc/tm.texi
++++ b/gcc/doc/tm.texi
+@@ -10589,6 +10589,12 @@ information in the @code{struct cl_target_option} structure for
+ function-specific options.
+ @end deftypefn
+ 
++@deftypefn {Target Hook} void TARGET_OPTION_PRINT_DIFF (FILE *@var{file}, int @var{indent}, struct cl_target_option *@var{ptr1}, struct cl_target_option *@var{ptr2})
++This hook is called to print diff additional target-specific
++information in the ptr1 and ptr2 @code{struct cl_target_option} structure for
++function-specific options.
++@end deftypefn
++
+ @deftypefn {Target Hook} bool TARGET_OPTION_PRAGMA_PARSE (tree @var{args}, tree @var{pop_target})
+ This target hook parses the options for @code{#pragma GCC target}, which
+ sets the target-specific options for functions that occur later in the
+diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
+index 2dd515659..cfda60304 100644
+--- a/gcc/doc/tm.texi.in
++++ b/gcc/doc/tm.texi.in
+@@ -6985,6 +6985,8 @@ on this implementation detail.
+ 
+ @hook TARGET_OPTION_PRINT
+ 
++@hook TARGET_OPTION_PRINT_DIFF
++
+ @hook TARGET_OPTION_PRAGMA_PARSE
+ 
+ @hook TARGET_OPTION_OVERRIDE
+diff --git a/gcc/ipa-inline.cc b/gcc/ipa-inline.cc
+index f8bb072c4..8d5cc9a84 100644
+--- a/gcc/ipa-inline.cc
++++ b/gcc/ipa-inline.cc
+@@ -90,6 +90,8 @@ along with GCC; see the file COPYING3.  If not see
+ 	 the need for offline copy of the function.  */
+ 
+ #include "config.h"
++#define INCLUDE_SET
++#define INCLUDE_STRING
+ #include "system.h"
+ #include "coretypes.h"
+ #include "backend.h"
+@@ -127,6 +129,7 @@ typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
+ static int overall_size;
+ static profile_count max_count;
+ static profile_count spec_rem;
++static std::set<std::string> force_inline_targets;
+ 
+ /* Return false when inlining edge E would lead to violating
+    limits on function unit growth or stack usage growth.  
+@@ -222,6 +225,38 @@ caller_growth_limits (struct cgraph_edge *e)
+   return true;
+ }
+ 
++/* Warn and prompt the user, and output only once for the file pair where
++   the function is located.  */
++
++static void
++prompt_inline_failed_target_option_reason (struct cgraph_edge *e)
++{
++  static std::set<std::pair<void*, void*>> address_pair_set;
++  if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH
++      && !cl_target_option_eq_major (target_opts_for_fn (e->caller->decl),
++	   target_opts_for_fn (e->callee->ultimate_alias_target ()->decl))
++      && e->caller->lto_file_data

_service:tar_scm:0305-Backport-varasm-Handle-private-COMDAT-function-symbo.patch Added

@@ -0,0 +1,296 @@
+From bbb4954294d010977fcfb96931384101cf015a44 Mon Sep 17 00:00:00 2001
+From: Jakub Jelinek <jakub@redhat.com>
+Date: Mon, 26 Feb 2024 17:55:07 +0100
+Subject: PATCH Backportvarasm: Handle private COMDAT function symbol
+ reference in readonly data section PR113617
+
+If default_elf_select_rtx_section is called to put a reference to some
+local symbol defined in a comdat section into memory, which happens more often
+since the r14-4944 RA change, linking might fail.
+default_elf_select_rtx_section puts such constants into .data.rel.ro.local
+etc. sections and if linker chooses comdat sections from some other TU
+and discards the one to which a relocation in .data.rel.ro.local remains,
+linker diagnoses error.  References to private comdat symbols can only appear
+from functions or data objects in the same comdat group, so the following
+patch arranges using .data.rel.ro.local.pool.<comdat_name> and similar sections.
+
+2024-02-26  Jakub Jelinek  <jakub@redhat.com>
+	    H.J. Lu  <hjl.tools@gmail.com>
+
+	PR rtl-optimization/113617
+	* varasm.cc (default_elf_select_rtx_section): For
+	references to private symbols in comdat sections
+	use .data.relro.local.pool.<comdat>, .data.relro.pool.<comdat>
+	or .rodata.<comdat> comdat sections.
+
+	* g++.dg/other/pr113617.C: New test.
+	* g++.dg/other/pr113617.h: New test.
+	* g++.dg/other/pr113617-aux.cc: New test.
+---
+ gcc/testsuite/g++.dg/other/pr113617-aux.cc |   9 ++
+ gcc/testsuite/g++.dg/other/pr113617.C      |  27 +++++
+ gcc/testsuite/g++.dg/other/pr113617.h      | 132 +++++++++++++++++++++
+ gcc/varasm.cc                              |  48 +++++++-
+ 4 files changed, 215 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/g++.dg/other/pr113617-aux.cc
+ create mode 100644 gcc/testsuite/g++.dg/other/pr113617.C
+ create mode 100644 gcc/testsuite/g++.dg/other/pr113617.h
+
+diff --git a/gcc/testsuite/g++.dg/other/pr113617-aux.cc b/gcc/testsuite/g++.dg/other/pr113617-aux.cc
+new file mode 100644
+index 000000000..e6900e05a
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/other/pr113617-aux.cc
+@@ -0,0 +1,9 @@
++// PR rtl-optimization/113617
++// { dg-do link { target { c++17 && c++14_down } } }
++
++#include "pr113617.h"
++
++void qux() {
++  A<long long> a;
++  a.foo(0, 0);
++}
+diff --git a/gcc/testsuite/g++.dg/other/pr113617.C b/gcc/testsuite/g++.dg/other/pr113617.C
+new file mode 100644
+index 000000000..a02dda142
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/other/pr113617.C
+@@ -0,0 +1,27 @@
++// PR rtl-optimization/113617
++// { dg-do link { target c++11 } }
++// { dg-options "-O2" }
++// { dg-additional-options "-fPIC" { target fpic } } */
++// { dg-additional-options "-shared" { target shared } } */
++// { dg-additional-sources pr113617-aux.cc }
++
++#include "pr113617.h"
++
++int z;
++long xx1;
++void corge() {
++  A<long long> a;
++  a.foo(xx1, 0);
++}
++
++typedef unsigned long int VV __attribute__((vector_size (2 * sizeof (long))));
++VV vv;
++__attribute__((noipa)) static void fn1 (void) {}
++__attribute__((noipa)) static void fn2 (void) {}
++
++void
++fn3 ()
++{
++  VV a = { (unsigned long) &fn1, (unsigned long) &fn2 };
++  vv = a;
++}
+diff --git a/gcc/testsuite/g++.dg/other/pr113617.h b/gcc/testsuite/g++.dg/other/pr113617.h
+new file mode 100644
+index 000000000..4d30eddbc
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/other/pr113617.h
+@@ -0,0 +1,132 @@
++namespace {
++template <int V> struct J { static constexpr int value = V; };
++template <bool V> using K = J<V>;
++using M = K<true>;
++template <int> struct L { template <typename _Tp, typename> using type = _Tp; };
++template <bool _Cond, typename _If, typename _Else> using N = typename L<_Cond>::type<_If, _Else>;
++M k;
++template <typename _Tp> struct O { using type = _Tp; };
++template <typename _Up>
++struct P : N<M ::value, O<_Up>, _Up> {};
++template <typename _Tp> struct Q { using type = typename P<_Tp>::type; };
++}
++namespace R {
++struct H;
++enum G {};
++template <typename> class S;
++struct T { using U = bool (*) (H &, const H &, G); U F; };
++template <typename, typename> class B;
++template <typename _R, typename _F, typename... _A>
++struct B<_R(_A...), _F> {
++  static bool F(H &, const H &, G) { return false; }
++  __attribute__((noipa)) static _R bar(const H &) {}
++};
++template <typename _R, typename... _A>
++struct S<_R(_A...)> : T {
++  template <typename _F> using AH = B<_R(), _F>;
++  template <typename _F> S(_F) {
++    using AG = AH<_F>;
++    barr = AG::bar;
++    F = AG::F;
++  }
++  using AF = _R (*)(const H &);
++  AF barr;
++};
++template <typename> class I;
++template <typename _F, typename... _B>
++struct I<_F(_B...)> {};
++template <typename> using W = decltype(k);
++template <int, typename _F, typename... _B> struct V {
++  typedef I<typename Q<_F>::type(typename Q<_B>::type...)> type;
++};
++template <typename _F, typename... _B>
++__attribute__((noipa)) typename V<W<_F>::value, _F, _B...>::type
++baz(_F, _B...) { return typename V<W<_F>::value, _F, _B...>::type (); }
++template <typename _Tp> struct AJ {
++  template <typename _Up> struct _Ptr { using type = _Up *; };
++  using AI = typename _Ptr<_Tp>::type;
++};
++template <typename _Tp> struct Y {
++  using AI = typename AJ<_Tp>::AI;
++  AI operator->();
++};
++}
++extern int z;
++namespace N1 {
++namespace N2 {
++namespace N3 {
++enum Z { Z1, Z2 };
++template <int> struct X {
++  template <typename _F>
++  __attribute__((noipa)) void boo(long long, long long, long long, _F &) {}
++};
++struct AC {
++  AC(int);
++  void m1(R::S<void()>);
++};
++template <typename>
++__attribute__((noipa)) void garply(void *, long long, long long, long long) {}
++template <>
++template <typename _F>
++void X<Z2>::boo(long long, long long x, long long y, _F &fi) {
++  AC pool(z);
++  for (;;) {
++    auto job = R::baz(garply<_F>, &fi, y, y, x);
++    pool.m1(job);
++  }
++}
++struct AB {
++  static AB &bleh();
++  template <typename _F>
++  void boo(long first, long x, long y, _F fi) {
++    switch (ab1) {
++    case Z1:
++      ab2->boo(first, x, y, fi);
++    case Z2:
++      ab3->boo(first, x, y, fi);
++    }
++  }
++  Z ab1;
++  R::Y<X<Z1>> ab2;
++  R::Y<X<Z2>> ab3;
++};
++template <typename, bool> struct C;
++template <typename _F> struct C<_F, false> {
++  __attribute__((noipa)) C(_F) {}
++  void boo(long first, long x, long y) {
++    auto u = AB::bleh();
++    u.boo(first, x, y, *this);
++  }
++};
++template <typename _F> struct AA { typedef C<_F, 0> type; };
++}
++}
++}
++struct AD {
++  template <typename _F>
++  static void boo(long first, long x, long y, _F f) {

_service:tar_scm:0306-RISC-V-Install-libstdc-libcc1-etc-to-lib64-instead-o.patch Added

@@ -0,0 +1,65 @@
+From 84edbc6544ed872aedb3cb6f6d0feb8647ff1d8b Mon Sep 17 00:00:00 2001
+From: YunQiang Su <yunqiang@isrc.iscas.ac.cn>
+Date: Mon, 14 Oct 2024 10:09:46 +0800
+Subject: PATCH RISC-V: Install libstdc++/libcc1 etc to /lib64 instead of lib
+
+The problem is that if we are configured with `--disable-multilib`,
+  gcc -print-multi-os-directory
+outputs
+  .
+Thus the dest to install libraries is set to
+  /usr/lib/.
+While other platforms (x86-64, arm64) it will be
+  /usr/lib/../lib64
+Let's sync riscv64 with them
+
+Another problem is that
+  gcc -print-file-name=libzstd.so.1
+will output
+  /usr/lib64/lp64d/../lib64/libzstd.so.1
+which is also need to patched.
+---
+ gcc/config.gcc               | 3 +++
+ gcc/config/riscv/linux.h     | 2 ++
+ gcc/config/riscv/t-openEuler | 2 ++
+ 3 files changed, 7 insertions(+)
+ create mode 100644 gcc/config/riscv/t-openEuler
+
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index 19b21a280..23c5bee2b 100644
+--- a/gcc/config.gcc
++++ b/gcc/config.gcc
+@@ -2453,6 +2453,9 @@ riscv*-*-linux*)
+ 	xyes) tmake_file="${tmake_file} riscv/t-linux-multilib" ;;
+ 	*) echo "Unknown value for enable_multilib"; exit 1
+ 	esac
++	case "x${target_vendor}" in
++	xopenEuler) tmake_file="${tmake_file} riscv/t-openEuler"
++	esac
+ 	tmake_file="${tmake_file} riscv/t-riscv riscv/t-linux"
+ 	gnu_ld=yes
+ 	gas=yes
+diff --git a/gcc/config/riscv/linux.h b/gcc/config/riscv/linux.h
+index b5c6c5027..a8d65f4e0 100644
+--- a/gcc/config/riscv/linux.h
++++ b/gcc/config/riscv/linux.h
+@@ -62,6 +62,8 @@ along with GCC; see the file COPYING3.  If not see
+ #define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+ 
+ #define STARTFILE_PREFIX_SPEC 			\
++   "/lib" XLEN_SPEC "/ " 			\
++   "/usr/lib" XLEN_SPEC "/ " 			\
+    "/lib" XLEN_SPEC "/" ABI_SPEC "/ "		\
+    "/usr/lib" XLEN_SPEC "/" ABI_SPEC "/ "	\
+    "/lib/ "					\
+diff --git a/gcc/config/riscv/t-openEuler b/gcc/config/riscv/t-openEuler
+new file mode 100644
+index 000000000..26541dd08
+--- /dev/null
++++ b/gcc/config/riscv/t-openEuler
+@@ -0,0 +1,2 @@
++MULTILIB_OPTIONS = mabi=lp64d
++MULTILIB_DIRNAMES = ../lib64
+-- 
+2.39.5 (Apple Git-154)
+

_service:tar_scm:0307-Set-fallback-value-for-print-multi-os-directory.patch Added

@@ -0,0 +1,105 @@
+From 0d157b14f361f8319f4694c54c6e01ac8f59d278 Mon Sep 17 00:00:00 2001
+From: YunQiang Su <yunqiang@isrc.iscas.ac.cn>
+Date: Tue, 8 Oct 2024 17:56:23 +0800
+Subject: PATCH 1/2 Set fallback value for -print-multi-os-directory
+
+Clang doesn't support -print-multi-os-directory option.
+So let's set the fallback value (../lib64) if it is empty.
+
+This is only needed for the projects built by hostcc:
+  gcc, libcc1, libiberty
+
+The projects for targets only, will always built by gcc itself.
+---
+ gcc/configure         | 3 +++
+ libcc1/configure      | 6 ++++++
+ libcc1/configure.ac   | 3 +++
+ libiberty/Makefile.in | 5 ++++-
+ libtool.m4            | 3 +++
+ 5 files changed, 19 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/configure b/gcc/configure
+index 7e64599b0..ef0449edd 100755
+--- a/gcc/configure
++++ b/gcc/configure
+@@ -18598,6 +18598,9 @@ if test "$GCC" = yes; then
+   # and add multilib dir if necessary.
+   lt_tmp_lt_search_path_spec=
+   lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null`
++  if  -z "$lt_multi_os_dir" ;then
++    lt_multi_os_dir=../lib64
++  fi
+   for lt_sys_path in $lt_search_path_spec; do
+     if test -d "$lt_sys_path/$lt_multi_os_dir"; then
+       lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir"
+diff --git a/libcc1/configure b/libcc1/configure
+index 01cfb2806..3c437d690 100755
+--- a/libcc1/configure
++++ b/libcc1/configure
+@@ -9701,6 +9701,9 @@ if test "$GCC" = yes; then
+   # and add multilib dir if necessary.
+   lt_tmp_lt_search_path_spec=
+   lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null`
++  if  -z "$lt_multi_os_dir" ;then
++    lt_multi_os_dir=../lib64
++  fi
+   for lt_sys_path in $lt_search_path_spec; do
+     if test -d "$lt_sys_path/$lt_multi_os_dir"; then
+       lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir"
+@@ -14865,6 +14868,9 @@ libsuffix=
+ if test "$GXX" = yes; then
+   libsuffix=`$CXX -print-multi-os-directory`
+ fi
++if  -z "$libsuffix" ;then
++  libsuffix=../lib64
++fi
+ 
+ 
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for socket libraries" >&5
+diff --git a/libcc1/configure.ac b/libcc1/configure.ac
+index 36f5a7e09..acd7c4c04 100644
+--- a/libcc1/configure.ac
++++ b/libcc1/configure.ac
+@@ -72,6 +72,9 @@ libsuffix=
+ if test "$GXX" = yes; then
+   libsuffix=`$CXX -print-multi-os-directory`
+ fi
++if  -z "$libsuffix" ;then
++  libsuffix=../lib64
++fi
+ AC_SUBST(libsuffix)
+ 
+ dnl Test for -lsocket and -lnsl.  Copied from libgo/configure.ac.
+diff --git a/libiberty/Makefile.in b/libiberty/Makefile.in
+index 1b17c2e3a..2bfa00de5 100644
+--- a/libiberty/Makefile.in
++++ b/libiberty/Makefile.in
+@@ -385,7 +385,10 @@ install-strip: install
+ # multilib-specific flags, it's overridden by FLAGS_TO_PASS from the
+ # default multilib, so we have to take CFLAGS into account as well,
+ # since it will be passed the multilib flags.
+-MULTIOSDIR = `$(CC) $(CFLAGS) -print-multi-os-directory`
++MULTIOSDIR = `$(CC) $(CFLAGS) -print-multi-os-directory 2>/dev/null`
++ifeq ($(MULTIOSDIR),)
++ MULTIOSDIR = ../lib64
++endif
+ install_to_libdir: all
+ 	if test -n "${target_header_dir}"; then \
+ 		${mkinstalldirs} $(DESTDIR)$(libdir)/$(MULTIOSDIR); \
+diff --git a/libtool.m4 b/libtool.m4
+index 17f8e5f30..86fc1e705 100644
+--- a/libtool.m4
++++ b/libtool.m4
+@@ -2059,6 +2059,9 @@ if test "$GCC" = yes; then
+   # and add multilib dir if necessary.
+   lt_tmp_lt_search_path_spec=
+   lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null`
++  if  -z "$lt_multi_os_dir" ;then
++    lt_multi_os_dir=../lib64
++  fi
+   for lt_sys_path in $lt_search_path_spec; do
+     if test -d "$lt_sys_path/$lt_multi_os_dir"; then
+       lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir"
+-- 
+2.47.0
+

_service:tar_scm:0308-Fix-enum-INPUT-MIDDLE-FINAL-aes_stage.patch Added

@@ -0,0 +1,108 @@
+From 1624bdceb341e0034c22ce46bc2e422726f76cce Mon Sep 17 00:00:00 2001
+From: YunQiang Su <yunqiang@isrc.iscas.ac.cn>
+Date: Tue, 8 Oct 2024 17:59:56 +0800
+Subject: PATCH 2/2 Fix enum { INPUT, MIDDLE, FINAL } aes_stage
+
+The FINAL is defined in ansidecl.h.
+Let's rename the elements to
+   aesINPUT, aseMIDDLE, aseFINAL
+to avoid conflits.
+
+I find this problem when trying to build gcc with clang.
+In fact FINAL is defined to empty for clang, and `final` for gcc.
+So it coincidentally worked for gcc.
+---
+ gcc/crypto-accel.cc | 28 ++++++++++++++--------------
+ 1 file changed, 14 insertions(+), 14 deletions(-)
+
+diff --git a/gcc/crypto-accel.cc b/gcc/crypto-accel.cc
+index e7766a585..716c4a38b 100644
+--- a/gcc/crypto-accel.cc
++++ b/gcc/crypto-accel.cc
+@@ -1251,7 +1251,7 @@ public:
+ 
+ /* AES stage description.  Required for some specializations
+    for curtain rounds.  */
+-typedef enum { INPUT, MIDDLE, FINAL } aes_stage;
++typedef enum { aesINPUT, aesMIDDLE, aesFINAL } aes_stage;
+ 
+ /* AES entity description.  It can be both round or state inside round.
+    It provides interface for unified analysis between blocks of 4 parts:
+@@ -1356,7 +1356,7 @@ struct state_input
+ 
+ /* Input round state uses special input.  */
+ template<>
+-struct state_input<INPUT>
++struct state_input<aesINPUT>
+ {
+   typedef std::pair<rtx, unsigned HOST_WIDE_INT> type;
+ 
+@@ -1389,7 +1389,7 @@ struct state_output
+ 
+ /* Final round state generates special output.  */
+ template<>
+-struct state_output<FINAL>
++struct state_output<aesFINAL>
+ {
+   typedef std::pair<rtx, unsigned HOST_WIDE_INT> type;
+ 
+@@ -1409,7 +1409,7 @@ struct round_input
+ 
+ /* Input round uses special input just as its state.  */
+ template<>
+-struct round_input<INPUT>
++struct round_input<aesINPUT>
+ {
+   typedef std::pair<rtx, unsigned HOST_WIDE_INT> type;
+ };
+@@ -1437,7 +1437,7 @@ struct round_output
+    AES encryption.  */
+ template<>
+ template<>
+-void round_output<INPUT>::reorder<aes_decrypt_table> (type &out)
++void round_output<aesINPUT>::reorder<aes_decrypt_table> (type &out)
+ {
+   gcc_assert (out.size () == 4);
+   std::swap (out1, out3);
+@@ -1445,14 +1445,14 @@ void round_output<INPUT>::reorder<aes_decrypt_table> (type &out)
+ 
+ template<>
+ template<>
+-void round_output<MIDDLE>::reorder<aes_decrypt_table> (type &out)
++void round_output<aesMIDDLE>::reorder<aes_decrypt_table> (type &out)
+ {
+-  round_output<INPUT>::reorder<aes_decrypt_table> (out);
++  round_output<aesINPUT>::reorder<aes_decrypt_table> (out);
+ }
+ 
+ /* Final round generates special output.  */
+ template<>
+-struct round_output<FINAL> : state_output<FINAL>
++struct round_output<aesFINAL> : state_output<aesFINAL>
+ {
+   template<typename T>
+   static void finalize (type &out, const T &v)
+@@ -1644,14 +1644,14 @@ public:
+   typedef std::map<rtx_insn *, aes_table_ref<T> > table_ref_map;
+ 
+   /* AES states typedefs.  */
+-  typedef aes_state<input_info, INPUT, T> aes_input_state;
+-  typedef aes_state<round_input_info<T>, MIDDLE, T> aes_body_state;
+-  typedef aes_state<round_input_info<T>, FINAL, T> aes_final_state;
++  typedef aes_state<input_info, aesINPUT, T> aes_input_state;
++  typedef aes_state<round_input_info<T>, aesMIDDLE, T> aes_body_state;
++  typedef aes_state<round_input_info<T>, aesFINAL, T> aes_final_state;
+ 
+   /* AES rounds typedefs.  */
+-  typedef aes_round<input_info, INPUT, T> aes_input_round;
+-  typedef aes_round<round_input_info<T>, MIDDLE, T> aes_body_round;
+-  typedef aes_round<round_input_info<T>, FINAL, T> aes_final_round;
++  typedef aes_round<input_info, aesINPUT, T> aes_input_round;
++  typedef aes_round<round_input_info<T>, aesMIDDLE, T> aes_body_round;
++  typedef aes_round<round_input_info<T>, aesFINAL, T> aes_final_round;
+ 
+   bool run ();
+ 
+-- 
+2.47.0
+

_service:tar_scm:Fix-indentation-and-numbering-errors.diff Added

@@ -0,0 +1,205 @@
+diff --git a/libphobos/libdruntime/Makefile.in b/libphobos/libdruntime/Makefile.in
+index 91cd653623b..b686f5eb492 100644
+--- a/libphobos/libdruntime/Makefile.in
++++ b/libphobos/libdruntime/Makefile.in
+@@ -124,13 +124,13 @@ target_triplet = @target@
+ # CPU specific sources
+ @DRUNTIME_CPU_AARCH64_TRUE@am__append_11 = config/aarch64/switchcontext.S
+ @DRUNTIME_CPU_ARM_TRUE@am__append_12 = config/arm/switchcontext.S
+-@DRUNTIME_CPU_LOONGARCH_TRUE@am__append_13 = config/loongarch/switchcontext.S
+-@DRUNTIME_CPU_MIPS_TRUE@am__append_14 = config/mips/switchcontext.S
+-@DRUNTIME_CPU_POWERPC_TRUE@am__append_15 = config/powerpc/switchcontext.S
+-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__append_16 = config/mingw/switchcontext.S
+-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__append_17 = config/x86/switchcontext.S
+-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__append_18 = config/systemz/get_tls_offset.S
+-@DRUNTIME_CPU_S390_TRUE@am__append_19 = config/s390/get_tls_offset.S
++@DRUNTIME_CPU_MIPS_TRUE@am__append_13 = config/mips/switchcontext.S
++@DRUNTIME_CPU_POWERPC_TRUE@am__append_14 = config/powerpc/switchcontext.S
++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__append_15 = config/mingw/switchcontext.S
++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__append_16 = config/x86/switchcontext.S
++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__append_17 = config/systemz/get_tls_offset.S
++@DRUNTIME_CPU_S390_TRUE@am__append_18 = config/s390/get_tls_offset.S
++@DRUNTIME_CPU_LOONGARCH_TRUE@am__append_19 = config/loongarch/switchcontext.S
+ subdir = libdruntime
+ ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+ am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
+@@ -475,14 +475,14 @@ am__objects_22 = core/sys/solaris/dlfcn.lo core/sys/solaris/elf.lo \
+ @DRUNTIME_OS_SOLARIS_TRUE@am__objects_23 = $(am__objects_22)
+ @DRUNTIME_CPU_AARCH64_TRUE@am__objects_24 = config/aarch64/libgdruntime_la-switchcontext.lo
+ @DRUNTIME_CPU_ARM_TRUE@am__objects_25 = config/arm/libgdruntime_la-switchcontext.lo
+-@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_26 = config/loongarch/libgdruntime_la-switchcontext.lo
+-@DRUNTIME_CPU_MIPS_TRUE@am__objects_27 = config/mips/libgdruntime_la-switchcontext.lo
+-@DRUNTIME_CPU_POWERPC_TRUE@am__objects_28 = config/powerpc/libgdruntime_la-switchcontext.lo
+-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_29 = config/mingw/libgdruntime_la-switchcontext.lo
+-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_30 = config/x86/libgdruntime_la-switchcontext.lo
+-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_31 = config/systemz/libgdruntime_la-get_tls_offset.lo
+-@DRUNTIME_CPU_S390_TRUE@am__objects_32 = config/s390/libgdruntime_la-get_tls_offset.lo
+-am__objects_33 = $(am__objects_6) $(am__objects_8) $(am__objects_10) \
++@DRUNTIME_CPU_MIPS_TRUE@am__objects_26 = config/mips/libgdruntime_la-switchcontext.lo
++@DRUNTIME_CPU_POWERPC_TRUE@am__objects_27 = config/powerpc/libgdruntime_la-switchcontext.lo
++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_28 = config/mingw/libgdruntime_la-switchcontext.lo
++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_29 = config/x86/libgdruntime_la-switchcontext.lo
++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_30 = config/systemz/libgdruntime_la-get_tls_offset.lo
++@DRUNTIME_CPU_S390_TRUE@am__objects_31 = config/s390/libgdruntime_la-get_tls_offset.lo
++@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_32 = config/loongarch/libgdruntime_la-switchcontext.lo
++am__objects_33 = $(am__objects_5) $(am__objects_7) $(am__objects_9) \
+ 	$(am__objects_11) $(am__objects_13) $(am__objects_15) \
+ 	$(am__objects_17) $(am__objects_19) $(am__objects_21) \
+ 	$(am__objects_23) $(am__objects_24) $(am__objects_25) \
+@@ -500,22 +500,22 @@ am__objects_36 = core/stdc/libgdruntime_convenience_la-errno_.lo
+ @DRUNTIME_OS_MINGW_TRUE@	config/mingw/libgdruntime_convenience_la-msvc.lo
+ @DRUNTIME_CPU_AARCH64_TRUE@am__objects_38 = config/aarch64/libgdruntime_convenience_la-switchcontext.lo
+ @DRUNTIME_CPU_ARM_TRUE@am__objects_39 = config/arm/libgdruntime_convenience_la-switchcontext.lo
+-@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_40 = config/loongarch/libgdruntime_convenience_la-switchcontext.lo
+-@DRUNTIME_CPU_MIPS_TRUE@am__objects_41 = config/mips/libgdruntime_convenience_la-switchcontext.lo
+-@DRUNTIME_CPU_POWERPC_TRUE@am__objects_42 = config/powerpc/libgdruntime_convenience_la-switchcontext.lo
+-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_43 = config/mingw/libgdruntime_convenience_la-switchcontext.lo
+-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_44 = config/x86/libgdruntime_convenience_la-switchcontext.lo
+-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_45 = config/systemz/libgdruntime_convenience_la-get_tls_offset.lo
+-@DRUNTIME_CPU_S390_TRUE@am__objects_46 = config/s390/libgdruntime_convenience_la-get_tls_offset.lo
++@DRUNTIME_CPU_MIPS_TRUE@am__objects_40 = config/mips/libgdruntime_convenience_la-switchcontext.lo
++@DRUNTIME_CPU_POWERPC_TRUE@am__objects_41 = config/powerpc/libgdruntime_convenience_la-switchcontext.lo
++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_42 = config/mingw/libgdruntime_convenience_la-switchcontext.lo
++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_43 = config/x86/libgdruntime_convenience_la-switchcontext.lo
++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_44 = config/systemz/libgdruntime_convenience_la-get_tls_offset.lo
++@DRUNTIME_CPU_S390_TRUE@am__objects_45 = config/s390/libgdruntime_convenience_la-get_tls_offset.lo
++@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_46 = config/loongarch/libgdruntime_convenience_la-switchcontext.lo
+ am__objects_47 = $(am__objects_5) $(am__objects_7) $(am__objects_9) \
+ 	$(am__objects_11) $(am__objects_13) $(am__objects_15) \
+-	$(am__objects_17) $(am__objects_19) $(am__objects_36) \
+-	$(am__objects_23) $(am__objects_37) $(am__objects_38) \
+-	$(am__objects_39) $(am__objects_40) $(am__objects_41) \
+-	$(am__objects_42) $(am__objects_43) $(am__objects_44) \
+-	$(am__objects_45) $(am__objects_46)
+-am__objects_48 = $(am__objects_1) $(am__objects_35) $(am__objects_3) \
+-	$(am__objects_47) $(am__objects_33)
++	$(am__objects_17) $(am__objects_19) $(am__objects_37) \
++	$(am__objects_23) $(am__objects_38) $(am__objects_39) \
++	$(am__objects_40) $(am__objects_41) $(am__objects_42) \
++	$(am__objects_43) $(am__objects_44) $(am__objects_45) \
++	$(am__objects_46)
++am__objects_48 = $(am__objects_1) $(am__objects_36) $(am__objects_3) \
++	$(am__objects_47) $(am__objects_34)
+ am__objects_49 = $(am__objects_48)
+ am_libgdruntime_convenience_la_OBJECTS = $(am__objects_49)
+ libgdruntime_convenience_la_OBJECTS =  \
+@@ -1905,11 +1905,6 @@ config/arm/$(am__dirstamp):
+ 	@: > config/arm/$(am__dirstamp)
+ config/arm/libgdruntime_la-switchcontext.lo:  \
+ 	config/arm/$(am__dirstamp)
+-config/loongarch/$(am__dirstamp):
+-	@$(MKDIR_P) config/loongarch
+-	@: > config/loongarch/$(am__dirstamp)
+-config/loongarch/libgdruntime_la-switchcontext.lo:  \
+-	config/loongarch/$(am__dirstamp)
+ config/mips/$(am__dirstamp):
+ 	@$(MKDIR_P) config/mips
+ 	@: > config/mips/$(am__dirstamp)
+@@ -1937,6 +1932,11 @@ config/s390/$(am__dirstamp):
+ 	@: > config/s390/$(am__dirstamp)
+ config/s390/libgdruntime_la-get_tls_offset.lo:  \
+ 	config/s390/$(am__dirstamp)
++config/loongarch/$(am__dirstamp):
++	@$(MKDIR_P) config/loongarch
++	@: > config/loongarch/$(am__dirstamp)
++config/loongarch/libgdruntime_la-switchcontext.lo:  \
++	config/loongarch/$(am__dirstamp)
+ gcc/config.lo: gcc/$(am__dirstamp)
+ gcc/libbacktrace.lo: gcc/$(am__dirstamp)
+ 
+@@ -1950,8 +1950,6 @@ config/aarch64/libgdruntime_convenience_la-switchcontext.lo:  \
+ 	config/aarch64/$(am__dirstamp)
+ config/arm/libgdruntime_convenience_la-switchcontext.lo:  \
+ 	config/arm/$(am__dirstamp)
+-config/loongarch/libgdruntime_convenience_la-switchcontext.lo:  \
+- config/loongarch/$(am__dirstamp)
+ config/mips/libgdruntime_convenience_la-switchcontext.lo:  \
+ 	config/mips/$(am__dirstamp)
+ config/powerpc/libgdruntime_convenience_la-switchcontext.lo:  \
+@@ -1964,6 +1962,8 @@ config/systemz/libgdruntime_convenience_la-get_tls_offset.lo:  \
+ 	config/systemz/$(am__dirstamp)
+ config/s390/libgdruntime_convenience_la-get_tls_offset.lo:  \
+ 	config/s390/$(am__dirstamp)
++config/loongarch/libgdruntime_convenience_la-switchcontext.lo:  \
++	config/loongarch/$(am__dirstamp)
+ 
+ libgdruntime_convenience.la: $(libgdruntime_convenience_la_OBJECTS) $(libgdruntime_convenience_la_DEPENDENCIES) $(EXTRA_libgdruntime_convenience_la_DEPENDENCIES) 
+ 	$(AM_V_GEN)$(libgdruntime_convenience_la_LINK)  $(libgdruntime_convenience_la_OBJECTS) $(libgdruntime_convenience_la_LIBADD) $(LIBS)
+@@ -1976,14 +1976,14 @@ mostlyclean-compile:
+ 	-rm -f config/arm/*.lo
+ 	-rm -f config/mingw/*.$(OBJEXT)
+ 	-rm -f config/mingw/*.lo
+-	-rm -f config/loongarch/*.$(OBJEXT)
+-	-rm -f config/loongarch/*.lo
+ 	-rm -f config/mips/*.$(OBJEXT)
+ 	-rm -f config/mips/*.lo
+ 	-rm -f config/powerpc/*.$(OBJEXT)
+ 	-rm -f config/powerpc/*.lo
+ 	-rm -f config/s390/*.$(OBJEXT)
+ 	-rm -f config/s390/*.lo
++	-rm -f config/loongarch/*.$(OBJEXT)
++	-rm -f config/loongarch/*.lo
+ 	-rm -f config/systemz/*.$(OBJEXT)
+ 	-rm -f config/systemz/*.lo
+ 	-rm -f config/x86/*.$(OBJEXT)
+@@ -2101,10 +2101,7 @@ config/aarch64/libgdruntime_la-switchcontext.lo: config/aarch64/switchcontext.S
+ config/arm/libgdruntime_la-switchcontext.lo: config/arm/switchcontext.S
+ 	$(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/arm/libgdruntime_la-switchcontext.lo `test -f 'config/arm/switchcontext.S' || echo '$(srcdir)/'`config/arm/switchcontext.S
+ 
+-config/loongarch/libgdruntime_la-switchcontext.lo: config/loongarch/switchcontext.S
+- $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS)
+-
+-onfig/mips/libgdruntime_la-switchcontext.lo: config/mips/switchcontext.S
++config/mips/libgdruntime_la-switchcontext.lo: config/mips/switchcontext.S
+ 	$(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/mips/libgdruntime_la-switchcontext.lo `test -f 'config/mips/switchcontext.S' || echo '$(srcdir)/'`config/mips/switchcontext.S
+ 
+ config/powerpc/libgdruntime_la-switchcontext.lo: config/powerpc/switchcontext.S
+@@ -2122,18 +2119,21 @@ config/systemz/libgdruntime_la-get_tls_offset.lo: config/systemz/get_tls_offset.
+ config/s390/libgdruntime_la-get_tls_offset.lo: config/s390/get_tls_offset.S
+ 	$(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/s390/libgdruntime_la-get_tls_offset.lo `test -f 'config/s390/get_tls_offset.S' || echo '$(srcdir)/'`config/s390/get_tls_offset.S
+ 
++config/loongarch/libgdruntime_la-switchcontext.lo: config/loongarch/switchcontext.S
++	$(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/loongarch/libgdruntime_la-switchcontext.lo `test -f 'config/loongarch/switchcontext.S' || echo '$(srcdir)/'`config/loongarch/switchcontext.S
++
+ config/aarch64/libgdruntime_convenience_la-switchcontext.lo: config/aarch64/switchcontext.S
+ 	$(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/aarch64/libgdruntime_convenience_la-switchcontext.lo `test -f 'config/aarch64/switchcontext.S' || echo '$(srcdir)/'`config/aarch64/switchcontext.S
+ 
+ config/arm/libgdruntime_convenience_la-switchcontext.lo: config/arm/switchcontext.S
+ 	$(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/arm/libgdruntime_convenience_la-switchcontext.lo `test -f 'config/arm/switchcontext.S' || echo '$(srcdir)/'`config/arm/switchcontext.S
+ 
+-config/loongarch/libgdruntime_convenience_la-switchcontext.lo: config/loongarch/switchcontext.S
+- $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM
+-
+ config/mips/libgdruntime_convenience_la-switchcontext.lo: config/mips/switchcontext.S
+ 	$(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/mips/libgdruntime_convenience_la-switchcontext.lo `test -f 'config/mips/switchcontext.S' || echo '$(srcdir)/'`config/mips/switchcontext.S
+ 
++config/loongarch/libgdruntime_convenience_la-switchcontext.lo: config/loongarch/switchcontext.S
++	$(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/loongarch/libgdruntime_convenience_la-switchcontext.lo `test -f 'config/loongarch/switchcontext.S' || echo '$(srcdir)/'`config/loongarch/switchcontext.S
++
+ config/powerpc/libgdruntime_convenience_la-switchcontext.lo: config/powerpc/switchcontext.S
+ 	$(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/powerpc/libgdruntime_convenience_la-switchcontext.lo `test -f 'config/powerpc/switchcontext.S' || echo '$(srcdir)/'`config/powerpc/switchcontext.S
+ 
+@@ -2178,10 +2178,10 @@ clean-libtool:
+ 	-rm -rf config/aarch64/.libs config/aarch64/_libs
+ 	-rm -rf config/arm/.libs config/arm/_libs
+ 	-rm -rf config/mingw/.libs config/mingw/_libs
+-	-rm -rf config/loongarch/.libs config/loongarch/_libs
+ 	-rm -rf config/mips/.libs config/mips/_libs
+ 	-rm -rf config/powerpc/.libs config/powerpc/_libs
+ 	-rm -rf config/s390/.libs config/s390/_libs
++	-rm -rf config/loongarch/.libs config/loongarch/_libs
+ 	-rm -rf config/systemz/.libs config/systemz/_libs
+ 	-rm -rf config/x86/.libs config/x86/_libs
+ 	-rm -rf core/.libs core/_libs
+@@ -2340,10 +2340,10 @@ distclean-generic:
+ 	-rm -f config/aarch64/$(am__dirstamp)
+ 	-rm -f config/arm/$(am__dirstamp)
+ 	-rm -f config/mingw/$(am__dirstamp)
+-	-rm -f config/loongarch/$(am__dirstamp)
+ 	-rm -f config/mips/$(am__dirstamp)