Projects
openEuler:24.03:SP1:Everything
gcc
Sign Up
Log In
Username
Password
We truncated the diff of some files because they were too big. If you want to see the full diff for every file,
click here
.
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 7
View file
_service:tar_scm:gcc.spec
Changed
@@ -2,7 +2,7 @@ %global gcc_major 12 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 32 +%global gcc_release 45 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 @@ -69,12 +69,8 @@ %global multilib_32_arch i686 %endif %ifarch riscv64 -%global _lib lib %global _smp_mflags -j8 %endif -%ifarch loongarch64 -%global _lib lib -%endif %global isl_enable 0 %global check_enable 0 @@ -206,27 +202,218 @@ Patch94: 0094-BUGFIX-AutoBOLT-function-miss-bind-type.patch Patch95: 0095-STABS-remove-gstabs-and-gxcoff-functionality.patch Patch96: 0096-Bugfix-Autofdo-use-PMU-sampling-set-num-eauals-den.patch -Patch97: 0097-aarch64-Use-local-frame-vars-in-shrink-wrapping-code.patch -Patch98: 0098-aarch64-Avoid-a-use-of-callee-offset.patch -Patch99: 0099-aarch64-Explicitly-handle-frames-with-no-saved-registers.patch -Patch100: 0100-aarch64-Add-bytes-below-saved-regs-to-frame-info.patch -Patch101: 0101-aarch64-Add-bytes-below-hard-fp-to-frame-info.patch -Patch102: 0102-aarch64-Tweak-aarch64-save-restore-callee-saves.patch -Patch103: 0103-aarch64-Only-calculate-chain-offset-if-there-is-a-chain.patch -Patch104: 0104-aarch64-Rename-locals-offset-to-bytes-above-locals.patch -Patch105: 0105-aarch64-Rename-hard-fp-offset-to-bytes-above-hard-fp.patch -Patch106: 0106-aarch64-Tweak-frame-size-comment.patch -Patch107: 0107-aarch64-Measure-reg-offset-from-the-bottom-of-the-frame.patch -Patch108: 0108-aarch64-Simplify-top-of-frame-allocation.patch -Patch109: 0109-aarch64-Minor-initial-adjustment-tweak.patch -Patch110: 0110-aarch64-Tweak-stack-clash-boundary-condition.patch -Patch111: 0111-aarch64-Put-LR-save-probe-in-first-16-bytes.patch -Patch112: 0112-aarch64-Simplify-probe-of-final-frame-allocation.patch -Patch113: 0113-aarch64-Explicitly-record-probe-registers-in-frame-info.patch -Patch114: 0114-aarch64-Remove-below-hard-fp-saved-regs-size.patch -Patch115: 0115-aarch64-Make-stack-smash-canary-protect-saved-registers.patch -Patch116: 0116-aarch64-Fix-return-register-handling-in-untyped_call.patch -Patch117: 0117-aarch64-Fix-loose-ldpstp-check.patch +Patch97: 0097-Improve-non-loop-disambiguation.patch +Patch98: 0098-CHREC-multiplication-and-undefined-overflow.patch +Patch99: 0099-Enable-Transposed-SLP.patch +Patch100: 0100-Add-hip09-machine-discribtion.patch +Patch101: 0101-Add-hip11-CPU-pipeline-scheduling.patch +Patch102: 0102-Add-Crc32-Optimization-in-Gzip-For-crc32-algorithm-i.patch +Patch103: 0103-SME-Remove-hip09-and-hip11-in-aarch64-cores.def-to-b.patch +Patch104: 0104-Backport-SME-AArch64-Cleanup-CPU-option-processing-c.patch +Patch105: 0105-Backport-SME-AArch64-Cleanup-option-processing-code.patch +Patch106: 0106-Backport-SME-aarch64-Add-march-support-for-Armv9.1-A.patch +Patch107: 0107-Backport-SME-Revert-aarch64-Define-__ARM_FEATURE_RCP.patch +Patch108: 0108-Backport-SME-Revert-Ampere-1-and-Ampere-1A-core-defi.patch +Patch109: 0109-Backport-SME-aarch64-Rename-AARCH64_ISA-architecture.patch +Patch110: 0110-Backport-SME-aarch64-Rename-AARCH64_FL-architecture-.patch +Patch111: 0111-Backport-SME-aarch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch +Patch112: 0112-Backport-SME-aarch64-Add-V-to-aarch64-arches.def-nam.patch +Patch113: 0113-Backport-SME-aarch64-Small-config.gcc-cleanups.patch +Patch114: 0114-Backport-SME-aarch64-Avoid-redundancy-in-aarch64-cor.patch +Patch115: 0115-Backport-SME-aarch64-Remove-AARCH64_FL_RCPC8_4-PR107.patch +Patch116: 0116-Backport-SME-aarch64-Fix-transitive-closure-of-featu.patch +Patch117: 0117-Backport-SME-aarch64-Reorder-an-entry-in-aarch64-opt.patch +Patch118: 0118-Backport-SME-aarch64-Simplify-feature-definitions.patch +Patch119: 0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch +Patch120: 0120-Backport-SME-aarch64-Avoid-std-string-in-static-data.patch +Patch121: 0121-Backport-SME-aarch64-Tweak-constness-of-option-relat.patch +Patch122: 0122-Backport-SME-aarch64-Make-more-use-of-aarch64_featur.patch +Patch123: 0123-Backport-SME-aarch64-Tweak-contents-of-flags_on-off-.patch +Patch124: 0124-Backport-SME-aarch64-Tweak-handling-of-mgeneral-regs.patch +Patch125: 0125-Backport-SME-aarch64-Remove-redundant-TARGET_-checks.patch +Patch126: 0126-Backport-SME-aarch64-Define-__ARM_FEATURE_RCPC.patch +Patch127: 0127-Backport-SME-Add-Ampere-1-and-Ampere-1A-core-definit.patch +Patch128: 0128-Backport-SME-aarch64-Fix-nosimd-handling-of-FPR-move.patch +Patch129: 0129-Backport-SME-aarch64-Commonise-some-folding-code.patch +Patch130: 0130-Backport-SME-aarch64-Add-a-Z-operand-modifier-for-SV.patch +Patch131: 0131-Backport-SME-mode-switching-Remove-unused-bbnum-fiel.patch +Patch132: 0132-Backport-SME-mode-switching-Tweak-the-macro-hook-doc.patch +Patch133: 0133-Backport-SME-mode-switching-Add-note-problem.patch +Patch134: 0134-Backport-SME-mode-switching-Avoid-quadractic-list-op.patch +Patch135: 0135-Backport-SME-mode-switching-Fix-the-mode-passed-to-t.patch +Patch136: 0136-Backport-SME-mode-switching-Simplify-recording-of-tr.patch +Patch137: 0137-Backport-SME-mode-switching-Tweak-entry-exit-handlin.patch +Patch138: 0138-Backport-SME-mode-switching-Allow-targets-to-set-the.patch +Patch139: 0139-Backport-SME-mode-switching-Pass-set-of-live-registe.patch +Patch140: 0140-Backport-SME-mode-switching-Pass-the-set-of-live-reg.patch +Patch141: 0141-Backport-SME-mode-switching-Use-1-based-edge-aux-fie.patch +Patch142: 0142-Backport-SME-mode-switching-Add-a-target-configurabl.patch +Patch143: 0143-Backport-SME-mode-switching-Add-a-backprop-hook.patch +Patch144: 0144-Backport-SME-aarch64-Add-a-result_mode-helper-functi.patch +Patch145: 0145-Backport-SME-rtl-Try-to-remove-EH-edges-after-pro-ep.patch +Patch146: 0146-Backport-SME-Fix-PR-middle-end-107705-ICE-after-recl.patch +Patch147: 0147-Backport-SME-function-Change-return-type-of-predicat.patch +Patch148: 0148-Backport-SME-Allow-prologues-and-epilogues-to-be-ins.patch +Patch149: 0149-Backport-SME-Add-a-target-hook-for-sibcall-epilogues.patch +Patch150: 0150-Backport-SME-Add-a-new-target-hook-TARGET_START_CALL.patch +Patch151: 0151-Backport-SME-Allow-targets-to-add-USEs-to-asms.patch +Patch152: 0152-Backport-SME-New-compact-syntax-for-insn-and-insn_sp.patch +Patch153: 0153-Backport-SME-recog-Improve-parser-for-pattern-new-co.patch +Patch154: 0154-Backport-SME-recog-Support-space-in-cons.patch +Patch155: 0155-Backport-SME-aarch64-Generalise-require_immediate_la.patch +Patch156: 0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch +Patch157: 0157-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch +Patch158: 0158-Backport-SME-aarch64-Simplify-output-template-emissi.patch +Patch159: 0159-Backport-SME-Improve-immediate-expansion-PR106583.patch +Patch160: 0160-Backport-SME-AArch64-Cleanup-move-immediate-code.patch +Patch161: 0161-Backport-SME-AArch64-convert-some-patterns-to-compac.patch +Patch162: 0162-Backport-SME-aarch64-Use-SVE-s-RDVL-instruction.patch +Patch163: 0163-Backport-SME-aarch64-Make-AARCH64_FL_SVE-requirement.patch +Patch164: 0164-Backport-SME-aarch64-Add-group-suffixes-to-SVE-intri.patch +Patch165: 0165-Backport-SME-aarch64-Add-sve_type-to-SVE-builtins-co.patch +Patch166: 0166-Backport-SME-aarch64-Generalise-some-SVE-ACLE-error-.patch +Patch167: 0167-Backport-SME-aarch64-Replace-vague-previous-argument.patch +Patch168: 0168-Backport-SME-aarch64-Make-more-use-of-sve_type-in-AC.patch +Patch169: 0169-Backport-SME-aarch64-Tweak-error-message-for-tuple-v.patch +Patch170: 0170-Backport-SME-aarch64-Add-tuple-forms-of-svreinterpre.patch +Patch171: 0171-Backport-SME-attribs-Use-existing-traits-for-excl_ha.patch +Patch172: 0172-Backport-SME-Allow-target-attributes-in-non-gnu-name.patch +Patch173: 0173-Backport-SME-aarch64-Fix-plugin-header-install.patch +Patch174: 0174-Backport-SME-aarch64-Add-arm_streaming-_compatible-a.patch +Patch175: 0175-Backport-SME-aarch64-Add-sme.patch +Patch176: 0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch +Patch177: 0177-Backport-SME-AArch64-Rewrite-simd-move-immediate-pat.patch +Patch178: 0178-Backport-SME-AArch64-remove-test-comment-from-mov-mo.patch +Patch179: 0179-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch +Patch180: 0180-Backport-SME-aarch64-Mark-relevant-SVE-instructions-.patch +Patch181: 0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch +Patch182: 0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch +Patch183: 0183-Backport-SME-aarch64-Avoid-a-use-of-callee_offset.patch +Patch184: 0184-Backport-SME-aarch64-Explicitly-handle-frames-with-n.patch +Patch185: 0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch +Patch186: 0186-Backport-SME-aarch64-Add-bytes_below_hard_fp-to-fram.patch +Patch187: 0187-Backport-SME-aarch64-Robustify-stack-tie-handling.patch +Patch188: 0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch +Patch189: 0189-Backport-SME-aarch64-Only-calculate-chain_offset-if-.patch +Patch190: 0190-Backport-SME-aarch64-Rename-locals_offset-to-bytes_a.patch +Patch191: 0191-Backport-SME-aarch64-Rename-hard_fp_offset-to-bytes_.patch +Patch192: 0192-Backport-SME-aarch64-Tweak-frame_size-comment.patch +Patch193: 0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch +Patch194: 0194-Backport-SME-aarch64-Simplify-top-of-frame-allocatio.patch +Patch195: 0195-Backport-SME-aarch64-Minor-initial-adjustment-tweak.patch +Patch196: 0196-Backport-SME-aarch64-Tweak-stack-clash-boundary-cond.patch +Patch197: 0197-Backport-SME-aarch64-Put-LR-save-probe-in-first-16-b.patch +Patch198: 0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch +Patch199: 0199-Backport-SME-aarch64-Explicitly-record-probe-registe.patch +Patch200: 0200-Backport-SME-aarch64-Remove-below_hard_fp_saved_regs.patch +Patch201: 0201-Backport-SME-aarch64-Make-stack-smash-canary-protect.patch +Patch202: 0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch +Patch203: 0203-Backport-SME-aarch64-Use-vecs-to-store-register-save.patch +Patch204: 0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch +Patch205: 0205-Backport-SME-aarch64-Switch-PSTATE.SM-around-calls.patch +Patch206: 0206-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch +Patch207: 0207-Backport-SME-aarch64-Add-a-register-class-for-w12-w1.patch +Patch208: 0208-Backport-SME-aarch64-Add-a-VNx1TI-mode.patch +Patch209: 0209-Backport-SME-aarch64-Generalise-unspec_based_functio.patch +Patch210: 0210-Backport-SME-aarch64-Generalise-_m-rules-for-SVE-int.patch +Patch211: 0211-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch +Patch212: 0212-Backport-SME-aarch64-Add-support-for-__arm_locally_s.patch +Patch213: 0213-Backport-SME-aarch64-Handle-PSTATE.SM-across-abnorma.patch +Patch214: 0214-Backport-SME-aarch64-Enforce-inlining-restrictions-f.patch +Patch215: 0215-Backport-SME-aarch64-Update-sibcall-handling-for-SME.patch +Patch216: 0216-Backport-SME-libgcc-aarch64-Configure-check-for-.var.patch +Patch217: 0217-Backport-SME-libgcc-aarch64-Configure-check-for-__ge.patch +Patch218: 0218-Backport-SME-libgcc-aarch64-Add-SME-runtime-support.patch +Patch219: 0219-Backport-SME-libgcc-aarch64-Add-SME-unwinder-support.patch +Patch220: 0220-Backport-SME-libgcc-Fix-config.in.patch +Patch221: 0221-Backport-SME-aarch64-Add-funwind-tables-to-some-test.patch +Patch222: 0222-Backport-SME-aarch64-Skip-some-SME-register-save-tes.patch +Patch223: 0223-Backport-SME-Add-OPTIONS_H_EXTRA-to-GTFILES.patch +Patch224: 0224-Backport-SME-aarch64-Add-V1DI-mode.patch +Patch225: 0225-Backport-SME-Allow-md-iterators-to-include-other-ite.patch +Patch226: 0226-Backport-SME-riscv-Add-support-for-strlen-inline-exp.patch +Patch227: 0227-Backport-SME-attribs-Add-overloads-with-namespace-na.patch +Patch228: 0228-Backport-SME-vec-Add-array_slice-constructors-from-n.patch +Patch229: 0229-Backport-SME-A-couple-of-va_gc_atomic-tweaks.patch +Patch230: 0230-Backport-SME-middle-end-Fix-issue-of-poly_uint16-1-1.patch +Patch231: 0231-SME-Add-missing-header-file-in-aarch64.cc.patch +Patch232: 0232-Backport-SME-c-Add-support-for-__extension__.patch +Patch233: 0233-Backport-SME-lra-Updates-of-biggest-mode-for-hard-re.patch +Patch234: 0234-Backport-SME-c-Support-C2x-empty-initializer-braces.patch +Patch235: 0235-Backport-SME-aarch64-Update-sizeless-tests-for-recen.patch +Patch236: 0236-Backport-SME-attribs-Namespace-aware-lookup_attribut.patch +Patch237: 0237-Backport-SME-c-family-ICE-with-gnu-nocf_check-PR1069.patch +Patch238: 0238-Backport-SME-AArch64-Fix-assert-in-aarch64_move_imm-.patch +Patch239: 0239-Backport-SME-testsuite-Only-run-fcf-protection-test-.patch +Patch240: 0240-Backport-SME-Fix-PRs-106764-106765-and-107307-all-IC.patch +Patch241: 0241-Backport-SME-aarch64-Remove-expected-error-for-compo.patch +Patch242: 0242-Backport-SME-aarch64-Remove-redundant-builtins-code.patch +Patch243: 0243-Backport-SME-AArch64-Fix-Armv9-a-warnings-that-get-e.patch +Patch244: 0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch +Patch245: 0245-Backport-SME-middle-end-Add-new-tbranch-optab-to-add.patch +Patch246: 0246-Backport-SME-explow-Allow-dynamic-allocations-after-.patch +Patch247: 0247-Backport-SME-PR105169-Fix-references-to-discarded-se.patch +Patch248: 0248-Backport-SME-RISC-V-autovec-Verify-that-GET_MODE_NUN.patch +Patch249: 0249-Backport-SME-Add-operator-to-gimple_stmt_iterator-an.patch
View file
_service:tar_scm:0001-LoongArch-Reimplement-multilib-build-option-handling.patch
Added
@@ -0,0 +1,464 @@ +From d394a9ac68674b40e0d2b436c09e23dd29d8b5d0 Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Wed, 13 Sep 2023 17:52:14 +0800 +Subject: PATCH 001/188 LoongArch: Reimplement multilib build option + handling. + +Library build options from --with-multilib-list used to be processed with +*self_spec, which missed the driver's initial canonicalization. This +caused limitations on CFLAGS override and the use of driver-only options +like -mno-lsx. + +The problem is solved by promoting the injection rules of --with-multilib-list +options to the first element of DRIVER_SELF_SPECS, to make them execute before +the canonialization. The library-build options are also hard-coded in +the driver and can be used conveniently by the builders of other non-gcc +libraries via the use of -fmultiflags. + +Bootstrapped and tested on loongarch64-linux-gnu. + +ChangeLog: + + * config-ml.in: Remove unneeded loongarch clause. + * configure.ac: Register custom makefile fragments mt-loongarch-* + for loongarch targets. + * configure: Regenerate. + +config/ChangeLog: + + * mt-loongarch-mlib: New file. Pass -fmultiflags when building + target libraries (FLAGS_FOR_TARGET). + * mt-loongarch-elf: New file. + * mt-loongarch-gnu: New file. + +gcc/ChangeLog: + + * config.gcc: Pass the default ABI via TM_MULTILIB_CONFIG. + * config/loongarch/loongarch-driver.h: Invoke MLIB_SELF_SPECS + before the driver canonicalization routines. + * config/loongarch/loongarch.h: Move definitions of CC1_SPEC etc. + to loongarch-driver.h + * config/loongarch/t-linux: Move multilib-related definitions to + t-multilib. + * config/loongarch/t-multilib: New file. Inject library build + options obtained from --with-multilib-list. + * config/loongarch/t-loongarch: Same. +--- + config-ml.in | 10 ---- + config/mt-loongarch-elf | 1 + + config/mt-loongarch-gnu | 2 + + config/mt-loongarch-mlib | 1 + + configure | 6 +++ + configure.ac | 6 +++ + gcc/config.gcc | 6 +-- + gcc/config/loongarch/loongarch-driver.h | 42 +++++++++++++++ + gcc/config/loongarch/loongarch.h | 50 ------------------ + gcc/config/loongarch/t-linux | 66 +++--------------------- + gcc/config/loongarch/t-loongarch | 2 +- + gcc/config/loongarch/t-multilib | 68 +++++++++++++++++++++++++ + 12 files changed, 137 insertions(+), 123 deletions(-) + create mode 100644 config/mt-loongarch-elf + create mode 100644 config/mt-loongarch-gnu + create mode 100644 config/mt-loongarch-mlib + create mode 100644 gcc/config/loongarch/t-multilib + +diff --git a/config-ml.in b/config-ml.in +index ad0db0781..68854a4f1 100644 +--- a/config-ml.in ++++ b/config-ml.in +@@ -301,16 +301,6 @@ arm-*-*) + done + fi + ;; +-loongarch*-*) +- old_multidirs="${multidirs}" +- multidirs="" +- for x in ${old_multidirs}; do +- case "$x" in +- `${CC-gcc} --print-multi-directory`) : ;; +- *) multidirs="${multidirs} ${x}" ;; +- esac +- done +- ;; + m68*-*-*) + if x$enable_softfloat = xno + then +diff --git a/config/mt-loongarch-elf b/config/mt-loongarch-elf +new file mode 100644 +index 000000000..bbf29bb57 +--- /dev/null ++++ b/config/mt-loongarch-elf +@@ -0,0 +1 @@ ++include $(srcdir)/config/mt-loongarch-mlib +diff --git a/config/mt-loongarch-gnu b/config/mt-loongarch-gnu +new file mode 100644 +index 000000000..dfefb44ed +--- /dev/null ++++ b/config/mt-loongarch-gnu +@@ -0,0 +1,2 @@ ++include $(srcdir)/config/mt-gnu ++include $(srcdir)/config/mt-loongarch-mlib +diff --git a/config/mt-loongarch-mlib b/config/mt-loongarch-mlib +new file mode 100644 +index 000000000..4cfe568f1 +--- /dev/null ++++ b/config/mt-loongarch-mlib +@@ -0,0 +1 @@ ++FLAGS_FOR_TARGET += -fmultiflags +diff --git a/configure b/configure +index aff62c464..81b4a3cec 100755 +--- a/configure ++++ b/configure +@@ -9548,6 +9548,12 @@ case "${target}" in + spu-*-*) + target_makefile_frag="config/mt-spu" + ;; ++ loongarch*-*linux* | loongarch*-*gnu*) ++ target_makefile_frag="config/mt-loongarch-gnu" ++ ;; ++ loongarch*-*elf*) ++ target_makefile_frag="config/mt-loongarch-elf" ++ ;; + mips*-sde-elf* | mips*-mti-elf* | mips*-img-elf*) + target_makefile_frag="config/mt-sde" + ;; +diff --git a/configure.ac b/configure.ac +index f310d75ca..9f8dbd319 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -2729,6 +2729,12 @@ case "${target}" in + spu-*-*) + target_makefile_frag="config/mt-spu" + ;; ++ loongarch*-*linux* | loongarch*-*gnu*) ++ target_makefile_frag="config/mt-loongarch-gnu" ++ ;; ++ loongarch*-*elf*) ++ target_makefile_frag="config/mt-loongarch-elf" ++ ;; + mips*-sde-elf* | mips*-mti-elf* | mips*-img-elf*) + target_makefile_frag="config/mt-sde" + ;; +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 3f870e966..e34a5fbb9 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -2510,7 +2510,7 @@ loongarch*-*-linux*) + tm_file="elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file}" + tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h" + extra_options="${extra_options} linux-android.opt" +- tmake_file="${tmake_file} loongarch/t-linux" ++ tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux" + gnu_ld=yes + gas=yes + +@@ -2522,7 +2522,7 @@ loongarch*-*-linux*) + loongarch*-*-elf*) + tm_file="elfos.h newlib-stdint.h ${tm_file}" + tm_file="${tm_file} loongarch/elf.h loongarch/linux.h" +- tmake_file="${tmake_file} loongarch/t-linux" ++ tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux" + gnu_ld=yes + gas=yes + +@@ -5241,7 +5241,7 @@ case "${target}" in + loongarch_multilib_list_sane=no + + # This one goes to TM_MULTILIB_CONFIG, for use in t-linux. +- loongarch_multilib_list_make="" ++ loongarch_multilib_list_make="${abi_base}," + + # This one goes to tm_defines, for use in loongarch-driver.c. + loongarch_multilib_list_c="" +diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h +index 6cfe0efb5..e7d083677 100644 +--- a/gcc/config/loongarch/loongarch-driver.h ++++ b/gcc/config/loongarch/loongarch-driver.h +@@ -23,6 +23,39 @@ along with GCC; see the file COPYING3. If not see + + #include "loongarch-str.h" + ++#ifndef SUBTARGET_CPP_SPEC ++#define SUBTARGET_CPP_SPEC "" ++#endif ++ ++#ifndef SUBTARGET_CC1_SPEC ++#define SUBTARGET_CC1_SPEC "" ++#endif ++ ++#ifndef SUBTARGET_ASM_SPEC ++#define SUBTARGET_ASM_SPEC "" ++#endif ++ ++#define EXTRA_SPECS \ ++ {"early_self_spec", ""}, \ ++ {"subtarget_cc1_spec", SUBTARGET_CC1_SPEC}, \ ++ {"subtarget_cpp_spec", SUBTARGET_CPP_SPEC}, \ ++ {"subtarget_asm_spec", SUBTARGET_ASM_SPEC}, ++ ++
View file
_service:tar_scm:0002-LoongArch-Check-whether-binutils-supports-the-relax-.patch
Added
@@ -0,0 +1,192 @@ +From 13c33536900709bf1f33171d5ae2b2af97789601 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 15 Sep 2023 10:22:49 +0800 +Subject: PATCH 002/188 LoongArch: Check whether binutils supports the relax + function. If supported, explicit relocs are turned off by default. + +gcc/ChangeLog: + + * config.in: Regenerate. + * config/loongarch/genopts/loongarch.opt.in: Add compilation option + mrelax. And set the initial value of explicit-relocs according to the + detection status. + * config/loongarch/gnu-user.h: When compiling with -mno-relax, pass the + --no-relax option to the linker. + * config/loongarch/loongarch-driver.h (ASM_SPEC): When compiling with + -mno-relax, pass the -mno-relax option to the assembler. + * config/loongarch/loongarch-opts.h (HAVE_AS_MRELAX_OPTION): Define macro. + * config/loongarch/loongarch.opt: Regenerate. + * configure: Regenerate. + * configure.ac: Add detection of support for binutils relax function. +--- + gcc/config.in | 6 ++++ + gcc/config/loongarch/genopts/loongarch.opt.in | 7 ++++- + gcc/config/loongarch/gnu-user.h | 3 +- + gcc/config/loongarch/loongarch-driver.h | 2 +- + gcc/config/loongarch/loongarch-opts.h | 4 +++ + gcc/config/loongarch/loongarch.opt | 7 ++++- + gcc/configure | 31 +++++++++++++++++++ + gcc/configure.ac | 4 +++ + 8 files changed, 60 insertions(+), 4 deletions(-) + +diff --git a/gcc/config.in b/gcc/config.in +index 0dff36199..0c55e67e7 100644 +--- a/gcc/config.in ++++ b/gcc/config.in +@@ -637,6 +637,12 @@ + #endif + + ++/* Define if your assembler supports -mrelax option. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_AS_MRELAX_OPTION ++#endif ++ ++ + /* Define if your assembler supports .mspabi_attribute. */ + #ifndef USED_FOR_TARGET + #undef HAVE_AS_MSPABI_ATTRIBUTE +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 2ef1b1e3b..f18733c24 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -181,7 +181,7 @@ Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init + -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. + + mexplicit-relocs +-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS) ++Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION) + Use %reloc() assembly operators. + + ; The code model option names for -mcmodel. +@@ -214,3 +214,8 @@ Specify the code model. + mdirect-extern-access + Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0) + Avoid using the GOT to access external symbols. ++ ++mrelax ++Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION) ++Take advantage of linker relaxations to reduce the number of instructions ++required to materialize symbol addresses. +diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h +index 44e4f2575..60ef75601 100644 +--- a/gcc/config/loongarch/gnu-user.h ++++ b/gcc/config/loongarch/gnu-user.h +@@ -48,7 +48,8 @@ along with GCC; see the file COPYING3. If not see + "%{!shared: %{static} " \ + "%{!static: %{!static-pie: %{rdynamic:-export-dynamic} " \ + "-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} " \ +- "%{static-pie: -static -pie --no-dynamic-linker -z text}}" ++ "%{static-pie: -static -pie --no-dynamic-linker -z text}}" \ ++ "%{mno-relax: --no-relax}" + + + /* Similar to standard Linux, but adding -ffast-math support. */ +diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h +index e7d083677..59fa3263d 100644 +--- a/gcc/config/loongarch/loongarch-driver.h ++++ b/gcc/config/loongarch/loongarch-driver.h +@@ -53,7 +53,7 @@ along with GCC; see the file COPYING3. If not see + + #undef ASM_SPEC + #define ASM_SPEC \ +- "%{mabi=*} %(subtarget_asm_spec)" ++ "%{mabi=*} %{mno-relax} %(subtarget_asm_spec)" + + + extern const char* +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 624e246bb..f2b59abe6 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -99,4 +99,8 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + #define HAVE_AS_EXPLICIT_RELOCS 0 + #endif + ++#ifndef HAVE_AS_MRELAX_OPTION ++#define HAVE_AS_MRELAX_OPTION 0 ++#endif ++ + #endif /* LOONGARCH_OPTS_H */ +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index f2d21c9f3..78f2baf3a 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -188,7 +188,7 @@ Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init + -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. + + mexplicit-relocs +-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS) ++Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION) + Use %reloc() assembly operators. + + ; The code model option names for -mcmodel. +@@ -221,3 +221,8 @@ Specify the code model. + mdirect-extern-access + Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0) + Avoid using the GOT to access external symbols. ++ ++mrelax ++Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION) ++Take advantage of linker relaxations to reduce the number of instructions ++required to materialize symbol addresses. +diff --git a/gcc/configure b/gcc/configure +index 2a5d3aaf3..8ae8a924a 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -28830,6 +28830,37 @@ if test $gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support = yes; then + + $as_echo "#define HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT 1" >>confdefs.h + ++fi ++ ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -mrelax option" >&5 ++$as_echo_n "checking assembler for -mrelax option... " >&6; } ++if ${gcc_cv_as_loongarch_relax+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ gcc_cv_as_loongarch_relax=no ++ if test x$gcc_cv_as != x; then ++ $as_echo '.text' > conftest.s ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -mrelax -o conftest.o conftest.s >&5' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ++ then ++ gcc_cv_as_loongarch_relax=yes ++ else ++ echo "configure: failed program was" >&5 ++ cat conftest.s >&5 ++ fi ++ rm -f conftest.o conftest.s ++ fi ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_relax" >&5 ++$as_echo "$gcc_cv_as_loongarch_relax" >&6; } ++if test $gcc_cv_as_loongarch_relax = yes; then ++ ++$as_echo "#define HAVE_AS_MRELAX_OPTION 1" >>confdefs.h ++ + fi + + ;; +diff --git a/gcc/configure.ac b/gcc/configure.ac +index ba2bf1ffc..f7161e66e 100644 +--- a/gcc/configure.ac ++++ b/gcc/configure.ac +@@ -5322,6 +5322,10 @@ x: + .cfi_endproc,, + AC_DEFINE(HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT, 1, + Define if your assembler supports eh_frame pcrel encoding.)) ++ gcc_GAS_CHECK_FEATURE(-mrelax option, gcc_cv_as_loongarch_relax, ++ -mrelax, .text,, ++ AC_DEFINE(HAVE_AS_MRELAX_OPTION, 1, ++ Define if your assembler supports -mrelax option.)) + ;; + s390*-*-*) + gcc_GAS_CHECK_FEATURE(.gnu_attribute support, +-- +2.43.0 +
View file
_service:tar_scm:0003-Modify-gas-uleb128-support-test.patch
Added
@@ -0,0 +1,115 @@ +From 38c338555e64da83fd35c608a1a89d738e1ca356 Mon Sep 17 00:00:00 2001 +From: mengqinggang <mengqinggang@loongson.cn> +Date: Fri, 15 Sep 2023 12:04:04 +0800 +Subject: PATCH 003/188 Modify gas uleb128 support test + +Some assemblers (GNU as for LoongArch) generates relocations for leb128 +symbol arithmetic for relaxation, we need to disable relaxation probing +leb128 support then. + +gcc/ChangeLog: + + * configure: Regenerate. + * configure.ac: Checking assembler for -mno-relax support. + Disable relaxation when probing leb128 support. + +co-authored-by: Xi Ruoyao <xry111@xry111.site> +--- + gcc/configure | 42 +++++++++++++++++++++++++++++++++++++++++- + gcc/configure.ac | 17 ++++++++++++++++- + 2 files changed, 57 insertions(+), 2 deletions(-) + +diff --git a/gcc/configure b/gcc/configure +index 8ae8a924a..430d44dc3 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -24441,6 +24441,46 @@ _ACEOF + + + ++# Some assemblers (GNU as for LoongArch) generates relocations for ++# leb128 symbol arithmetic for relaxation, we need to disable relaxation ++# probing leb128 support then. ++case $target in ++ loongarch*-*-*) ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -mno-relax support" >&5 ++$as_echo_n "checking assembler for -mno-relax support... " >&6; } ++if ${gcc_cv_as_mno_relax+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ gcc_cv_as_mno_relax=no ++ if test x$gcc_cv_as != x; then ++ $as_echo '.text' > conftest.s ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -mno-relax -o conftest.o conftest.s >&5' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ++ then ++ gcc_cv_as_mno_relax=yes ++ else ++ echo "configure: failed program was" >&5 ++ cat conftest.s >&5 ++ fi ++ rm -f conftest.o conftest.s ++ fi ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_mno_relax" >&5 ++$as_echo "$gcc_cv_as_mno_relax" >&6; } ++if test $gcc_cv_as_mno_relax = yes; then ++ check_leb128_asflags=-mno-relax ++fi ++ ++ ;; ++ *) ++ check_leb128_asflags= ++ ;; ++esac ++ + # Check if we have .usleb128, and support symbol arithmetic with it. + # Older versions of GAS and some non-GNU assemblers, have a bugs handling + # these directives, even when they appear to accept them. +@@ -24459,7 +24499,7 @@ L1: + L2: + .uleb128 0x8000000000000000 + ' > conftest.s +- if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags $check_leb128_asflags -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? +diff --git a/gcc/configure.ac b/gcc/configure.ac +index f7161e66e..4b24db190 100644 +--- a/gcc/configure.ac ++++ b/gcc/configure.ac +@@ -3185,10 +3185,25 @@ AC_MSG_RESULT($gcc_cv_ld_ro_rw_mix) + + gcc_AC_INITFINI_ARRAY + ++# Some assemblers (GNU as for LoongArch) generates relocations for ++# leb128 symbol arithmetic for relaxation, we need to disable relaxation ++# probing leb128 support then. ++case $target in ++ loongarch*-*-*) ++ gcc_GAS_CHECK_FEATURE(-mno-relax support, ++ gcc_cv_as_mno_relax,-mno-relax,.text,, ++ check_leb128_asflags=-mno-relax) ++ ;; ++ *) ++ check_leb128_asflags= ++ ;; ++esac ++ + # Check if we have .usleb128, and support symbol arithmetic with it. + # Older versions of GAS and some non-GNU assemblers, have a bugs handling + # these directives, even when they appear to accept them. +-gcc_GAS_CHECK_FEATURE(.sleb128 and .uleb128, gcc_cv_as_leb128,, ++gcc_GAS_CHECK_FEATURE(.sleb128 and .uleb128, gcc_cv_as_leb128, ++$check_leb128_asflags, + .data + .uleb128 L2 - L1 + L1: +-- +2.43.0 +
View file
_service:tar_scm:0004-LoongArch-Optimizations-of-vector-construction.patch
Added
@@ -0,0 +1,1310 @@ +From b74895b8b723a64bc136c4c560661abed81e013a Mon Sep 17 00:00:00 2001 +From: Guo Jie <guojie@loongson.cn> +Date: Thu, 21 Sep 2023 09:19:18 +0800 +Subject: PATCH 004/188 LoongArch: Optimizations of vector construction. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (lasx_vecinit_merge_<LASX:mode>): New + pattern for vector construction. + (vec_set<mode>_internal): Ditto. + (lasx_xvinsgr2vr_<mode256_i_half>_internal): Ditto. + (lasx_xvilvl_<lasxfmt_f>_internal): Ditto. + * config/loongarch/loongarch.cc (loongarch_expand_vector_init): + Optimized the implementation of vector construction. + (loongarch_expand_vector_init_same): New function. + * config/loongarch/lsx.md (lsx_vilvl_<lsxfmt_f>_internal): New + pattern for vector construction. + (lsx_vreplvei_mirror_<lsxfmt_f>): New pattern for vector + construction. + (vec_concatv2df): Ditto. + (vec_concatv4sf): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-vec-construct-opt.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-vec-construct-opt.c: New test. +--- + gcc/config/loongarch/lasx.md | 69 ++ + gcc/config/loongarch/loongarch.cc | 716 +++++++++--------- + gcc/config/loongarch/lsx.md | 134 ++++ + .../vector/lasx/lasx-vec-construct-opt.c | 102 +++ + .../vector/lsx/lsx-vec-construct-opt.c | 85 +++ + 5 files changed, 732 insertions(+), 374 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-construct-opt.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vec-construct-opt.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 8111c8bb7..2bc5d47ed 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -186,6 +186,9 @@ + UNSPEC_LASX_XVLDI + UNSPEC_LASX_XVLDX + UNSPEC_LASX_XVSTX ++ UNSPEC_LASX_VECINIT_MERGE ++ UNSPEC_LASX_VEC_SET_INTERNAL ++ UNSPEC_LASX_XVILVL_INTERNAL + ) + + ;; All vector modes with 256 bits. +@@ -255,6 +258,15 @@ + (V8SF "V4SF") + (V4DF "V2DF")) + ++;; The attribute gives half int/float modes for vector modes. ++(define_mode_attr VHMODE256_ALL ++ (V32QI "V16QI") ++ (V16HI "V8HI") ++ (V8SI "V4SI") ++ (V4DI "V2DI") ++ (V8SF "V4SF") ++ (V4DF "V2DF")) ++ + ;; The attribute gives double modes for vector modes in LASX. + (define_mode_attr VDMODE256 + (V8SI "V4DI") +@@ -312,6 +324,11 @@ + (V4DI "v4df") + (V8SI "v8sf")) + ++;; This attribute gives V32QI mode and V16HI mode with half size. ++(define_mode_attr mode256_i_half ++ (V32QI "v16qi") ++ (V16HI "v8hi")) ++ + ;; This attribute gives suffix for LASX instructions. HOW? + (define_mode_attr lasxfmt + (V4DF "d") +@@ -756,6 +773,20 @@ + (set_attr "type" "simd_splat") + (set_attr "mode" "<MODE>")) + ++;; Only for loongarch_expand_vector_init in loongarch.cc. ++;; Support a LSX-mode input op2. ++(define_insn "lasx_vecinit_merge_<LASX:mode>" ++ (set (match_operand:LASX 0 "register_operand" "=f") ++ (unspec:LASX ++ (match_operand:LASX 1 "register_operand" "0") ++ (match_operand:<VHMODE256_ALL> 2 "register_operand" "f") ++ (match_operand 3 "const_uimm8_operand") ++ UNSPEC_LASX_VECINIT_MERGE)) ++ "ISA_HAS_LASX" ++ "xvpermi.q\t%u0,%u2,%3" ++ (set_attr "type" "simd_splat") ++ (set_attr "mode" "<MODE>")) ++ + (define_insn "lasx_xvpickve2gr_d<u>" + (set (match_operand:DI 0 "register_operand" "=r") + (any_extend:DI +@@ -779,6 +810,33 @@ + DONE; + }) + ++;; Only for loongarch_expand_vector_init in loongarch.cc. ++;; Simulate missing instructions xvinsgr2vr.b and xvinsgr2vr.h. ++(define_expand "vec_set<mode>_internal" ++ (match_operand:ILASX_HB 0 "register_operand") ++ (match_operand:<UNITMODE> 1 "reg_or_0_operand") ++ (match_operand 2 "const_<indeximm256>_operand") ++ "ISA_HAS_LASX" ++{ ++ rtx index = GEN_INT (1 << INTVAL (operands2)); ++ emit_insn (gen_lasx_xvinsgr2vr_<mode256_i_half>_internal ++ (operands0, operands1, operands0, index)); ++ DONE; ++}) ++ ++(define_insn "lasx_xvinsgr2vr_<mode256_i_half>_internal" ++ (set (match_operand:ILASX_HB 0 "register_operand" "=f") ++ (unspec:ILASX_HB (match_operand:<UNITMODE> 1 "reg_or_0_operand" "rJ") ++ (match_operand:ILASX_HB 2 "register_operand" "0") ++ (match_operand 3 "const_<bitmask256>_operand" "") ++ UNSPEC_LASX_VEC_SET_INTERNAL)) ++ "ISA_HAS_LASX" ++{ ++ return "vinsgr2vr.<lasxfmt>\t%w0,%z1,%y3"; ++} ++ (set_attr "type" "simd_insert") ++ (set_attr "mode" "<MODE>")) ++ + (define_expand "vec_set<mode>" + (match_operand:FLASX 0 "register_operand") + (match_operand:<UNITMODE> 1 "reg_or_0_operand") +@@ -1567,6 +1625,17 @@ + (set_attr "type" "simd_flog2") + (set_attr "mode" "<MODE>")) + ++;; Only for loongarch_expand_vector_init in loongarch.cc. ++;; Merge two scalar floating-point op1 and op2 into a LASX op0. ++(define_insn "lasx_xvilvl_<lasxfmt_f>_internal" ++ (set (match_operand:FLASX 0 "register_operand" "=f") ++ (unspec:FLASX (match_operand:<UNITMODE> 1 "register_operand" "f") ++ (match_operand:<UNITMODE> 2 "register_operand" "f") ++ UNSPEC_LASX_XVILVL_INTERNAL)) ++ "ISA_HAS_LASX" ++ "xvilvl.<lasxfmt>\t%u0,%u2,%u1" ++ (set_attr "type" "simd_permute") ++ (set_attr "mode" "<MODE>")) + + (define_insn "smax<mode>3" + (set (match_operand:FLASX 0 "register_operand" "=f") +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index f2e796a6b..760b12268 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10193,300 +10193,344 @@ loongarch_expand_vector_group_init (rtx target, rtx vals) + ops1))); + } + ++/* Expand initialization of a vector which has all same elements. */ ++ + void +-loongarch_expand_vector_init (rtx target, rtx vals) ++loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar) + { + machine_mode vmode = GET_MODE (target); + machine_mode imode = GET_MODE_INNER (vmode); +- unsigned i, nelt = GET_MODE_NUNITS (vmode); +- unsigned nvar = 0; +- bool all_same = true; +- rtx x; ++ rtx same = XVECEXP (vals, 0, 0); ++ rtx temp, temp2; + +- for (i = 0; i < nelt; ++i) ++ if (CONST_INT_P (same) && nvar == 0 ++ && loongarch_signed_immediate_p (INTVAL (same), 10, 0)) ++ { ++ switch (vmode) ++ { ++ case E_V32QImode: ++ case E_V16HImode: ++ case E_V8SImode: ++ case E_V4DImode: ++ case E_V16QImode: ++ case E_V8HImode: ++ case E_V4SImode: ++ case E_V2DImode: ++ temp = gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0)); ++ emit_move_insn (target, temp); ++ return; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ temp = gen_reg_rtx (imode); ++ if (imode == GET_MODE (same)) ++ temp2 = same; ++ else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD)
View file
_service:tar_scm:0005-LoongArch-Replace-UNSPEC_FCOPYSIGN-with-copysign-RTL.patch
Added
@@ -0,0 +1,51 @@ +From 9b2cbf361e38ea1ad672c2b8c8cf1dda4f6f7d72 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 2 Oct 2023 18:51:00 +0800 +Subject: PATCH 005/188 LoongArch: Replace UNSPEC_FCOPYSIGN with copysign RTL + +When I added copysign support for LoongArch (r13-3702), we did not have +a copysign RTL insn, so I had to use UNSPEC to represent the copysign +instruction. Now the copysign RTX code has been added in r14-1586, so +this patch removes those UNSPECs, and it uses the native RTL copysign +insn. + +Inspired by rs6000 patch "Cleanup: Replace UNSPEC_COPYSIGN with copysign +RTL" 1 from Michael Meissner. + +1: https://gcc.gnu.org/pipermail/gcc-patches/2023-September/631701.html + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (UNSPEC_FCOPYSIGN): Delete. + (copysign<mode>3): Use copysign RTL instead of UNSPEC. +--- + gcc/config/loongarch/loongarch.md | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 63ff32e75..73e2cbe0b 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -37,7 +37,6 @@ + UNSPEC_FCLASS + UNSPEC_FMAX + UNSPEC_FMIN +- UNSPEC_FCOPYSIGN + UNSPEC_FTINT + UNSPEC_FTINTRM + UNSPEC_FTINTRP +@@ -1129,9 +1128,8 @@ + + (define_insn "copysign<mode>3" + (set (match_operand:ANYF 0 "register_operand" "=f") +- (unspec:ANYF (match_operand:ANYF 1 "register_operand" "f") +- (match_operand:ANYF 2 "register_operand" "f") +- UNSPEC_FCOPYSIGN)) ++ (copysign:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f"))) + "TARGET_HARD_FLOAT" + "fcopysign.<fmt>\t%0,%1,%2" + (set_attr "type" "fcopysign") +-- +2.43.0 +
View file
_service:tar_scm:0006-LoongArch-Adjust-makefile-dependency-for-loongarch-h.patch
Added
@@ -0,0 +1,71 @@ +From 746109cb61d6f3db4c25a9a107f30996c17f11db Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Wed, 11 Oct 2023 17:59:53 +0800 +Subject: PATCH 006/188 LoongArch: Adjust makefile dependency for loongarch + headers. + +gcc/ChangeLog: + + * config.gcc: Add loongarch-driver.h to tm_files. + * config/loongarch/loongarch.h: Do not include loongarch-driver.h. + * config/loongarch/t-loongarch: Append loongarch-multilib.h to $(GTM_H) + instead of $(TM_H) for building generator programs. +--- + gcc/config.gcc | 4 ++-- + gcc/config/loongarch/loongarch.h | 3 --- + gcc/config/loongarch/t-loongarch | 3 ++- + 3 files changed, 4 insertions(+), 6 deletions(-) + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index e34a5fbb9..11ab620d0 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -2508,7 +2508,7 @@ riscv*-*-freebsd*) + + loongarch*-*-linux*) + tm_file="elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file}" +- tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h" ++ tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h loongarch/loongarch-driver.h" + extra_options="${extra_options} linux-android.opt" + tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux" + gnu_ld=yes +@@ -2521,7 +2521,7 @@ loongarch*-*-linux*) + + loongarch*-*-elf*) + tm_file="elfos.h newlib-stdint.h ${tm_file}" +- tm_file="${tm_file} loongarch/elf.h loongarch/linux.h" ++ tm_file="${tm_file} loongarch/elf.h loongarch/linux.h loongarch/loongarch-driver.h" + tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux" + gnu_ld=yes + gas=yes +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index a443a6427..a2dc4ba8c 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -49,9 +49,6 @@ along with GCC; see the file COPYING3. If not see + + #define TARGET_LIBGCC_SDATA_SECTION ".sdata" + +-/* Driver native functions for SPEC processing in the GCC driver. */ +-#include "loongarch-driver.h" +- + /* This definition replaces the formerly used 'm' constraint with a + different constraint letter in order to avoid changing semantics of + the 'm' constraint when accepting new address formats in +diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch +index 28cfb49df..12734c37b 100644 +--- a/gcc/config/loongarch/t-loongarch ++++ b/gcc/config/loongarch/t-loongarch +@@ -16,7 +16,8 @@ + # along with GCC; see the file COPYING3. If not see + # <http://www.gnu.org/licenses/>. + +-TM_H += loongarch-multilib.h $(srcdir)/config/loongarch/loongarch-driver.h ++ ++GTM_H += loongarch-multilib.h + OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \ + $(srcdir)/config/loongarch/loongarch-tune.h + +-- +2.43.0 +
View file
_service:tar_scm:0007-LoongArch-Enable-vect.exp-for-LoongArch.-PR111424.patch
Added
@@ -0,0 +1,65 @@ +From b75f00086e863ac7e9e1ee37f8107b199cf62550 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Fri, 25 Oct 2024 00:58:01 +0000 +Subject: PATCH 007/188 LoongArch: Enable vect.exp for LoongArch. PR111424 + +gcc/testsuite/ChangeLog: + + PR target/111424 + * lib/target-supports.exp: Enable vect.exp for LoongArch. +--- + gcc/testsuite/lib/target-supports.exp | 31 +++++++++++++++++++++++++++ + 1 file changed, 31 insertions(+) + +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index 192e0aded..bbe145c1c 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -10535,6 +10535,13 @@ proc check_vect_support_and_set_flags { } { + } + } elseif istarget amdgcn-*-* { + set dg-do-what-default run ++ } elseif istarget loongarch*-*-* { ++ lappend DEFAULT_VECTCFLAGS "-mdouble-float" "-mlasx" ++ if check_effective_target_loongarch_asx_hw { ++ set dg-do-what-default run ++ } else { ++ set dg-do-what-default compile ++ } + } else { + return 0 + } +@@ -10542,6 +10549,30 @@ proc check_vect_support_and_set_flags { } { + return 1 + } + ++proc check_effective_target_loongarch_sx_hw { } { ++ return check_runtime loongarch_sx_hw { ++ #include <lsxintrin.h> ++ int main (void) ++ { ++ __m128i a, b, c; ++ c = __lsx_vand_v (a, b); ++ return 0; ++ } ++ } "-mlsx" ++} ++ ++proc check_effective_target_loongarch_asx_hw { } { ++ return check_runtime loongarch_asx_hw { ++ #include <lasxintrin.h> ++ int main (void) ++ { ++ __m256i a, b, c; ++ c = __lasx_xvand_v (a, b); ++ return 0; ++ } ++ } "-mlasx" ++} ++ + # Return 1 if the target does *not* require strict alignment. + + proc check_effective_target_non_strict_align {} { +-- +2.43.0 +
View file
_service:tar_scm:0008-LoongArch-Delete-macro-definition-ASM_OUTPUT_ALIGN_W.patch
Added
@@ -0,0 +1,48 @@ +From 3829ad1963a92526201b42233d2bb4facf7ba8d4 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 15 Sep 2023 11:56:01 +0800 +Subject: PATCH 008/188 LoongArch: Delete macro definition + ASM_OUTPUT_ALIGN_WITH_NOP. + +There are two reasons for removing this macro definition: +1. The default in the assembler is to use the nop instruction for filling. +2. For assembly directives: .align abs-expr, abs-expr, abs-expr + The third expression it is the maximum number of bytes that should be + skipped by this alignment directive. + Therefore, it will affect the display of the specified alignment rules + and affect the operating efficiency. + +This modification relies on binutils commit 1fb3cdd87ec61715a5684925fb6d6a6cf53bb97c. +(Since the assembler will add nop based on the .align information when doing relax, +it will cause the conditional branch to go out of bounds during the assembly process. +This submission of binutils solves this problem.) + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (ASM_OUTPUT_ALIGN_WITH_NOP): + Delete. + +Co-authored-by: Chenghua Xu <xuchenghua@loongson.cn> +--- + gcc/config/loongarch/loongarch.h | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index a2dc4ba8c..572b538be 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -1058,11 +1058,6 @@ typedef struct { + + #define ASM_OUTPUT_ALIGN(STREAM, LOG) fprintf (STREAM, "\t.align\t%d\n", (LOG)) + +-/* "nop" instruction 54525952 (andi $r0,$r0,0) is +- used for padding. */ +-#define ASM_OUTPUT_ALIGN_WITH_NOP(STREAM, LOG) \ +- fprintf (STREAM, "\t.align\t%d,54525952,4\n", (LOG)) +- + /* This is how to output an assembler line to advance the location + counter by SIZE bytes. */ + +-- +2.43.0 +
View file
_service:tar_scm:0009-LoongArch-Fix-vec_initv32qiv16qi-template-to-avoid-I.patch
Added
@@ -0,0 +1,105 @@ +From aa947bf395b5722a23f2edd9d6302e220473d900 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Wed, 11 Oct 2023 16:41:25 +0800 +Subject: PATCH 009/188 LoongArch: Fix vec_initv32qiv16qi template to avoid + ICE. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Following test code triggers unrecognized insn ICE on LoongArch target +with "-O3 -mlasx": + +void +foo (unsigned char *dst, unsigned char *src) +{ + for (int y = 0; y < 16; y++) + { + for (int x = 0; x < 16; x++) + dstx = srcx + 1; + dst += 32; + src += 32; + } +} + +ICE info: +./test.c: In function ‘foo’: +./test.c:8:1: error: unrecognizable insn: + 8 | } + | ^ +(insn 15 14 16 4 (set (reg:V32QI 185 vect__24.7 ) + (vec_concat:V32QI (reg:V16QI 186) + (const_vector:V16QI + (const_int 0 0) repeated x16 + ))) "./test.c":4:19 -1 + (nil)) +during RTL pass: vregs +./test.c:8:1: internal compiler error: in extract_insn, at recog.cc:2791 +0x12028023b _fatal_insn(char const*, rtx_def const*, char const*, int, char const*) + /home/panchenghui/upstream/gcc/gcc/rtl-error.cc:108 +0x12028026f _fatal_insn_not_found(rtx_def const*, char const*, int, char const*) + /home/panchenghui/upstream/gcc/gcc/rtl-error.cc:116 +0x120a03c5b extract_insn(rtx_insn*) + /home/panchenghui/upstream/gcc/gcc/recog.cc:2791 +0x12067ff73 instantiate_virtual_regs_in_insn + /home/panchenghui/upstream/gcc/gcc/function.cc:1610 +0x12067ff73 instantiate_virtual_regs + /home/panchenghui/upstream/gcc/gcc/function.cc:1983 +0x12067ff73 execute + /home/panchenghui/upstream/gcc/gcc/function.cc:2030 + +This RTL is generated inside loongarch_expand_vector_group_init function (related +to vec_initv32qiv16qi template). Original impl doesn't ensure all vec_concat arguments +are register type. This patch adds force_reg() to the vec_concat argument generation. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_expand_vector_group_init): + fix impl related to vec_initv32qiv16qi template to avoid ICE. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 3 ++- + .../loongarch/vector/lasx/lasx-vec-init-1.c | 14 ++++++++++++++ + 2 files changed, 16 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 760b12268..9a629a999 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10188,7 +10188,8 @@ loongarch_gen_const_int_vector_shuffle (machine_mode mode, int val) + void + loongarch_expand_vector_group_init (rtx target, rtx vals) + { +- rtx ops2 = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) }; ++ rtx ops2 = { force_reg (E_V16QImode, XVECEXP (vals, 0, 0)), ++ force_reg (E_V16QImode, XVECEXP (vals, 0, 1)) }; + emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (E_V32QImode, ops0, + ops1))); + } +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c +new file mode 100644 +index 000000000..28be32982 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3" } */ ++ ++void ++foo (unsigned char *dst, unsigned char *src) ++{ ++ for (int y = 0; y < 16; y++) ++ { ++ for (int x = 0; x < 16; x++) ++ dstx = srcx + 1; ++ dst += 32; ++ src += 32; ++ } ++} +-- +2.43.0 +
View file
_service:tar_scm:0010-LoongArch-Use-fcmp.caf.s-instead-of-movgr2cf-for-zer.patch
Added
@@ -0,0 +1,35 @@ +From 35bce671a97b27a41c425109ba92b24ab87ff35b Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Tue, 17 Oct 2023 21:55:05 +0800 +Subject: PATCH 010/188 LoongArch: Use fcmp.caf.s instead of movgr2cf for + zeroing a fcc + +During the review of an LLVM change 1, on LA464 we found that zeroing +an fcc with fcmp.caf.s is much faster than a movgr2cf from $r0. + +1: https://github.com/llvm/llvm-project/pull/69300 + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (movfcc): Use fcmp.caf.s for + zeroing a fcc. +--- + gcc/config/loongarch/loongarch.md | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 73e2cbe0b..5f9e63d66 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2150,7 +2150,7 @@ + (set (match_operand:FCC 0 "register_operand" "=z") + (const_int 0)) + "" +- "movgr2cf\t%0,$r0") ++ "fcmp.caf.s\t%0,$f0,$f0") + + ;; Conditional move instructions. + +-- +2.43.0 +
View file
_service:tar_scm:0011-LoongArch-Implement-avg-and-sad-standard-names.patch
Added
@@ -0,0 +1,389 @@ +From 159dd069968fae895f1f663ebda6f53970ec34b1 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 18 Oct 2023 17:36:12 +0800 +Subject: PATCH 011/188 LoongArch:Implement avg and sad standard names. + +gcc/ChangeLog: + + * config/loongarch/lasx.md + (avg<mode>3_ceil): New patterns. + (uavg<mode>3_ceil): Ditto. + (avg<mode>3_floor): Ditto. + (uavg<mode>3_floor): Ditto. + (usadv32qi): Ditto. + (ssadv32qi): Ditto. + * config/loongarch/lsx.md + (avg<mode>3_ceil): New patterns. + (uavg<mode>3_ceil): Ditto. + (avg<mode>3_floor): Ditto. + (uavg<mode>3_floor): Ditto. + (usadv16qi): Ditto. + (ssadv16qi): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/avg-ceil-lasx.c: New test. + * gcc.target/loongarch/avg-ceil-lsx.c: New test. + * gcc.target/loongarch/avg-floor-lasx.c: New test. + * gcc.target/loongarch/avg-floor-lsx.c: New test. + * gcc.target/loongarch/sad-lasx.c: New test. + * gcc.target/loongarch/sad-lsx.c: New test. +--- + gcc/config/loongarch/lasx.md | 78 +++++++++++++++++++ + gcc/config/loongarch/lsx.md | 78 +++++++++++++++++++ + .../gcc.target/loongarch/avg-ceil-lasx.c | 22 ++++++ + .../gcc.target/loongarch/avg-ceil-lsx.c | 22 ++++++ + .../gcc.target/loongarch/avg-floor-lasx.c | 22 ++++++ + .../gcc.target/loongarch/avg-floor-lsx.c | 22 ++++++ + gcc/testsuite/gcc.target/loongarch/sad-lasx.c | 20 +++++ + gcc/testsuite/gcc.target/loongarch/sad-lsx.c | 20 +++++ + 8 files changed, 284 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-ceil-lasx.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-ceil-lsx.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-floor-lasx.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-floor-lsx.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/sad-lasx.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/sad-lsx.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 2bc5d47ed..c7496d68a 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -5171,3 +5171,81 @@ + const0_rtx)); + DONE; + }) ++ ++(define_expand "avg<mode>3_ceil" ++ (match_operand:ILASX_WHB 0 "register_operand") ++ (match_operand:ILASX_WHB 1 "register_operand") ++ (match_operand:ILASX_WHB 2 "register_operand") ++ "ISA_HAS_LASX" ++{ ++ emit_insn (gen_lasx_xvavgr_s_<lasxfmt> (operands0, ++ operands1, operands2)); ++ DONE; ++}) ++ ++(define_expand "uavg<mode>3_ceil" ++ (match_operand:ILASX_WHB 0 "register_operand") ++ (match_operand:ILASX_WHB 1 "register_operand") ++ (match_operand:ILASX_WHB 2 "register_operand") ++ "ISA_HAS_LASX" ++{ ++ emit_insn (gen_lasx_xvavgr_u_<lasxfmt_u> (operands0, ++ operands1, operands2)); ++ DONE; ++}) ++ ++(define_expand "avg<mode>3_floor" ++ (match_operand:ILASX_WHB 0 "register_operand") ++ (match_operand:ILASX_WHB 1 "register_operand") ++ (match_operand:ILASX_WHB 2 "register_operand") ++ "ISA_HAS_LASX" ++{ ++ emit_insn (gen_lasx_xvavg_s_<lasxfmt> (operands0, ++ operands1, operands2)); ++ DONE; ++}) ++ ++(define_expand "uavg<mode>3_floor" ++ (match_operand:ILASX_WHB 0 "register_operand") ++ (match_operand:ILASX_WHB 1 "register_operand") ++ (match_operand:ILASX_WHB 2 "register_operand") ++ "ISA_HAS_LASX" ++{ ++ emit_insn (gen_lasx_xvavg_u_<lasxfmt_u> (operands0, ++ operands1, operands2)); ++ DONE; ++}) ++ ++(define_expand "usadv32qi" ++ (match_operand:V8SI 0 "register_operand") ++ (match_operand:V32QI 1 "register_operand") ++ (match_operand:V32QI 2 "register_operand") ++ (match_operand:V8SI 3 "register_operand") ++ "ISA_HAS_LASX" ++{ ++ rtx t1 = gen_reg_rtx (V32QImode); ++ rtx t2 = gen_reg_rtx (V16HImode); ++ rtx t3 = gen_reg_rtx (V8SImode); ++ emit_insn (gen_lasx_xvabsd_u_bu (t1, operands1, operands2)); ++ emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1)); ++ emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_addv8si3 (operands0, t3, operands3)); ++ DONE; ++}) ++ ++(define_expand "ssadv32qi" ++ (match_operand:V8SI 0 "register_operand") ++ (match_operand:V32QI 1 "register_operand") ++ (match_operand:V32QI 2 "register_operand") ++ (match_operand:V8SI 3 "register_operand") ++ "ISA_HAS_LASX" ++{ ++ rtx t1 = gen_reg_rtx (V32QImode); ++ rtx t2 = gen_reg_rtx (V16HImode); ++ rtx t3 = gen_reg_rtx (V8SImode); ++ emit_insn (gen_lasx_xvabsd_s_b (t1, operands1, operands2)); ++ emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1)); ++ emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_addv8si3 (operands0, t3, operands3)); ++ DONE; ++}) +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 075f6ba56..b4e92ae9c 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -3581,6 +3581,84 @@ + DONE; + }) + ++(define_expand "avg<mode>3_ceil" ++ (match_operand:ILSX_WHB 0 "register_operand") ++ (match_operand:ILSX_WHB 1 "register_operand") ++ (match_operand:ILSX_WHB 2 "register_operand") ++ "ISA_HAS_LSX" ++{ ++ emit_insn (gen_lsx_vavgr_s_<lsxfmt> (operands0, ++ operands1, operands2)); ++ DONE; ++}) ++ ++(define_expand "uavg<mode>3_ceil" ++ (match_operand:ILSX_WHB 0 "register_operand") ++ (match_operand:ILSX_WHB 1 "register_operand") ++ (match_operand:ILSX_WHB 2 "register_operand") ++ "ISA_HAS_LSX" ++{ ++ emit_insn (gen_lsx_vavgr_u_<lsxfmt_u> (operands0, ++ operands1, operands2)); ++ DONE; ++}) ++ ++(define_expand "avg<mode>3_floor" ++ (match_operand:ILSX_WHB 0 "register_operand") ++ (match_operand:ILSX_WHB 1 "register_operand") ++ (match_operand:ILSX_WHB 2 "register_operand") ++ "ISA_HAS_LSX" ++{ ++ emit_insn (gen_lsx_vavg_s_<lsxfmt> (operands0, ++ operands1, operands2)); ++ DONE; ++}) ++ ++(define_expand "uavg<mode>3_floor" ++ (match_operand:ILSX_WHB 0 "register_operand") ++ (match_operand:ILSX_WHB 1 "register_operand") ++ (match_operand:ILSX_WHB 2 "register_operand") ++ "ISA_HAS_LSX" ++{ ++ emit_insn (gen_lsx_vavg_u_<lsxfmt_u> (operands0, ++ operands1, operands2)); ++ DONE; ++}) ++ ++(define_expand "usadv16qi" ++ (match_operand:V4SI 0 "register_operand") ++ (match_operand:V16QI 1 "register_operand") ++ (match_operand:V16QI 2 "register_operand") ++ (match_operand:V4SI 3 "register_operand") ++ "ISA_HAS_LSX" ++{ ++ rtx t1 = gen_reg_rtx (V16QImode); ++ rtx t2 = gen_reg_rtx (V8HImode); ++ rtx t3 = gen_reg_rtx (V4SImode); ++ emit_insn (gen_lsx_vabsd_u_bu (t1, operands1, operands2)); ++ emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1)); ++ emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_addv4si3 (operands0, t3, operands3));
View file
_service:tar_scm:0012-LoongArch-Implement-vec_widen-standard-names.patch
Added
@@ -0,0 +1,403 @@ +From 81e2e22979d9f9d170b1c30ec27e30e1f25aec35 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 18 Oct 2023 17:39:40 +0800 +Subject: PATCH 012/188 LoongArch:Implement vec_widen standard names. + +Add support for vec_widen lo/hi patterns. These do not directly +match on Loongarch lasx instructions but can be emulated with +even/odd + vector merge. + +gcc/ChangeLog: + + * config/loongarch/lasx.md + (vec_widen_<su>mult_even_v8si): New patterns. + (vec_widen_<su>add_hi_<mode>): Ditto. + (vec_widen_<su>add_lo_<mode>): Ditto. + (vec_widen_<su>sub_hi_<mode>): Ditto. + (vec_widen_<su>sub_lo_<mode>): Ditto. + (vec_widen_<su>mult_hi_<mode>): Ditto. + (vec_widen_<su>mult_lo_<mode>): Ditto. + * config/loongarch/loongarch.md (u_bool): New iterator. + * config/loongarch/loongarch-protos.h + (loongarch_expand_vec_widen_hilo): New prototype. + * config/loongarch/loongarch.cc + (loongarch_expand_vec_interleave): New function. + (loongarch_expand_vec_widen_hilo): New function. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-widen-add.c: New test. + * gcc.target/loongarch/vect-widen-mul.c: New test. + * gcc.target/loongarch/vect-widen-sub.c: New test. +--- + gcc/config/loongarch/lasx.md | 82 ++++++++--- + gcc/config/loongarch/loongarch-protos.h | 1 + + gcc/config/loongarch/loongarch.cc | 137 ++++++++++++++++++ + gcc/config/loongarch/loongarch.md | 2 + + .../gcc.target/loongarch/vect-widen-add.c | 24 +++ + .../gcc.target/loongarch/vect-widen-mul.c | 24 +++ + .../gcc.target/loongarch/vect-widen-sub.c | 24 +++ + 7 files changed, 277 insertions(+), 17 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-add.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index c7496d68a..442fda246 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -5048,23 +5048,71 @@ + (set_attr "type" "simd_store") + (set_attr "mode" "DI")) + +-(define_insn "vec_widen_<su>mult_even_v8si" +- (set (match_operand:V4DI 0 "register_operand" "=f") +- (mult:V4DI +- (any_extend:V4DI +- (vec_select:V4SI +- (match_operand:V8SI 1 "register_operand" "%f") +- (parallel (const_int 0) (const_int 2) +- (const_int 4) (const_int 6)))) +- (any_extend:V4DI +- (vec_select:V4SI +- (match_operand:V8SI 2 "register_operand" "f") +- (parallel (const_int 0) (const_int 2) +- (const_int 4) (const_int 6)))))) +- "ISA_HAS_LASX" +- "xvmulwev.d.w<u>\t%u0,%u1,%u2" +- (set_attr "type" "simd_int_arith") +- (set_attr "mode" "V4DI")) ++(define_expand "vec_widen_<su>add_hi_<mode>" ++ (match_operand:<VDMODE256> 0 "register_operand") ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand")) ++ "ISA_HAS_LASX" ++{ ++ loongarch_expand_vec_widen_hilo (operands0, operands1, operands2, ++ <u_bool>, true, "add"); ++ DONE; ++}) ++ ++(define_expand "vec_widen_<su>add_lo_<mode>" ++ (match_operand:<VDMODE256> 0 "register_operand") ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand")) ++ "ISA_HAS_LASX" ++{ ++ loongarch_expand_vec_widen_hilo (operands0, operands1, operands2, ++ <u_bool>, false, "add"); ++ DONE; ++}) ++ ++(define_expand "vec_widen_<su>sub_hi_<mode>" ++ (match_operand:<VDMODE256> 0 "register_operand") ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand")) ++ "ISA_HAS_LASX" ++{ ++ loongarch_expand_vec_widen_hilo (operands0, operands1, operands2, ++ <u_bool>, true, "sub"); ++ DONE; ++}) ++ ++(define_expand "vec_widen_<su>sub_lo_<mode>" ++ (match_operand:<VDMODE256> 0 "register_operand") ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand")) ++ "ISA_HAS_LASX" ++{ ++ loongarch_expand_vec_widen_hilo (operands0, operands1, operands2, ++ <u_bool>, false, "sub"); ++ DONE; ++}) ++ ++(define_expand "vec_widen_<su>mult_hi_<mode>" ++ (match_operand:<VDMODE256> 0 "register_operand") ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand")) ++ "ISA_HAS_LASX" ++{ ++ loongarch_expand_vec_widen_hilo (operands0, operands1, operands2, ++ <u_bool>, true, "mult"); ++ DONE; ++}) ++ ++(define_expand "vec_widen_<su>mult_lo_<mode>" ++ (match_operand:<VDMODE256> 0 "register_operand") ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand")) ++ "ISA_HAS_LASX" ++{ ++ loongarch_expand_vec_widen_hilo (operands0, operands1, operands2, ++ <u_bool>, false, "mult"); ++ DONE; ++}) + + ;; Vector reduction operation + (define_expand "reduc_plus_scal_v4di" +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index ea61cf567..163162598 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -205,6 +205,7 @@ extern void loongarch_register_frame_header_opt (void); + extern void loongarch_expand_vec_cond_expr (machine_mode, machine_mode, rtx *); + extern void loongarch_expand_vec_cond_mask_expr (machine_mode, machine_mode, + rtx *); ++extern void loongarch_expand_vec_widen_hilo (rtx, rtx, rtx, bool, bool, const char *); + + /* Routines implemented in loongarch-c.c. */ + void loongarch_cpu_cpp_builtins (cpp_reader *); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 9a629a999..c0f58f9a9 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -8028,6 +8028,143 @@ loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *d) + return loongarch_expand_vec_perm_even_odd_1 (d, odd); + } + ++static void ++loongarch_expand_vec_interleave (rtx target, rtx op0, rtx op1, bool high_p) ++{ ++ struct expand_vec_perm_d d; ++ unsigned i, nelt, base; ++ bool ok; ++ ++ d.target = target; ++ d.op0 = op0; ++ d.op1 = op1; ++ d.vmode = GET_MODE (target); ++ d.nelt = nelt = GET_MODE_NUNITS (d.vmode); ++ d.one_vector_p = false; ++ d.testing_p = false; ++ ++ base = high_p ? nelt / 2 : 0; ++ for (i = 0; i < nelt / 2; ++i) ++ { ++ d.permi * 2 = i + base; ++ d.permi * 2 + 1 = i + base + nelt; ++ } ++ ++ ok = loongarch_expand_vec_perm_interleave (&d); ++ gcc_assert (ok); ++} ++ ++/* The loongarch lasx instructions xvmulwev and xvmulwod return the even or odd ++ parts of the double sized result elements in the corresponding elements of ++ the target register. That's NOT what the vec_widen_umult_lo/hi patterns are ++ expected to do. We emulate the widening lo/hi multiplies with the even/odd ++ versions followed by a vector merge. */ ++ ++void ++loongarch_expand_vec_widen_hilo (rtx dest, rtx op1, rtx op2, ++ bool uns_p, bool high_p, const char *optab) ++{ ++ machine_mode wmode = GET_MODE (dest); ++ machine_mode mode = GET_MODE (op1); ++ rtx t1, t2, t3; ++ ++ t1 = gen_reg_rtx (wmode); ++ t2 = gen_reg_rtx (wmode);
View file
_service:tar_scm:0013-LoongArch-Implement-the-new-vector-cost-model-framew.patch
Added
@@ -0,0 +1,354 @@ +From 472890b43d2848a46fa13945279308f0a21c55d9 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 18 Oct 2023 17:43:39 +0800 +Subject: PATCH 013/188 LoongArch:Implement the new vector cost model + framework. + +This patch make loongarch use the new vector hooks and implements the costing +function determine_suggested_unroll_factor, to make it be able to suggest the +unroll factor for a given loop being vectorized base vec_ops analysis during +vector costing and the available issue information. Referring to aarch64 and +rs6000 port. + +The patch also reduces the cost of unaligned stores, making it equal to the +cost of aligned ones in order to avoid odd alignment peeling. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_vector_costs): Inherit from + vector_costs. Add a constructor. + (loongarch_vector_costs::add_stmt_cost): Use adjust_cost_for_freq to + adjust the cost for inner loops. + (loongarch_vector_costs::count_operations): New function. + (loongarch_vector_costs::determine_suggested_unroll_factor): Ditto. + (loongarch_vector_costs::finish_cost): Ditto. + (loongarch_builtin_vectorization_cost): Adjust. + * config/loongarch/loongarch.opt (loongarch-vect-unroll-limit): New parameter. + (loongarcg-vect-issue-info): Ditto. + (mmemvec-cost): Delete. + * config/loongarch/genopts/loongarch.opt.in + (loongarch-vect-unroll-limit): Ditto. + (loongarcg-vect-issue-info): Ditto. + (mmemvec-cost): Delete. + * doc/invoke.texi (loongarcg-vect-unroll-limit): Document new option. +--- + gcc/config/loongarch/genopts/loongarch.opt.in | 15 +- + gcc/config/loongarch/loongarch.cc | 173 ++++++++++++++++-- + gcc/config/loongarch/loongarch.opt | 15 +- + gcc/doc/invoke.texi | 7 + + 4 files changed, 188 insertions(+), 22 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index f18733c24..74cf4a7f7 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -152,10 +152,6 @@ mbranch-cost= + Target RejectNegative Joined UInteger Var(loongarch_branch_cost) + -mbranch-cost=COST Set the cost of branches to roughly COST instructions. + +-mmemvec-cost= +-Target RejectNegative Joined UInteger Var(loongarch_vector_access_cost) IntegerRange(1, 5) +-mmemvec-cost=COST Set the cost of vector memory access instructions. +- + mcheck-zero-division + Target Mask(CHECK_ZERO_DIV) + Trap on integer divide by zero. +@@ -219,3 +215,14 @@ mrelax + Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION) + Take advantage of linker relaxations to reduce the number of instructions + required to materialize symbol addresses. ++ ++-param=loongarch-vect-unroll-limit= ++Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param ++Used to limit unroll factor which indicates how much the autovectorizer may ++unroll a loop. The default value is 6. ++ ++-param=loongarch-vect-issue-info= ++Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) IntegerRange(1, 64) Param ++Indicate how many non memory access vector instructions can be issued per ++cycle, it's used in unroll factor determination for autovectorizer. The ++default value is 4. +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index c0f58f9a9..e22a64600 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -65,6 +65,8 @@ along with GCC; see the file COPYING3. If not see + #include "rtl-iter.h" + #include "opts.h" + #include "function-abi.h" ++#include "cfgloop.h" ++#include "tree-vectorizer.h" + + /* This file should be included last. */ + #include "target-def.h" +@@ -3841,8 +3843,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + } + } + +-/* Vectorizer cost model implementation. */ +- + /* Implement targetm.vectorize.builtin_vectorization_cost. */ + + static int +@@ -3861,36 +3861,182 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + case vector_load: + case vec_to_scalar: + case scalar_to_vec: +- case cond_branch_not_taken: +- case vec_promote_demote: + case scalar_store: + case vector_store: + return 1; + ++ case vec_promote_demote: + case vec_perm: + return LASX_SUPPORTED_MODE_P (mode) + && !LSX_SUPPORTED_MODE_P (mode) ? 2 : 1; + + case unaligned_load: +- case vector_gather_load: +- return 2; +- + case unaligned_store: +- case vector_scatter_store: +- return 10; ++ return 2; + + case cond_branch_taken: +- return 3; ++ return 4; ++ ++ case cond_branch_not_taken: ++ return 2; + + case vec_construct: + elements = TYPE_VECTOR_SUBPARTS (vectype); +- return elements / 2 + 1; ++ if (ISA_HAS_LASX) ++ return elements + 1; ++ else ++ return elements; + + default: + gcc_unreachable (); + } + } + ++class loongarch_vector_costs : public vector_costs ++{ ++public: ++ using vector_costs::vector_costs; ++ ++ unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind, ++ stmt_vec_info stmt_info, slp_tree, tree vectype, ++ int misalign, ++ vect_cost_model_location where) override; ++ void finish_cost (const vector_costs *) override; ++ ++protected: ++ void count_operations (vect_cost_for_stmt, stmt_vec_info, ++ vect_cost_model_location, unsigned int); ++ unsigned int determine_suggested_unroll_factor (loop_vec_info); ++ /* The number of vectorized stmts in loop. */ ++ unsigned m_stmts = 0; ++ /* The number of load and store operations in loop. */ ++ unsigned m_loads = 0; ++ unsigned m_stores = 0; ++ /* Reduction factor for suggesting unroll factor. */ ++ unsigned m_reduc_factor = 0; ++ /* True if the loop contains an average operation. */ ++ bool m_has_avg =false; ++}; ++ ++/* Implement TARGET_VECTORIZE_CREATE_COSTS. */ ++static vector_costs * ++loongarch_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar) ++{ ++ return new loongarch_vector_costs (vinfo, costing_for_scalar); ++} ++ ++void ++loongarch_vector_costs::count_operations (vect_cost_for_stmt kind, ++ stmt_vec_info stmt_info, ++ vect_cost_model_location where, ++ unsigned int count) ++{ ++ if (!m_costing_for_scalar ++ && is_a<loop_vec_info> (m_vinfo) ++ && where == vect_body) ++ { ++ m_stmts += count; ++ ++ if (kind == scalar_load ++ || kind == vector_load ++ || kind == unaligned_load) ++ m_loads += count; ++ else if (kind == scalar_store ++ || kind == vector_store ++ || kind == unaligned_store) ++ m_stores += count; ++ else if ((kind == scalar_stmt ++ || kind == vector_stmt ++ || kind == vec_to_scalar) ++ && stmt_info && vect_is_reduction (stmt_info)) ++ { ++ tree lhs = gimple_get_lhs (stmt_info->stmt); ++ unsigned int base = FLOAT_TYPE_P (TREE_TYPE (lhs)) ? 2 : 1; ++ m_reduc_factor = MAX (base * count, m_reduc_factor); ++ } ++ }
View file
_service:tar_scm:0014-LoongArch-Define-macro-CLEAR_INSN_CACHE.patch
Added
@@ -0,0 +1,34 @@ +From 7f4a912da99f5787c88b275b83ee547c9e1aa3d7 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Mon, 23 Oct 2023 09:07:32 +0800 +Subject: PATCH 014/188 LoongArch: Define macro CLEAR_INSN_CACHE. + +LoongArch's microstructure ensures cache consistency by hardware. +Due to out-of-order execution, "ibar" is required to ensure the visibility of the +store (invalidated icache) executed by this CPU before "ibar" (to the instance). +"ibar" will not invalidate the icache, so the start and end parameters are not Affect +"ibar" performance. + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (CLEAR_INSN_CACHE): New definition. +--- + gcc/config/loongarch/loongarch.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 572b538be..f7ddfc452 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -1231,3 +1231,8 @@ struct GTY (()) machine_function + (TARGET_HARD_FLOAT_ABI ? (TARGET_DOUBLE_FLOAT_ABI ? 8 : 4) : 0) + + #define FUNCTION_VALUE_REGNO_P(N) ((N) == GP_RETURN || (N) == FP_RETURN) ++ ++/* LoongArch maintains ICache/DCache coherency by hardware, ++ we just need "ibar" to avoid instruction hazard here. */ ++#undef CLEAR_INSN_CACHE ++#define CLEAR_INSN_CACHE(beg, end) __builtin_loongarch_ibar (0) +-- +2.43.0 +
View file
_service:tar_scm:0015-LoongArch-Add-enum-style-mexplicit-relocs-option.patch
Added
@@ -0,0 +1,233 @@ +From 56403837a7859f0a7ccbc56c055261c9adf22fb8 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 23 Oct 2023 15:23:11 +0800 +Subject: PATCH 015/188 LoongArch: Add enum-style -mexplicit-relocs= option + +To take a better balance between scheduling and relaxation when -flto is +enabled, add three-way -mexplicit-relocs={auto,none,always} options. +The old -mexplicit-relocs and -mno-explicit-relocs options are still +supported, they are mapped to -mexplicit-relocs=always and +-mexplicit-relocs=none. + +The default choice is determined by probing assembler capabilities at +build time. If the assembler does not supports explicit relocs at all, +the default will be none; if it supports explicit relocs but not +relaxation, the default will be always; if both explicit relocs and +relaxation are supported, the default will be auto. + +Currently auto is same as none. We will make auto more clever in +following changes. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch-strings: Add strings for + -mexplicit-relocs={auto,none,always}. + * config/loongarch/genopts/loongarch.opt.in: Add options for + -mexplicit-relocs={auto,none,always}. + * config/loongarch/loongarch-str.h: Regenerate. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch-def.h + (EXPLICIT_RELOCS_AUTO): Define. + (EXPLICIT_RELOCS_NONE): Define. + (EXPLICIT_RELOCS_ALWAYS): Define. + (N_EXPLICIT_RELOCS_TYPES): Define. + * config/loongarch/loongarch.cc + (loongarch_option_override_internal): Error out if the old-style + -mno-explicit-relocs option is used with + -mexplicit-relocs={auto,none,always} together. Map + -mno-explicit-relocs to -mexplicit-relocs=none and + -mexplicit-relocs to -mexplicit-relocs=always for backward + compatibility. Set a proper default for -mexplicit-relocs= + based on configure-time probed linker capability. Update a + diagnostic message to mention -mexplicit-relocs=always instead + of the old-style -mexplicit-relocs. + (loongarch_handle_model_attribute): Update a diagnostic message + to mention -mexplicit-relocs=always instead of the old-style + -mexplicit-relocs. + * config/loongarch/loongarch.h (TARGET_EXPLICIT_RELOCS): Define. +--- + .../loongarch/genopts/loongarch-strings | 6 +++++ + gcc/config/loongarch/genopts/loongarch.opt.in | 21 ++++++++++++++-- + gcc/config/loongarch/loongarch-def.h | 6 +++++ + gcc/config/loongarch/loongarch-str.h | 5 ++++ + gcc/config/loongarch/loongarch.cc | 24 +++++++++++++++++-- + gcc/config/loongarch/loongarch.h | 3 +++ + gcc/config/loongarch/loongarch.opt | 21 ++++++++++++++-- + 7 files changed, 80 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +index eb5086fe3..6c8a42af2 100644 +--- a/gcc/config/loongarch/genopts/loongarch-strings ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -65,3 +65,9 @@ STR_CMODEL_TS tiny-static + STR_CMODEL_MEDIUM medium + STR_CMODEL_LARGE large + STR_CMODEL_EXTREME extreme ++ ++# -mexplicit-relocs ++OPTSTR_EXPLICIT_RELOCS explicit-relocs ++STR_EXPLICIT_RELOCS_AUTO auto ++STR_EXPLICIT_RELOCS_NONE none ++STR_EXPLICIT_RELOCS_ALWAYS always +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 74cf4a7f7..e7df1964a 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -176,10 +176,27 @@ mmax-inline-memcpy-size= + Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024) + -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. + +-mexplicit-relocs +-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION) ++Enum ++Name(explicit_relocs) Type(int) ++The code model option names for -mexplicit-relocs: ++ ++EnumValue ++Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_AUTO@@) Value(EXPLICIT_RELOCS_AUTO) ++ ++EnumValue ++Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_NONE@@) Value(EXPLICIT_RELOCS_NONE) ++ ++EnumValue ++Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_ALWAYS@@) Value(EXPLICIT_RELOCS_ALWAYS) ++ ++mexplicit-relocs= ++Target RejectNegative Joined Enum(explicit_relocs) Var(la_opt_explicit_relocs) Init(M_OPT_UNSET) + Use %reloc() assembly operators. + ++mexplicit-relocs ++Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET) ++Use %reloc() assembly operators (for backward compatibility). ++ + ; The code model option names for -mcmodel. + Enum + Name(cmodel) Type(int) +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index eb8e53b20..4757de14b 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -100,6 +100,12 @@ extern const char* loongarch_cmodel_strings; + #define CMODEL_EXTREME 5 + #define N_CMODEL_TYPES 6 + ++/* enum explicit_relocs */ ++#define EXPLICIT_RELOCS_AUTO 0 ++#define EXPLICIT_RELOCS_NONE 1 ++#define EXPLICIT_RELOCS_ALWAYS 2 ++#define N_EXPLICIT_RELOCS_TYPES 3 ++ + /* The common default value for variables whose assignments + are triggered by command-line options. */ + +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index ecfebf9db..037e9e583 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -64,4 +64,9 @@ along with GCC; see the file COPYING3. If not see + #define STR_CMODEL_LARGE "large" + #define STR_CMODEL_EXTREME "extreme" + ++#define OPTSTR_EXPLICIT_RELOCS "explicit-relocs" ++#define STR_EXPLICIT_RELOCS_AUTO "auto" ++#define STR_EXPLICIT_RELOCS_NONE "none" ++#define STR_EXPLICIT_RELOCS_ALWAYS "always" ++ + #endif /* LOONGARCH_STR_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index e22a64600..3258c8655 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -7383,6 +7383,25 @@ loongarch_option_override_internal (struct gcc_options *opts, + loongarch_update_gcc_opt_status (&la_target, opts, opts_set); + loongarch_cpu_option_override (&la_target, opts, opts_set); + ++ if (la_opt_explicit_relocs != M_OPT_UNSET ++ && la_opt_explicit_relocs_backward != M_OPT_UNSET) ++ error ("do not use %qs (with %qs) and %qs (without %qs) together", ++ "-mexplicit-relocs=", "=", ++ la_opt_explicit_relocs_backward ? "-mexplicit-relocs" ++ : "-mno-explicit-relocs", "="); ++ ++ if (la_opt_explicit_relocs_backward != M_OPT_UNSET) ++ la_opt_explicit_relocs = (la_opt_explicit_relocs_backward ++ ? EXPLICIT_RELOCS_ALWAYS ++ : EXPLICIT_RELOCS_NONE); ++ ++ if (la_opt_explicit_relocs == M_OPT_UNSET) ++ la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS ++ ? (HAVE_AS_MRELAX_OPTION ++ ? EXPLICIT_RELOCS_AUTO ++ : EXPLICIT_RELOCS_ALWAYS) ++ : EXPLICIT_RELOCS_NONE); ++ + if (TARGET_ABI_LP64) + flag_pcc_struct_return = 0; + +@@ -7413,7 +7432,7 @@ loongarch_option_override_internal (struct gcc_options *opts, + case CMODEL_EXTREME: + if (!TARGET_EXPLICIT_RELOCS) + error ("code model %qs needs %s", +- "extreme", "-mexplicit-relocs"); ++ "extreme", "-mexplicit-relocs=always"); + + if (opts->x_flag_plt) + { +@@ -7717,7 +7736,8 @@ loongarch_handle_model_attribute (tree *node, tree name, tree arg, int, + if (!TARGET_EXPLICIT_RELOCS) + { + error_at (DECL_SOURCE_LOCATION (decl), +- "%qE attribute requires %s", name, "-mexplicit-relocs"); ++ "%qE attribute requires %s", name, ++ "-mexplicit-relocs=always"); + *no_add_attrs = true; + return NULL_TREE; + } +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index f7ddfc452..6e8ac293a 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -1236,3 +1236,6 @@ struct GTY (()) machine_function + we just need "ibar" to avoid instruction hazard here. */ + #undef CLEAR_INSN_CACHE + #define CLEAR_INSN_CACHE(beg, end) __builtin_loongarch_ibar (0) ++ ++#define TARGET_EXPLICIT_RELOCS \ ++ (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS) +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 34bd832bd..44376fd77 100644 +--- a/gcc/config/loongarch/loongarch.opt
View file
_service:tar_scm:0016-LoongArch-Use-explicit-relocs-for-GOT-access-when-me.patch
Added
@@ -0,0 +1,212 @@ +From 8539e5560e7bf11473cc7c386043b7019264236a Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 30 Sep 2023 18:46:28 +0800 +Subject: PATCH 016/188 LoongArch: Use explicit relocs for GOT access when + -mexplicit-relocs=auto and LTO during a final link with linker plugin + +If we are performing LTO for a final link and linker plugin is enabled, +then we are sure any GOT access may resolve to a symbol out of the link +unit (otherwise the linker plugin will tell us the symbol should be +resolved locally and we'll use PC-relative access instead). + +Produce machine instructions with explicit relocs instead of la.global +for better scheduling. + +gcc/ChangeLog: + + * config/loongarch/loongarch-protos.h + (loongarch_explicit_relocs_p): Declare new function. + * config/loongarch/loongarch.cc (loongarch_explicit_relocs_p): + Implement. + (loongarch_symbol_insns): Call loongarch_explicit_relocs_p for + SYMBOL_GOT_DISP, instead of using TARGET_EXPLICIT_RELOCS. + (loongarch_split_symbol): Call loongarch_explicit_relocs_p for + deciding if return early, instead of using + TARGET_EXPLICIT_RELOCS. + (loongarch_output_move): CAll loongarch_explicit_relocs_p + instead of using TARGET_EXPLICIT_RELOCS. + * config/loongarch/loongarch.md (*low<mode>): Remove + TARGET_EXPLICIT_RELOCS from insn condition. + (@ld_from_got<mode>): Likewise. + * config/loongarch/predicates.md (move_operand): Call + loongarch_explicit_relocs_p instead of using + TARGET_EXPLICIT_RELOCS. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-auto-lto.c: New test. +--- + gcc/config/loongarch/loongarch-protos.h | 1 + + gcc/config/loongarch/loongarch.cc | 34 +++++++++++++++---- + gcc/config/loongarch/loongarch.md | 4 +-- + gcc/config/loongarch/predicates.md | 8 ++--- + .../loongarch/explicit-relocs-auto-lto.c | 26 ++++++++++++++ + 5 files changed, 59 insertions(+), 14 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 163162598..51d38177b 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -220,4 +220,5 @@ extern rtx loongarch_gen_const_int_vector_shuffle (machine_mode, int); + extern tree loongarch_build_builtin_va_list (void); + + extern rtx loongarch_build_signbit_mask (machine_mode, bool, bool); ++extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type); + #endif /* ! GCC_LOONGARCH_PROTOS_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 3258c8655..1d20577e7 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1922,6 +1922,29 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type) + gcc_unreachable (); + } + ++/* If -mexplicit-relocs=auto, we use machine operations with reloc hints ++ for cases where the linker is unable to relax so we can schedule the ++ machine operations, otherwise use an assembler pseudo-op so the ++ assembler will generate R_LARCH_RELAX. */ ++ ++bool ++loongarch_explicit_relocs_p (enum loongarch_symbol_type type) ++{ ++ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO) ++ return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS; ++ ++ /* If we are performing LTO for a final link, and we have the linker ++ plugin so we know the resolution of the symbols, then all GOT ++ references are binding to external symbols or preemptable symbols. ++ So the linker cannot relax them. */ ++ return (in_lto_p ++ && !flag_incremental_link ++ && HAVE_LTO_PLUGIN == 2 ++ && (!global_options_set.x_flag_use_linker_plugin ++ || global_options.x_flag_use_linker_plugin) ++ && type == SYMBOL_GOT_DISP); ++} ++ + /* Returns the number of instructions necessary to reference a symbol. */ + + static int +@@ -1937,7 +1960,7 @@ loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode) + case SYMBOL_GOT_DISP: + /* The constant will have to be loaded from the GOT before it + is used in an address. */ +- if (!TARGET_EXPLICIT_RELOCS && mode != MAX_MACHINE_MODE) ++ if (!loongarch_explicit_relocs_p (type) && mode != MAX_MACHINE_MODE) + return 0; + + return 3; +@@ -3034,7 +3057,7 @@ loongarch_symbol_extreme_p (enum loongarch_symbol_type type) + If so, and if LOW_OUT is nonnull, emit the high part and store the + low part in *LOW_OUT. Leave *LOW_OUT unchanged otherwise. + +- Return false if build with '-mno-explicit-relocs'. ++ Return false if build with '-mexplicit-relocs=none'. + + TEMP is as for loongarch_force_temporary and is used to load the high + part into a register. +@@ -3048,12 +3071,9 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) + { + enum loongarch_symbol_type symbol_type; + +- /* If build with '-mno-explicit-relocs', don't split symbol. */ +- if (!TARGET_EXPLICIT_RELOCS) +- return false; +- + if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE) + || !loongarch_symbolic_constant_p (addr, &symbol_type) ++ || !loongarch_explicit_relocs_p (symbol_type) + || loongarch_symbol_insns (symbol_type, mode) == 0 + || !loongarch_split_symbol_type (symbol_type)) + return false; +@@ -4793,7 +4813,7 @@ loongarch_output_move (rtx dest, rtx src) + } + } + +- if (!TARGET_EXPLICIT_RELOCS ++ if (!loongarch_explicit_relocs_p (loongarch_classify_symbol (src)) + && dest_code == REG && symbolic_operand (src, VOIDmode)) + { + if (loongarch_classify_symbol (src) == SYMBOL_PCREL) +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 29ac950bf..81c97393b 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2247,7 +2247,7 @@ + (set (match_operand:P 0 "register_operand" "=r") + (lo_sum:P (match_operand:P 1 "register_operand" " r") + (match_operand:P 2 "symbolic_operand" ""))) +- "TARGET_EXPLICIT_RELOCS" ++ "" + "addi.<d>\t%0,%1,%L2" + (set_attr "type" "arith") + (set_attr "mode" "<MODE>")) +@@ -2275,7 +2275,7 @@ + (match_operand:P 1 "register_operand" "r") + (match_operand:P 2 "symbolic_operand"))) + UNSPEC_LOAD_FROM_GOT)) +- "TARGET_EXPLICIT_RELOCS" ++ "" + "ld.<d>\t%0,%1,%L2" + (set_attr "type" "move") + ) +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index ad6cee5c4..6b50b3a4d 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -541,16 +541,14 @@ + case SYMBOL_REF: + case LABEL_REF: + return (loongarch_symbolic_constant_p (op, &symbol_type) +- && (!TARGET_EXPLICIT_RELOCS ++ && (!loongarch_explicit_relocs_p (symbol_type) + || !loongarch_split_symbol_type (symbol_type))); + + case HIGH: +- /* '-mno-explicit-relocs' don't generate high/low pairs. */ +- if (!TARGET_EXPLICIT_RELOCS) +- return false; +- + op = XEXP (op, 0); ++ + return (loongarch_symbolic_constant_p (op, &symbol_type) ++ && loongarch_explicit_relocs_p (symbol_type) + && loongarch_split_symbol_type (symbol_type)); + + default: +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c +new file mode 100644 +index 000000000..f53b54689 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c +@@ -0,0 +1,26 @@ ++/* { dg-do link } */ ++/* { dg-require-effective-target lto } */ ++/* { dg-require-linker-plugin "" } */ ++/* { dg-options "-fpic -shared -O2 --save-temps -mexplicit-relocs=auto -flto -fuse-linker-plugin -flto-partition=one" } */ ++ ++int pcrel __attribute__ ((visibility ("hidden"))); ++int got __attribute__ ((visibility ("default"))); ++ ++int ++*addr_pcrel (void) ++{ ++ return &pcrel; ++} ++ ++int ++*addr_got (void)
View file
_service:tar_scm:0017-LoongArch-Use-explicit-relocs-for-TLS-access-with-me.patch
Added
@@ -0,0 +1,146 @@ +From 23b4166c6699a1a3063b11fa45497c1a1524bd48 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 2 Oct 2023 13:00:18 +0800 +Subject: PATCH 017/188 LoongArch: Use explicit relocs for TLS access with + -mexplicit-relocs=auto + +The linker does not know how to relax TLS access for LoongArch, so let's +emit machine instructions with explicit relocs for TLS. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_explicit_relocs_p): + Return true for TLS symbol types if -mexplicit-relocs=auto. + (loongarch_call_tls_get_addr): Replace TARGET_EXPLICIT_RELOCS + with la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE. + (loongarch_legitimize_tls_address): Likewise. + * config/loongarch/loongarch.md (@tls_low<mode>): Remove + TARGET_EXPLICIT_RELOCS from insn condition. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c: New + test. + * gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c: New + test. +--- + gcc/config/loongarch/loongarch.cc | 37 ++++++++++++------- + gcc/config/loongarch/loongarch.md | 2 +- + .../explicit-relocs-auto-tls-ld-gd.c | 9 +++++ + .../explicit-relocs-auto-tls-le-ie.c | 6 +++ + 4 files changed, 40 insertions(+), 14 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 1d20577e7..fa5c14be6 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1933,16 +1933,27 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type) + if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO) + return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS; + +- /* If we are performing LTO for a final link, and we have the linker +- plugin so we know the resolution of the symbols, then all GOT +- references are binding to external symbols or preemptable symbols. +- So the linker cannot relax them. */ +- return (in_lto_p +- && !flag_incremental_link +- && HAVE_LTO_PLUGIN == 2 +- && (!global_options_set.x_flag_use_linker_plugin +- || global_options.x_flag_use_linker_plugin) +- && type == SYMBOL_GOT_DISP); ++ switch (type) ++ { ++ case SYMBOL_TLS_IE: ++ case SYMBOL_TLS_LE: ++ case SYMBOL_TLSGD: ++ case SYMBOL_TLSLDM: ++ /* The linker don't know how to relax TLS accesses. */ ++ return true; ++ case SYMBOL_GOT_DISP: ++ /* If we are performing LTO for a final link, and we have the ++ linker plugin so we know the resolution of the symbols, then ++ all GOT references are binding to external symbols or ++ preemptable symbols. So the linker cannot relax them. */ ++ return (in_lto_p ++ && !flag_incremental_link ++ && HAVE_LTO_PLUGIN == 2 ++ && (!global_options_set.x_flag_use_linker_plugin ++ || global_options.x_flag_use_linker_plugin)); ++ default: ++ return false; ++ } + } + + /* Returns the number of instructions necessary to reference a symbol. */ +@@ -2749,7 +2760,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + start_sequence (); + +- if (TARGET_EXPLICIT_RELOCS) ++ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { + /* Split tls symbol to high and low. */ + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc)); +@@ -2914,7 +2925,7 @@ loongarch_legitimize_tls_address (rtx loc) + tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); + tmp1 = gen_reg_rtx (Pmode); + dest = gen_reg_rtx (Pmode); +- if (TARGET_EXPLICIT_RELOCS) ++ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { + tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE); + tmp3 = gen_reg_rtx (Pmode); +@@ -2951,7 +2962,7 @@ loongarch_legitimize_tls_address (rtx loc) + tmp1 = gen_reg_rtx (Pmode); + dest = gen_reg_rtx (Pmode); + +- if (TARGET_EXPLICIT_RELOCS) ++ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { + tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE); + tmp3 = gen_reg_rtx (Pmode); +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 81c97393b..3b836d535 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2257,7 +2257,7 @@ + (unspec:P (mem:P (lo_sum:P (match_operand:P 1 "register_operand" "r") + (match_operand:P 2 "symbolic_operand" ""))) + UNSPEC_TLS_LOW)) +- "TARGET_EXPLICIT_RELOCS" ++ "" + "addi.<d>\t%0,%1,%L2" + (set_attr "type" "arith") + (set_attr "mode" "<MODE>")) +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c +new file mode 100644 +index 000000000..957ff98df +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto" } */ ++ ++__thread int a __attribute__((visibility("hidden"))); ++extern __thread int b __attribute__((visibility("default"))); ++ ++int test() { return a + b; } ++ ++/* { dg-final { scan-assembler-not "la.tls" { target tls_native } } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c +new file mode 100644 +index 000000000..78898cfc6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mexplicit-relocs=auto" } */ ++ ++#include "explicit-relocs-auto-tls-ld-gd.c" ++ ++/* { dg-final { scan-assembler-not "la.tls" { target tls_native } } } */ +-- +2.43.0 +
View file
_service:tar_scm:0018-LoongArch-Use-explicit-relocs-for-addresses-only-use.patch
Added
@@ -0,0 +1,245 @@ +From c29a4f4fb5ff24ef975ba27688a3da696aa7d006 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 1 Oct 2023 11:14:29 +0800 +Subject: PATCH 018/188 LoongArch: Use explicit relocs for addresses only + used for one load or store with -mexplicit-relocs=auto and + -mcmodel={normal,medium} + +In these cases, if we use explicit relocs, we end up with 2 +instructions: + + pcalau12i t0, %pc_hi20(x) + ld.d t0, t0, %pc_lo12(x) + +If we use la.local pseudo-op, in the best scenario (x is in +/- 2MiB +range) we still have 2 instructions: + + pcaddi t0, %pcrel_20(x) + ld.d t0, t0, 0 + +If x is out of the range we'll have 3 instructions. So for these cases +just emit machine instructions with explicit relocs. + +gcc/ChangeLog: + + * config/loongarch/predicates.md (symbolic_pcrel_operand): New + predicate. + * config/loongarch/loongarch.md (define_peephole2): Optimize + la.local + ld/st to pcalau12i + ld/st if the address is only used + once if -mexplicit-relocs=auto and -mcmodel=normal or medium. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-auto-single-load-store.c: + New test. + * gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c: + New test. +--- + gcc/config/loongarch/loongarch.md | 122 ++++++++++++++++++ + gcc/config/loongarch/predicates.md | 7 + + ...-relocs-auto-single-load-store-no-anchor.c | 6 + + .../explicit-relocs-auto-single-load-store.c | 14 ++ + 4 files changed, 149 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 3b836d535..c4c6baa60 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -65,6 +65,7 @@ + + UNSPEC_LOAD_FROM_GOT + UNSPEC_PCALAU12I ++ UNSPEC_PCALAU12I_GR + UNSPEC_ORI_L_LO12 + UNSPEC_LUI_L_HI20 + UNSPEC_LUI_H_LO20 +@@ -2297,6 +2298,16 @@ + "pcalau12i\t%0,%%pc_hi20(%1)" + (set_attr "type" "move")) + ++;; @pcalau12i may be used for sibcall so it has a strict constraint. This ++;; allows any general register as the operand. ++(define_insn "@pcalau12i_gr<mode>" ++ (set (match_operand:P 0 "register_operand" "=r") ++ (unspec:P (match_operand:P 1 "symbolic_operand" "") ++ UNSPEC_PCALAU12I_GR)) ++ "" ++ "pcalau12i\t%0,%%pc_hi20(%1)" ++ (set_attr "type" "move")) ++ + (define_insn "@ori_l_lo12<mode>" + (set (match_operand:P 0 "register_operand" "=r") + (unspec:P (match_operand:P 1 "register_operand" "r") +@@ -3748,6 +3759,117 @@ + (set_attr "type" "unknown") + (set_attr "mode" "<MODE>")) + ++;; With normal or medium code models, if the only use of a pc-relative ++;; address is for loading or storing a value, then relying on linker ++;; relaxation is not better than emitting the machine instruction directly. ++;; Even if the la.local pseudo op can be relaxed, we get: ++;; ++;; pcaddi $t0, %pcrel_20(x) ++;; ld.d $t0, $t0, 0 ++;; ++;; There are still two instructions, same as using the machine instructions ++;; and explicit relocs: ++;; ++;; pcalau12i $t0, %pc_hi20(x) ++;; ld.d $t0, $t0, %pc_lo12(x) ++;; ++;; And if the pseudo op cannot be relaxed, we'll get a worse result (with ++;; 3 instructions). ++(define_peephole2 ++ (set (match_operand:P 0 "register_operand") ++ (match_operand:P 1 "symbolic_pcrel_operand")) ++ (set (match_operand:GPR 2 "register_operand") ++ (mem:GPR (match_dup 0))) ++ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ ++ && (peep2_reg_dead_p (2, operands0) \ ++ || REGNO (operands0) == REGNO (operands2))" ++ (set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1)))) ++ { ++ emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); ++ }) ++ ++(define_peephole2 ++ (set (match_operand:P 0 "register_operand") ++ (match_operand:P 1 "symbolic_pcrel_operand")) ++ (set (match_operand:GPR 2 "register_operand") ++ (mem:GPR (plus (match_dup 0) ++ (match_operand 3 "const_int_operand")))) ++ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ ++ && (peep2_reg_dead_p (2, operands0) \ ++ || REGNO (operands0) == REGNO (operands2))" ++ (set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1)))) ++ { ++ operands1 = plus_constant (Pmode, operands1, INTVAL (operands3)); ++ emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); ++ }) ++ ++(define_peephole2 ++ (set (match_operand:P 0 "register_operand") ++ (match_operand:P 1 "symbolic_pcrel_operand")) ++ (set (match_operand:GPR 2 "register_operand") ++ (any_extend:GPR (mem:SUBDI (match_dup 0)))) ++ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ ++ && (peep2_reg_dead_p (2, operands0) \ ++ || REGNO (operands0) == REGNO (operands2))" ++ (set (match_dup 2) ++ (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0) ++ (match_dup 1))))) ++ { ++ emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); ++ }) ++ ++(define_peephole2 ++ (set (match_operand:P 0 "register_operand") ++ (match_operand:P 1 "symbolic_pcrel_operand")) ++ (set (match_operand:GPR 2 "register_operand") ++ (any_extend:GPR ++ (mem:SUBDI (plus (match_dup 0) ++ (match_operand 3 "const_int_operand"))))) ++ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ ++ && (peep2_reg_dead_p (2, operands0) \ ++ || REGNO (operands0) == REGNO (operands2))" ++ (set (match_dup 2) ++ (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0) ++ (match_dup 1))))) ++ { ++ operands1 = plus_constant (Pmode, operands1, INTVAL (operands3)); ++ emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); ++ }) ++ ++(define_peephole2 ++ (set (match_operand:P 0 "register_operand") ++ (match_operand:P 1 "symbolic_pcrel_operand")) ++ (set (mem:QHWD (match_dup 0)) ++ (match_operand:QHWD 2 "register_operand")) ++ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ ++ && (peep2_reg_dead_p (2, operands0)) \ ++ && REGNO (operands0) != REGNO (operands2)" ++ (set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2)) ++ { ++ emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); ++ }) ++ ++(define_peephole2 ++ (set (match_operand:P 0 "register_operand") ++ (match_operand:P 1 "symbolic_pcrel_operand")) ++ (set (mem:QHWD (plus (match_dup 0) ++ (match_operand 3 "const_int_operand"))) ++ (match_operand:QHWD 2 "register_operand")) ++ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ ++ && (peep2_reg_dead_p (2, operands0)) \ ++ && REGNO (operands0) != REGNO (operands2)" ++ (set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2)) ++ { ++ operands1 = plus_constant (Pmode, operands1, INTVAL (operands3)); ++ emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); ++ }) ++ + ;; Synchronization instructions. + + (include "sync.md") +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 6b50b3a4d..1d669f560 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -563,6 +563,13 @@ + return loongarch_symbolic_constant_p (op, &type); + })
View file
_service:tar_scm:0019-LoongArch-Implement-__builtin_thread_pointer-for-TLS.patch
Added
@@ -0,0 +1,84 @@ +From 619b6081064bf85a19f4659e278a361875e4f9fb Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Tue, 24 Oct 2023 14:40:14 +0800 +Subject: PATCH 019/188 LoongArch: Implement __builtin_thread_pointer for + TLS. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (get_thread_pointer<mode>):Adds the + instruction template corresponding to the __builtin_thread_pointer + function. + * doc/extend.texi:Add the __builtin_thread_pointer function support + description to the documentation. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/builtin_thread_pointer.c: New test. +--- + gcc/config/loongarch/loongarch.md | 7 +++++++ + gcc/doc/extend.texi | 5 +++++ + .../gcc.target/loongarch/builtin_thread_pointer.c | 10 ++++++++++ + 3 files changed, 22 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index c4c6baa60..80487488d 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -113,6 +113,7 @@ + + (define_constants + (RETURN_ADDR_REGNUM 1) ++ (TP_REGNUM 2) + (T0_REGNUM 12) + (T1_REGNUM 13) + (S0_REGNUM 23) +@@ -3647,6 +3648,12 @@ + (set_attr "length" "0") + (set_attr "type" "ghost")) + ++;; Named pattern for expanding thread pointer reference. ++(define_expand "get_thread_pointer<mode>" ++ (set (match_operand:P 0 "register_operand" "=r") ++ (reg:P TP_REGNUM)) ++ "HAVE_AS_TLS" ++ {}) +  + (define_split + (match_operand 0 "small_data_pattern") +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index 1d1bac255..497c6de5f 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -16257,6 +16257,11 @@ function you need to include @code{larchintrin.h}. + void __break (imm0_32767) + @end smallexample + ++Returns the value that is currently set in the @samp{tp} register. ++@smallexample ++ void * __builtin_thread_pointer (void) ++@end smallexample ++ + @node MIPS DSP Built-in Functions + @subsection MIPS DSP Built-in Functions + +diff --git a/gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c b/gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c +new file mode 100644 +index 000000000..541e3b143 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target tls_native } */ ++/* { dg-options "-O2" } */ ++/* { dg-final { scan-assembler "or\t\\\$r4,\\\$r2,\\\$r0" } } */ ++ ++void * ++get_tp () ++{ ++ return __builtin_thread_pointer (); ++} +-- +2.43.0 +
View file
_service:tar_scm:0020-LoongArch-Fix-vfrint-releated-comments-in-lsxintrin..patch
Added
@@ -0,0 +1,189 @@ +From 9b29e6ba10716656ba9b32c33f021e920bb05f3d Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Mon, 23 Oct 2023 10:13:24 +0800 +Subject: PATCH 020/188 LoongArch: Fix vfrint-releated comments in + lsxintrin.h and lasxintrin.h + +The comment of vfrint-related intrinsic functions does not match the return +value type in definition. This patch fixes these comments. + +gcc/ChangeLog: + + * config/loongarch/lasxintrin.h (__lasx_xvftintrnel_l_s): Fix comments. + (__lasx_xvfrintrne_s): Ditto. + (__lasx_xvfrintrne_d): Ditto. + (__lasx_xvfrintrz_s): Ditto. + (__lasx_xvfrintrz_d): Ditto. + (__lasx_xvfrintrp_s): Ditto. + (__lasx_xvfrintrp_d): Ditto. + (__lasx_xvfrintrm_s): Ditto. + (__lasx_xvfrintrm_d): Ditto. + * config/loongarch/lsxintrin.h (__lsx_vftintrneh_l_s): Ditto. + (__lsx_vfrintrne_s): Ditto. + (__lsx_vfrintrne_d): Ditto. + (__lsx_vfrintrz_s): Ditto. + (__lsx_vfrintrz_d): Ditto. + (__lsx_vfrintrp_s): Ditto. + (__lsx_vfrintrp_d): Ditto. + (__lsx_vfrintrm_s): Ditto. + (__lsx_vfrintrm_d): Ditto. +--- + gcc/config/loongarch/lasxintrin.h | 16 ++++++++-------- + gcc/config/loongarch/lsxintrin.h | 16 ++++++++-------- + 2 files changed, 16 insertions(+), 16 deletions(-) + +diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h +index d39379927..7bce2c757 100644 +--- a/gcc/config/loongarch/lasxintrin.h ++++ b/gcc/config/loongarch/lasxintrin.h +@@ -3368,7 +3368,7 @@ __m256i __lasx_xvftintrnel_l_s (__m256 _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V8SI, V8SF. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256 __lasx_xvfrintrne_s (__m256 _1) + { +@@ -3376,7 +3376,7 @@ __m256 __lasx_xvfrintrne_s (__m256 _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V4DI, V4DF. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256d __lasx_xvfrintrne_d (__m256d _1) + { +@@ -3384,7 +3384,7 @@ __m256d __lasx_xvfrintrne_d (__m256d _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V8SI, V8SF. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256 __lasx_xvfrintrz_s (__m256 _1) + { +@@ -3392,7 +3392,7 @@ __m256 __lasx_xvfrintrz_s (__m256 _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V4DI, V4DF. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256d __lasx_xvfrintrz_d (__m256d _1) + { +@@ -3400,7 +3400,7 @@ __m256d __lasx_xvfrintrz_d (__m256d _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V8SI, V8SF. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256 __lasx_xvfrintrp_s (__m256 _1) + { +@@ -3408,7 +3408,7 @@ __m256 __lasx_xvfrintrp_s (__m256 _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V4DI, V4DF. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256d __lasx_xvfrintrp_d (__m256d _1) + { +@@ -3416,7 +3416,7 @@ __m256d __lasx_xvfrintrp_d (__m256d _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V8SI, V8SF. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256 __lasx_xvfrintrm_s (__m256 _1) + { +@@ -3424,7 +3424,7 @@ __m256 __lasx_xvfrintrm_s (__m256 _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V4DI, V4DF. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256d __lasx_xvfrintrm_d (__m256d _1) + { +diff --git a/gcc/config/loongarch/lsxintrin.h b/gcc/config/loongarch/lsxintrin.h +index ec4206990..29553c093 100644 +--- a/gcc/config/loongarch/lsxintrin.h ++++ b/gcc/config/loongarch/lsxintrin.h +@@ -3412,7 +3412,7 @@ __m128i __lsx_vftintrneh_l_s (__m128 _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V4SI, V4SF. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128 __lsx_vfrintrne_s (__m128 _1) + { +@@ -3420,7 +3420,7 @@ __m128 __lsx_vfrintrne_s (__m128 _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V2DI, V2DF. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128d __lsx_vfrintrne_d (__m128d _1) + { +@@ -3428,7 +3428,7 @@ __m128d __lsx_vfrintrne_d (__m128d _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V4SI, V4SF. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128 __lsx_vfrintrz_s (__m128 _1) + { +@@ -3436,7 +3436,7 @@ __m128 __lsx_vfrintrz_s (__m128 _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V2DI, V2DF. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128d __lsx_vfrintrz_d (__m128d _1) + { +@@ -3444,7 +3444,7 @@ __m128d __lsx_vfrintrz_d (__m128d _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V4SI, V4SF. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128 __lsx_vfrintrp_s (__m128 _1) + { +@@ -3452,7 +3452,7 @@ __m128 __lsx_vfrintrp_s (__m128 _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V2DI, V2DF. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128d __lsx_vfrintrp_d (__m128d _1) + { +@@ -3460,7 +3460,7 @@ __m128d __lsx_vfrintrp_d (__m128d _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V4SI, V4SF. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128 __lsx_vfrintrm_s (__m128 _1) + { +@@ -3468,7 +3468,7 @@ __m128 __lsx_vfrintrm_s (__m128 _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V2DI, V2DF. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128d __lsx_vfrintrm_d (__m128d _1) + { +-- +2.43.0 +
View file
_service:tar_scm:0021-LoongArch-Enable-vcond_mask_mn-expanders-for-SF-DF-m.patch
Added
@@ -0,0 +1,418 @@ +From 156d9451a5b20ac336370f1610a949db1bef7a26 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Thu, 26 Oct 2023 09:34:32 +0800 +Subject: PATCH 021/188 LoongArch:Enable vcond_mask_mn expanders for SF/DF + modes. + +If the vcond_mask patterns don't support fp modes, the vector +FP comparison instructions will not be generated. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (vcond_mask_<ILASX:mode><ILASX:mode>): Change to + (vcond_mask_<mode><mode256_i>): this. + * config/loongarch/lsx.md (vcond_mask_<ILSX:mode><ILSX:mode>): Change to + (vcond_mask_<mode><mode_i>): this. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-vcond-1.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-vcond-2.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-vcond-1.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: New test. +--- + gcc/config/loongarch/lasx.md | 14 +-- + gcc/config/loongarch/lsx.md | 14 +-- + .../loongarch/vector/lasx/lasx-vcond-1.c | 64 ++++++++++++++ + .../loongarch/vector/lasx/lasx-vcond-2.c | 87 +++++++++++++++++++ + .../loongarch/vector/lsx/lsx-vcond-1.c | 64 ++++++++++++++ + .../loongarch/vector/lsx/lsx-vcond-2.c | 87 +++++++++++++++++++ + 6 files changed, 316 insertions(+), 14 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 442fda246..f0f2dd08d 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -906,15 +906,15 @@ + }) + + ;; Same as vcond_ +-(define_expand "vcond_mask_<ILASX:mode><ILASX:mode>" +- (match_operand:ILASX 0 "register_operand") +- (match_operand:ILASX 1 "reg_or_m1_operand") +- (match_operand:ILASX 2 "reg_or_0_operand") +- (match_operand:ILASX 3 "register_operand") ++(define_expand "vcond_mask_<mode><mode256_i>" ++ (match_operand:LASX 0 "register_operand") ++ (match_operand:LASX 1 "reg_or_m1_operand") ++ (match_operand:LASX 2 "reg_or_0_operand") ++ (match_operand:<VIMODE256> 3 "register_operand") + "ISA_HAS_LASX" + { +- loongarch_expand_vec_cond_mask_expr (<ILASX:MODE>mode, +- <ILASX:VIMODE256>mode, operands); ++ loongarch_expand_vec_cond_mask_expr (<MODE>mode, ++ <VIMODE256>mode, operands); + DONE; + }) + +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index b4e92ae9c..4af32c8df 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -644,15 +644,15 @@ + DONE; + }) + +-(define_expand "vcond_mask_<ILSX:mode><ILSX:mode>" +- (match_operand:ILSX 0 "register_operand") +- (match_operand:ILSX 1 "reg_or_m1_operand") +- (match_operand:ILSX 2 "reg_or_0_operand") +- (match_operand:ILSX 3 "register_operand") ++(define_expand "vcond_mask_<mode><mode_i>" ++ (match_operand:LSX 0 "register_operand") ++ (match_operand:LSX 1 "reg_or_m1_operand") ++ (match_operand:LSX 2 "reg_or_0_operand") ++ (match_operand:<VIMODE> 3 "register_operand") + "ISA_HAS_LSX" + { +- loongarch_expand_vec_cond_mask_expr (<ILSX:MODE>mode, +- <ILSX:VIMODE>mode, operands); ++ loongarch_expand_vec_cond_mask_expr (<MODE>mode, ++ <VIMODE>mode, operands); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c +new file mode 100644 +index 000000000..ee9cb1a1f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c +@@ -0,0 +1,64 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-unroll-loops -fno-vect-cost-model -mlasx" } */ ++ ++#include <stdint-gcc.h> ++ ++#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX) \ ++ void __attribute__ ((noinline, noclone)) \ ++ vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r, \ ++ DATA_TYPE *__restrict__ x, \ ++ DATA_TYPE *__restrict__ y, \ ++ CMP_TYPE *__restrict__ a, \ ++ CMP_TYPE *__restrict__ b, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; i++) \ ++ { \ ++ DATA_TYPE xval = xi, yval = yi; \ ++ CMP_TYPE aval = ai, bval = bi; \ ++ ri = aval COND bval ? xval : yval; \ ++ } \ ++ } ++ ++#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX) \ ++ T (int8_t, int8_t, COND, SUFFIX) \ ++ T (int16_t, int16_t, COND, SUFFIX) \ ++ T (int32_t, int32_t, COND, SUFFIX) \ ++ T (int64_t, int64_t, COND, SUFFIX) \ ++ T (float, int32_t, COND, SUFFIX##_float) \ ++ T (double, int64_t, COND, SUFFIX##_double) ++ ++#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX) \ ++ T (uint8_t, uint8_t, COND, SUFFIX) \ ++ T (uint16_t, uint16_t, COND, SUFFIX) \ ++ T (uint32_t, uint32_t, COND, SUFFIX) \ ++ T (uint64_t, uint64_t, COND, SUFFIX) \ ++ T (float, uint32_t, COND, SUFFIX##_float) \ ++ T (double, uint64_t, COND, SUFFIX##_double) ++ ++#define TEST_COND_VAR_ALL(T, COND, SUFFIX) \ ++ TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX) \ ++ TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX) ++ ++#define TEST_VAR_ALL(T) \ ++ TEST_COND_VAR_ALL (T, >, _gt) \ ++ TEST_COND_VAR_ALL (T, <, _lt) \ ++ TEST_COND_VAR_ALL (T, >=, _ge) \ ++ TEST_COND_VAR_ALL (T, <=, _le) \ ++ TEST_COND_VAR_ALL (T, ==, _eq) \ ++ TEST_COND_VAR_ALL (T, !=, _ne) ++ ++TEST_VAR_ALL (DEF_VCOND_VAR) ++ ++/* { dg-final { scan-assembler-times {\txvslt\.b} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvslt\.h} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvslt\.w} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvslt\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvsle\.b} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvsle\.h} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvsle\.w} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvsle\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvseq\.b} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvseq\.h} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvseq\.w} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvseq\.d} 4 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c +new file mode 100644 +index 000000000..5f40ed44c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c +@@ -0,0 +1,87 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -mlasx" } */ ++ ++#include <stdint-gcc.h> ++ ++#define eq(A, B) ((A) == (B)) ++#define ne(A, B) ((A) != (B)) ++#define olt(A, B) ((A) < (B)) ++#define ole(A, B) ((A) <= (B)) ++#define oge(A, B) ((A) >= (B)) ++#define ogt(A, B) ((A) > (B)) ++#define ordered(A, B) (!__builtin_isunordered (A, B)) ++#define unordered(A, B) (__builtin_isunordered (A, B)) ++#define ueq(A, B) (!__builtin_islessgreater (A, B)) ++#define ult(A, B) (__builtin_isless (A, B)) ++#define ule(A, B) (__builtin_islessequal (A, B)) ++#define uge(A, B) (__builtin_isgreaterequal (A, B)) ++#define ugt(A, B) (__builtin_isgreater (A, B)) ++#define nueq(A, B) (__builtin_islessgreater (A, B)) ++#define nult(A, B) (!__builtin_isless (A, B)) ++#define nule(A, B) (!__builtin_islessequal (A, B)) ++#define nuge(A, B) (!__builtin_isgreaterequal (A, B)) ++#define nugt(A, B) (!__builtin_isgreater (A, B)) ++ ++#define TEST_LOOP(TYPE1, TYPE2, CMP) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest, \ ++ TYPE1 *restrict src, \ ++ TYPE1 fallback, \ ++ TYPE2 *restrict a, \ ++ TYPE2 *restrict b, \ ++ int count) \ ++ { \ ++ for (int i = 0; i < count; ++i) \
View file
_service:tar_scm:0022-LoongArch-Define-HAVE_AS_TLS-to-0-if-it-s-undefined-.patch
Added
@@ -0,0 +1,34 @@ +From 0527589fb1b7b97cff2c441c1219fb9c8a44dd23 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 30 Oct 2023 19:39:27 +0800 +Subject: PATCH 022/188 LoongArch: Define HAVE_AS_TLS to 0 if it's undefined + PR112299 + +Now loongarch.md uses HAVE_AS_TLS, we need this to fix the failure +building a cross compiler if the cross assembler is not installed yet. + +gcc/ChangeLog: + + PR target/112299 + * config/loongarch/loongarch-opts.h (HAVE_AS_TLS): Define to 0 + if not defined yet. +--- + gcc/config/loongarch/loongarch-opts.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index f2b59abe6..c4975af00 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -103,4 +103,8 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + #define HAVE_AS_MRELAX_OPTION 0 + #endif + ++#ifndef HAVE_AS_TLS ++#define HAVE_AS_TLS 0 ++#endif ++ + #endif /* LOONGARCH_OPTS_H */ +-- +2.43.0 +
View file
_service:tar_scm:0023-LoongArch-Fix-instruction-name-typo-in-lsx_vreplgr2v.patch
Added
@@ -0,0 +1,30 @@ +From bc3ae60454a51b80538b6deba21975d43de23b6a Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Fri, 3 Nov 2023 17:01:36 +0800 +Subject: PATCH 023/188 LoongArch: Fix instruction name typo in + lsx_vreplgr2vr_<lsxfmt_f> template + +gcc/ChangeLog: + + * config/loongarch/lsx.md: Fix instruction name typo in + lsx_vreplgr2vr_<lsxfmt_f> template. +--- + gcc/config/loongarch/lsx.md | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 4af32c8df..55c7d79a0 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -1523,7 +1523,7 @@ + "ISA_HAS_LSX" + { + if (which_alternative == 1) +- return "ldi.<lsxfmt>\t%w0,0"; ++ return "vldi.<lsxfmt>\t%w0,0"; + + if (!TARGET_64BIT && (<MODE>mode == V2DImode || <MODE>mode == V2DFmode)) + return "#"; +-- +2.43.0 +
View file
_service:tar_scm:0024-LoongArch-Use-simplify_gen_subreg-instead-of-gen_rtx.patch
Added
@@ -0,0 +1,116 @@ +From b8f47a362000bb51dec88e0a73f885c57a46f568 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 12 Nov 2023 00:55:13 +0800 +Subject: PATCH 024/188 LoongArch: Use simplify_gen_subreg instead of + gen_rtx_SUBREG in loongarch_expand_vec_cond_mask_expr PR112476 + +GCC internal says: + + 'subreg's of 'subreg's are not supported. Using + 'simplify_gen_subreg' is the recommended way to avoid this problem. + +Unfortunately loongarch_expand_vec_cond_mask_expr might create nested +subreg under certain circumstances, causing an ICE. + +Use simplify_gen_subreg as the internal document suggests. + +gcc/ChangeLog: + + PR target/112476 + * config/loongarch/loongarch.cc + (loongarch_expand_vec_cond_mask_expr): Call simplify_gen_subreg + instead of gen_rtx_SUBREG. + +gcc/testsuite/ChangeLog: + + PR target/112476 + * gcc.target/loongarch/pr112476-1.c: New test. + * gcc.target/loongarch/pr112476-2.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 11 ++++++--- + .../gcc.target/loongarch/pr112476-1.c | 24 +++++++++++++++++++ + .../gcc.target/loongarch/pr112476-2.c | 5 ++++ + 3 files changed, 37 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-2.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index fa5c14be6..65ca1489f 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -11190,7 +11190,9 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode, + if (mode != vimode) + { + xop1 = gen_reg_rtx (vimode); +- emit_move_insn (xop1, gen_rtx_SUBREG (vimode, operands1, 0)); ++ emit_move_insn (xop1, ++ simplify_gen_subreg (vimode, operands1, ++ mode, 0)); + } + emit_move_insn (src1, xop1); + } +@@ -11207,7 +11209,9 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode, + if (mode != vimode) + { + xop2 = gen_reg_rtx (vimode); +- emit_move_insn (xop2, gen_rtx_SUBREG (vimode, operands2, 0)); ++ emit_move_insn (xop2, ++ simplify_gen_subreg (vimode, operands2, ++ mode, 0)); + } + emit_move_insn (src2, xop2); + } +@@ -11226,7 +11230,8 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode, + gen_rtx_AND (vimode, mask, src1)); + /* The result is placed back to a register with the mask. */ + emit_insn (gen_rtx_SET (mask, bsel)); +- emit_move_insn (operands0, gen_rtx_SUBREG (mode, mask, 0)); ++ emit_move_insn (operands0, simplify_gen_subreg (mode, mask, ++ vimode, 0)); + } + } + +diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-1.c b/gcc/testsuite/gcc.target/loongarch/pr112476-1.c +new file mode 100644 +index 000000000..4cf133e7a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr112476-1.c +@@ -0,0 +1,24 @@ ++/* PR target/112476: ICE with -mlsx */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mfpu=64 -mabi=lp64d -mlsx" } */ ++ ++int foo, bar; ++float baz, res, a; ++ ++void ++apply_adjacent_ternary (float *dst, float *src0) ++{ ++ do ++ { ++ __builtin_memcpy (&res, &src0, sizeof (res)); ++ *dst = foo ? baz : res; ++ dst++; ++ } ++ while (dst != src0); ++} ++ ++void ++xx (void) ++{ ++ apply_adjacent_ternary (&a, &a); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-2.c b/gcc/testsuite/gcc.target/loongarch/pr112476-2.c +new file mode 100644 +index 000000000..cc0dfbfc9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr112476-2.c +@@ -0,0 +1,5 @@ ++/* PR target/112476: ICE with -mlasx */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mfpu=64 -mabi=lp64d -mlasx" } */ ++ ++#include "pr112476-1.c" +-- +2.43.0 +
View file
_service:tar_scm:0025-LoongArch-Optimize-single-used-address-with-mexplici.patch
Added
@@ -0,0 +1,116 @@ +From b23a89e835962ae7d89e5c6f87a69c021097d715 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 30 Oct 2023 20:24:58 +0800 +Subject: PATCH 025/188 LoongArch: Optimize single-used address with + -mexplicit-relocs=auto for fld/fst + +fld and fst have same address mode as ld.w and st.w, so the same +optimization as r14-4851 should be applied for them too. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (LD_AT_LEAST_32_BIT): New mode + iterator. + (ST_ANY): New mode iterator. + (define_peephole2): Use LD_AT_LEAST_32_BIT instead of GPR and + ST_ANY instead of QHWD for applicable patterns. +--- + gcc/config/loongarch/loongarch.md | 38 +++++++++++++++++++------------ + 1 file changed, 24 insertions(+), 14 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 80487488d..ed86c95bd 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -400,6 +400,14 @@ + (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") + (TF "TARGET_64BIT && TARGET_DOUBLE_FLOAT")) + ++;; A mode for anything with 32 bits or more, and able to be loaded with ++;; the same addressing mode as ld.w. ++(define_mode_iterator LD_AT_LEAST_32_BIT GPR ANYF) ++ ++;; A mode for anything able to be stored with the same addressing mode as ++;; st.w. ++(define_mode_iterator ST_ANY QHWD ANYF) ++ + ;; In GPR templates, a string like "mul.<d>" will expand to "mul.w" in the + ;; 32-bit version and "mul.d" in the 64-bit version. + (define_mode_attr d (SI "w") (DI "d")) +@@ -3785,13 +3793,14 @@ + (define_peephole2 + (set (match_operand:P 0 "register_operand") + (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (match_operand:GPR 2 "register_operand") +- (mem:GPR (match_dup 0))) ++ (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand") ++ (mem:LD_AT_LEAST_32_BIT (match_dup 0))) + "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ + && (peep2_reg_dead_p (2, operands0) \ + || REGNO (operands0) == REGNO (operands2))" +- (set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1)))) ++ (set (match_dup 2) ++ (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1)))) + { + emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); + }) +@@ -3799,14 +3808,15 @@ + (define_peephole2 + (set (match_operand:P 0 "register_operand") + (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (match_operand:GPR 2 "register_operand") +- (mem:GPR (plus (match_dup 0) +- (match_operand 3 "const_int_operand")))) ++ (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand") ++ (mem:LD_AT_LEAST_32_BIT (plus (match_dup 0) ++ (match_operand 3 "const_int_operand")))) + "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ + && (peep2_reg_dead_p (2, operands0) \ + || REGNO (operands0) == REGNO (operands2))" +- (set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1)))) ++ (set (match_dup 2) ++ (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1)))) + { + operands1 = plus_constant (Pmode, operands1, INTVAL (operands3)); + emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); +@@ -3850,13 +3860,13 @@ + (define_peephole2 + (set (match_operand:P 0 "register_operand") + (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (mem:QHWD (match_dup 0)) +- (match_operand:QHWD 2 "register_operand")) ++ (set (mem:ST_ANY (match_dup 0)) ++ (match_operand:ST_ANY 2 "register_operand")) + "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ + && (peep2_reg_dead_p (2, operands0)) \ + && REGNO (operands0) != REGNO (operands2)" +- (set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2)) ++ (set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2)) + { + emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); + }) +@@ -3864,14 +3874,14 @@ + (define_peephole2 + (set (match_operand:P 0 "register_operand") + (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (mem:QHWD (plus (match_dup 0) +- (match_operand 3 "const_int_operand"))) +- (match_operand:QHWD 2 "register_operand")) ++ (set (mem:ST_ANY (plus (match_dup 0) ++ (match_operand 3 "const_int_operand"))) ++ (match_operand:ST_ANY 2 "register_operand")) + "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ + && (peep2_reg_dead_p (2, operands0)) \ + && REGNO (operands0) != REGNO (operands2)" +- (set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2)) ++ (set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2)) + { + operands1 = plus_constant (Pmode, operands1, INTVAL (operands3)); + emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); +-- +2.43.0 +
View file
_service:tar_scm:0026-LoongArch-Disable-relaxation-if-the-assembler-don-t-.patch
Added
@@ -0,0 +1,305 @@ +From f1cfdec1602a5a316a9b9022a95143a7385489c2 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 3 Nov 2023 21:19:59 +0800 +Subject: PATCH 026/188 LoongArch: Disable relaxation if the assembler don't + support conditional branch relaxation PR112330 + +As the commit message of r14-4674 has indicated, if the assembler does +not support conditional branch relaxation, a relocation overflow may +happen on conditional branches when relaxation is enabled because the +number of NOP instructions inserted by the assembler will be more than +the number estimated by GCC. + +To work around this issue, disable relaxation by default if the +assembler is detected incapable to perform conditional branch relaxation +at GCC build time. We also need to pass -mno-relax to the assembler to +really disable relaxation. But, if the assembler does not support +-mrelax option at all, we should not pass -mno-relax to the assembler or +it will immediately error out. Also handle this with the build time +assembler capability probing, and add a pair of options +-mno-pass-mrelax-to-as to allow using a different assembler from the +build-time one. + +With this change, if GCC is built with GAS 2.41, relaxation will be +disabled by default. So the default value of -mexplicit-relocs= is also +changed to 'always' if -mno-relax is specified or implied by the +build-time default, because using assembler macros for symbol addresses +produces no benefit when relaxation is disabled. + +gcc/ChangeLog: + + PR target/112330 + * config/loongarch/genopts/loongarch.opt.in: Add + -mno-pass-relax-to-as. Change the default of -mno-relax to + account conditional branch relaxation support status. + * config/loongarch/loongarch.opt: Regenerate. + * configure.ac (gcc_cv_as_loongarch_cond_branch_relax): Check if + the assembler supports conditional branch relaxation. + * configure: Regenerate. + * config.in: Regenerate. Note that there are some unrelated + changes introduced by r14-5424 (which does not contain a + config.in regeneration). + * config/loongarch/loongarch-opts.h + (HAVE_AS_COND_BRANCH_RELAXATION): Define to 0 if not defined. + * config/loongarch/loongarch-driver.h (ASM_MRELAX_DEFAULT): + Define. + (ASM_MRELAX_SPEC): Define. + (ASM_SPEC): Use ASM_MRELAX_SPEC instead of "%{mno-relax}". + * config/loongarch/loongarch.cc: Take the setting of + -mno-relax into account when determining the default of + -mexplicit-relocs=. + * doc/invoke.texi: Document -mno-relax and + -mno-pass-mrelax-to-as for LoongArch. Update the default + value of -mexplicit-relocs=. +--- + gcc/config.in | 35 ++++++++++++++++++- + gcc/config/loongarch/genopts/loongarch.opt.in | 6 +++- + gcc/config/loongarch/loongarch-driver.h | 16 ++++++++- + gcc/config/loongarch/loongarch-opts.h | 4 +++ + gcc/config/loongarch/loongarch.cc | 2 +- + gcc/config/loongarch/loongarch.opt | 6 +++- + gcc/configure | 35 +++++++++++++++++++ + gcc/configure.ac | 10 ++++++ + 8 files changed, 109 insertions(+), 5 deletions(-) + +diff --git a/gcc/config.in b/gcc/config.in +index 0c55e67e7..04968b53c 100644 +--- a/gcc/config.in ++++ b/gcc/config.in +@@ -374,6 +374,12 @@ + #endif + + ++/* Define if your assembler supports conditional branch relaxation. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_AS_COND_BRANCH_RELAXATION ++#endif ++ ++ + /* Define if your assembler supports the --debug-prefix-map option. */ + #ifndef USED_FOR_TARGET + #undef HAVE_AS_DEBUG_PREFIX_MAP +@@ -798,6 +804,20 @@ + #endif + + ++/* Define to 1 if you have the Mac OS X function ++ CFLocaleCopyPreferredLanguages in the CoreFoundation framework. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_CFLOCALECOPYPREFERREDLANGUAGES ++#endif ++ ++ ++/* Define to 1 if you have the Mac OS X function CFPreferencesCopyAppValue in ++ the CoreFoundation framework. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_CFPREFERENCESCOPYAPPVALUE ++#endif ++ ++ + /* Define to 1 if you have the `clearerr_unlocked' function. */ + #ifndef USED_FOR_TARGET + #undef HAVE_CLEARERR_UNLOCKED +@@ -822,6 +842,13 @@ + #endif + + ++/* Define if the GNU dcgettext() function is already present or preinstalled. ++ */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_DCGETTEXT ++#endif ++ ++ + /* Define to 1 if we found a declaration for 'abort', otherwise define to 0. + */ + #ifndef USED_FOR_TARGET +@@ -1554,6 +1581,12 @@ + #endif + + ++/* Define if the GNU gettext() function is already present or preinstalled. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_GETTEXT ++#endif ++ ++ + /* Define to 1 if you have the `gettimeofday' function. */ + #ifndef USED_FOR_TARGET + #undef HAVE_GETTIMEOFDAY +@@ -1585,7 +1618,7 @@ + #endif + + +-/* Define if you have the iconv() function. */ ++/* Define if you have the iconv() function and it works. */ + #ifndef USED_FOR_TARGET + #undef HAVE_ICONV + #endif +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index e7df1964a..bd3cfaf60 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -229,10 +229,14 @@ Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0) + Avoid using the GOT to access external symbols. + + mrelax +-Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION) ++Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION) + Take advantage of linker relaxations to reduce the number of instructions + required to materialize symbol addresses. + ++mpass-mrelax-to-as ++Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) ++Pass -mrelax or -mno-relax option to the assembler. ++ + -param=loongarch-vect-unroll-limit= + Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param + Used to limit unroll factor which indicates how much the autovectorizer may +diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h +index 59fa3263d..c8dba2cc4 100644 +--- a/gcc/config/loongarch/loongarch-driver.h ++++ b/gcc/config/loongarch/loongarch-driver.h +@@ -51,9 +51,23 @@ along with GCC; see the file COPYING3. If not see + "%{G*} %{,ada:-gnatea %{mabi=*} -gnatez} " \ + "%(subtarget_cc1_spec)" + ++#if HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION ++#define ASM_MRELAX_DEFAULT "%{!mrelax:%{!mno-relax:-mrelax}}" ++#else ++#define ASM_MRELAX_DEFAULT "%{!mrelax:%{!mno-relax:-mno-relax}}" ++#endif ++ ++#if HAVE_AS_MRELAX_OPTION ++#define ASM_MRELAX_SPEC \ ++ "%{!mno-pass-mrelax-to-as:%{mrelax} %{mno-relax} " ASM_MRELAX_DEFAULT "}" ++#else ++#define ASM_MRELAX_SPEC \ ++ "%{mpass-mrelax-to-as:%{mrelax} %{mno-relax} " ASM_MRELAX_DEFAULT "}" ++#endif ++ + #undef ASM_SPEC + #define ASM_SPEC \ +- "%{mabi=*} %{mno-relax} %(subtarget_asm_spec)" ++ "%{mabi=*} " ASM_MRELAX_SPEC " %(subtarget_asm_spec)" + + + extern const char* +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index c4975af00..dfbe9dd5c 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -103,6 +103,10 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + #define HAVE_AS_MRELAX_OPTION 0 + #endif + ++#ifndef HAVE_AS_COND_BRANCH_RELAXATION ++#define HAVE_AS_COND_BRANCH_RELAXATION 0 ++#endif ++
View file
_service:tar_scm:0027-LoongArch-Remove-redundant-barrier-instructions-befo.patch
Added
@@ -0,0 +1,391 @@ +From 4498010fba61c1446286c96cbda24d5ed53c53c7 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 6 Nov 2023 16:06:08 +0800 +Subject: PATCH 027/188 LoongArch: Remove redundant barrier instructions + before LL-SC loops + +This is isomorphic to the LLVM changes 1-2. + +On LoongArch, the LL and SC instructions has memory barrier semantics: + +- LL: <memory-barrier> + <load-exclusive> +- SC: <store-conditional> + <memory-barrier> + +But the compare and swap operation is allowed to fail, and if it fails +the SC instruction is not executed, thus the guarantee of acquiring +semantics cannot be ensured. Therefore, an acquire barrier needs to be +generated when failure_memorder includes an acquire operation. + +On CPUs implementing LoongArch v1.10 or later, "dbar 0b10100" is an +acquire barrier; on CPUs implementing LoongArch v1.00, it is a full +barrier. So it's always enough for acquire semantics. OTOH if an +acquire semantic is not needed, we still needs the "dbar 0x700" as the +load-load barrier like all LL-SC loops. + +1:https://github.com/llvm/llvm-project/pull/67391 +2:https://github.com/llvm/llvm-project/pull/69339 + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_memmodel_needs_release_fence): Remove. + (loongarch_cas_failure_memorder_needs_acquire): New static + function. + (loongarch_print_operand): Redefine 'G' for the barrier on CAS + failure. + * config/loongarch/sync.md (atomic_cas_value_strong<mode>): + Remove the redundant barrier before the LL instruction, and + emit an acquire barrier on failure if needed by + failure_memorder. + (atomic_cas_value_cmp_and_7_<mode>): Likewise. + (atomic_cas_value_add_7_<mode>): Remove the unnecessary barrier + before the LL instruction. + (atomic_cas_value_sub_7_<mode>): Likewise. + (atomic_cas_value_and_7_<mode>): Likewise. + (atomic_cas_value_xor_7_<mode>): Likewise. + (atomic_cas_value_or_7_<mode>): Likewise. + (atomic_cas_value_nand_7_<mode>): Likewise. + (atomic_cas_value_exchange_7_<mode>): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/cas-acquire.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 30 ++++--- + gcc/config/loongarch/sync.md | 49 +++++------ + .../gcc.target/loongarch/cas-acquire.c | 82 +++++++++++++++++++ + 3 files changed, 119 insertions(+), 42 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/cas-acquire.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 6d580ee75..8467f03cf 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5829,27 +5829,27 @@ loongarch_memmodel_needs_rel_acq_fence (enum memmodel model) + } + } + +-/* Return true if a FENCE should be emitted to before a memory access to +- implement the release portion of memory model MODEL. */ ++/* Return true if a FENCE should be emitted after a failed CAS to ++ implement the acquire semantic of failure_memorder. */ + + static bool +-loongarch_memmodel_needs_release_fence (enum memmodel model) ++loongarch_cas_failure_memorder_needs_acquire (enum memmodel model) + { +- switch (model) ++ switch (memmodel_base (model)) + { ++ case MEMMODEL_ACQUIRE: + case MEMMODEL_ACQ_REL: + case MEMMODEL_SEQ_CST: +- case MEMMODEL_SYNC_SEQ_CST: +- case MEMMODEL_RELEASE: +- case MEMMODEL_SYNC_RELEASE: + return true; + +- case MEMMODEL_ACQUIRE: +- case MEMMODEL_CONSUME: +- case MEMMODEL_SYNC_ACQUIRE: + case MEMMODEL_RELAXED: ++ case MEMMODEL_RELEASE: + return false; + ++ /* MEMMODEL_CONSUME is deliberately not handled because it's always ++ replaced by MEMMODEL_ACQUIRE as at now. If you see an ICE caused by ++ MEMMODEL_CONSUME, read the change (re)introducing it carefully and ++ decide what to do. See PR 59448 and get_memmodel in builtins.cc. */ + default: + gcc_unreachable (); + } +@@ -5962,7 +5962,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, + 'd' Print CONST_INT OP in decimal. + 'E' Print CONST_INT OP element 0 of a replicated CONST_VECTOR in decimal. + 'F' Print the FPU branch condition for comparison OP. +- 'G' Print a DBAR insn if the memory model requires a release. ++ 'G' Print a DBAR insn for CAS failure (with an acquire semantic if ++ needed, otherwise a simple load-load barrier). + 'H' Print address 52-61bit relocation associated with OP. + 'h' Print the high-part relocation associated with OP. + 'i' Print i if the operand is not a register. +@@ -6053,8 +6054,11 @@ loongarch_print_operand (FILE *file, rtx op, int letter) + break; + + case 'G': +- if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op))) +- fputs ("dbar\t0", file); ++ if (loongarch_cas_failure_memorder_needs_acquire ( ++ memmodel_from_int (INTVAL (op)))) ++ fputs ("dbar\t0b10100", file); ++ else ++ fputs ("dbar\t0x700", file); + break; + + case 'h': +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index efa40f24c..dd1f98946 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -162,19 +162,18 @@ + (clobber (match_scratch:GPR 6 "=&r")) + "" + { +- return "%G5\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.<amo>\\t%0,%1\\n\\t" + "bne\\t%0,%z2,2f\\n\\t" + "or%i3\\t%6,$zero,%3\\n\\t" + "sc.<amo>\\t%6,%1\\n\\t" +- "beq\\t$zero,%6,1b\\n\\t" ++ "beqz\\t%6,1b\\n\\t" + "b\\t3f\\n\\t" + "2:\\n\\t" +- "dbar\\t0x700\\n\\t" ++ "%G5\\n\\t" + "3:\\n\\t"; + } +- (set (attr "length") (const_int 32))) ++ (set (attr "length") (const_int 28))) + + (define_expand "atomic_compare_and_swap<mode>" + (match_operand:SI 0 "register_operand" "") ;; bool output +@@ -267,8 +266,7 @@ + (clobber (match_scratch:GPR 7 "=&r")) + "" + { +- return "%G6\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.<amo>\\t%0,%1\\n\\t" + "and\\t%7,%0,%2\\n\\t" + "bne\\t%7,%z4,2f\\n\\t" +@@ -278,10 +276,10 @@ + "beq\\t$zero,%7,1b\\n\\t" + "b\\t3f\\n\\t" + "2:\\n\\t" +- "dbar\\t0x700\\n\\t" ++ "%G6\\n\\t" + "3:\\n\\t"; + } +- (set (attr "length") (const_int 40))) ++ (set (attr "length") (const_int 36))) + + (define_expand "atomic_compare_and_swap<mode>" + (match_operand:SI 0 "register_operand" "") ;; bool output +@@ -336,8 +334,7 @@ + (clobber (match_scratch:GPR 8 "=&r")) + "" + { +- return "%G6\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.<amo>\\t%0,%1\\n\\t" + "and\\t%7,%0,%3\\n\\t" + "add.w\\t%8,%0,%z5\\n\\t" +@@ -347,7 +344,7 @@ + "beq\\t$zero,%7,1b"; + } + +- (set (attr "length") (const_int 32))) ++ (set (attr "length") (const_int 28))) + + (define_insn "atomic_cas_value_sub_7_<mode>" + (set (match_operand:GPR 0 "register_operand" "=&r") ;; res +@@ -363,8 +360,7 @@ + (clobber (match_scratch:GPR 8 "=&r")) + "" + {
View file
_service:tar_scm:0028-LoongArch-Fix-scan-assembler-times-of-lasx-lsx-test-.patch
Added
@@ -0,0 +1,161 @@ +From 9731abbe19b9fad184dfe728bd9b2cc02b40c543 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Thu, 16 Nov 2023 20:31:09 +0800 +Subject: PATCH 028/188 LoongArch: Fix scan-assembler-times of lasx/lsx test + case. + +These tests fail when they are first added,this patch adjusts the scan-assembler-times +to fix them. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-vcond-1.c: Adjust assembler times. + * gcc.target/loongarch/vector/lasx/lasx-vcond-2.c: Ditto. + * gcc.target/loongarch/vector/lsx/lsx-vcond-1.c: Ditto. + * gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: Ditto. +--- + .../loongarch/vector/lasx/lasx-vcond-1.c | 12 +++---- + .../loongarch/vector/lasx/lasx-vcond-2.c | 36 +++++++++---------- + .../loongarch/vector/lsx/lsx-vcond-1.c | 12 +++---- + .../loongarch/vector/lsx/lsx-vcond-2.c | 36 +++++++++---------- + 4 files changed, 48 insertions(+), 48 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c +index ee9cb1a1f..57064eac9 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c +@@ -52,13 +52,13 @@ TEST_VAR_ALL (DEF_VCOND_VAR) + + /* { dg-final { scan-assembler-times {\txvslt\.b} 4 } } */ + /* { dg-final { scan-assembler-times {\txvslt\.h} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvslt\.w} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvslt\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvslt\.w} 8 } } */ ++/* { dg-final { scan-assembler-times {\txvslt\.d} 8 } } */ + /* { dg-final { scan-assembler-times {\txvsle\.b} 4 } } */ + /* { dg-final { scan-assembler-times {\txvsle\.h} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvsle\.w} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvsle\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvsle\.w} 8 } } */ ++/* { dg-final { scan-assembler-times {\txvsle\.d} 8 } } */ + /* { dg-final { scan-assembler-times {\txvseq\.b} 4 } } */ + /* { dg-final { scan-assembler-times {\txvseq\.h} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvseq\.w} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvseq\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvseq\.w} 8 } } */ ++/* { dg-final { scan-assembler-times {\txvseq\.d} 8 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c +index 5f40ed44c..55d5a084c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c +@@ -67,21 +67,21 @@ TEST_CMP (nule) + TEST_CMP (nuge) + TEST_CMP (nugt) + +-/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.d} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.s} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.d} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.s} 8 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.d} 8 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.s} 8 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.d} 8 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 6 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 6 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 6 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.d} 6 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.s} 6 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.d} 6 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.s} 12 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.d} 12 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.s} 12 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.d} 12 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c +index 138adccfa..8c69f0d9b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c +@@ -52,13 +52,13 @@ TEST_VAR_ALL (DEF_VCOND_VAR) + + /* { dg-final { scan-assembler-times {\tvslt\.b} 4 } } */ + /* { dg-final { scan-assembler-times {\tvslt\.h} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvslt\.w} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvslt\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvslt\.w} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvslt\.d} 8 } } */ + /* { dg-final { scan-assembler-times {\tvsle\.b} 4 } } */ + /* { dg-final { scan-assembler-times {\tvsle\.h} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvsle\.w} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvsle\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvsle\.w} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvsle\.d} 8 } } */ + /* { dg-final { scan-assembler-times {\tvseq\.b} 4 } } */ + /* { dg-final { scan-assembler-times {\tvseq\.h} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvseq\.w} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvseq\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvseq\.w} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvseq\.d} 8 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c +index e8fe31f8f..2214afd0a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c +@@ -67,21 +67,21 @@ TEST_CMP (nule) + TEST_CMP (nuge) + TEST_CMP (nugt) + +-/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.d} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.s} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.d} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.s} 8 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.d} 8 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.s} 8 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.d} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 6 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 6 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 6 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.d} 6 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.s} 6 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.d} 6 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.s} 12 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.d} 12 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.s} 12 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.d} 12 } } */ +-- +2.43.0 +
View file
_service:tar_scm:0029-LoongArch-Increase-cost-of-vector-aligned-store-load.patch
Added
@@ -0,0 +1,45 @@ +From 526e1effd86cfa0b1afae88890ce4f74f7150d88 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Thu, 16 Nov 2023 16:44:36 +0800 +Subject: PATCH 029/188 LoongArch: Increase cost of vector aligned + store/load. + +Based on SPEC2017 performance evaluation results, it's better to make them equal +to the cost of unaligned store/load so as to avoid odd alignment peeling. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_builtin_vectorization_cost): Adjust. +--- + gcc/config/loongarch/loongarch.cc | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 8467f03cf..b6f0d61ef 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3889,11 +3889,9 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + case scalar_stmt: + case scalar_load: + case vector_stmt: +- case vector_load: + case vec_to_scalar: + case scalar_to_vec: + case scalar_store: +- case vector_store: + return 1; + + case vec_promote_demote: +@@ -3901,6 +3899,8 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + return LASX_SUPPORTED_MODE_P (mode) + && !LSX_SUPPORTED_MODE_P (mode) ? 2 : 1; + ++ case vector_load: ++ case vector_store: + case unaligned_load: + case unaligned_store: + return 2; +-- +2.43.0 +
View file
_service:tar_scm:0030-LoongArch-Implement-C-LT-Z_DEFINED_VALUE_AT_ZERO.patch
Added
@@ -0,0 +1,58 @@ +From bd74cb3e1238e842d15bcd4044c9e2f246cc18bc Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Fri, 17 Nov 2023 10:38:02 +0800 +Subject: PATCH 030/188 LoongArch: Implement CLTZ_DEFINED_VALUE_AT_ZERO + +The LoongArch has defined ctz and clz on the backend, but if we want GCC +do CTZ transformation optimization in forwprop2 pass, GCC need to know +the value of cltz at zero, which may be beneficial for some test cases +(like spec2017 deepsjeng_r). + +After implementing the macro, we test dynamic instruction count on +deepsjeng_r: +- before 1688423249186 +- after 1660311215745 (1.66% reduction) + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (CLZ_DEFINED_VALUE_AT_ZERO): + Implement. + (CTZ_DEFINED_VALUE_AT_ZERO): Same. + +gcc/testsuite/ChangeLog: + + * gcc.dg/pr90838.c: add clz/ctz test support on LoongArch. +--- + gcc/config/loongarch/loongarch.h | 5 +++++ + gcc/testsuite/gcc.dg/pr90838.c | 5 +++++ + 2 files changed, 10 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 6e8ac293a..19cf6fd33 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -1239,3 +1239,8 @@ struct GTY (()) machine_function + + #define TARGET_EXPLICIT_RELOCS \ + (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS) ++ ++#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ ++ ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) ++#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ ++ ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) +diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c +index 7502b8463..7aa912525 100644 +--- a/gcc/testsuite/gcc.dg/pr90838.c ++++ b/gcc/testsuite/gcc.dg/pr90838.c +@@ -82,3 +82,8 @@ int ctz4 (unsigned long x) + /* { dg-final { scan-assembler-times "ctz\t" 3 { target { rv32 } } } } */ + /* { dg-final { scan-assembler-times "andi\t" 1 { target { rv32 } } } } */ + /* { dg-final { scan-assembler-times "mul\t" 1 { target { rv32 } } } } */ ++ ++/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target { loongarch64*-*-* } } } } */ ++/* { dg-final { scan-assembler-times "ctz.d\t" 1 { target { loongarch64*-*-* } } } } */ ++/* { dg-final { scan-assembler-times "ctz.w\t" 3 { target { loongarch64*-*-* } } } } */ ++/* { dg-final { scan-assembler-times "andi\t" 4 { target { loongarch64*-*-* } } } } */ +-- +2.43.0 +
View file
_service:tar_scm:0031-LoongArch-Handle-vectorized-copysign-x-1-expansion-e.patch
Added
@@ -0,0 +1,197 @@ +From 61daf071708947ef8431ac36bc6c6b47339fdd2a Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Tue, 14 Nov 2023 00:17:19 +0800 +Subject: PATCH 031/188 LoongArch: Handle vectorized copysign (x, -1) + expansion efficiently + +With LSX or LASX, copysign (xi, -1) (or any negative constant) can be +vectorized using xvbitseti.{w/d} instructions to directly set the +signbits. + +Inspired by Tamar Christina's "AArch64: Handle copysign (x, -1) expansion +efficiently" (r14-5289). + +gcc/ChangeLog: + + * config/loongarch/lsx.md (copysign<mode>3): Allow operand2 to + be an reg_or_vector_same_val_operand. If it's a const vector + with same negative elements, expand the copysign with a bitset + instruction. Otherwise, force it into an register. + * config/loongarch/lasx.md (copysign<mode>3): Likewise. + +gcc/testsuite/ChangeLog: + + * g++.target/loongarch/vect-copysign-negconst.C: New test. + * g++.target/loongarch/vect-copysign-negconst-run.C: New test. +--- + gcc/config/loongarch/lasx.md | 22 ++++++++- + gcc/config/loongarch/lsx.md | 22 ++++++++- + .../loongarch/vect-copysign-negconst-run.C | 47 +++++++++++++++++++ + .../loongarch/vect-copysign-negconst.C | 27 +++++++++++ + 4 files changed, 116 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C + create mode 100644 gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index f0f2dd08d..2e11f0612 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -3136,11 +3136,31 @@ + (match_operand:FLASX 1 "register_operand"))) + (set (match_dup 5) + (and:FLASX (match_dup 3) +- (match_operand:FLASX 2 "register_operand"))) ++ (match_operand:FLASX 2 "reg_or_vector_same_val_operand"))) + (set (match_operand:FLASX 0 "register_operand") + (ior:FLASX (match_dup 4) (match_dup 5))) + "ISA_HAS_LASX" + { ++ /* copysign (x, -1) should instead be expanded as setting the sign ++ bit. */ ++ if (!REG_P (operands2)) ++ { ++ rtx op2_elt = unwrap_const_vec_duplicate (operands2); ++ if (GET_CODE (op2_elt) == CONST_DOUBLE ++ && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt))) ++ { ++ rtx n = GEN_INT (8 * GET_MODE_SIZE (<UNITMODE>mode) - 1); ++ operands0 = lowpart_subreg (<VIMODE256>mode, operands0, ++ <MODE>mode); ++ operands1 = lowpart_subreg (<VIMODE256>mode, operands1, ++ <MODE>mode); ++ emit_insn (gen_lasx_xvbitseti_<lasxfmt> (operands0, ++ operands1, n)); ++ DONE; ++ } ++ } ++ ++ operands2 = force_reg (<MODE>mode, operands2); + operands3 = loongarch_build_signbit_mask (<MODE>mode, 1, 0); + + operands4 = gen_reg_rtx (<MODE>mode); +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 55c7d79a0..8ea41c85b 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -2873,11 +2873,31 @@ + (match_operand:FLSX 1 "register_operand"))) + (set (match_dup 5) + (and:FLSX (match_dup 3) +- (match_operand:FLSX 2 "register_operand"))) ++ (match_operand:FLSX 2 "reg_or_vector_same_val_operand"))) + (set (match_operand:FLSX 0 "register_operand") + (ior:FLSX (match_dup 4) (match_dup 5))) + "ISA_HAS_LSX" + { ++ /* copysign (x, -1) should instead be expanded as setting the sign ++ bit. */ ++ if (!REG_P (operands2)) ++ { ++ rtx op2_elt = unwrap_const_vec_duplicate (operands2); ++ if (GET_CODE (op2_elt) == CONST_DOUBLE ++ && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt))) ++ { ++ rtx n = GEN_INT (8 * GET_MODE_SIZE (<UNITMODE>mode) - 1); ++ operands0 = lowpart_subreg (<VIMODE>mode, operands0, ++ <MODE>mode); ++ operands1 = lowpart_subreg (<VIMODE>mode, operands1, ++ <MODE>mode); ++ emit_insn (gen_lsx_vbitseti_<lsxfmt> (operands0, operands1, ++ n)); ++ DONE; ++ } ++ } ++ ++ operands2 = force_reg (<MODE>mode, operands2); + operands3 = loongarch_build_signbit_mask (<MODE>mode, 1, 0); + + operands4 = gen_reg_rtx (<MODE>mode); +diff --git a/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C +new file mode 100644 +index 000000000..d2d5d15c9 +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C +@@ -0,0 +1,47 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -march=loongarch64 -mlasx -mno-strict-align" } */ ++/* { dg-require-effective-target loongarch_asx_hw } */ ++ ++#include "vect-copysign-negconst.C" ++ ++double d = {1.2, -3.4, -5.6, 7.8}; ++float f = {1.2, -3.4, -5.6, 7.8, -9.0, -11.4, 51.4, 1919.810}; ++ ++double _abs(double x) { return __builtin_fabs (x); } ++float _abs(float x) { return __builtin_fabsf (x); } ++ ++template <class T> ++void ++check (T *arr, T *orig, int len) ++{ ++ for (int i = 0; i < len; i++) ++ { ++ if (arri > 0) ++ __builtin_trap (); ++ if (_abs (arri) != _abs (origi)) ++ __builtin_trap (); ++ } ++} ++ ++int ++main() ++{ ++ double test_d4; ++ float test_f8; ++ ++ __builtin_memcpy (test_d, d, sizeof (test_d)); ++ force_negative<2> (test_d); ++ check (test_d, d, 2); ++ ++ __builtin_memcpy (test_d, d, sizeof (test_d)); ++ force_negative<4> (test_d); ++ check (test_d, d, 4); ++ ++ __builtin_memcpy (test_f, f, sizeof (test_f)); ++ force_negative<4> (test_f); ++ check (test_f, f, 4); ++ ++ __builtin_memcpy (test_f, f, sizeof (test_f)); ++ force_negative<8> (test_f); ++ check (test_f, f, 8); ++} +diff --git a/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C +new file mode 100644 +index 000000000..5e8820d2b +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C +@@ -0,0 +1,27 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mlasx -mno-strict-align" } */ ++/* { dg-final { scan-assembler "\txvbitseti.*63" } } */ ++/* { dg-final { scan-assembler "\txvbitseti.*31" } } */ ++/* { dg-final { scan-assembler "\tvbitseti.*63" } } */ ++/* { dg-final { scan-assembler "\tvbitseti.*31" } } */ ++ ++template <int N> ++__attribute__ ((noipa)) void ++force_negative (float *arr) ++{ ++ for (int i = 0; i < N; i++) ++ arri = __builtin_copysignf (arri, -2); ++} ++ ++template <int N> ++__attribute__ ((noipa)) void ++force_negative (double *arr) ++{ ++ for (int i = 0; i < N; i++) ++ arri = __builtin_copysign (arri, -3); ++} ++ ++template void force_negative<4>(float *); ++template void force_negative<8>(float *); ++template void force_negative<2>(double *); ++template void force_negative<4>(double *); +-- +2.43.0 +
View file
_service:tar_scm:0032-LoongArch-Add-code-generation-support-for-call36-fun.patch
Added
@@ -0,0 +1,561 @@ +From 5ab014701ddd9968855026f0e2ae1af2b165bcd7 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 16 Nov 2023 15:06:11 +0800 +Subject: PATCH 032/188 LoongArch: Add code generation support for call36 + function calls. + +When compiling with '-mcmodel=medium', the function call is made through +'pcaddu18i+jirl' if binutils supports call36, otherwise the +native implementation 'pcalau12i+jirl' is used. + +gcc/ChangeLog: + + * config.in: Regenerate. + * config/loongarch/loongarch-opts.h (HAVE_AS_SUPPORT_CALL36): Define macro. + * config/loongarch/loongarch.cc (loongarch_legitimize_call_address): + If binutils supports call36, the function call is not split over expand. + * config/loongarch/loongarch.md: Add call36 generation code. + * config/loongarch/predicates.md: Likewise. + * configure: Regenerate. + * configure.ac: Check whether binutils supports call36. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/func-call-medium-5.c: If the assembler supports call36, + the test is abandoned. + * gcc.target/loongarch/func-call-medium-6.c: Likewise. + * gcc.target/loongarch/func-call-medium-7.c: Likewise. + * gcc.target/loongarch/func-call-medium-8.c: Likewise. + * lib/target-supports.exp: Added a function to see if the assembler supports + the call36 relocation. + * gcc.target/loongarch/func-call-medium-call36-1.c: New test. + * gcc.target/loongarch/func-call-medium-call36.c: New test. + +Co-authored-by: Xi Ruoyao <xry111@xry111.site> +--- + gcc/config.in | 6 + + gcc/config/loongarch/loongarch-opts.h | 4 + + gcc/config/loongarch/loongarch.cc | 12 +- + gcc/config/loongarch/loongarch.md | 171 +++++++++++++++--- + gcc/config/loongarch/predicates.md | 7 +- + gcc/configure | 32 ++++ + gcc/configure.ac | 6 + + .../gcc.target/loongarch/func-call-medium-5.c | 1 + + .../gcc.target/loongarch/func-call-medium-6.c | 1 + + .../gcc.target/loongarch/func-call-medium-7.c | 1 + + .../gcc.target/loongarch/func-call-medium-8.c | 1 + + .../loongarch/func-call-medium-call36-1.c | 21 +++ + .../loongarch/func-call-medium-call36.c | 32 ++++ + gcc/testsuite/lib/target-supports.exp | 9 + + 14 files changed, 268 insertions(+), 36 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-call36-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-call36.c + +diff --git a/gcc/config.in b/gcc/config.in +index 04968b53c..033cfb98b 100644 +--- a/gcc/config.in ++++ b/gcc/config.in +@@ -759,6 +759,12 @@ + #endif + + ++/* Define if your assembler supports call36 relocation. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_AS_SUPPORT_CALL36 ++#endif ++ ++ + /* Define if your assembler and linker support thread-local storage. */ + #ifndef USED_FOR_TARGET + #undef HAVE_AS_TLS +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index dfbe9dd5c..22ce1a122 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -99,6 +99,10 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + #define HAVE_AS_EXPLICIT_RELOCS 0 + #endif + ++#ifndef HAVE_AS_SUPPORT_CALL36 ++#define HAVE_AS_SUPPORT_CALL36 0 ++#endif ++ + #ifndef HAVE_AS_MRELAX_OPTION + #define HAVE_AS_MRELAX_OPTION 0 + #endif +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index b6f0d61ef..43f0e82ba 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3002,12 +3002,16 @@ loongarch_legitimize_call_address (rtx addr) + + enum loongarch_symbol_type symbol_type = loongarch_classify_symbol (addr); + +- /* Split function call insn 'bl sym' or 'bl %plt(sym)' to : +- pcalau12i $rd, %pc_hi20(sym) +- jr $rd, %pc_lo12(sym). */ ++ /* If add the compilation option '-cmodel=medium', and the assembler does ++ not support call36. The following sequence of instructions will be ++ used for the function call: ++ pcalau12i $rd, %pc_hi20(sym) ++ jr $rd, %pc_lo12(sym) ++ */ + + if (TARGET_CMODEL_MEDIUM +- && TARGET_EXPLICIT_RELOCS ++ && !HAVE_AS_SUPPORT_CALL36 ++ && (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + && (SYMBOL_REF_P (addr) || LABEL_REF_P (addr)) + && (symbol_type == SYMBOL_PCREL + || (symbol_type == SYMBOL_GOT_DISP && flag_plt))) +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index ed86c95bd..52e40a208 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -3274,7 +3274,13 @@ + XEXP (target, 1), + operands1)); + else +- emit_call_insn (gen_sibcall_internal (target, operands1)); ++ { ++ rtx call = emit_call_insn (gen_sibcall_internal (target, operands1)); ++ ++ if (TARGET_CMODEL_MEDIUM && !REG_P (target)) ++ clobber_reg (&CALL_INSN_FUNCTION_USAGE (call), ++ gen_rtx_REG (Pmode, T0_REGNUM)); ++ } + DONE; + }) + +@@ -3282,10 +3288,25 @@ + (call (mem:SI (match_operand 0 "call_insn_operand" "j,c,b")) + (match_operand 1 "" "")) + "SIBLING_CALL_P (insn)" +- "@ +- jr\t%0 +- b\t%0 +- b\t%%plt(%0)" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "jr\t%0"; ++ case 1: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r12,%%call36(%0)\n\tjirl\t$r0,$r12,0"; ++ else ++ return "b\t%0"; ++ case 2: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r12,%%call36(%0)\n\tjirl\t$r0,$r12,0"; ++ else ++ return "b\t%%plt(%0)"; ++ default: ++ gcc_unreachable (); ++ } ++} + (set_attr "jirl" "indirect,direct,direct")) + + (define_insn "@sibcall_internal_1<mode>" +@@ -3318,9 +3339,17 @@ + operands2, + arg2)); + else +- emit_call_insn (gen_sibcall_value_multiple_internal (arg1, target, +- operands2, +- arg2)); ++ { ++ rtx call ++ = emit_call_insn (gen_sibcall_value_multiple_internal (arg1, ++ target, ++ operands2, ++ arg2)); ++ ++ if (TARGET_CMODEL_MEDIUM && !REG_P (target)) ++ clobber_reg (&CALL_INSN_FUNCTION_USAGE (call), ++ gen_rtx_REG (Pmode, T0_REGNUM)); ++ } + } + else + { +@@ -3334,8 +3363,15 @@ + XEXP (target, 1), + operands2)); + else +- emit_call_insn (gen_sibcall_value_internal (operands0, target, +- operands2)); ++ { ++ rtx call = emit_call_insn (gen_sibcall_value_internal (operands0, ++ target, ++ operands2)); ++ ++ if (TARGET_CMODEL_MEDIUM && !REG_P (target)) ++ clobber_reg (&CALL_INSN_FUNCTION_USAGE (call), ++ gen_rtx_REG (Pmode, T0_REGNUM)); ++ } + } + DONE; + }) +@@ -3345,10 +3381,25 @@
View file
_service:tar_scm:0033-LoongArch-Implement-atomic-operations-using-LoongArc.patch
Added
@@ -0,0 +1,362 @@ +From 704e67084fcd7f3ea89321e17dfafa7e907c907c Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 17 Nov 2023 15:42:53 +0800 +Subject: PATCH 033/188 LoongArch: Implement atomic operations using + LoongArch1.1 instructions. + +1. short and char type calls for atomic_add_fetch and __atomic_fetch_add are + implemented using amadd{_db}.{b/h}. +2. Use amcas{_db}.{b/h/w/d} to implement __atomic_compare_exchange_n and __atomic_compare_exchange. +3. The short and char types of the functions __atomic_exchange and __atomic_exchange_n are + implemented using amswap{_db}.{b/h}. + +gcc/ChangeLog: + + * config/loongarch/loongarch-def.h: Add comments. + * config/loongarch/loongarch-opts.h (ISA_BASE_IS_LA64V110): Define macro. + * config/loongarch/loongarch.cc (loongarch_memmodel_needs_rel_acq_fence): + Remove redundant code implementations. + * config/loongarch/sync.md (d): Added QI, HI support. + (atomic_add<mode>): New template. + (atomic_exchange<mode>_short): Likewise. + (atomic_cas_value_strong<mode>_amcas): Likewise.. + (atomic_fetch_add<mode>_short): Likewise. +--- + gcc/config/loongarch/loongarch-def.h | 2 + + gcc/config/loongarch/loongarch-opts.h | 2 +- + gcc/config/loongarch/loongarch.cc | 6 +- + gcc/config/loongarch/sync.md | 186 ++++++++++++++++++++------ + 4 files changed, 147 insertions(+), 49 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index 4757de14b..078d8607d 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -54,7 +54,9 @@ extern "C" { + + /* enum isa_base */ + extern const char* loongarch_isa_base_strings; ++/* LoongArch V1.00. */ + #define ISA_BASE_LA64V100 0 ++/* LoongArch V1.10. */ + #define ISA_BASE_LA64V110 1 + #define N_ISA_BASE_TYPES 2 + +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 22ce1a122..9b3d023ac 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -86,10 +86,10 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + || la_target.isa.simd == ISA_EXT_SIMD_LASX) + #define ISA_HAS_LASX (la_target.isa.simd == ISA_EXT_SIMD_LASX) + +- + /* TARGET_ macros for use in *.md template conditionals */ + #define TARGET_uARCH_LA464 (la_target.cpu_tune == CPU_LA464) + #define TARGET_uARCH_LA664 (la_target.cpu_tune == CPU_LA664) ++#define ISA_BASE_IS_LA64V110 (la_target.isa.base == ISA_BASE_LA64V110) + + /* Note: optimize_size may vary across functions, + while -mno-memcpy imposes a global constraint. */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 43f0e82ba..7bb46a45d 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5813,16 +5813,12 @@ loongarch_print_operand_punct_valid_p (unsigned char code) + static bool + loongarch_memmodel_needs_rel_acq_fence (enum memmodel model) + { +- switch (model) ++ switch (memmodel_base (model)) + { + case MEMMODEL_ACQ_REL: + case MEMMODEL_SEQ_CST: +- case MEMMODEL_SYNC_SEQ_CST: + case MEMMODEL_RELEASE: +- case MEMMODEL_SYNC_RELEASE: + case MEMMODEL_ACQUIRE: +- case MEMMODEL_CONSUME: +- case MEMMODEL_SYNC_ACQUIRE: + return true; + + case MEMMODEL_RELAXED: +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index dd1f98946..1eabaec04 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -38,7 +38,7 @@ + (plus "add") (ior "or") (xor "xor") (and "and")) + + ;; This attribute gives the format suffix for atomic memory operations. +-(define_mode_attr amo (SI "w") (DI "d")) ++(define_mode_attr amo (QI "b") (HI "h") (SI "w") (DI "d")) + + ;; <amop> expands to the name of the atomic operand that implements a + ;; particular code. +@@ -123,7 +123,18 @@ + UNSPEC_SYNC_OLD_OP)) + "" + "am<amop>%A2.<amo>\t$zero,%z1,%0" +- (set (attr "length") (const_int 8))) ++ (set (attr "length") (const_int 4))) ++ ++(define_insn "atomic_add<mode>" ++ (set (match_operand:SHORT 0 "memory_operand" "+ZB") ++ (unspec_volatile:SHORT ++ (plus:SHORT (match_dup 0) ++ (match_operand:SHORT 1 "reg_or_0_operand" "rJ")) ++ (match_operand:SI 2 "const_int_operand") ;; model ++ UNSPEC_SYNC_OLD_OP)) ++ "ISA_BASE_IS_LA64V110" ++ "amadd%A2.<amo>\t$zero,%z1,%0" ++ (set (attr "length") (const_int 4))) + + (define_insn "atomic_fetch_<atomic_optab><mode>" + (set (match_operand:GPR 0 "register_operand" "=&r") +@@ -131,12 +142,12 @@ + (set (match_dup 1) + (unspec_volatile:GPR + (any_atomic:GPR (match_dup 1) +- (match_operand:GPR 2 "reg_or_0_operand" "rJ")) ++ (match_operand:GPR 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand") ;; model + UNSPEC_SYNC_OLD_OP)) + "" + "am<amop>%A3.<amo>\t%0,%z2,%1" +- (set (attr "length") (const_int 8))) ++ (set (attr "length") (const_int 4))) + + (define_insn "atomic_exchange<mode>" + (set (match_operand:GPR 0 "register_operand" "=&r") +@@ -148,7 +159,19 @@ + (match_operand:GPR 2 "register_operand" "r")) + "" + "amswap%A3.<amo>\t%0,%z2,%1" +- (set (attr "length") (const_int 8))) ++ (set (attr "length") (const_int 4))) ++ ++(define_insn "atomic_exchange<mode>_short" ++ (set (match_operand:SHORT 0 "register_operand" "=&r") ++ (unspec_volatile:SHORT ++ (match_operand:SHORT 1 "memory_operand" "+ZB") ++ (match_operand:SI 3 "const_int_operand") ;; model ++ UNSPEC_SYNC_EXCHANGE)) ++ (set (match_dup 1) ++ (match_operand:SHORT 2 "register_operand" "r")) ++ "ISA_BASE_IS_LA64V110" ++ "amswap%A3.<amo>\t%0,%z2,%1" ++ (set (attr "length") (const_int 4))) + + (define_insn "atomic_cas_value_strong<mode>" + (set (match_operand:GPR 0 "register_operand" "=&r") +@@ -156,25 +179,36 @@ + (set (match_dup 1) + (unspec_volatile:GPR (match_operand:GPR 2 "reg_or_0_operand" "rJ") + (match_operand:GPR 3 "reg_or_0_operand" "rJ") +- (match_operand:SI 4 "const_int_operand") ;; mod_s +- (match_operand:SI 5 "const_int_operand") ;; mod_f ++ (match_operand:SI 4 "const_int_operand") ;; mod_s + UNSPEC_COMPARE_AND_SWAP)) +- (clobber (match_scratch:GPR 6 "=&r")) ++ (clobber (match_scratch:GPR 5 "=&r")) + "" + { + return "1:\\n\\t" + "ll.<amo>\\t%0,%1\\n\\t" + "bne\\t%0,%z2,2f\\n\\t" +- "or%i3\\t%6,$zero,%3\\n\\t" +- "sc.<amo>\\t%6,%1\\n\\t" +- "beqz\\t%6,1b\\n\\t" ++ "or%i3\\t%5,$zero,%3\\n\\t" ++ "sc.<amo>\\t%5,%1\\n\\t" ++ "beqz\\t%5,1b\\n\\t" + "b\\t3f\\n\\t" + "2:\\n\\t" +- "%G5\\n\\t" ++ "%G4\\n\\t" + "3:\\n\\t"; + } + (set (attr "length") (const_int 28))) + ++(define_insn "atomic_cas_value_strong<mode>_amcas" ++ (set (match_operand:QHWD 0 "register_operand" "=&r") ++ (match_operand:QHWD 1 "memory_operand" "+ZB")) ++ (set (match_dup 1) ++ (unspec_volatile:QHWD (match_operand:QHWD 2 "reg_or_0_operand" "rJ") ++ (match_operand:QHWD 3 "reg_or_0_operand" "rJ") ++ (match_operand:SI 4 "const_int_operand") ;; mod_s ++ UNSPEC_COMPARE_AND_SWAP)) ++ "ISA_BASE_IS_LA64V110" ++ "ori\t%0,%z2,0\n\tamcas%A4.<amo>\t%0,%z3,%1" ++ (set (attr "length") (const_int 8))) ++ + (define_expand "atomic_compare_and_swap<mode>" + (match_operand:SI 0 "register_operand" "") ;; bool output + (match_operand:GPR 1 "register_operand" "") ;; val output +@@ -186,9 +220,29 @@ + (match_operand:SI 7 "const_int_operand" "") ;; mod_f + "" + {
View file
_service:tar_scm:0034-LoongArch-atomic_load-and-atomic_store-are-implement.patch
Added
@@ -0,0 +1,140 @@ +From 61a70e6b6b44bf420eae559d998e109b70e5a9b6 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 17 Nov 2023 16:04:45 +0800 +Subject: PATCH 034/188 LoongArch: atomic_load and atomic_store are + implemented using dbar grading. + +Because the la464 memory model design allows the same address load out of order, +so in the following test example, the Load of 23 lines may be executed first over +the load of 21 lines, resulting in an error. +So when memmodel is MEMMODEL_RELAXED, the load instruction will be followed by +"dbar 0x700" when implementing _atomic_load. + + 1 void * + 2 gomp_ptrlock_get_slow (gomp_ptrlock_t *ptrlock) + 3 { + 4 int *intptr; + 5 uintptr_t oldval = 1; + 6 + 7 __atomic_compare_exchange_n (ptrlock, &oldval, 2, false, + 8 MEMMODEL_RELAXED, MEMMODEL_RELAXED); + 9 + 10 /* futex works on ints, not pointers. + 11 But a valid work share pointer will be at least + 12 8 byte aligned, so it is safe to assume the low + 13 32-bits of the pointer won't contain values 1 or 2. */ + 14 __asm volatile ("" : "=r" (intptr) : "0" (ptrlock)); + 15 #if __BYTE_ORDER == __BIG_ENDIAN + 16 if (sizeof (*ptrlock) > sizeof (int)) + 17 intptr += (sizeof (*ptrlock) / sizeof (int)) - 1; + 18 #endif + 19 do + 20 do_wait (intptr, 2); + 21 while (__atomic_load_n (intptr, MEMMODEL_RELAXED) == 2); + 22 __asm volatile ("" : : : "memory"); + 23 return (void *) __atomic_load_n (ptrlock, MEMMODEL_ACQUIRE); + 24 } + +gcc/ChangeLog: + + * config/loongarch/sync.md (atomic_load<mode>): New template. +--- + gcc/config/loongarch/sync.md | 70 +++++++++++++++++++++++++++++++++--- + 1 file changed, 65 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index 1eabaec04..f4673c856 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -30,6 +30,7 @@ + UNSPEC_SYNC_OLD_OP + UNSPEC_SYNC_EXCHANGE + UNSPEC_ATOMIC_STORE ++ UNSPEC_ATOMIC_LOAD + UNSPEC_MEMORY_BARRIER + ) + +@@ -103,16 +104,75 @@ + + ;; Atomic memory operations. + ++(define_insn "atomic_load<mode>" ++ (set (match_operand:QHWD 0 "register_operand" "=r") ++ (unspec_volatile:QHWD ++ (match_operand:QHWD 1 "memory_operand" "+m") ++ (match_operand:SI 2 "const_int_operand") ;; model ++ UNSPEC_ATOMIC_LOAD)) ++ "" ++{ ++ enum memmodel model = memmodel_base (INTVAL (operands2)); ++ ++ switch (model) ++ { ++ case MEMMODEL_SEQ_CST: ++ return "dbar\t0x11\\n\\t" ++ "ld.<size>\t%0,%1\\n\\t" ++ "dbar\t0x14\\n\\t"; ++ case MEMMODEL_ACQUIRE: ++ return "ld.<size>\t%0,%1\\n\\t" ++ "dbar\t0x14\\n\\t"; ++ case MEMMODEL_RELAXED: ++ return "ld.<size>\t%0,%1\\n\\t" ++ "dbar\t0x700\\n\\t"; ++ ++ default: ++ /* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, ++ __ATOMIC_CONSUME and __ATOMIC_ACQUIRE. ++ The expand_builtin_atomic_store function converts all invalid memmodels ++ to MEMMODEL_SEQ_CST. ++ ++ __atomic builtins doc: "Consume is implemented using the ++ stronger acquire memory order because of a deficiency in C++11's ++ semantics." See PR 59448 and get_memmodel in builtins.cc. */ ++ gcc_unreachable (); ++ } ++} ++ (set (attr "length") (const_int 12))) ++ + ;; Implement atomic stores with amoswap. Fall back to fences for atomic loads. + (define_insn "atomic_store<mode>" +- (set (match_operand:GPR 0 "memory_operand" "+ZB") +- (unspec_volatile:GPR +- (match_operand:GPR 1 "reg_or_0_operand" "rJ") ++ (set (match_operand:QHWD 0 "memory_operand" "+m") ++ (unspec_volatile:QHWD ++ (match_operand:QHWD 1 "reg_or_0_operand" "rJ") + (match_operand:SI 2 "const_int_operand") ;; model + UNSPEC_ATOMIC_STORE)) + "" +- "amswap%A2.<amo>\t$zero,%z1,%0" +- (set (attr "length") (const_int 8))) ++{ ++ enum memmodel model = memmodel_base (INTVAL (operands2)); ++ ++ switch (model) ++ { ++ case MEMMODEL_SEQ_CST: ++ return "dbar\t0x12\\n\\t" ++ "st.<size>\t%z1,%0\\n\\t" ++ "dbar\t0x18\\n\\t"; ++ case MEMMODEL_RELEASE: ++ return "dbar\t0x12\\n\\t" ++ "st.<size>\t%z1,%0\\n\\t"; ++ case MEMMODEL_RELAXED: ++ return "st.<size>\t%z1,%0"; ++ ++ default: ++ /* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, ++ and __ATOMIC_RELEASE. ++ The expand_builtin_atomic_store function converts all invalid memmodels ++ to MEMMODEL_SEQ_CST. */ ++ gcc_unreachable (); ++ } ++} ++ (set (attr "length") (const_int 12))) + + (define_insn "atomic_<atomic_optab><mode>" + (set (match_operand:GPR 0 "memory_operand" "+ZB") +-- +2.43.0 +
View file
_service:tar_scm:0035-LoongArch-genopts-Add-infrastructure-to-generate-cod.patch
Added
@@ -0,0 +1,615 @@ +From 535fb5a2d4347801439fbb51fa07cd0317183cee Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 25 Oct 2024 02:08:03 +0000 +Subject: PATCH 035/188 LoongArch: genopts: Add infrastructure to generate + code for new features in ISA evolution + +LoongArch v1.10 introduced the concept of ISA evolution. During ISA +evolution, many independent features can be added and enumerated via +CPUCFG. + +Add a data file into genopts storing the CPUCFG word, bit, the name +of the command line option controlling if this feature should be used +for compilation, and the text description. Make genstr.sh process these +info and add the command line options into loongarch.opt and +loongarch-str.h, and generate a new file loongarch-cpucfg-map.h for +mapping CPUCFG output to the corresponding option. When handling +-march=native, use the information in loongarch-cpucfg-map.h to generate +the corresponding option mask. Enable the features implied by -march +setting unless the user has explicitly disabled the feature. + +The added options (-mdiv32 and -mld-seq-sa) are not really handled yet. +They'll be used in the following patches. + +gcc/ChangeLog: + + * config/loongarch/genopts/isa-evolution.in: New data file. + * config/loongarch/genopts/genstr.sh: Translate info in + isa-evolution.in when generating loongarch-str.h, loongarch.opt, + and loongarch-cpucfg-map.h. + * config/loongarch/genopts/loongarch.opt.in (isa_evolution): + New variable. + * config/loongarch/t-loongarch: (loongarch-cpucfg-map.h): New + rule. + (loongarch-str.h): Depend on isa-evolution.in. + (loongarch.opt): Depend on isa-evolution.in. + (loongarch-cpu.o): Depend on loongarch-cpucfg-map.h. + * config/loongarch/loongarch-str.h: Regenerate. + * config/loongarch/loongarch-def.h (loongarch_isa): Add field + for evolution features. Add helper function to enable features + in this field. + Probe native CPU capability and save the corresponding options + into preset. + * config/loongarch/loongarch-cpu.cc (fill_native_cpu_config): + Probe native CPU capability and save the corresponding options + into preset. + (cache_cpucfg): Simplify with C++11-style for loop. + (cpucfg_useful_idx, N_CPUCFG_WORDS): Move to ... + * config/loongarch/loongarch.cc + (loongarch_option_override_internal): Enable the ISA evolution + feature options implied by -march and not explicitly disabled. + (loongarch_asm_code_end): New function, print ISA information as + comments in the assembly if -fverbose-asm. It makes easier to + debug things like -march=native. + (TARGET_ASM_CODE_END): Define. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch-cpucfg-map.h: Generate. + (cpucfg_useful_idx, N_CPUCFG_WORDS) ... here. +--- + gcc/config/loongarch/genopts/genstr.sh | 92 ++++++++++++++++++- + gcc/config/loongarch/genopts/isa-evolution.in | 2 + + gcc/config/loongarch/genopts/loongarch.opt.in | 7 ++ + gcc/config/loongarch/loongarch-cpu.cc | 46 +++++----- + gcc/config/loongarch/loongarch-cpucfg-map.h | 48 ++++++++++ + gcc/config/loongarch/loongarch-def.h | 7 ++ + gcc/config/loongarch/loongarch-str.h | 6 +- + gcc/config/loongarch/loongarch.cc | 31 +++++++ + gcc/config/loongarch/loongarch.opt | 20 +++- + gcc/config/loongarch/t-loongarch | 21 ++++- + 10 files changed, 244 insertions(+), 36 deletions(-) + create mode 100644 gcc/config/loongarch/genopts/isa-evolution.in + create mode 100644 gcc/config/loongarch/loongarch-cpucfg-map.h + +diff --git a/gcc/config/loongarch/genopts/genstr.sh b/gcc/config/loongarch/genopts/genstr.sh +index 972ef125f..bcc616e98 100755 +--- a/gcc/config/loongarch/genopts/genstr.sh ++++ b/gcc/config/loongarch/genopts/genstr.sh +@@ -25,8 +25,8 @@ cd "$(dirname "$0")" + # Generate a header containing definitions from the string table. + gen_defines() { + cat <<EOF +-/* Generated automatically by "genstr" from "loongarch-strings". +- Please do not edit this file directly. ++/* Generated automatically by "genstr" from "loongarch-strings" and ++ "isa-evolution.in". Please do not edit this file directly. + + Copyright (C) 2021-2022 Free Software Foundation, Inc. + Contributed by Loongson Ltd. +@@ -56,6 +56,15 @@ EOF + loongarch-strings + + echo ++ ++ # Generate the strings from isa-evolution.in. ++ awk '{ ++ a=$3 ++ gsub(/-/, "_", a) ++ print("#define OPTSTR_"toupper(a)"\t\""$3"\"") ++ }' isa-evolution.in ++ ++ echo + echo "#endif /* LOONGARCH_STR_H */" + } + +@@ -77,11 +86,12 @@ gen_options() { + # print a header + cat << EOF + ; Generated by "genstr" from the template "loongarch.opt.in" +-; and definitions from "loongarch-strings". ++; and definitions from "loongarch-strings" and "isa-evolution.in". + ; + ; Please do not edit this file directly. + ; It will be automatically updated during a gcc build +-; if you change "loongarch.opt.in" or "loongarch-strings". ++; if you change "loongarch.opt.in", "loongarch-strings", or ++; "isa-evolution.in". + ; + EOF + +@@ -91,13 +101,85 @@ EOF + eval "echo \"$line\"" + done + } ++ ++ # Generate the strings from isa-evolution.in. ++ awk '{ ++ print("") ++ print("m"$3) ++ gsub(/-/, "_", $3) ++ print("Target Mask(ISA_"toupper($3)") Var(isa_evolution)") ++ $1=""; $2=""; $3="" ++ sub(/^ */, "", $0) ++ print($0) ++ }' isa-evolution.in ++} ++ ++gen_cpucfg_map() { ++ cat <<EOF ++/* Generated automatically by "genstr" from "isa-evolution.in". ++ Please do not edit this file directly. ++ ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++#ifndef LOONGARCH_CPUCFG_MAP_H ++#define LOONGARCH_CPUCFG_MAP_H ++ ++#include "options.h" ++ ++static constexpr struct { ++ int cpucfg_word; ++ unsigned int cpucfg_bit; ++ HOST_WIDE_INT isa_evolution_bit; ++} cpucfg_map = { ++EOF ++ ++ # Generate the strings from isa-evolution.in. ++ awk '{ ++ gsub(/-/, "_", $3) ++ print(" { "$1", 1u << "$2", OPTION_MASK_ISA_"toupper($3)" },") ++ }' isa-evolution.in ++ ++ echo "};" ++ echo ++ echo "static constexpr int cpucfg_useful_idx = {" ++ ++ awk 'BEGIN { print(" 0,\n 1,\n 2,\n 16,\n 17,\n 18,\n 19,") } ++ {if ($1+0 > max+0) max=$1; print(" "$1",")}' \ ++ isa-evolution.in | sort -n | uniq ++ ++ echo "};" ++ echo "" ++ ++ awk 'BEGIN { max=19 } ++ { if ($1+0 > max+0) max=$1 } ++ END { print "static constexpr int N_CPUCFG_WORDS = "1+max";" }' \ ++ isa-evolution.in ++ ++ echo "#endif /* LOONGARCH_CPUCFG_MAP_H */" + } + + main() { + case "$1" in ++ cpucfg-map) gen_cpucfg_map;; + header) gen_defines;;
View file
_service:tar_scm:0036-LoongArch-Add-evolution-features-of-base-ISA-revisio.patch
Added
@@ -0,0 +1,148 @@ +From 24648180418affbaf044a58ae0b5f79a0cf71155 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 18 Nov 2023 03:19:07 +0800 +Subject: PATCH 036/188 LoongArch: Add evolution features of base ISA + revisions + + * config/loongarch/loongarch-def.h: + (loongarch_isa_base_features): Declare. Define it in ... + * config/loongarch/loongarch-cpu.cc + (loongarch_isa_base_features): ... here. + (fill_native_cpu_config): If we know the base ISA of the CPU + model from PRID, use it instead of la64 (v1.0). Check if all + expected features of this base ISA is available, emit a warning + if not. + * config/loongarch/loongarch-opts.cc (config_target_isa): Enable + the features implied by the base ISA if not -march=native. +--- + gcc/config/loongarch/loongarch-cpu.cc | 62 ++++++++++++++++++-------- + gcc/config/loongarch/loongarch-def.h | 5 +++ + gcc/config/loongarch/loongarch-opts.cc | 3 ++ + 3 files changed, 52 insertions(+), 18 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index e1cd85d02..76d66fa55 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -32,6 +32,19 @@ along with GCC; see the file COPYING3. If not see + #include "loongarch-cpucfg-map.h" + #include "loongarch-str.h" + ++/* loongarch_isa_base_features defined here instead of loongarch-def.c ++ because we need to use options.h. Pay attention on the order of elements ++ in the initializer becaue ISO C++ does not allow C99 designated ++ initializers! */ ++ ++#define ISA_BASE_LA64V110_FEATURES \ ++ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA) ++ ++int64_t loongarch_isa_base_featuresN_ISA_BASE_TYPES = { ++ /* ISA_BASE_LA64V100 = */ 0, ++ /* ISA_BASE_LA64V110 = */ ISA_BASE_LA64V110_FEATURES, ++}; ++ + /* Native CPU detection with "cpucfg" */ + static uint32_t cpucfg_cacheN_CPUCFG_WORDS = { 0 }; + +@@ -127,24 +140,22 @@ fill_native_cpu_config (struct loongarch_target *tgt) + With: base architecture (ARCH) + At: cpucfg_words11:0 */ + +- switch (cpucfg_cache1 & 0x3) +- { +- case 0x02: +- tmp = ISA_BASE_LA64V100; +- break; +- +- default: +- fatal_error (UNKNOWN_LOCATION, +- "unknown native base architecture %<0x%x%>, " +- "%qs failed", (unsigned int) (cpucfg_cache1 & 0x3), +- "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE); +- } +- +- /* Check consistency with PRID presets. */ +- if (native_cpu_type != CPU_NATIVE && tmp != preset.base) +- warning (0, "base architecture %qs differs from PRID preset %qs", +- loongarch_isa_base_stringstmp, +- loongarch_isa_base_stringspreset.base); ++ if (native_cpu_type != CPU_NATIVE) ++ tmp = loongarch_cpu_default_isanative_cpu_type.base; ++ else ++ switch (cpucfg_cache1 & 0x3) ++ { ++ case 0x02: ++ tmp = ISA_BASE_LA64V100; ++ break; ++ ++ default: ++ fatal_error (UNKNOWN_LOCATION, ++ "unknown native base architecture %<0x%x%>, " ++ "%qs failed", ++ (unsigned int) (cpucfg_cache1 & 0x3), ++ "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE); ++ } + + /* Use the native value anyways. */ + preset.base = tmp; +@@ -227,6 +238,21 @@ fill_native_cpu_config (struct loongarch_target *tgt) + for (const auto &entry: cpucfg_map) + if (cpucfg_cacheentry.cpucfg_word & entry.cpucfg_bit) + preset.evolution |= entry.isa_evolution_bit; ++ ++ if (native_cpu_type != CPU_NATIVE) ++ { ++ /* Check if the local CPU really supports the features of the base ++ ISA of probed native_cpu_type. If any feature is not detected, ++ either GCC or the hardware is buggy. */ ++ auto base_isa_feature = loongarch_isa_base_featurespreset.base; ++ if ((preset.evolution & base_isa_feature) != base_isa_feature) ++ warning (0, ++ "detected base architecture %qs, but some of its " ++ "features are not detected; the detected base " ++ "architecture may be unreliable, only detected " ++ "features will be enabled", ++ loongarch_isa_base_stringspreset.base); ++ } + } + + if (tune_native_p) +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index cb99caebe..ca0a324dd 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -55,12 +55,17 @@ extern "C" { + + /* enum isa_base */ + extern const char* loongarch_isa_base_strings; ++ + /* LoongArch V1.00. */ + #define ISA_BASE_LA64V100 0 + /* LoongArch V1.10. */ + #define ISA_BASE_LA64V110 1 + #define N_ISA_BASE_TYPES 2 + ++/* Unlike other arrays, this is defined in loongarch-cpu.cc. The problem is ++ we cannot use the C++ header options.h in loongarch-def.c. */ ++extern int64_t loongarch_isa_base_features; ++ + /* enum isa_ext_* */ + extern const char* loongarch_isa_ext_strings; + #define ISA_EXT_NONE 0 +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index f10a9d3ff..390720479 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -284,6 +284,9 @@ config_target_isa: + /* Get default ISA from "-march" or its default value. */ + t.isa = loongarch_cpu_default_isat.cpu_arch; + ++ if (t.cpu_arch != CPU_NATIVE) ++ t.isa.evolution |= loongarch_isa_base_featurest.isa.base; ++ + /* Apply incremental changes. */ + /* "-march=native" overrides the default FPU type. */ + +-- +2.43.0 +
View file
_service:tar_scm:0037-LoongArch-Take-the-advantage-of-mdiv32-if-it-s-enabl.patch
Added
@@ -0,0 +1,156 @@ +From 6b483504c4fbb2a05a17d67e8f51b72149f1bbf9 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Thu, 16 Nov 2023 09:21:47 +0800 +Subject: PATCH 037/188 LoongArch: Take the advantage of -mdiv32 if it's + enabled + +With -mdiv32, we can assume div.wu and mod.wu works on low 32 bits +of a 64-bit GPR even if it's not sign-extended. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (DIV): New mode iterator. + (<optab:ANY_DIV><mode:GPR>3): Don't expand if TARGET_DIV32. + (<optab:ANY_DIV>di3_fake): Disable if TARGET_DIV32. + (*<optab:ANY_DIV><mode:GPR>3): Allow SImode if TARGET_DIV32. + (<optab:ANY_DIV>si3_extended): New insn if TARGET_DIV32. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/div-div32.c: New test. + * gcc.target/loongarch/div-no-div32.c: New test. +--- + gcc/config/loongarch/loongarch.md | 31 ++++++++++++++++--- + .../gcc.target/loongarch/div-div32.c | 31 +++++++++++++++++++ + .../gcc.target/loongarch/div-no-div32.c | 11 +++++++ + 3 files changed, 68 insertions(+), 5 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/div-div32.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/div-no-div32.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 52e40a208..c4e7af107 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -408,6 +408,10 @@ + ;; st.w. + (define_mode_iterator ST_ANY QHWD ANYF) + ++;; A mode for anything legal as a input of a div or mod instruction. ++(define_mode_iterator DIV (DI "TARGET_64BIT") ++ (SI "!TARGET_64BIT || TARGET_DIV32")) ++ + ;; In GPR templates, a string like "mul.<d>" will expand to "mul.w" in the + ;; 32-bit version and "mul.d" in the 64-bit version. + (define_mode_attr d (SI "w") (DI "d")) +@@ -914,7 +918,7 @@ + (match_operand:GPR 2 "register_operand"))) + "" + { +- if (GET_MODE (operands0) == SImode && TARGET_64BIT) ++ if (GET_MODE (operands0) == SImode && TARGET_64BIT && !TARGET_DIV32) + { + rtx reg1 = gen_reg_rtx (DImode); + rtx reg2 = gen_reg_rtx (DImode); +@@ -934,9 +938,9 @@ + }) + + (define_insn "*<optab><mode>3" +- (set (match_operand:X 0 "register_operand" "=r,&r,&r") +- (any_div:X (match_operand:X 1 "register_operand" "r,r,0") +- (match_operand:X 2 "register_operand" "r,r,r"))) ++ (set (match_operand:DIV 0 "register_operand" "=r,&r,&r") ++ (any_div:DIV (match_operand:DIV 1 "register_operand" "r,r,0") ++ (match_operand:DIV 2 "register_operand" "r,r,r"))) + "" + { + return loongarch_output_division ("<insn>.<d><u>\t%0,%1,%2", operands); +@@ -949,6 +953,23 @@ + (const_string "yes") + (const_string "no")))) + ++(define_insn "<optab>si3_extended" ++ (set (match_operand:DI 0 "register_operand" "=r,&r,&r") ++ (sign_extend ++ (any_div:SI (match_operand:SI 1 "register_operand" "r,r,0") ++ (match_operand:SI 2 "register_operand" "r,r,r")))) ++ "TARGET_64BIT && TARGET_DIV32" ++{ ++ return loongarch_output_division ("<insn>.w<u>\t%0,%1,%2", operands); ++} ++ (set_attr "type" "idiv") ++ (set_attr "mode" "SI") ++ (set (attr "enabled") ++ (if_then_else ++ (match_test "!!which_alternative == loongarch_check_zero_div_p()") ++ (const_string "yes") ++ (const_string "no")))) ++ + (define_insn "<optab>di3_fake" + (set (match_operand:DI 0 "register_operand" "=r,&r,&r") + (sign_extend:DI +@@ -957,7 +978,7 @@ + (any_div:DI (match_operand:DI 1 "register_operand" "r,r,0") + (match_operand:DI 2 "register_operand" "r,r,r")) 0) + UNSPEC_FAKE_ANY_DIV))) +- "TARGET_64BIT" ++ "TARGET_64BIT && !TARGET_DIV32" + { + return loongarch_output_division ("<insn>.w<u>\t%0,%1,%2", operands); + } +diff --git a/gcc/testsuite/gcc.target/loongarch/div-div32.c b/gcc/testsuite/gcc.target/loongarch/div-div32.c +new file mode 100644 +index 000000000..8b1f686ec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/div-div32.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mdiv32" } */ ++/* { dg-final { scan-assembler "div\.w" } } */ ++/* { dg-final { scan-assembler "div\.wu" } } */ ++/* { dg-final { scan-assembler "mod\.w" } } */ ++/* { dg-final { scan-assembler "mod\.wu" } } */ ++/* { dg-final { scan-assembler-not "slli\.w.*,0" } } */ ++ ++int ++divw (long a, long b) ++{ ++ return (int)a / (int)b; ++} ++ ++unsigned int ++divwu (long a, long b) ++{ ++ return (unsigned int)a / (unsigned int)b; ++} ++ ++int ++modw (long a, long b) ++{ ++ return (int)a % (int)b; ++} ++ ++unsigned int ++modwu (long a, long b) ++{ ++ return (unsigned int)a % (unsigned int)b; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/div-no-div32.c b/gcc/testsuite/gcc.target/loongarch/div-no-div32.c +new file mode 100644 +index 000000000..f0f697ba5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/div-no-div32.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "div\.w" } } */ ++/* { dg-final { scan-assembler "div\.wu" } } */ ++/* { dg-final { scan-assembler "mod\.w" } } */ ++/* { dg-final { scan-assembler "mod\.wu" } } */ ++ ++/* -mno-div32 should be implied by -march=loongarch64. */ ++/* { dg-final { scan-assembler-times "slli\.w\^\n\*0" 8 } } */ ++ ++#include "div-div32.c" +-- +2.43.0 +
View file
_service:tar_scm:0038-LoongArch-Don-t-emit-dbar-0x700-if-mld-seq-sa.patch
Added
@@ -0,0 +1,61 @@ +From 42368d6ab1200c157ff473c37889b56b596040e2 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Thu, 16 Nov 2023 09:30:14 +0800 +Subject: PATCH 038/188 LoongArch: Don't emit dbar 0x700 if -mld-seq-sa + +This option (CPUCFG word 0x3 bit 23) means "the hardware guarantee that +two loads on the same address won't be reordered with each other". Thus +we can omit the "load-load" barrier dbar 0x700. + +This is only a micro-optimization because dbar 0x700 is already treated +as nop if the hardware supports LD_SEQ_SA. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_print_operand): Don't + print dbar 0x700 if TARGET_LD_SEQ_SA. + * config/loongarch/sync.md (atomic_load<mode>): Likewise. +--- + gcc/config/loongarch/loongarch.cc | 2 +- + gcc/config/loongarch/sync.md | 9 +++++---- + 2 files changed, 6 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 8bd46da62..c86b787c4 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6057,7 +6057,7 @@ loongarch_print_operand (FILE *file, rtx op, int letter) + if (loongarch_cas_failure_memorder_needs_acquire ( + memmodel_from_int (INTVAL (op)))) + fputs ("dbar\t0b10100", file); +- else ++ else if (!TARGET_LD_SEQ_SA) + fputs ("dbar\t0x700", file); + break; + +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index f4673c856..65443c899 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -119,13 +119,14 @@ + case MEMMODEL_SEQ_CST: + return "dbar\t0x11\\n\\t" + "ld.<size>\t%0,%1\\n\\t" +- "dbar\t0x14\\n\\t"; ++ "dbar\t0x14"; + case MEMMODEL_ACQUIRE: + return "ld.<size>\t%0,%1\\n\\t" +- "dbar\t0x14\\n\\t"; ++ "dbar\t0x14"; + case MEMMODEL_RELAXED: +- return "ld.<size>\t%0,%1\\n\\t" +- "dbar\t0x700\\n\\t"; ++ return TARGET_LD_SEQ_SA ? "ld.<size>\t%0,%1\\n\\t" ++ : "ld.<size>\t%0,%1\\n\\t" ++ "dbar\t0x700"; + + default: + /* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, +-- +2.43.0 +
View file
_service:tar_scm:0039-LoongArch-Add-fine-grained-control-for-LAM_BH-and-LA.patch
Added
@@ -0,0 +1,208 @@ +From 416bdd180a6c0dab4736a6da26de245cb0487c0e Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 25 Oct 2024 02:13:53 +0000 +Subject: PATCH 039/188 LoongArch: Add fine-grained control for LAM_BH and + LAMCAS + +gcc/ChangeLog: + + * config/loongarch/genopts/isa-evolution.in: (lam-bh, lamcas): + Add. + * config/loongarch/loongarch-str.h: Regenerate. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch-cpucfg-map.h: Regenerate. + * config/loongarch/loongarch-cpu.cc + (ISA_BASE_LA64V110_FEATURES): Include OPTION_MASK_ISA_LAM_BH + and OPTION_MASK_ISA_LAMCAS. + * config/loongarch/sync.md (atomic_add<mode:SHORT>): Use + TARGET_LAM_BH instead of ISA_BASE_IS_LA64V110. Remove empty + lines from assembly output. + (atomic_exchange<mode>_short): Likewise. + (atomic_exchange<mode:SHORT>): Likewise. + (atomic_fetch_add<mode>_short): Likewise. + (atomic_fetch_add<mode:SHORT>): Likewise. + (atomic_cas_value_strong<mode>_amcas): Use TARGET_LAMCAS instead + of ISA_BASE_IS_LA64V110. + (atomic_compare_and_swap<mode>): Likewise. + (atomic_compare_and_swap<mode:GPR>): Likewise. + (atomic_compare_and_swap<mode:SHORT>): Likewise. + * config/loongarch/loongarch.cc (loongarch_asm_code_end): Dump + status if -mlam-bh and -mlamcas if -fverbose-asm. +--- + gcc/config/loongarch/genopts/isa-evolution.in | 2 ++ + gcc/config/loongarch/loongarch-cpu.cc | 3 ++- + gcc/config/loongarch/loongarch-cpucfg-map.h | 2 ++ + gcc/config/loongarch/loongarch-str.h | 2 ++ + gcc/config/loongarch/loongarch.cc | 2 ++ + gcc/config/loongarch/loongarch.opt | 8 ++++++++ + gcc/config/loongarch/sync.md | 18 +++++++++--------- + 7 files changed, 27 insertions(+), 10 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in +index e58f0d6a1..a6bc3f87f 100644 +--- a/gcc/config/loongarch/genopts/isa-evolution.in ++++ b/gcc/config/loongarch/genopts/isa-evolution.in +@@ -1,2 +1,4 @@ + 2 26 div32 Support div.wu and mod.wu instructions with inputs not sign-extended. ++2 27 lam-bh Support am{swap/add}_db.{b/h} instructions. ++2 28 lamcas Support amcas_db.{b/h/w/d} instructions. + 3 23 ld-seq-sa Do not need load-load barriers (dbar 0x700). +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index 76d66fa55..bbce82c9c 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -38,7 +38,8 @@ along with GCC; see the file COPYING3. If not see + initializers! */ + + #define ISA_BASE_LA64V110_FEATURES \ +- (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA) ++ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA \ ++ | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS) + + int64_t loongarch_isa_base_featuresN_ISA_BASE_TYPES = { + /* ISA_BASE_LA64V100 = */ 0, +diff --git a/gcc/config/loongarch/loongarch-cpucfg-map.h b/gcc/config/loongarch/loongarch-cpucfg-map.h +index 0c078c397..02ff16712 100644 +--- a/gcc/config/loongarch/loongarch-cpucfg-map.h ++++ b/gcc/config/loongarch/loongarch-cpucfg-map.h +@@ -30,6 +30,8 @@ static constexpr struct { + HOST_WIDE_INT isa_evolution_bit; + } cpucfg_map = { + { 2, 1u << 26, OPTION_MASK_ISA_DIV32 }, ++ { 2, 1u << 27, OPTION_MASK_ISA_LAM_BH }, ++ { 2, 1u << 28, OPTION_MASK_ISA_LAMCAS }, + { 3, 1u << 23, OPTION_MASK_ISA_LD_SEQ_SA }, + }; + +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index cd9dbb41b..0fee9abe5 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -70,5 +70,7 @@ along with GCC; see the file COPYING3. If not see + #define STR_EXPLICIT_RELOCS_ALWAYS "always" + + #define OPTSTR_DIV32 "div32" ++#define OPTSTR_LAM_BH "lam-bh" ++#define OPTSTR_LAMCAS "lamcas" + #define OPTSTR_LD_SEQ_SA "ld-seq-sa" + #endif /* LOONGARCH_STR_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index c86b787c4..33d23a731 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -11448,6 +11448,8 @@ loongarch_asm_code_end (void) + fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START, + loongarch_isa_base_strings la_target.isa.base); + DUMP_FEATURE (TARGET_DIV32); ++ DUMP_FEATURE (TARGET_LAM_BH); ++ DUMP_FEATURE (TARGET_LAMCAS); + DUMP_FEATURE (TARGET_LD_SEQ_SA); + } + +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 5251f705d..ea0d5bb4e 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -267,6 +267,14 @@ mdiv32 + Target Mask(ISA_DIV32) Var(isa_evolution) + Support div.wu and mod.wu instructions with inputs not sign-extended. + ++mlam-bh ++Target Mask(ISA_LAM_BH) Var(isa_evolution) ++Support am{swap/add}_db.{b/h} instructions. ++ ++mlamcas ++Target Mask(ISA_LAMCAS) Var(isa_evolution) ++Support amcas_db.{b/h/w/d} instructions. ++ + mld-seq-sa + Target Mask(ISA_LD_SEQ_SA) Var(isa_evolution) + Do not need load-load barriers (dbar 0x700). +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index 65443c899..a678e7131 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -124,7 +124,7 @@ + return "ld.<size>\t%0,%1\\n\\t" + "dbar\t0x14"; + case MEMMODEL_RELAXED: +- return TARGET_LD_SEQ_SA ? "ld.<size>\t%0,%1\\n\\t" ++ return TARGET_LD_SEQ_SA ? "ld.<size>\t%0,%1" + : "ld.<size>\t%0,%1\\n\\t" + "dbar\t0x700"; + +@@ -193,7 +193,7 @@ + (match_operand:SHORT 1 "reg_or_0_operand" "rJ")) + (match_operand:SI 2 "const_int_operand") ;; model + UNSPEC_SYNC_OLD_OP)) +- "ISA_BASE_IS_LA64V110" ++ "TARGET_LAM_BH" + "amadd%A2.<amo>\t$zero,%z1,%0" + (set (attr "length") (const_int 4))) + +@@ -230,7 +230,7 @@ + UNSPEC_SYNC_EXCHANGE)) + (set (match_dup 1) + (match_operand:SHORT 2 "register_operand" "r")) +- "ISA_BASE_IS_LA64V110" ++ "TARGET_LAM_BH" + "amswap%A3.<amo>\t%0,%z2,%1" + (set (attr "length") (const_int 4))) + +@@ -266,7 +266,7 @@ + (match_operand:QHWD 3 "reg_or_0_operand" "rJ") + (match_operand:SI 4 "const_int_operand") ;; mod_s + UNSPEC_COMPARE_AND_SWAP)) +- "ISA_BASE_IS_LA64V110" ++ "TARGET_LAMCAS" + "ori\t%0,%z2,0\n\tamcas%A4.<amo>\t%0,%z3,%1" + (set (attr "length") (const_int 8))) + +@@ -296,7 +296,7 @@ + + operands6 = mod_s; + +- if (ISA_BASE_IS_LA64V110) ++ if (TARGET_LAMCAS) + emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands1, operands2, + operands3, operands4, + operands6)); +@@ -422,7 +422,7 @@ + + operands6 = mod_s; + +- if (ISA_BASE_IS_LA64V110) ++ if (TARGET_LAMCAS) + emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands1, operands2, + operands3, operands4, + operands6)); +@@ -642,7 +642,7 @@ + (match_operand:SHORT 2 "register_operand")) + "" + { +- if (ISA_BASE_IS_LA64V110) ++ if (TARGET_LAM_BH) + emit_insn (gen_atomic_exchange<mode>_short (operands0, operands1, operands2, operands3)); + else + { +@@ -663,7 +663,7 @@ + (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand") ;; model + UNSPEC_SYNC_OLD_OP)) +- "ISA_BASE_IS_LA64V110" ++ "TARGET_LAM_BH" + "amadd%A3.<amo>\t%0,%z2,%1" + (set (attr "length") (const_int 4))) + +@@ -678,7 +678,7 @@ + UNSPEC_SYNC_OLD_OP)) + ""
View file
_service:tar_scm:0040-LoongArch-Fix-mexplict-relocs-none-mcmodel-medium-pr.patch
Added
@@ -0,0 +1,50 @@ +From 8ca46859ad70fb9473f6dbb1d3069e68ed43ef36 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 19 Nov 2023 01:41:12 +0800 +Subject: PATCH 040/188 LoongArch: Fix "-mexplict-relocs=none + -mcmodel=medium" producing %call36 when the assembler does not support it + +Even if !HAVE_AS_SUPPORT_CALL36, const_call_insn_operand should still +return false when -mexplict-relocs=none -mcmodel=medium to make +loongarch_legitimize_call_address emit la.local or la.global. + +gcc/ChangeLog: + + * config/loongarch/predicates.md (const_call_insn_operand): + Remove buggy "HAVE_AS_SUPPORT_CALL36" conditions. Change "1" to + "true" to make the coding style consistent. +--- + gcc/config/loongarch/predicates.md | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 2aae87db4..30a0dee9f 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -444,21 +444,19 @@ + case SYMBOL_PCREL: + if (TARGET_CMODEL_EXTREME + || (TARGET_CMODEL_MEDIUM +- && HAVE_AS_SUPPORT_CALL36 + && (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE))) + return false; + else +- return 1; ++ return true; + + case SYMBOL_GOT_DISP: + if (TARGET_CMODEL_EXTREME + || !flag_plt + || (flag_plt && TARGET_CMODEL_MEDIUM +- && HAVE_AS_SUPPORT_CALL36 + && (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE))) + return false; + else +- return 1; ++ return true; + + default: + return false; +-- +2.43.0 +
View file
_service:tar_scm:0041-LoongArch-Modify-MUSL_DYNAMIC_LINKER.patch
Added
@@ -0,0 +1,43 @@ +From 4c24f920e52c0dddf4bbbc391d2e5d2524754b4a Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Sat, 18 Nov 2023 11:04:42 +0800 +Subject: PATCH 041/188 LoongArch: Modify MUSL_DYNAMIC_LINKER. + +Use no suffix at all in the musl dynamic linker name for hard +float ABI. Use -sf and -sp suffixes in musl dynamic linker name +for soft float and single precision ABIs. The following table +outlines the musl interpreter names for the LoongArch64 ABI names. + +musl interpreter | LoongArch64 ABI +--------------------------- | ----------------- +ld-musl-loongarch64.so.1 | loongarch64-lp64d +ld-musl-loongarch64-sp.so.1 | loongarch64-lp64f +ld-musl-loongarch64-sf.so.1 | loongarch64-lp64s + +gcc/ChangeLog: + + * config/loongarch/gnu-user.h (MUSL_ABI_SPEC): Modify suffix. +--- + gcc/config/loongarch/gnu-user.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h +index 60ef75601..9fc49dc8f 100644 +--- a/gcc/config/loongarch/gnu-user.h ++++ b/gcc/config/loongarch/gnu-user.h +@@ -34,9 +34,9 @@ along with GCC; see the file COPYING3. If not see + "/lib" ABI_GRLEN_SPEC "/ld-linux-loongarch-" ABI_SPEC ".so.1" + + #define MUSL_ABI_SPEC \ +- "%{mabi=lp64d:-lp64d}" \ +- "%{mabi=lp64f:-lp64f}" \ +- "%{mabi=lp64s:-lp64s}" ++ "%{mabi=lp64d:}" \ ++ "%{mabi=lp64f:-sp}" \ ++ "%{mabi=lp64s:-sf}" + + #undef MUSL_DYNAMIC_LINKER + #define MUSL_DYNAMIC_LINKER \ +-- +2.43.0 +
View file
_service:tar_scm:0042-LoongArch-Fix-libgcc-build-failure-when-libc-is-not-.patch
Added
@@ -0,0 +1,85 @@ +From 0f65e5ebe60d9ad5141115661ed71c321156cd95 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Tue, 21 Nov 2023 09:09:25 +0800 +Subject: PATCH 042/188 LoongArch: Fix libgcc build failure when libc is not + available + +To use int64_t we included <stdint.h> in loongarch-def.h. +Unfortunately, loongarch-def.h is also used by libgcc etc., causing a +build failure when building a "stage1" cross compiler at which the +target libc is not built yet. + +As int64_t is used for a C-compatible replacement of HOST_WIDE_INT, it's +not directly or indirectly referred by the target libraries. So +guard everything requiring stdint.h with #if then they'll not block +target libraries. + +gcc/ChangeLog: + + * config/loongarch/loongarch-def.h (stdint.h): Guard with #if to + exclude it for target libraries. + (loongarch_isa_base_features): Likewise. + (loongarch_isa): Likewise. + (loongarch_abi): Likewise. + (loongarch_target): Likewise. + (loongarch_cpu_default_isa): Likewise. +--- + gcc/config/loongarch/loongarch-def.h | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index ca0a324dd..ef848f606 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -46,7 +46,10 @@ along with GCC; see the file COPYING3. If not see + #ifndef LOONGARCH_DEF_H + #define LOONGARCH_DEF_H + ++#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + #include <stdint.h> ++#endif ++ + #include "loongarch-tune.h" + + #ifdef __cplusplus +@@ -62,9 +65,11 @@ extern const char* loongarch_isa_base_strings; + #define ISA_BASE_LA64V110 1 + #define N_ISA_BASE_TYPES 2 + ++#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + /* Unlike other arrays, this is defined in loongarch-cpu.cc. The problem is + we cannot use the C++ header options.h in loongarch-def.c. */ + extern int64_t loongarch_isa_base_features; ++#endif + + /* enum isa_ext_* */ + extern const char* loongarch_isa_ext_strings; +@@ -121,6 +126,7 @@ extern const char* loongarch_cmodel_strings; + #define M_OPT_ABSENT(opt_enum) ((opt_enum) == M_OPT_UNSET) + + ++#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + /* Internal representation of the target. */ + struct loongarch_isa + { +@@ -150,6 +156,9 @@ struct loongarch_target + int cmodel; /* CMODEL_ */ + }; + ++extern struct loongarch_isa loongarch_cpu_default_isa; ++#endif ++ + /* CPU properties. */ + /* index */ + #define CPU_NATIVE 0 +@@ -162,7 +171,6 @@ struct loongarch_target + + /* parallel tables. */ + extern const char* loongarch_cpu_strings; +-extern struct loongarch_isa loongarch_cpu_default_isa; + extern int loongarch_cpu_issue_rate; + extern int loongarch_cpu_multipass_dfa_lookahead; + +-- +2.43.0 +
View file
_service:tar_scm:0043-LoongArch-Optimize-LSX-vector-shuffle-on-floating-po.patch
Added
@@ -0,0 +1,148 @@ +From cdea7c114fa48012705d65134276619b5679fa35 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 19 Nov 2023 06:12:22 +0800 +Subject: PATCH 043/188 LoongArch: Optimize LSX vector shuffle on + floating-point vector +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The vec_perm expander was wrongly defined. GCC internal says: + +Operand 3 is the “selector”. It is an integral mode vector of the same +width and number of elements as mode M. + +But we made operand 3 in the same mode as the shuffled vectors, so it +would be a FP mode vector if the shuffled vectors are FP mode. + +With this mistake, the generic code manages to work around and it ends +up creating some very nasty code for a simple __builtin_shuffle (a, b, +c) where a and b are V4SF, c is V4SI: + + la.local $r12,.LANCHOR0 + la.local $r13,.LANCHOR1 + vld $vr1,$r12,48 + vslli.w $vr1,$vr1,2 + vld $vr2,$r12,16 + vld $vr0,$r13,0 + vld $vr3,$r13,16 + vshuf.b $vr0,$vr1,$vr1,$vr0 + vld $vr1,$r12,32 + vadd.b $vr0,$vr0,$vr3 + vandi.b $vr0,$vr0,31 + vshuf.b $vr0,$vr1,$vr2,$vr0 + vst $vr0,$r12,0 + jr $r1 + +This is obviously stupid. Fix the expander definition and adjust +loongarch_expand_vec_perm to handle it correctly. + +gcc/ChangeLog: + + * config/loongarch/lsx.md (vec_perm<mode:LSX>): Make the + selector VIMODE. + * config/loongarch/loongarch.cc (loongarch_expand_vec_perm): + Use the mode of the selector (instead of the shuffled vector) + for truncating it. Operate on subregs in the selector mode if + the shuffled vector has a different mode (i. e. it's a + floating-point vector). + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-shuf-fp.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 18 ++++++++++-------- + gcc/config/loongarch/lsx.md | 2 +- + .../gcc.target/loongarch/vect-shuf-fp.c | 16 ++++++++++++++++ + 3 files changed, 27 insertions(+), 9 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 33d23a731..d95ac68e8 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -8603,8 +8603,9 @@ void + loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) + { + machine_mode vmode = GET_MODE (target); ++ machine_mode vimode = GET_MODE (sel); + auto nelt = GET_MODE_NUNITS (vmode); +- auto round_reg = gen_reg_rtx (vmode); ++ auto round_reg = gen_reg_rtx (vimode); + rtx round_dataMAX_VECT_LEN; + + for (int i = 0; i < nelt; i += 1) +@@ -8612,9 +8613,16 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) + round_datai = GEN_INT (0x1f); + } + +- rtx round_data_rtx = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, round_data)); ++ rtx round_data_rtx = gen_rtx_CONST_VECTOR (vimode, gen_rtvec_v (nelt, round_data)); + emit_move_insn (round_reg, round_data_rtx); + ++ if (vmode != vimode) ++ { ++ target = lowpart_subreg (vimode, target, vmode); ++ op0 = lowpart_subreg (vimode, op0, vmode); ++ op1 = lowpart_subreg (vimode, op1, vmode); ++ } ++ + switch (vmode) + { + case E_V16QImode: +@@ -8622,17 +8630,11 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) + emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel)); + break; + case E_V2DFmode: +- emit_insn (gen_andv2di3 (sel, sel, round_reg)); +- emit_insn (gen_lsx_vshuf_d_f (target, sel, op1, op0)); +- break; + case E_V2DImode: + emit_insn (gen_andv2di3 (sel, sel, round_reg)); + emit_insn (gen_lsx_vshuf_d (target, sel, op1, op0)); + break; + case E_V4SFmode: +- emit_insn (gen_andv4si3 (sel, sel, round_reg)); +- emit_insn (gen_lsx_vshuf_w_f (target, sel, op1, op0)); +- break; + case E_V4SImode: + emit_insn (gen_andv4si3 (sel, sel, round_reg)); + emit_insn (gen_lsx_vshuf_w (target, sel, op1, op0)); +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 8ea41c85b..5e8d8d74b 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -837,7 +837,7 @@ + (match_operand:LSX 0 "register_operand") + (match_operand:LSX 1 "register_operand") + (match_operand:LSX 2 "register_operand") +- (match_operand:LSX 3 "register_operand") ++ (match_operand:<VIMODE> 3 "register_operand") + "ISA_HAS_LSX" + { + loongarch_expand_vec_perm (operands0, operands1, +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c b/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c +new file mode 100644 +index 000000000..7acc2113a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mlasx -O3" } */ ++/* { dg-final { scan-assembler "vshuf\.w" } } */ ++ ++#define V __attribute__ ((vector_size (16))) ++ ++int a V; ++float b V; ++float c V; ++float d V; ++ ++void ++test (void) ++{ ++ d = __builtin_shuffle (b, c, a); ++} +-- +2.43.0 +
View file
_service:tar_scm:0044-LoongArch-Optimize-the-loading-of-immediate-numbers-.patch
Added
@@ -0,0 +1,112 @@ +From aaf58efe8414a4eaceb6721d9c242df710d1762c Mon Sep 17 00:00:00 2001 +From: Guo Jie <guojie@loongson.cn> +Date: Thu, 23 Nov 2023 11:04:17 +0800 +Subject: PATCH 044/188 LoongArch: Optimize the loading of immediate numbers + with the same high and low 32-bit values + +For the following immediate load operation in gcc/testsuite/gcc.target/loongarch/imm-load1.c: + + long long r = 0x0101010101010101; + +Before this patch: + + lu12i.w $r15,16842752>>12 + ori $r15,$r15,257 + lu32i.d $r15,0x1010100000000>>32 + lu52i.d $r15,$r15,0x100000000000000>>52 + +After this patch: + + lu12i.w $r15,16842752>>12 + ori $r15,$r15,257 + bstrins.d $r15,$r15,63,32 + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (enum loongarch_load_imm_method): Add new method. + (loongarch_build_integer): Add relevant implementations for + new method. + (loongarch_move_integer): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/imm-load1.c: Change old check. +--- + gcc/config/loongarch/loongarch.cc | 22 ++++++++++++++++++- + .../gcc.target/loongarch/imm-load1.c | 3 ++- + 2 files changed, 23 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index d95ac68e8..048d3802b 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -142,12 +142,16 @@ struct loongarch_address_info + + METHOD_LU52I: + Load 52-63 bit of the immediate number. ++ ++ METHOD_MIRROR: ++ Copy 0-31 bit of the immediate number to 32-63bit. + */ + enum loongarch_load_imm_method + { + METHOD_NORMAL, + METHOD_LU32I, +- METHOD_LU52I ++ METHOD_LU52I, ++ METHOD_MIRROR + }; + + struct loongarch_integer_op +@@ -1553,11 +1557,23 @@ loongarch_build_integer (struct loongarch_integer_op *codes, + + int sign31 = (value & (HOST_WIDE_INT_1U << 31)) >> 31; + int sign51 = (value & (HOST_WIDE_INT_1U << 51)) >> 51; ++ ++ uint32_t hival = (uint32_t) (value >> 32); ++ uint32_t loval = (uint32_t) value; ++ + /* Determine whether the upper 32 bits are sign-extended from the lower + 32 bits. If it is, the instructions to load the high order can be + ommitted. */ + if (lu32isign31 && lu52isign31) + return cost; ++ /* If the lower 32 bits are the same as the upper 32 bits, just copy ++ the lower 32 bits to the upper 32 bits. */ ++ else if (loval == hival) ++ { ++ codescost.method = METHOD_MIRROR; ++ codescost.curr_value = value; ++ return cost + 1; ++ } + /* Determine whether bits 32-51 are sign-extended from the lower 32 + bits. If so, directly load 52-63 bits. */ + else if (lu32isign31) +@@ -3230,6 +3246,10 @@ loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value) + gen_rtx_AND (DImode, x, GEN_INT (0xfffffffffffff)), + GEN_INT (codesi.value)); + break; ++ case METHOD_MIRROR: ++ gcc_assert (mode == DImode); ++ emit_insn (gen_insvdi (x, GEN_INT (32), GEN_INT (32), x)); ++ break; + default: + gcc_unreachable (); + } +diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load1.c b/gcc/testsuite/gcc.target/loongarch/imm-load1.c +index 2ff029712..f64cc2956 100644 +--- a/gcc/testsuite/gcc.target/loongarch/imm-load1.c ++++ b/gcc/testsuite/gcc.target/loongarch/imm-load1.c +@@ -1,6 +1,7 @@ + /* { dg-do compile } */ + /* { dg-options "-mabi=lp64d -O2" } */ +-/* { dg-final { scan-assembler "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */ ++/* { dg-final { scan-assembler-not "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */ ++/* { dg-final { scan-assembler "test:.*lu12i\.w.*\n\tbstrins\.d.*\n\.L2:" } } */ + + + extern long long b10; +-- +2.43.0 +
View file
_service:tar_scm:0045-LoongArch-Fix-runtime-error-in-a-gcc-build-with-with.patch
Added
@@ -0,0 +1,30 @@ +From fa28ce4ac91691595e14838be49c9dd42b153b7f Mon Sep 17 00:00:00 2001 +From: Guo Jie <guojie@loongson.cn> +Date: Thu, 23 Nov 2023 11:05:56 +0800 +Subject: PATCH 045/188 LoongArch: Fix runtime error in a gcc build with + --with-build-config=bootstrap-ubsan + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_split_plus_constant): + avoid left shift of negative value -0x8000. +--- + gcc/config/loongarch/loongarch.cc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 048d3802b..ecceca22d 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4265,7 +4265,7 @@ loongarch_split_plus_constant (rtx *op, machine_mode mode) + else if (loongarch_addu16i_imm12_operand_p (v, mode)) + a = (v & ~HWIT_UC_0xFFF) + ((v & 0x800) << 1); + else if (mode == DImode && DUAL_ADDU16I_OPERAND (v)) +- a = (v > 0 ? 0x7fff : -0x8000) << 16; ++ a = (v > 0 ? 0x7fff0000 : ~0x7fffffff); + else + gcc_unreachable (); + +-- +2.43.0 +
View file
_service:tar_scm:0046-LoongArch-Fix-usage-of-LSX-and-LASX-frint-ftint-inst.patch
Added
@@ -0,0 +1,1295 @@ +From d37308b7a62246e16ee61c40441548feb76761f1 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 18 Nov 2023 04:48:20 +0800 +Subject: PATCH 046/188 LoongArch: Fix usage of LSX and LASX frint/ftint + instructions PR112578 + +The usage LSX and LASX frint/ftint instructions had some problems: + +1. These instructions raises FE_INEXACT, which is not allowed with + -fno-fp-int-builtin-inexact for most C2x section F.10.6 functions + (the only exceptions are rint, lrint, and llrint). +2. The "frint" instruction without explicit rounding mode is used for + roundM2, this is incorrect because roundM2 is defined "rounding + operand 1 to the *nearest* integer, rounding away from zero in the + event of a tie". We actually don't have such an instruction. Our + frintrne instruction is roundevenM2 (unfortunately, this is not + documented). +3. These define_insn's are written in a way not so easy to hack. + +So I removed these instructions and created a "simd.md" file, then added +them and the corresponding expanders there. The advantage of the +simd.md file is we don't need to duplicate the RTL template twice (in +lsx.md and lasx.md). + +gcc/ChangeLog: + + PR target/112578 + * config/loongarch/lsx.md (UNSPEC_LSX_VFTINT_S, + UNSPEC_LSX_VFTINTRNE, UNSPEC_LSX_VFTINTRP, + UNSPEC_LSX_VFTINTRM, UNSPEC_LSX_VFRINTRNE_S, + UNSPEC_LSX_VFRINTRNE_D, UNSPEC_LSX_VFRINTRZ_S, + UNSPEC_LSX_VFRINTRZ_D, UNSPEC_LSX_VFRINTRP_S, + UNSPEC_LSX_VFRINTRP_D, UNSPEC_LSX_VFRINTRM_S, + UNSPEC_LSX_VFRINTRM_D): Remove. + (ILSX, FLSX): Move into ... + (VIMODE): Move into ... + (FRINT_S, FRINT_D): Remove. + (frint_pattern_s, frint_pattern_d, frint_suffix): Remove. + (lsx_vfrint_<flsxfmt>, lsx_vftint_s_<ilsxfmt>_<flsxfmt>, + lsx_vftintrne_w_s, lsx_vftintrne_l_d, lsx_vftintrp_w_s, + lsx_vftintrp_l_d, lsx_vftintrm_w_s, lsx_vftintrm_l_d, + lsx_vfrintrne_s, lsx_vfrintrne_d, lsx_vfrintrz_s, + lsx_vfrintrz_d, lsx_vfrintrp_s, lsx_vfrintrp_d, + lsx_vfrintrm_s, lsx_vfrintrm_d, + <FRINT_S:frint_pattern_s>v4sf2, + <FRINT_D:frint_pattern_d>v2df2, round<mode>2, + fix_trunc<mode>2): Remove. + * config/loongarch/lasx.md: Likewise. + * config/loongarch/simd.md: New file. + (ILSX, ILASX, FLSX, FLASX, VIMODE): ... here. + (IVEC, FVEC): New mode iterators. + (VIMODE): ... here. Extend it to work for all LSX/LASX vector + modes. + (x, wu, simd_isa, WVEC, vimode, simdfmt, simdifmt_for_f, + elebits): New mode attributes. + (UNSPEC_SIMD_FRINTRP, UNSPEC_SIMD_FRINTRZ, UNSPEC_SIMD_FRINT, + UNSPEC_SIMD_FRINTRM, UNSPEC_SIMD_FRINTRNE): New unspecs. + (SIMD_FRINT): New int iterator. + (simd_frint_rounding, simd_frint_pattern): New int attributes. + (<simd_isa>_<x>vfrint<simd_frint_rounding>_<simdfmt>): New + define_insn template for frint instructions. + (<simd_isa>_<x>vftint<simd_frint_rounding>_<simdifmt_for_f>_<simdfmt>): + Likewise, but for ftint instructions. + (<simd_frint_pattern><mode>2): New define_expand with + flag_fp_int_builtin_inexact checked. + (l<simd_frint_pattern><mode><vimode>2): Likewise. + (ftrunc<mode>2): New define_expand. It does not require + flag_fp_int_builtin_inexact. + (fix_trunc<mode><vimode>2): New define_insn_and_split. It does + not require flag_fp_int_builtin_inexact. + (include): Add lsx.md and lasx.md. + * config/loongarch/loongarch.md (include): Include simd.md, + instead of including lsx.md and lasx.md directly. + * config/loongarch/loongarch-builtins.cc + (CODE_FOR_lsx_vftint_w_s, CODE_FOR_lsx_vftint_l_d, + CODE_FOR_lasx_xvftint_w_s, CODE_FOR_lasx_xvftint_l_d): + Remove. + +gcc/testsuite/ChangeLog: + + PR target/112578 + * gcc.target/loongarch/vect-frint.c: New test. + * gcc.target/loongarch/vect-frint-no-inexact.c: New test. + * gcc.target/loongarch/vect-ftint.c: New test. + * gcc.target/loongarch/vect-ftint-no-inexact.c: New test. +--- + gcc/config/loongarch/lasx.md | 239 ----------------- + gcc/config/loongarch/loongarch-builtins.cc | 4 - + gcc/config/loongarch/loongarch.md | 7 +- + gcc/config/loongarch/lsx.md | 243 ------------------ + gcc/config/loongarch/simd.md | 213 +++++++++++++++ + .../loongarch/vect-frint-no-inexact.c | 48 ++++ + .../gcc.target/loongarch/vect-frint.c | 85 ++++++ + .../loongarch/vect-ftint-no-inexact.c | 44 ++++ + .../gcc.target/loongarch/vect-ftint.c | 83 ++++++ + 9 files changed, 475 insertions(+), 491 deletions(-) + create mode 100644 gcc/config/loongarch/simd.md + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint-no-inexact.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-ftint.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 2e11f0612..d4a56c307 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -53,7 +53,6 @@ + UNSPEC_LASX_XVFCMP_SULT + UNSPEC_LASX_XVFCMP_SUN + UNSPEC_LASX_XVFCMP_SUNE +- UNSPEC_LASX_XVFTINT_S + UNSPEC_LASX_XVFTINT_U + UNSPEC_LASX_XVCLO + UNSPEC_LASX_XVSAT_S +@@ -92,12 +91,6 @@ + UNSPEC_LASX_XVEXTRINS + UNSPEC_LASX_XVMSKLTZ + UNSPEC_LASX_XVSIGNCOV +- UNSPEC_LASX_XVFTINTRNE_W_S +- UNSPEC_LASX_XVFTINTRNE_L_D +- UNSPEC_LASX_XVFTINTRP_W_S +- UNSPEC_LASX_XVFTINTRP_L_D +- UNSPEC_LASX_XVFTINTRM_W_S +- UNSPEC_LASX_XVFTINTRM_L_D + UNSPEC_LASX_XVFTINT_W_D + UNSPEC_LASX_XVFFINT_S_L + UNSPEC_LASX_XVFTINTRZ_W_D +@@ -116,14 +109,6 @@ + UNSPEC_LASX_XVFTINTRML_L_S + UNSPEC_LASX_XVFTINTRNEL_L_S + UNSPEC_LASX_XVFTINTRNEH_L_S +- UNSPEC_LASX_XVFRINTRNE_S +- UNSPEC_LASX_XVFRINTRNE_D +- UNSPEC_LASX_XVFRINTRZ_S +- UNSPEC_LASX_XVFRINTRZ_D +- UNSPEC_LASX_XVFRINTRP_S +- UNSPEC_LASX_XVFRINTRP_D +- UNSPEC_LASX_XVFRINTRM_S +- UNSPEC_LASX_XVFRINTRM_D + UNSPEC_LASX_XVREPLVE0_Q + UNSPEC_LASX_XVPERM_W + UNSPEC_LASX_XVPERMI_Q +@@ -206,9 +191,6 @@ + ;; Only used for copy256_{u,s}.w. + (define_mode_iterator LASX_W V8SI V8SF) + +-;; Only integer modes in LASX. +-(define_mode_iterator ILASX V4DI V8SI V16HI V32QI) +- + ;; As ILASX but excludes V32QI. + (define_mode_iterator ILASX_DWH V4DI V8SI V16HI) + +@@ -224,9 +206,6 @@ + ;; Only integer modes smaller than a word. + (define_mode_iterator ILASX_HB V16HI V32QI) + +-;; Only floating-point modes in LASX. +-(define_mode_iterator FLASX V4DF V8SF) +- + ;; Only used for immediate set shuffle elements instruction. + (define_mode_iterator LASX_WHB_W V8SI V16HI V32QI V8SF) + +@@ -500,37 +479,6 @@ + (V16HI "w") + (V32QI "w")) + +-(define_int_iterator FRINT256_S UNSPEC_LASX_XVFRINTRP_S +- UNSPEC_LASX_XVFRINTRZ_S +- UNSPEC_LASX_XVFRINT +- UNSPEC_LASX_XVFRINTRM_S) +- +-(define_int_iterator FRINT256_D UNSPEC_LASX_XVFRINTRP_D +- UNSPEC_LASX_XVFRINTRZ_D +- UNSPEC_LASX_XVFRINT +- UNSPEC_LASX_XVFRINTRM_D) +- +-(define_int_attr frint256_pattern_s +- (UNSPEC_LASX_XVFRINTRP_S "ceil") +- (UNSPEC_LASX_XVFRINTRZ_S "btrunc") +- (UNSPEC_LASX_XVFRINT "rint") +- (UNSPEC_LASX_XVFRINTRM_S "floor")) +- +-(define_int_attr frint256_pattern_d +- (UNSPEC_LASX_XVFRINTRP_D "ceil") +- (UNSPEC_LASX_XVFRINTRZ_D "btrunc") +- (UNSPEC_LASX_XVFRINT "rint") +- (UNSPEC_LASX_XVFRINTRM_D "floor")) +- +-(define_int_attr frint256_suffix +- (UNSPEC_LASX_XVFRINTRP_S "rp") +- (UNSPEC_LASX_XVFRINTRP_D "rp") +- (UNSPEC_LASX_XVFRINTRZ_S "rz") +- (UNSPEC_LASX_XVFRINTRZ_D "rz") +- (UNSPEC_LASX_XVFRINT "") +- (UNSPEC_LASX_XVFRINTRM_S "rm") +- (UNSPEC_LASX_XVFRINTRM_D "rm")) +- + (define_expand "vec_init<mode><unitmode>" + (match_operand:LASX 0 "register_operand")
View file
_service:tar_scm:0047-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch
Added
@@ -0,0 +1,268 @@ +From 4c13256ea34b4169ceb3f9c7826843b754c6a6e0 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 19 Nov 2023 16:28:59 +0800 +Subject: PATCH 047/188 LoongArch: Use standard pattern name and RTX code for + LSX/LASX muh instructions + +Removes unnecessary UNSPECs and make the muh instructions useful with +GNU vectors or auto vectorization. + +gcc/ChangeLog: + + * config/loongarch/simd.md (muh): New code attribute mapping + any_extend to smul_highpart or umul_highpart. + (<su>mul<mode>3_highpart): New define_insn. + * config/loongarch/lsx.md (UNSPEC_LSX_VMUH_S): Remove. + (UNSPEC_LSX_VMUH_U): Remove. + (lsx_vmuh_s_<lsxfmt>): Remove. + (lsx_vmuh_u_<lsxfmt>): Remove. + * config/loongarch/lasx.md (UNSPEC_LASX_XVMUH_S): Remove. + (UNSPEC_LASX_XVMUH_U): Remove. + (lasx_xvmuh_s_<lasxfmt>): Remove. + (lasx_xvmuh_u_<lasxfmt>): Remove. + * config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vmuh_b): + Redefine to standard pattern name. + (CODE_FOR_lsx_vmuh_h): Likewise. + (CODE_FOR_lsx_vmuh_w): Likewise. + (CODE_FOR_lsx_vmuh_d): Likewise. + (CODE_FOR_lsx_vmuh_bu): Likewise. + (CODE_FOR_lsx_vmuh_hu): Likewise. + (CODE_FOR_lsx_vmuh_wu): Likewise. + (CODE_FOR_lsx_vmuh_du): Likewise. + (CODE_FOR_lasx_xvmuh_b): Likewise. + (CODE_FOR_lasx_xvmuh_h): Likewise. + (CODE_FOR_lasx_xvmuh_w): Likewise. + (CODE_FOR_lasx_xvmuh_d): Likewise. + (CODE_FOR_lasx_xvmuh_bu): Likewise. + (CODE_FOR_lasx_xvmuh_hu): Likewise. + (CODE_FOR_lasx_xvmuh_wu): Likewise. + (CODE_FOR_lasx_xvmuh_du): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-muh.c: New test. +--- + gcc/config/loongarch/lasx.md | 22 ------------ + gcc/config/loongarch/loongarch-builtins.cc | 32 ++++++++--------- + gcc/config/loongarch/lsx.md | 22 ------------ + gcc/config/loongarch/simd.md | 16 +++++++++ + gcc/testsuite/gcc.target/loongarch/vect-muh.c | 36 +++++++++++++++++++ + 5 files changed, 68 insertions(+), 60 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-muh.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index d4a56c307..023a023b4 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -68,8 +68,6 @@ + UNSPEC_LASX_BRANCH + UNSPEC_LASX_BRANCH_V + +- UNSPEC_LASX_XVMUH_S +- UNSPEC_LASX_XVMUH_U + UNSPEC_LASX_MXVEXTW_U + UNSPEC_LASX_XVSLLWIL_S + UNSPEC_LASX_XVSLLWIL_U +@@ -2823,26 +2821,6 @@ + (set_attr "type" "simd_logic") + (set_attr "mode" "<MODE>")) + +-(define_insn "lasx_xvmuh_s_<lasxfmt>" +- (set (match_operand:ILASX 0 "register_operand" "=f") +- (unspec:ILASX (match_operand:ILASX 1 "register_operand" "f") +- (match_operand:ILASX 2 "register_operand" "f") +- UNSPEC_LASX_XVMUH_S)) +- "ISA_HAS_LASX" +- "xvmuh.<lasxfmt>\t%u0,%u1,%u2" +- (set_attr "type" "simd_int_arith") +- (set_attr "mode" "<MODE>")) +- +-(define_insn "lasx_xvmuh_u_<lasxfmt_u>" +- (set (match_operand:ILASX 0 "register_operand" "=f") +- (unspec:ILASX (match_operand:ILASX 1 "register_operand" "f") +- (match_operand:ILASX 2 "register_operand" "f") +- UNSPEC_LASX_XVMUH_U)) +- "ISA_HAS_LASX" +- "xvmuh.<lasxfmt_u>\t%u0,%u1,%u2" +- (set_attr "type" "simd_int_arith") +- (set_attr "mode" "<MODE>")) +- + (define_insn "lasx_xvsllwil_s_<dlasxfmt>_<lasxfmt>" + (set (match_operand:<VDMODE256> 0 "register_operand" "=f") + (unspec:<VDMODE256> (match_operand:ILASX_WHB 1 "register_operand" "f") +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index fb458feac..41ea357cf 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -319,6 +319,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lsx_vmod_hu CODE_FOR_umodv8hi3 + #define CODE_FOR_lsx_vmod_wu CODE_FOR_umodv4si3 + #define CODE_FOR_lsx_vmod_du CODE_FOR_umodv2di3 ++#define CODE_FOR_lsx_vmuh_b CODE_FOR_smulv16qi3_highpart ++#define CODE_FOR_lsx_vmuh_h CODE_FOR_smulv8hi3_highpart ++#define CODE_FOR_lsx_vmuh_w CODE_FOR_smulv4si3_highpart ++#define CODE_FOR_lsx_vmuh_d CODE_FOR_smulv2di3_highpart ++#define CODE_FOR_lsx_vmuh_bu CODE_FOR_umulv16qi3_highpart ++#define CODE_FOR_lsx_vmuh_hu CODE_FOR_umulv8hi3_highpart ++#define CODE_FOR_lsx_vmuh_wu CODE_FOR_umulv4si3_highpart ++#define CODE_FOR_lsx_vmuh_du CODE_FOR_umulv2di3_highpart + #define CODE_FOR_lsx_vmul_b CODE_FOR_mulv16qi3 + #define CODE_FOR_lsx_vmul_h CODE_FOR_mulv8hi3 + #define CODE_FOR_lsx_vmul_w CODE_FOR_mulv4si3 +@@ -439,14 +447,6 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lsx_vfnmsub_s CODE_FOR_vfnmsubv4sf4_nmsub4 + #define CODE_FOR_lsx_vfnmsub_d CODE_FOR_vfnmsubv2df4_nmsub4 + +-#define CODE_FOR_lsx_vmuh_b CODE_FOR_lsx_vmuh_s_b +-#define CODE_FOR_lsx_vmuh_h CODE_FOR_lsx_vmuh_s_h +-#define CODE_FOR_lsx_vmuh_w CODE_FOR_lsx_vmuh_s_w +-#define CODE_FOR_lsx_vmuh_d CODE_FOR_lsx_vmuh_s_d +-#define CODE_FOR_lsx_vmuh_bu CODE_FOR_lsx_vmuh_u_bu +-#define CODE_FOR_lsx_vmuh_hu CODE_FOR_lsx_vmuh_u_hu +-#define CODE_FOR_lsx_vmuh_wu CODE_FOR_lsx_vmuh_u_wu +-#define CODE_FOR_lsx_vmuh_du CODE_FOR_lsx_vmuh_u_du + #define CODE_FOR_lsx_vsllwil_h_b CODE_FOR_lsx_vsllwil_s_h_b + #define CODE_FOR_lsx_vsllwil_w_h CODE_FOR_lsx_vsllwil_s_w_h + #define CODE_FOR_lsx_vsllwil_d_w CODE_FOR_lsx_vsllwil_s_d_w +@@ -588,6 +588,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lasx_xvmul_h CODE_FOR_mulv16hi3 + #define CODE_FOR_lasx_xvmul_w CODE_FOR_mulv8si3 + #define CODE_FOR_lasx_xvmul_d CODE_FOR_mulv4di3 ++#define CODE_FOR_lasx_xvmuh_b CODE_FOR_smulv32qi3_highpart ++#define CODE_FOR_lasx_xvmuh_h CODE_FOR_smulv16hi3_highpart ++#define CODE_FOR_lasx_xvmuh_w CODE_FOR_smulv8si3_highpart ++#define CODE_FOR_lasx_xvmuh_d CODE_FOR_smulv4di3_highpart ++#define CODE_FOR_lasx_xvmuh_bu CODE_FOR_umulv32qi3_highpart ++#define CODE_FOR_lasx_xvmuh_hu CODE_FOR_umulv16hi3_highpart ++#define CODE_FOR_lasx_xvmuh_wu CODE_FOR_umulv8si3_highpart ++#define CODE_FOR_lasx_xvmuh_du CODE_FOR_umulv4di3_highpart + #define CODE_FOR_lasx_xvclz_b CODE_FOR_clzv32qi2 + #define CODE_FOR_lasx_xvclz_h CODE_FOR_clzv16hi2 + #define CODE_FOR_lasx_xvclz_w CODE_FOR_clzv8si2 +@@ -697,14 +705,6 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lasx_xvavgr_hu CODE_FOR_lasx_xvavgr_u_hu + #define CODE_FOR_lasx_xvavgr_wu CODE_FOR_lasx_xvavgr_u_wu + #define CODE_FOR_lasx_xvavgr_du CODE_FOR_lasx_xvavgr_u_du +-#define CODE_FOR_lasx_xvmuh_b CODE_FOR_lasx_xvmuh_s_b +-#define CODE_FOR_lasx_xvmuh_h CODE_FOR_lasx_xvmuh_s_h +-#define CODE_FOR_lasx_xvmuh_w CODE_FOR_lasx_xvmuh_s_w +-#define CODE_FOR_lasx_xvmuh_d CODE_FOR_lasx_xvmuh_s_d +-#define CODE_FOR_lasx_xvmuh_bu CODE_FOR_lasx_xvmuh_u_bu +-#define CODE_FOR_lasx_xvmuh_hu CODE_FOR_lasx_xvmuh_u_hu +-#define CODE_FOR_lasx_xvmuh_wu CODE_FOR_lasx_xvmuh_u_wu +-#define CODE_FOR_lasx_xvmuh_du CODE_FOR_lasx_xvmuh_u_du + #define CODE_FOR_lasx_xvssran_b_h CODE_FOR_lasx_xvssran_s_b_h + #define CODE_FOR_lasx_xvssran_h_w CODE_FOR_lasx_xvssran_s_h_w + #define CODE_FOR_lasx_xvssran_w_d CODE_FOR_lasx_xvssran_s_w_d +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index c1c3719e3..537afaf96 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -64,8 +64,6 @@ + UNSPEC_LSX_VSRLR + UNSPEC_LSX_VSRLRI + UNSPEC_LSX_VSHUF +- UNSPEC_LSX_VMUH_S +- UNSPEC_LSX_VMUH_U + UNSPEC_LSX_VEXTW_S + UNSPEC_LSX_VEXTW_U + UNSPEC_LSX_VSLLWIL_S +@@ -2506,26 +2504,6 @@ + (set_attr "type" "simd_logic") + (set_attr "mode" "<MODE>")) + +-(define_insn "lsx_vmuh_s_<lsxfmt>" +- (set (match_operand:ILSX 0 "register_operand" "=f") +- (unspec:ILSX (match_operand:ILSX 1 "register_operand" "f") +- (match_operand:ILSX 2 "register_operand" "f") +- UNSPEC_LSX_VMUH_S)) +- "ISA_HAS_LSX" +- "vmuh.<lsxfmt>\t%w0,%w1,%w2" +- (set_attr "type" "simd_int_arith") +- (set_attr "mode" "<MODE>")) +- +-(define_insn "lsx_vmuh_u_<lsxfmt_u>" +- (set (match_operand:ILSX 0 "register_operand" "=f") +- (unspec:ILSX (match_operand:ILSX 1 "register_operand" "f") +- (match_operand:ILSX 2 "register_operand" "f") +- UNSPEC_LSX_VMUH_U)) +- "ISA_HAS_LSX" +- "vmuh.<lsxfmt_u>\t%w0,%w1,%w2" +- (set_attr "type" "simd_int_arith") +- (set_attr "mode" "<MODE>")) +- + (define_insn "lsx_vextw_s_d" + (set (match_operand:V2DI 0 "register_operand" "=f") + (unspec:V2DI (match_operand:V4SI 1 "register_operand" "f") +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 27d1ffecd..a0e8db3c0 100644 +--- a/gcc/config/loongarch/simd.md
View file
_service:tar_scm:0048-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch
Added
@@ -0,0 +1,285 @@ +From 9dde2178e64893e4c46b1c375a658f8ab6d34fdd Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 19 Nov 2023 17:28:06 +0800 +Subject: PATCH 048/188 LoongArch: Use standard pattern name and RTX code for + LSX/LASX rotate shift + +Remove unnecessary UNSPECs and make the xvrotri instructions useful +with GNU vectors and auto vectorization. + +gcc/ChangeLog: + + * config/loongarch/lsx.md (bitimm): Move to ... + (UNSPEC_LSX_VROTR): Remove. + (lsx_vrotr_<lsxfmt>): Remove. + (lsx_vrotri_<lsxfmt>): Remove. + * config/loongarch/lasx.md (UNSPEC_LASX_XVROTR): Remove. + (lsx_vrotr_<lsxfmt>): Remove. + (lsx_vrotri_<lsxfmt>): Remove. + * config/loongarch/simd.md (bitimm): ... here. Expand it to + cover LASX modes. + (vrotr<mode>3): New define_insn. + (vrotri<mode>3): New define_insn. + * config/loongarch/loongarch-builtins.cc: + (CODE_FOR_lsx_vrotr_b): Use standard pattern name. + (CODE_FOR_lsx_vrotr_h): Likewise. + (CODE_FOR_lsx_vrotr_w): Likewise. + (CODE_FOR_lsx_vrotr_d): Likewise. + (CODE_FOR_lasx_xvrotr_b): Likewise. + (CODE_FOR_lasx_xvrotr_h): Likewise. + (CODE_FOR_lasx_xvrotr_w): Likewise. + (CODE_FOR_lasx_xvrotr_d): Likewise. + (CODE_FOR_lsx_vrotri_b): Define to standard pattern name. + (CODE_FOR_lsx_vrotri_h): Likewise. + (CODE_FOR_lsx_vrotri_w): Likewise. + (CODE_FOR_lsx_vrotri_d): Likewise. + (CODE_FOR_lasx_xvrotri_b): Likewise. + (CODE_FOR_lasx_xvrotri_h): Likewise. + (CODE_FOR_lasx_xvrotri_w): Likewise. + (CODE_FOR_lasx_xvrotri_d): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-rotr.c: New test. +--- + gcc/config/loongarch/lasx.md | 22 ------------ + gcc/config/loongarch/loongarch-builtins.cc | 16 +++++++++ + gcc/config/loongarch/lsx.md | 28 --------------- + gcc/config/loongarch/simd.md | 29 +++++++++++++++ + .../gcc.target/loongarch/vect-rotr.c | 36 +++++++++++++++++++ + 5 files changed, 81 insertions(+), 50 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-rotr.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 023a023b4..116b30c07 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -138,7 +138,6 @@ + UNSPEC_LASX_XVHSUBW_Q_D + UNSPEC_LASX_XVHADDW_QU_DU + UNSPEC_LASX_XVHSUBW_QU_DU +- UNSPEC_LASX_XVROTR + UNSPEC_LASX_XVADD_Q + UNSPEC_LASX_XVSUB_Q + UNSPEC_LASX_XVREPLVE +@@ -4232,18 +4231,6 @@ + (set_attr "type" "simd_int_arith") + (set_attr "mode" "V4DI")) + +-;;XVROTR.B XVROTR.H XVROTR.W XVROTR.D +-;;TODO-478 +-(define_insn "lasx_xvrotr_<lasxfmt>" +- (set (match_operand:ILASX 0 "register_operand" "=f") +- (unspec:ILASX (match_operand:ILASX 1 "register_operand" "f") +- (match_operand:ILASX 2 "register_operand" "f") +- UNSPEC_LASX_XVROTR)) +- "ISA_HAS_LASX" +- "xvrotr.<lasxfmt>\t%u0,%u1,%u2" +- (set_attr "type" "simd_int_arith") +- (set_attr "mode" "<MODE>")) +- + ;;XVADD.Q + ;;TODO2 + (define_insn "lasx_xvadd_q" +@@ -4426,15 +4413,6 @@ + (set_attr "type" "simd_fcvt") + (set_attr "mode" "V4DI")) + +-(define_insn "lasx_xvrotri_<lasxfmt>" +- (set (match_operand:ILASX 0 "register_operand" "=f") +- (rotatert:ILASX (match_operand:ILASX 1 "register_operand" "f") +- (match_operand 2 "const_<bitimm256>_operand" ""))) +- "ISA_HAS_LASX" +- "xvrotri.<lasxfmt>\t%u0,%u1,%2" +- (set_attr "type" "simd_shf") +- (set_attr "mode" "<MODE>")) +- + (define_insn "lasx_xvextl_q_d" + (set (match_operand:V4DI 0 "register_operand" "=f") + (unspec:V4DI (match_operand:V4DI 1 "register_operand" "f") +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index 41ea357cf..f4523c8bf 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -369,6 +369,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lsx_vsrli_h CODE_FOR_vlshrv8hi3 + #define CODE_FOR_lsx_vsrli_w CODE_FOR_vlshrv4si3 + #define CODE_FOR_lsx_vsrli_d CODE_FOR_vlshrv2di3 ++#define CODE_FOR_lsx_vrotr_b CODE_FOR_vrotrv16qi3 ++#define CODE_FOR_lsx_vrotr_h CODE_FOR_vrotrv8hi3 ++#define CODE_FOR_lsx_vrotr_w CODE_FOR_vrotrv4si3 ++#define CODE_FOR_lsx_vrotr_d CODE_FOR_vrotrv2di3 ++#define CODE_FOR_lsx_vrotri_b CODE_FOR_rotrv16qi3 ++#define CODE_FOR_lsx_vrotri_h CODE_FOR_rotrv8hi3 ++#define CODE_FOR_lsx_vrotri_w CODE_FOR_rotrv4si3 ++#define CODE_FOR_lsx_vrotri_d CODE_FOR_rotrv2di3 + #define CODE_FOR_lsx_vsub_b CODE_FOR_subv16qi3 + #define CODE_FOR_lsx_vsub_h CODE_FOR_subv8hi3 + #define CODE_FOR_lsx_vsub_w CODE_FOR_subv4si3 +@@ -634,6 +642,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lasx_xvsrli_h CODE_FOR_vlshrv16hi3 + #define CODE_FOR_lasx_xvsrli_w CODE_FOR_vlshrv8si3 + #define CODE_FOR_lasx_xvsrli_d CODE_FOR_vlshrv4di3 ++#define CODE_FOR_lasx_xvrotr_b CODE_FOR_vrotrv32qi3 ++#define CODE_FOR_lasx_xvrotr_h CODE_FOR_vrotrv16hi3 ++#define CODE_FOR_lasx_xvrotr_w CODE_FOR_vrotrv8si3 ++#define CODE_FOR_lasx_xvrotr_d CODE_FOR_vrotrv4di3 ++#define CODE_FOR_lasx_xvrotri_b CODE_FOR_rotrv32qi3 ++#define CODE_FOR_lasx_xvrotri_h CODE_FOR_rotrv16hi3 ++#define CODE_FOR_lasx_xvrotri_w CODE_FOR_rotrv8si3 ++#define CODE_FOR_lasx_xvrotri_d CODE_FOR_rotrv4di3 + #define CODE_FOR_lasx_xvsub_b CODE_FOR_subv32qi3 + #define CODE_FOR_lasx_xvsub_h CODE_FOR_subv16hi3 + #define CODE_FOR_lasx_xvsub_w CODE_FOR_subv8si3 +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 537afaf96..232399934 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -141,7 +141,6 @@ + UNSPEC_LSX_VMADDWOD + UNSPEC_LSX_VMADDWOD2 + UNSPEC_LSX_VMADDWOD3 +- UNSPEC_LSX_VROTR + UNSPEC_LSX_VADD_Q + UNSPEC_LSX_VSUB_Q + UNSPEC_LSX_VEXTH_Q_D +@@ -363,14 +362,6 @@ + (V8HI "exp_8") + (V16QI "exp_16")) + +-;; This attribute is used to form an immediate operand constraint using +-;; "const_<bitimm>_operand". +-(define_mode_attr bitimm +- (V16QI "uimm3") +- (V8HI "uimm4") +- (V4SI "uimm5") +- (V2DI "uimm6")) +- + (define_expand "vec_init<mode><unitmode>" + (match_operand:LSX 0 "register_operand") + (match_operand:LSX 1 "") +@@ -4152,16 +4143,6 @@ + (set_attr "type" "simd_int_arith") + (set_attr "mode" "V2DI")) + +-(define_insn "lsx_vrotr_<lsxfmt>" +- (set (match_operand:ILSX 0 "register_operand" "=f") +- (unspec:ILSX (match_operand:ILSX 1 "register_operand" "f") +- (match_operand:ILSX 2 "register_operand" "f") +- UNSPEC_LSX_VROTR)) +- "ISA_HAS_LSX" +- "vrotr.<lsxfmt>\t%w0,%w1,%w2" +- (set_attr "type" "simd_int_arith") +- (set_attr "mode" "<MODE>")) +- + (define_insn "lsx_vadd_q" + (set (match_operand:V2DI 0 "register_operand" "=f") + (unspec:V2DI (match_operand:V2DI 1 "register_operand" "f") +@@ -4255,15 +4236,6 @@ + (set_attr "type" "simd_fcvt") + (set_attr "mode" "V2DI")) + +-(define_insn "lsx_vrotri_<lsxfmt>" +- (set (match_operand:ILSX 0 "register_operand" "=f") +- (rotatert:ILSX (match_operand:ILSX 1 "register_operand" "f") +- (match_operand 2 "const_<bitimm>_operand" ""))) +- "ISA_HAS_LSX" +- "vrotri.<lsxfmt>\t%w0,%w1,%2" +- (set_attr "type" "simd_shf") +- (set_attr "mode" "<MODE>")) +- + (define_insn "lsx_vextl_q_d" + (set (match_operand:V2DI 0 "register_operand" "=f") + (unspec:V2DI (match_operand:V2DI 1 "register_operand" "f") +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index a0e8db3c0..4ecf7a55e 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -91,6 +91,13 @@ + (V8HI "16") (V16HI "16")
View file
_service:tar_scm:0049-LoongArch-Remove-lrint_allow_inexact.patch
Added
@@ -0,0 +1,42 @@ +From c898e4a85c04a72f08db9ba2a454130f15f6f280 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 20 Nov 2023 01:34:26 +0800 +Subject: PATCH 049/188 LoongArch: Remove lrint_allow_inexact + +No functional change, just a cleanup. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (lrint_allow_inexact): Remove. + (<lrint_pattern><ANYF:mode><ANYFI:mode>2): Check if <LRINT> + == UNSPEC_FTINT instead of <lrint_allow_inexact>. +--- + gcc/config/loongarch/loongarch.md | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index d1c766cbf..11577f407 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -585,9 +585,6 @@ + (define_int_attr lrint_submenmonic (UNSPEC_FTINT "") + (UNSPEC_FTINTRM "rm") + (UNSPEC_FTINTRP "rp")) +-(define_int_attr lrint_allow_inexact (UNSPEC_FTINT "1") +- (UNSPEC_FTINTRM "0") +- (UNSPEC_FTINTRP "0")) + + ;; Iterator and attributes for bytepick.d + (define_int_iterator bytepick_w_ashift_amount 8 16 24) +@@ -2384,7 +2381,7 @@ + (unspec:ANYFI (match_operand:ANYF 1 "register_operand" "f") + LRINT)) + "TARGET_HARD_FLOAT && +- (<lrint_allow_inexact> ++ (<LRINT> == UNSPEC_FTINT + || flag_fp_int_builtin_inexact + || !flag_trapping_math)" + "ftint<lrint_submenmonic>.<ANYFI:ifmt>.<ANYF:fmt> %0,%1" +-- +2.43.0 +
View file
_service:tar_scm:0050-LoongArch-Use-LSX-for-scalar-FP-rounding-with-explic.patch
Added
@@ -0,0 +1,150 @@ +From 05fafb78b301ce9a545e0dad896b19339f716eaf Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 20 Nov 2023 03:51:56 +0800 +Subject: PATCH 050/188 LoongArch: Use LSX for scalar FP rounding with + explicit rounding mode + +In LoongArch FP base ISA there is only the frint.{s/d} instruction which +reads the global rounding mode. Utilize LSX for explicit rounding mode +even if the operand is scalar. It seems wasting the CPU power, but +still much faster than calling the library function. + +gcc/ChangeLog: + + * config/loongarch/simd.md (LSX_SCALAR_FRINT): New int iterator. + (VLSX_FOR_FMODE): New mode attribute. + (<simd_for_scalar_frint_pattern><mode>2): New expander, + expanding to vreplvei.{w/d} + frint{rp/rz/rm/rne}.{s.d}. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-frint-scalar.c: New test. + * gcc.target/loongarch/vect-frint-scalar-no-inexact.c: New test. +--- + gcc/config/loongarch/simd.md | 28 ++++++++++++ + .../loongarch/vect-frint-scalar-no-inexact.c | 23 ++++++++++ + .../gcc.target/loongarch/vect-frint-scalar.c | 43 +++++++++++++++++++ + 3 files changed, 94 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c + +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 4ecf7a55e..843b1a41f 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -169,6 +169,34 @@ + UNSPEC_SIMD_FRINTRZ)) + "") + ++;; Use LSX for scalar ceil/floor/trunc/roundeven when -mlsx and -ffp-int- ++;; builtin-inexact. The base FP instruction set lacks these operations. ++;; Yes we are wasting 50% or even 75% of the CPU horsepower, but it's still ++;; much faster than calling a libc function: on LA464 and LA664 there is a ++;; 3x ~ 5x speed up. ++;; ++;; Note that a vreplvei instruction is needed or we'll also operate on the ++;; junk in high bits of the vector register and produce random FP exceptions. ++ ++(define_int_iterator LSX_SCALAR_FRINT ++ UNSPEC_SIMD_FRINTRP ++ UNSPEC_SIMD_FRINTRZ ++ UNSPEC_SIMD_FRINTRM ++ UNSPEC_SIMD_FRINTRNE) ++ ++(define_mode_attr VLSX_FOR_FMODE (DF "V2DF") (SF "V4SF")) ++ ++(define_expand "<simd_frint_pattern><mode>2" ++ (set (match_dup 2) ++ (vec_duplicate:<VLSX_FOR_FMODE> ++ (match_operand:ANYF 1 "register_operand"))) ++ (set (match_dup 2) ++ (unspec:<VLSX_FOR_FMODE> (match_dup 2) LSX_SCALAR_FRINT)) ++ (set (match_operand:ANYF 0 "register_operand") ++ (vec_select:ANYF (match_dup 2) (parallel (const_int 0)))) ++ "ISA_HAS_LSX && (flag_fp_int_builtin_inexact || !flag_trapping_math)" ++ "operands2 = gen_reg_rtx (<VLSX_FOR_FMODE>mode);") ++ + ;; <x>vftint.{/rp/rz/rm} + (define_insn + "<simd_isa>_<x>vftint<simd_frint_rounding>_<simdifmt_for_f>_<simdfmt>" +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c +new file mode 100644 +index 000000000..002e3b92d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fno-fp-int-builtin-inexact" } */ ++ ++#include "vect-frint-scalar.c" ++ ++/* cannot use LSX for these with -fno-fp-int-builtin-inexact, ++ call library function. */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(ceil\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(ceilf\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(floor\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(floorf\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(trunc\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(truncf\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(roundeven\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(roundevenf\\)" } } */ ++ ++/* nearbyint is not allowed to rasie FE_INEXACT for decades */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyint\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyintf\\)" } } */ ++ ++/* rint should just use basic FP operation */ ++/* { dg-final { scan-assembler "\tfrint\.s" } } */ ++/* { dg-final { scan-assembler "\tfrint\.d" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c +new file mode 100644 +index 000000000..c7cb40be7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c +@@ -0,0 +1,43 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx" } */ ++ ++#define test(func, suffix) \ ++__typeof__ (1.##suffix) \ ++_##func##suffix (__typeof__ (1.##suffix) x) \ ++{ \ ++ return __builtin_##func##suffix (x); \ ++} ++ ++test (ceil, f) ++test (ceil, ) ++test (floor, f) ++test (floor, ) ++test (trunc, f) ++test (trunc, ) ++test (roundeven, f) ++test (roundeven, ) ++test (nearbyint, f) ++test (nearbyint, ) ++test (rint, f) ++test (rint, ) ++ ++/* { dg-final { scan-assembler "\tvfrintrp\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrm\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrz\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrne\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrp\.d" } } */ ++/* { dg-final { scan-assembler "\tvfrintrm\.d" } } */ ++/* { dg-final { scan-assembler "\tvfrintrz\.d" } } */ ++/* { dg-final { scan-assembler "\tvfrintrne\.d" } } */ ++ ++/* must do vreplvei first */ ++/* { dg-final { scan-assembler-times "\tvreplvei\.w\t\\\$vr0,\\\$vr0,0" 4 } } */ ++/* { dg-final { scan-assembler-times "\tvreplvei\.d\t\\\$vr0,\\\$vr0,0" 4 } } */ ++ ++/* nearbyint is not allowed to rasie FE_INEXACT for decades */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyint\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyintf\\)" } } */ ++ ++/* rint should just use basic FP operation */ ++/* { dg-final { scan-assembler "\tfrint\.s" } } */ ++/* { dg-final { scan-assembler "\tfrint\.d" } } */ +-- +2.43.0 +
View file
_service:tar_scm:0051-LoongArch-Remove-duplicate-definition-of-CLZ_DEFINED.patch
Added
@@ -0,0 +1,49 @@ +From 21bb4f07db53df717d02e9115dcdb7b5475ede2a Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Tue, 28 Nov 2023 15:56:35 +0800 +Subject: PATCH 051/188 LoongArch: Remove duplicate definition of + CLZ_DEFINED_VALUE_AT_ZERO. + +In the r14-5547 commit, CLTZ_DEFINED_VALUE_AT_ZERO were defined at +the same time, but in fact, CLZ_DEFINED_VALUE_AT_ZERO has already been +defined, so remove the duplicate definition. + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (CTZ_DEFINED_VALUE_AT_ZERO): Add + description. + (CLZ_DEFINED_VALUE_AT_ZERO): Remove duplicate definition. +--- + gcc/config/loongarch/loongarch.h | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 19cf6fd33..8b28be0e4 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -288,10 +288,12 @@ along with GCC; see the file COPYING3. If not see + /* Define if loading short immediate values into registers sign extends. */ + #define SHORT_IMMEDIATES_SIGN_EXTEND 1 + +-/* The clz.{w/d} instructions have the natural values at 0. */ ++/* The clz.{w/d}, ctz.{w/d} instructions have the natural values at 0. */ + + #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) ++#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ ++ ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) + + /* Standard register usage. */ + +@@ -1239,8 +1241,3 @@ struct GTY (()) machine_function + + #define TARGET_EXPLICIT_RELOCS \ + (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS) +- +-#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ +- ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) +-#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ +- ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) +-- +2.43.0 +
View file
_service:tar_scm:0052-LoongArch-Added-vectorized-hardware-inspection-for-t.patch
Added
@@ -0,0 +1,4375 @@ +From 8d5c983efc35804f98823e203eada6263dd1604e Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Tue, 28 Nov 2023 16:23:53 +0800 +Subject: PATCH 052/188 LoongArch: Added vectorized hardware inspection for + testsuite. + +When GCC regression tests are executed on a cpu that does not support +vectorization, the loongarch/vector directory will have some FAIL entries for +all test cases related to vectorization runs. In order to solve this kind +of problem, a vectorized hardware detection function was added to the code, +which can only be compiled but not run. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-xvabsd-1.c:Remove + the default Settings to run the behavior. + * gcc.target/loongarch/vector/lasx/lasx-xvabsd-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvadd.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvadda.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddwev-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddwev-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddwev-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddwod-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddwod-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddwod-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvand.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvandi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvandn.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvavg-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvavg-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvavgr-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvavgr-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitclr.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitclri.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitrev.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitrevi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitsel.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitseli.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitset.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitseti.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbsll_v.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbsrl_v.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvclo.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvclz.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvdiv-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvdiv-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvext2xv-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvext2xv-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvexth-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvexth-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvextl-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvextl-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvextrins.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfadd_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfadd_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfclass_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfclass_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_caf_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_ceq_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cle_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_clt_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cne_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cor_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cun_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_saf_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_seq_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sle_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_slt_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sne_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sor_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sun_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcvt.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcvth.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvffint-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvffint-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvffinth.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvflogb_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvflogb_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfmadd_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfmadd_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfmax_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfmax_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfrint_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfrstp.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfrstpi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvftint-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvftint-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvftint-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvftintl.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvhaddw-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvhaddw-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvhsubw-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvhsubw-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvilvh.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvilvl.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvinsgr2vr.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvinsve0.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvld.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvldi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmadd.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmax-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmax-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaxi-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaxi-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmin-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmin-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmini-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmini-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmod-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmod-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmskgez.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmskltz.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmsknz.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmsub.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmuh-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmuh-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmul.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmulwev-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmulwev-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmulwev-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmulwod-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmulwod-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmulwod-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvneg.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvnor.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvnori.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvor.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvori.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvorn.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpackev.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpackod.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpcnt.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpickev.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpickod.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpickve.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpickve2gr.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvprem.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpremi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvreplgr2vr.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvreplve.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvreplve0.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvreplvei.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvrotr.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvrotri.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsadd-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsadd-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsat-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsat-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvseq.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvseqi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvshuf4i_b.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsigncov.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsle-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsle-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslei-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslei-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsll.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslli.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsllwil-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsllwil-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslt-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslt-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslti-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslti-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsra.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrai.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsran.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrani.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrar.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrari.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrarn.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrarni.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrl.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrli.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrln.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrlni.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrlr.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrlri.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrlrn.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrlrni.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssran.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssrani.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssrarn.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssrarni.c:Dito.
View file
_service:tar_scm:0053-LoongArch-Accelerate-optimization-of-scalar-signed-u.patch
Added
@@ -0,0 +1,148 @@ +From 87230032bc7fbcec1e3927b2b4a6aeba78040cc6 Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Tue, 28 Nov 2023 15:38:37 +0800 +Subject: PATCH 053/188 LoongArch: Accelerate optimization of scalar + signed/unsigned popcount. + +In LoongArch, the vector popcount has corresponding instructions, while +the scalar does not. Currently, the scalar popcount is calculated +through a loop, and the value of a non-power of two needs to be iterated +several times, so the vector popcount instruction is considered for +optimization. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (v2di): Used to simplify the + following templates. + (popcount<mode>2): New. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/popcnt.c: New test. + * gcc.target/loongarch/popcount.c: New test. +--- + gcc/config/loongarch/loongarch.md | 27 +++++++++++- + gcc/testsuite/gcc.target/loongarch/popcnt.c | 41 +++++++++++++++++++ + gcc/testsuite/gcc.target/loongarch/popcount.c | 17 ++++++++ + 3 files changed, 83 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/popcnt.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/popcount.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 11577f407..cfd7a8ec6 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1512,7 +1512,30 @@ + (set_attr "cnv_mode" "D2S") + (set_attr "mode" "SF")) + +- ++;; In vector registers, popcount can be implemented directly through ++;; the vector instruction XVPCNT. For GP registers, we can implement ++;; it through the following method. Compared with loop implementation ++;; of popcount, the following method has better performance. ++ ++;; This attribute used for get connection of scalar mode and corresponding ++;; vector mode. ++(define_mode_attr cntmap (SI "v4si") (DI "v2di")) ++ ++(define_expand "popcount<mode>2" ++ (set (match_operand:GPR 0 "register_operand") ++ (popcount:GPR (match_operand:GPR 1 "register_operand"))) ++ "ISA_HAS_LSX" ++{ ++ rtx in = operands1; ++ rtx out = operands0; ++ rtx vreg = <MODE>mode == SImode ? gen_reg_rtx (V4SImode) : ++ gen_reg_rtx (V2DImode); ++ emit_insn (gen_lsx_vinsgr2vr_<size> (vreg, in, vreg, GEN_INT (1))); ++ emit_insn (gen_popcount<cntmap>2 (vreg, vreg)); ++ emit_insn (gen_lsx_vpickve2gr_<size> (out, vreg, GEN_INT (0))); ++ DONE; ++}) ++ + ;; + ;; .................... + ;; +@@ -3879,7 +3902,7 @@ + (any_extend:SI (match_dup 3)))) + "") + +- ++ + + (define_mode_iterator QHSD QI HI SI DI) + +diff --git a/gcc/testsuite/gcc.target/loongarch/popcnt.c b/gcc/testsuite/gcc.target/loongarch/popcnt.c +new file mode 100644 +index 000000000..a10fca420 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/popcnt.c +@@ -0,0 +1,41 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx" } */ ++/* { dg-final { scan-assembler-not {popcount} } } */ ++/* { dg-final { scan-assembler-times "vpcnt.d" 2 { target { loongarch64*-*-* } } } } */ ++/* { dg-final { scan-assembler-times "vpcnt.w" 4 { target { loongarch64*-*-* } } } } */ ++ ++int ++foo (int x) ++{ ++ return __builtin_popcount (x); ++} ++ ++long ++foo1 (long x) ++{ ++ return __builtin_popcountl (x); ++} ++ ++long long ++foo2 (long long x) ++{ ++ return __builtin_popcountll (x); ++} ++ ++int ++foo3 (int *p) ++{ ++ return __builtin_popcount (*p); ++} ++ ++unsigned ++foo4 (int x) ++{ ++ return __builtin_popcount (x); ++} ++ ++unsigned long ++foo5 (int x) ++{ ++ return __builtin_popcount (x); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/popcount.c b/gcc/testsuite/gcc.target/loongarch/popcount.c +new file mode 100644 +index 000000000..390ff0676 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/popcount.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fdump-tree-optimized" } */ ++/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 1 "optimized" } } */ ++ ++int ++PopCount (long b) ++{ ++ int c = 0; ++ ++ while (b) ++ { ++ b &= b - 1; ++ c++; ++ } ++ ++ return c; ++} +-- +2.43.0 +
View file
_service:tar_scm:0054-LoongArch-Optimize-vector-constant-extract-even-odd-.patch
Added
@@ -0,0 +1,163 @@ +From 19282fbb0dab42c3553326a1ed01ad9a599622dd Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Tue, 28 Nov 2023 15:39:00 +0800 +Subject: PATCH 054/188 LoongArch: Optimize vector constant + extract-{even/odd} permutation. + +For vector constant extract-{even/odd} permutation replace the default +xvshuf instruction combination with xvilv{l/h} instruction, which +can reduce instructions and improves performance. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_is_odd_extraction): + Supplementary function prototype. + (loongarch_is_even_extraction): Adjust. + (loongarch_try_expand_lsx_vshuf_const): Adjust. + (loongarch_is_extraction_permutation): Adjust. + (loongarch_expand_vec_perm_const_2): Adjust. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/lasx-extract-even_odd-opt.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 33 +++++++++++- + .../loongarch/lasx-extract-even_odd-opt.c | 54 +++++++++++++++++++ + 2 files changed, 85 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index ecceca22d..3ef7e3605 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -8668,6 +8668,12 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) + } + } + ++static bool ++loongarch_is_odd_extraction (struct expand_vec_perm_d *); ++ ++static bool ++loongarch_is_even_extraction (struct expand_vec_perm_d *); ++ + static bool + loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) + { +@@ -8690,6 +8696,24 @@ loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) + if (d->testing_p) + return true; + ++ /* If match extract-even and extract-odd permutations pattern, use ++ * vselect much better than vshuf. */ ++ if (loongarch_is_odd_extraction (d) ++ || loongarch_is_even_extraction (d)) ++ { ++ if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, ++ d->perm, d->nelt)) ++ return true; ++ ++ unsigned char perm2MAX_VECT_LEN; ++ for (i = 0; i < d->nelt; ++i) ++ perm2i = (d->permi + d->nelt) & (2 * d->nelt - 1); ++ ++ if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, ++ perm2, d->nelt)) ++ return true; ++ } ++ + for (i = 0; i < d->nelt; i += 1) + { + rpermi = GEN_INT (d->permi); +@@ -8874,7 +8898,7 @@ loongarch_is_even_extraction (struct expand_vec_perm_d *d) + result = false; + break; + } +- buf += 1; ++ buf += 2; + } + + return result; +@@ -8896,7 +8920,7 @@ loongarch_is_extraction_permutation (struct expand_vec_perm_d *d) + result = false; + break; + } +- buf += 2; ++ buf += 1; + } + + return result; +@@ -9373,6 +9397,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + Selector after: { 1, 3, 1, 3 }. + Even extraction selector sample: E_V4DImode, { 0, 2, 4, 6 } + Selector after: { 0, 2, 0, 2 }. */ ++ ++ /* Better implement of extract-even and extract-odd permutations. */ ++ if (loongarch_expand_vec_perm_even_odd (d)) ++ return true; ++ + for (i = 0; i < d->nelt / 2; i += 1) + { + idx = d->permi; +diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c b/gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c +new file mode 100644 +index 000000000..515f0c862 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c +@@ -0,0 +1,54 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlasx" } */ ++/* { dg-final { scan-assembler "xvilvl.d" } } */ ++/* { dg-final { scan-assembler "xvilvh.d" } } */ ++ ++#define CMUL(a, b, c) \ ++ { \ ++ (c).ai = (a).ai * (b).ai - (a).bi * (b).bi; \ ++ (c).bi = (a).ai * (b).bi + (a).bi * (b).ai; \ ++ (c).ci = (a).ci * (b).ci - (a).di * (b).di; \ ++ (c).di = (a).ci * (b).di + (a).di * (b).ci; \ ++ } ++#define CSUM(a, b) \ ++ { \ ++ (a).ai += (b).ai; \ ++ (a).bi += (b).bi; \ ++ (a).ci += (b).ci; \ ++ (a).di += (b).di; \ ++ } ++ ++typedef struct ++{ ++ double ai; ++ double bi; ++ double ci; ++ double di; ++} complex; ++ ++typedef struct ++{ ++ complex e66; ++} matrix; ++ ++typedef struct ++{ ++ complex c6; ++} vector; ++ ++void ++mult_adj_mat_vec (matrix *a, vector *b, vector *c) ++{ ++ register int i, j; ++ register complex x, y; ++ for (i = 0; i < 6; i++) ++ { ++ x.ai = x.bi = x.ci = x.di = 0.0; ++ for (j = 0; j < 6; j++) ++ { ++ CMUL (a->eji, b->cj, y); ++ CSUM (x, y); ++ } ++ c->ci = x; ++ } ++} +-- +2.43.0 +
View file
_service:tar_scm:0055-LoongArch-Add-intrinsic-function-descriptions-for-LS.patch
Added
@@ -0,0 +1,1697 @@ +From 548322a75cdeb96960fb9d324a2abf8735c4d254 Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Tue, 7 Nov 2023 11:53:39 +0800 +Subject: PATCH 055/188 LoongArch: Add intrinsic function descriptions for + LSX and LASX instructions to doc. + +gcc/ChangeLog: + + * doc/extend.texi: Add information about the intrinsic function of the vector + instruction. +--- + gcc/doc/extend.texi | 1662 +++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 1662 insertions(+) + +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index 497c6de5f..7edd3974d 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -14679,6 +14679,8 @@ instructions, but allow the compiler to schedule those calls. + * BPF Built-in Functions:: + * FR-V Built-in Functions:: + * LoongArch Base Built-in Functions:: ++* LoongArch SX Vector Intrinsics:: ++* LoongArch ASX Vector Intrinsics:: + * MIPS DSP Built-in Functions:: + * MIPS Paired-Single Support:: + * MIPS Loongson Built-in Functions:: +@@ -16262,6 +16264,1666 @@ Returns the value that is currently set in the @samp{tp} register. + void * __builtin_thread_pointer (void) + @end smallexample + ++@node LoongArch SX Vector Intrinsics ++@subsection LoongArch SX Vector Intrinsics ++ ++GCC provides intrinsics to access the LSX (Loongson SIMD Extension) instructions. ++The interface is made available by including @code{<lsxintrin.h>} and using ++@option{-mlsx}. ++ ++The following vectors typedefs are included in @code{lsxintrin.h}: ++ ++@itemize ++@item @code{__m128i}, a 128-bit vector of fixed point; ++@item @code{__m128}, a 128-bit vector of single precision floating point; ++@item @code{__m128d}, a 128-bit vector of double precision floating point. ++@end itemize ++ ++Instructions and corresponding built-ins may have additional restrictions and/or ++input/output values manipulated: ++@itemize ++@item @code{imm0_1}, an integer literal in range 0 to 1; ++@item @code{imm0_3}, an integer literal in range 0 to 3; ++@item @code{imm0_7}, an integer literal in range 0 to 7; ++@item @code{imm0_15}, an integer literal in range 0 to 15; ++@item @code{imm0_31}, an integer literal in range 0 to 31; ++@item @code{imm0_63}, an integer literal in range 0 to 63; ++@item @code{imm0_127}, an integer literal in range 0 to 127; ++@item @code{imm0_255}, an integer literal in range 0 to 255; ++@item @code{imm_n16_15}, an integer literal in range -16 to 15; ++@item @code{imm_n128_127}, an integer literal in range -128 to 127; ++@item @code{imm_n256_255}, an integer literal in range -256 to 255; ++@item @code{imm_n512_511}, an integer literal in range -512 to 511; ++@item @code{imm_n1024_1023}, an integer literal in range -1024 to 1023; ++@item @code{imm_n2048_2047}, an integer literal in range -2048 to 2047. ++@end itemize ++ ++For convenience, GCC defines functions @code{__lsx_vrepli_@{b/h/w/d@}} and ++@code{__lsx_bnz_@{v/b/h/w/d@}}, which are implemented as follows: ++ ++@smallexample ++a. @code{__lsx_vrepli_@{b/h/w/d@}}: Implemented the case where the highest ++ bit of @code{vldi} instruction @code{i13} is 1. ++ ++ i1312 == 1'b0 ++ case i1311:10 of : ++ 2'b00: __lsx_vrepli_b (imm_n512_511) ++ 2'b01: __lsx_vrepli_h (imm_n512_511) ++ 2'b10: __lsx_vrepli_w (imm_n512_511) ++ 2'b11: __lsx_vrepli_d (imm_n512_511) ++ ++b. @code{__lsx_bnz_@{v/b/h/w/d@}}: Since the @code{vseteqz} class directive ++ cannot be used on its own, this function is defined. ++ ++ _lsx_bz_v => vseteqz.v + bcnez ++ _lsx_bnz_v => vsetnez.v + bcnez ++ _lsx_bz_b => vsetanyeqz.b + bcnez ++ _lsx_bz_h => vsetanyeqz.h + bcnez ++ _lsx_bz_w => vsetanyeqz.w + bcnez ++ _lsx_bz_d => vsetanyeqz.d + bcnez ++ _lsx_bnz_b => vsetallnez.b + bcnez ++ _lsx_bnz_h => vsetallnez.h + bcnez ++ _lsx_bnz_w => vsetallnez.w + bcnez ++ _lsx_bnz_d => vsetallnez.d + bcnez ++@end smallexample ++ ++@smallexample ++eg: ++ #include <lsxintrin.h> ++ ++ extern __m128i @var{a}; ++ ++ void ++ test (void) ++ @{ ++ if (__lsx_bz_v (@var{a})) ++ printf ("1\n"); ++ else ++ printf ("2\n"); ++ @} ++@end smallexample ++ ++@emph{Note:} For directives where the intent operand is also the source operand ++(modifying only part of the bitfield of the intent register), the first parameter ++in the builtin call function is used as the intent operand. ++ ++@smallexample ++eg: ++ #include <lsxintrin.h> ++ ++ extern __m128i @var{dst}; ++ extern int @var{src}; ++ ++ void ++ test (void) ++ @{ ++ @var{dst} = __lsx_vinsgr2vr_b (@var{dst}, @var{src}, 3); ++ @} ++@end smallexample ++ ++The intrinsics provided are listed below: ++@smallexample ++int __lsx_bnz_b (__m128i); ++int __lsx_bnz_d (__m128i); ++int __lsx_bnz_h (__m128i); ++int __lsx_bnz_v (__m128i); ++int __lsx_bnz_w (__m128i); ++int __lsx_bz_b (__m128i); ++int __lsx_bz_d (__m128i); ++int __lsx_bz_h (__m128i); ++int __lsx_bz_v (__m128i); ++int __lsx_bz_w (__m128i); ++__m128i __lsx_vabsd_b (__m128i, __m128i); ++__m128i __lsx_vabsd_bu (__m128i, __m128i); ++__m128i __lsx_vabsd_di (__m128i, __m128i); ++__m128i __lsx_vabsd_du (__m128i, __m128i); ++__m128i __lsx_vabsd_h (__m128i, __m128i); ++__m128i __lsx_vabsd_hu (__m128i, __m128i); ++__m128i __lsx_vabsd_w (__m128i, __m128i); ++__m128i __lsx_vabsd_wu (__m128i, __m128i); ++__m128i __lsx_vadda_b (__m128i, __m128i); ++__m128i __lsx_vadda_d (__m128i, __m128i); ++__m128i __lsx_vadda_h (__m128i, __m128i); ++__m128i __lsx_vadda_w (__m128i, __m128i); ++__m128i __lsx_vadd_b (__m128i, __m128i); ++__m128i __lsx_vadd_d (__m128i, __m128i); ++__m128i __lsx_vadd_h (__m128i, __m128i); ++__m128i __lsx_vaddi_bu (__m128i, imm0_31); ++__m128i __lsx_vaddi_du (__m128i, imm0_31); ++__m128i __lsx_vaddi_hu (__m128i, imm0_31); ++__m128i __lsx_vaddi_wu (__m128i, imm0_31); ++__m128i __lsx_vadd_q (__m128i, __m128i); ++__m128i __lsx_vadd_w (__m128i, __m128i); ++__m128i __lsx_vaddwev_d_w (__m128i, __m128i); ++__m128i __lsx_vaddwev_d_wu (__m128i, __m128i); ++__m128i __lsx_vaddwev_d_wu_w (__m128i, __m128i); ++__m128i __lsx_vaddwev_h_b (__m128i, __m128i); ++__m128i __lsx_vaddwev_h_bu (__m128i, __m128i); ++__m128i __lsx_vaddwev_h_bu_b (__m128i, __m128i); ++__m128i __lsx_vaddwev_q_d (__m128i, __m128i); ++__m128i __lsx_vaddwev_q_du (__m128i, __m128i); ++__m128i __lsx_vaddwev_q_du_d (__m128i, __m128i); ++__m128i __lsx_vaddwev_w_h (__m128i, __m128i); ++__m128i __lsx_vaddwev_w_hu (__m128i, __m128i); ++__m128i __lsx_vaddwev_w_hu_h (__m128i, __m128i); ++__m128i __lsx_vaddwod_d_w (__m128i, __m128i); ++__m128i __lsx_vaddwod_d_wu (__m128i, __m128i); ++__m128i __lsx_vaddwod_d_wu_w (__m128i, __m128i); ++__m128i __lsx_vaddwod_h_b (__m128i, __m128i); ++__m128i __lsx_vaddwod_h_bu (__m128i, __m128i); ++__m128i __lsx_vaddwod_h_bu_b (__m128i, __m128i); ++__m128i __lsx_vaddwod_q_d (__m128i, __m128i); ++__m128i __lsx_vaddwod_q_du (__m128i, __m128i); ++__m128i __lsx_vaddwod_q_du_d (__m128i, __m128i); ++__m128i __lsx_vaddwod_w_h (__m128i, __m128i); ++__m128i __lsx_vaddwod_w_hu (__m128i, __m128i); ++__m128i __lsx_vaddwod_w_hu_h (__m128i, __m128i); ++__m128i __lsx_vandi_b (__m128i, imm0_255); ++__m128i __lsx_vandn_v (__m128i, __m128i); ++__m128i __lsx_vand_v (__m128i, __m128i); ++__m128i __lsx_vavg_b (__m128i, __m128i); ++__m128i __lsx_vavg_bu (__m128i, __m128i); ++__m128i __lsx_vavg_d (__m128i, __m128i); ++__m128i __lsx_vavg_du (__m128i, __m128i); ++__m128i __lsx_vavg_h (__m128i, __m128i); ++__m128i __lsx_vavg_hu (__m128i, __m128i); ++__m128i __lsx_vavgr_b (__m128i, __m128i); ++__m128i __lsx_vavgr_bu (__m128i, __m128i); ++__m128i __lsx_vavgr_d (__m128i, __m128i); ++__m128i __lsx_vavgr_du (__m128i, __m128i); ++__m128i __lsx_vavgr_h (__m128i, __m128i);
View file
_service:tar_scm:0056-LoongArch-Switch-loongarch-def-from-C-to-C-to-make-i.patch
Added
@@ -0,0 +1,925 @@ +From 6c85d03940f87770a7e8b7195ffe45f99afef411 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 1 Dec 2023 10:09:33 +0800 +Subject: PATCH 056/188 LoongArch: Switch loongarch-def from C to C++ to make + it possible. + +We'll use HOST_WIDE_INT in LoongArch static properties in following patches. + +To keep the same readability as C99 designated initializers, create a +std::array like data structure with position setter function, and add +field setter functions for structs used in loongarch-def.cc. + +Remove unneeded guards #if +!defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) +in loongarch-def.h and loongarch-opts.h. + +gcc/ChangeLog: + + * config/loongarch/loongarch-def.h: Remove extern "C". + (loongarch_isa_base_strings): Declare as loongarch_def_array + instead of plain array. + (loongarch_isa_ext_strings): Likewise. + (loongarch_abi_base_strings): Likewise. + (loongarch_abi_ext_strings): Likewise. + (loongarch_cmodel_strings): Likewise. + (loongarch_cpu_strings): Likewise. + (loongarch_cpu_default_isa): Likewise. + (loongarch_cpu_issue_rate): Likewise. + (loongarch_cpu_multipass_dfa_lookahead): Likewise. + (loongarch_cpu_cache): Likewise. + (loongarch_cpu_align): Likewise. + (loongarch_cpu_rtx_cost_data): Likewise. + (loongarch_isa): Add a constructor and field setter functions. + * config/loongarch/loongarch-opts.h (loongarch-defs.h): Do not + include for target libraries. + * config/loongarch/loongarch-opts.cc: Comment code that doesn't + run and causes compilation errors. + * config/loongarch/loongarch-tune.h (LOONGARCH_TUNE_H): Likewise. + (struct loongarch_rtx_cost_data): Likewise. + (struct loongarch_cache): Likewise. + (struct loongarch_align): Likewise. + * config/loongarch/t-loongarch: Compile loongarch-def.cc with the + C++ compiler. + * config/loongarch/loongarch-def-array.h: New file for a + std:array like data structure with position setter function. + * config/loongarch/loongarch-def.c: Rename to ... + * config/loongarch/loongarch-def.cc: ... here. + (loongarch_cpu_strings): Define as loongarch_def_array instead + of plain array. + (loongarch_cpu_default_isa): Likewise. + (loongarch_cpu_cache): Likewise. + (loongarch_cpu_align): Likewise. + (loongarch_cpu_rtx_cost_data): Likewise. + (loongarch_cpu_issue_rate): Likewise. + (loongarch_cpu_multipass_dfa_lookahead): Likewise. + (loongarch_isa_base_strings): Likewise. + (loongarch_isa_ext_strings): Likewise. + (loongarch_abi_base_strings): Likewise. + (loongarch_abi_ext_strings): Likewise. + (loongarch_cmodel_strings): Likewise. + (abi_minimal_isa): Likewise. + (loongarch_rtx_cost_optimize_size): Use field setter functions + instead of designated initializers. + (loongarch_rtx_cost_data): Implement default constructor. +--- + gcc/config/loongarch/loongarch-def-array.h | 40 ++++ + gcc/config/loongarch/loongarch-def.c | 227 --------------------- + gcc/config/loongarch/loongarch-def.cc | 187 +++++++++++++++++ + gcc/config/loongarch/loongarch-def.h | 55 ++--- + gcc/config/loongarch/loongarch-opts.cc | 7 + + gcc/config/loongarch/loongarch-opts.h | 5 +- + gcc/config/loongarch/loongarch-tune.h | 123 ++++++++++- + gcc/config/loongarch/t-loongarch | 4 +- + 8 files changed, 390 insertions(+), 258 deletions(-) + create mode 100644 gcc/config/loongarch/loongarch-def-array.h + delete mode 100644 gcc/config/loongarch/loongarch-def.c + create mode 100644 gcc/config/loongarch/loongarch-def.cc + +diff --git a/gcc/config/loongarch/loongarch-def-array.h b/gcc/config/loongarch/loongarch-def-array.h +new file mode 100644 +index 000000000..bdb3e9c6a +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-def-array.h +@@ -0,0 +1,40 @@ ++/* A std::array like data structure for LoongArch static properties. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++#ifndef _LOONGARCH_DEF_ARRAY_H ++#define _LOONGARCH_DEF_ARRAY_H 1 ++ ++template <class T, int N> ++class loongarch_def_array { ++private: ++ T arrN; ++public: ++ loongarch_def_array () : arr{} {} ++ ++ T &operator (int n) { return arrn; } ++ const T &operator (int n) const { return arrn; } ++ ++ loongarch_def_array set (int idx, T &&value) ++ { ++ (*this)idx = value; ++ return *this; ++ } ++}; ++ ++#endif +diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c +deleted file mode 100644 +index fe4474e77..000000000 +--- a/gcc/config/loongarch/loongarch-def.c ++++ /dev/null +@@ -1,227 +0,0 @@ +-/* LoongArch static properties. +- Copyright (C) 2021-2022 Free Software Foundation, Inc. +- Contributed by Loongson Ltd. +- +-This file is part of GCC. +- +-GCC is free software; you can redistribute it and/or modify +-it under the terms of the GNU General Public License as published by +-the Free Software Foundation; either version 3, or (at your option) +-any later version. +- +-GCC is distributed in the hope that it will be useful, +-but WITHOUT ANY WARRANTY; without even the implied warranty of +-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-GNU General Public License for more details. +- +-You should have received a copy of the GNU General Public License +-along with GCC; see the file COPYING3. If not see +-<http://www.gnu.org/licenses/>. */ +- +-#include "loongarch-def.h" +-#include "loongarch-str.h" +- +-/* CPU property tables. */ +-const char* +-loongarch_cpu_stringsN_TUNE_TYPES = { +- CPU_NATIVE = STR_CPU_NATIVE, +- CPU_ABI_DEFAULT = STR_CPU_ABI_DEFAULT, +- CPU_LOONGARCH64 = STR_CPU_LOONGARCH64, +- CPU_LA464 = STR_CPU_LA464, +- CPU_LA664 = STR_CPU_LA664, +-}; +- +-struct loongarch_isa +-loongarch_cpu_default_isaN_ARCH_TYPES = { +- CPU_LOONGARCH64 = { +- .base = ISA_BASE_LA64V100, +- .fpu = ISA_EXT_FPU64, +- .simd = 0, +- }, +- CPU_LA464 = { +- .base = ISA_BASE_LA64V100, +- .fpu = ISA_EXT_FPU64, +- .simd = ISA_EXT_SIMD_LASX, +- }, +- CPU_LA664 = { +- .base = ISA_BASE_LA64V110, +- .fpu = ISA_EXT_FPU64, +- .simd = ISA_EXT_SIMD_LASX, +- }, +-}; +- +-struct loongarch_cache +-loongarch_cpu_cacheN_TUNE_TYPES = { +- CPU_LOONGARCH64 = { +- .l1d_line_size = 64, +- .l1d_size = 64, +- .l2d_size = 256, +- .simultaneous_prefetches = 4, +- }, +- CPU_LA464 = { +- .l1d_line_size = 64, +- .l1d_size = 64, +- .l2d_size = 256, +- .simultaneous_prefetches = 4, +- }, +- CPU_LA664 = { +- .l1d_line_size = 64, +- .l1d_size = 64,
View file
_service:tar_scm:0057-LoongArch-Remove-the-definition-of-ISA_BASE_LA64V110.patch
Added
@@ -0,0 +1,261 @@ +From 1ec35f153636077760b65dc3e0385d0a4d383486 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 1 Dec 2023 11:51:51 +0800 +Subject: PATCH 057/188 LoongArch: Remove the definition of ISA_BASE_LA64V110 + from the code. + +The instructions defined in LoongArch Reference Manual v1.1 are not the instruction +set v1.1 version. The CPU defined later may only support some instructions in +LoongArch Reference Manual v1.1. Therefore, the macro ISA_BASE_LA64V110 and +related definitions are removed here. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch-strings: Delete STR_ISA_BASE_LA64V110. + * config/loongarch/genopts/loongarch.opt.in: Likewise. + * config/loongarch/loongarch-cpu.cc (ISA_BASE_LA64V110_FEATURES): Delete macro. + (fill_native_cpu_config): Define a new variable hw_isa_evolution record the + extended instruction set support read from cpucfg. + * config/loongarch/loongarch-def.cc: Set evolution at initialization. + * config/loongarch/loongarch-def.h (ISA_BASE_LA64V100): Delete. + (ISA_BASE_LA64V110): Likewise. + (N_ISA_BASE_TYPES): Likewise. + (defined): Likewise. + * config/loongarch/loongarch-opts.cc: Likewise. + * config/loongarch/loongarch-opts.h (TARGET_64BIT): Likewise. + (ISA_BASE_IS_LA64V110): Likewise. + * config/loongarch/loongarch-str.h (STR_ISA_BASE_LA64V110): Likewise. + * config/loongarch/loongarch.opt: Regenerate. +--- + .../loongarch/genopts/loongarch-strings | 1 - + gcc/config/loongarch/genopts/loongarch.opt.in | 3 --- + gcc/config/loongarch/loongarch-cpu.cc | 23 +++++-------------- + gcc/config/loongarch/loongarch-def.cc | 14 +++++++---- + gcc/config/loongarch/loongarch-def.h | 12 ++-------- + gcc/config/loongarch/loongarch-opts.cc | 3 --- + gcc/config/loongarch/loongarch-opts.h | 4 +--- + gcc/config/loongarch/loongarch-str.h | 1 - + gcc/config/loongarch/loongarch.opt | 3 --- + 9 files changed, 19 insertions(+), 45 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +index 6c8a42af2..411ad5696 100644 +--- a/gcc/config/loongarch/genopts/loongarch-strings ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -30,7 +30,6 @@ STR_CPU_LA664 la664 + + # Base architecture + STR_ISA_BASE_LA64V100 la64 +-STR_ISA_BASE_LA64V110 la64v1.1 + + # -mfpu + OPTSTR_ISA_EXT_FPU fpu +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index a49de07c9..cd5e75e4f 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -32,9 +32,6 @@ Basic ISAs of LoongArch: + EnumValue + Enum(isa_base) String(@@STR_ISA_BASE_LA64V100@@) Value(ISA_BASE_LA64V100) + +-EnumValue +-Enum(isa_base) String(@@STR_ISA_BASE_LA64V110@@) Value(ISA_BASE_LA64V110) +- + ;; ISA extensions / adjustments + Enum + Name(isa_ext_fpu) Type(int) +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index bbce82c9c..7e0625835 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -23,7 +23,6 @@ along with GCC; see the file COPYING3. If not see + #include "config.h" + #include "system.h" + #include "coretypes.h" +-#include "tm.h" + #include "diagnostic-core.h" + + #include "loongarch-def.h" +@@ -32,19 +31,6 @@ along with GCC; see the file COPYING3. If not see + #include "loongarch-cpucfg-map.h" + #include "loongarch-str.h" + +-/* loongarch_isa_base_features defined here instead of loongarch-def.c +- because we need to use options.h. Pay attention on the order of elements +- in the initializer becaue ISO C++ does not allow C99 designated +- initializers! */ +- +-#define ISA_BASE_LA64V110_FEATURES \ +- (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA \ +- | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS) +- +-int64_t loongarch_isa_base_featuresN_ISA_BASE_TYPES = { +- /* ISA_BASE_LA64V100 = */ 0, +- /* ISA_BASE_LA64V110 = */ ISA_BASE_LA64V110_FEATURES, +-}; + + /* Native CPU detection with "cpucfg" */ + static uint32_t cpucfg_cacheN_CPUCFG_WORDS = { 0 }; +@@ -235,18 +221,20 @@ fill_native_cpu_config (struct loongarch_target *tgt) + /* Use the native value anyways. */ + preset.simd = tmp; + ++ ++ int64_t hw_isa_evolution = 0; ++ + /* Features added during ISA evolution. */ + for (const auto &entry: cpucfg_map) + if (cpucfg_cacheentry.cpucfg_word & entry.cpucfg_bit) +- preset.evolution |= entry.isa_evolution_bit; ++ hw_isa_evolution |= entry.isa_evolution_bit; + + if (native_cpu_type != CPU_NATIVE) + { + /* Check if the local CPU really supports the features of the base + ISA of probed native_cpu_type. If any feature is not detected, + either GCC or the hardware is buggy. */ +- auto base_isa_feature = loongarch_isa_base_featurespreset.base; +- if ((preset.evolution & base_isa_feature) != base_isa_feature) ++ if ((preset.evolution & hw_isa_evolution) != hw_isa_evolution) + warning (0, + "detected base architecture %qs, but some of its " + "features are not detected; the detected base " +@@ -254,6 +242,7 @@ fill_native_cpu_config (struct loongarch_target *tgt) + "features will be enabled", + loongarch_isa_base_stringspreset.base); + } ++ preset.evolution = hw_isa_evolution; + } + + if (tune_native_p) +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index 6990c86c2..bc6997e45 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -18,6 +18,11 @@ You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++ + #include "loongarch-def.h" + #include "loongarch-str.h" + +@@ -51,9 +56,11 @@ array_arch<loongarch_isa> loongarch_cpu_default_isa = + .simd_ (ISA_EXT_SIMD_LASX)) + .set (CPU_LA664, + loongarch_isa () +- .base_ (ISA_BASE_LA64V110) ++ .base_ (ISA_BASE_LA64V100) + .fpu_ (ISA_EXT_FPU64) +- .simd_ (ISA_EXT_SIMD_LASX)); ++ .simd_ (ISA_EXT_SIMD_LASX) ++ .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA ++ | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS)); + + static inline loongarch_cache la464_cache () + { +@@ -136,8 +143,7 @@ array_tune<int> loongarch_cpu_multipass_dfa_lookahead = array_tune<int> () + + array<const char *, N_ISA_BASE_TYPES> loongarch_isa_base_strings = + array<const char *, N_ISA_BASE_TYPES> () +- .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100) +- .set (ISA_BASE_LA64V110, STR_ISA_BASE_LA64V110); ++ .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100); + + array<const char *, N_ISA_EXT_TYPES> loongarch_isa_ext_strings = + array<const char *, N_ISA_EXT_TYPES> () +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index 5ac70dfdd..f8f36f0e2 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -56,19 +56,11 @@ along with GCC; see the file COPYING3. If not see + /* enum isa_base */ + + /* LoongArch V1.00. */ +-#define ISA_BASE_LA64V100 0 +-/* LoongArch V1.10. */ +-#define ISA_BASE_LA64V110 1 +-#define N_ISA_BASE_TYPES 2 ++#define ISA_BASE_LA64V100 0 ++#define N_ISA_BASE_TYPES 1 + extern loongarch_def_array<const char *, N_ISA_BASE_TYPES> + loongarch_isa_base_strings; + +-#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) +-/* Unlike other arrays, this is defined in loongarch-cpu.cc. The problem is +- we cannot use the C++ header options.h in loongarch-def.c. */ +-extern int64_t loongarch_isa_base_features; +-#endif +- + /* enum isa_ext_* */ + #define ISA_EXT_NONE 0 + #define ISA_EXT_FPU32 1 +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 45fc521e4..d31becc67 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc
View file
_service:tar_scm:0058-LoongArch-Add-support-for-xorsign.patch
Added
@@ -0,0 +1,412 @@ +From dac02bbb72cae374ddc905fffcc6c94c901f9b26 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Fri, 17 Nov 2023 17:00:21 +0800 +Subject: PATCH 058/188 LoongArch: Add support for xorsign. + +This patch adds support for xorsign pattern to scalar fp and vector. With the +new expands, uniformly using vector bitwise logical operations to handle xorsign. + +On LoongArch64, floating-point registers and vector registers share the same register, +so this patch also allows conversion between LSX vector mode and scalar fp mode to +avoid unnecessary instruction generation. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (xorsign<mode>3): New expander. + * config/loongarch/loongarch.cc (loongarch_can_change_mode_class): Allow + conversion between LSX vector mode and scalar fp mode. + * config/loongarch/loongarch.md (@xorsign<mode>3): New expander. + * config/loongarch/lsx.md (@xorsign<mode>3): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-xorsign.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-xorsign.c: New test. + * gcc.target/loongarch/xorsign-run.c: New test. + * gcc.target/loongarch/xorsign.c: New test. +--- + gcc/config/loongarch/lasx.md | 22 +++++-- + gcc/config/loongarch/loongarch.cc | 5 ++ + gcc/config/loongarch/loongarch.md | 17 ++++++ + gcc/config/loongarch/lsx.md | 23 +++++-- + .../loongarch/vector/lasx/lasx-xorsign-run.c | 60 +++++++++++++++++++ + .../loongarch/vector/lasx/lasx-xorsign.c | 19 ++++++ + .../loongarch/vector/lsx/lsx-xorsign-run.c | 60 +++++++++++++++++++ + .../loongarch/vector/lsx/lsx-xorsign.c | 19 ++++++ + .../gcc.target/loongarch/xorsign-run.c | 25 ++++++++ + gcc/testsuite/gcc.target/loongarch/xorsign.c | 18 ++++++ + 10 files changed, 260 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/xorsign-run.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/xorsign.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 116b30c07..de7c88f14 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1065,10 +1065,10 @@ + (set_attr "mode" "<MODE>")) + + (define_insn "xor<mode>3" +- (set (match_operand:ILASX 0 "register_operand" "=f,f,f") +- (xor:ILASX +- (match_operand:ILASX 1 "register_operand" "f,f,f") +- (match_operand:ILASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8"))) ++ (set (match_operand:LASX 0 "register_operand" "=f,f,f") ++ (xor:LASX ++ (match_operand:LASX 1 "register_operand" "f,f,f") ++ (match_operand:LASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8"))) + "ISA_HAS_LASX" + "@ + xvxor.v\t%u0,%u1,%u2 +@@ -3061,6 +3061,20 @@ + operands5 = gen_reg_rtx (<MODE>mode); + }) + ++(define_expand "xorsign<mode>3" ++ (set (match_dup 4) ++ (and:FLASX (match_dup 3) ++ (match_operand:FLASX 2 "register_operand"))) ++ (set (match_operand:FLASX 0 "register_operand") ++ (xor:FLASX (match_dup 4) ++ (match_operand:FLASX 1 "register_operand"))) ++ "ISA_HAS_LASX" ++{ ++ operands3 = loongarch_build_signbit_mask (<MODE>mode, 1, 0); ++ ++ operands4 = gen_reg_rtx (<MODE>mode); ++}) ++ + + (define_insn "absv4df2" + (set (match_operand:V4DF 0 "register_operand" "=f") +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 3ef7e3605..3c8ae9a42 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6703,6 +6703,11 @@ loongarch_can_change_mode_class (machine_mode from, machine_mode to, + if (LSX_SUPPORTED_MODE_P (from) && LSX_SUPPORTED_MODE_P (to)) + return true; + ++ /* Allow conversion between LSX vector mode and scalar fp mode. */ ++ if ((LSX_SUPPORTED_MODE_P (from) && SCALAR_FLOAT_MODE_P (to)) ++ || ((SCALAR_FLOAT_MODE_P (from) && LSX_SUPPORTED_MODE_P (to)))) ++ return true; ++ + return !reg_classes_intersect_p (FP_REGS, rclass); + } + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index cfd7a8ec6..afc3c591f 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1164,6 +1164,23 @@ + "fcopysign.<fmt>\t%0,%1,%2" + (set_attr "type" "fcopysign") + (set_attr "mode" "<UNITMODE>")) ++ ++(define_expand "@xorsign<mode>3" ++ (match_operand:ANYF 0 "register_operand") ++ (match_operand:ANYF 1 "register_operand") ++ (match_operand:ANYF 2 "register_operand") ++ "ISA_HAS_LSX" ++{ ++ machine_mode lsx_mode ++ = <MODE>mode == SFmode ? V4SFmode : V2DFmode; ++ rtx tmp = gen_reg_rtx (lsx_mode); ++ rtx op1 = lowpart_subreg (lsx_mode, operands1, <MODE>mode); ++ rtx op2 = lowpart_subreg (lsx_mode, operands2, <MODE>mode); ++ emit_insn (gen_xorsign3 (lsx_mode, tmp, op1, op2)); ++ emit_move_insn (operands0, ++ lowpart_subreg (<MODE>mode, tmp, lsx_mode)); ++ DONE; ++}) +  + ;; + ;; .................... +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 232399934..ce6ec6d69 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -957,10 +957,10 @@ + (set_attr "mode" "<MODE>")) + + (define_insn "xor<mode>3" +- (set (match_operand:ILSX 0 "register_operand" "=f,f,f") +- (xor:ILSX +- (match_operand:ILSX 1 "register_operand" "f,f,f") +- (match_operand:ILSX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8"))) ++ (set (match_operand:LSX 0 "register_operand" "=f,f,f") ++ (xor:LSX ++ (match_operand:LSX 1 "register_operand" "f,f,f") ++ (match_operand:LSX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8"))) + "ISA_HAS_LSX" + "@ + vxor.v\t%w0,%w1,%w2 +@@ -2786,6 +2786,21 @@ + operands5 = gen_reg_rtx (<MODE>mode); + }) + ++(define_expand "@xorsign<mode>3" ++ (set (match_dup 4) ++ (and:FLSX (match_dup 3) ++ (match_operand:FLSX 2 "register_operand"))) ++ (set (match_operand:FLSX 0 "register_operand") ++ (xor:FLSX (match_dup 4) ++ (match_operand:FLSX 1 "register_operand"))) ++ "ISA_HAS_LSX" ++{ ++ operands3 = loongarch_build_signbit_mask (<MODE>mode, 1, 0); ++ ++ operands4 = gen_reg_rtx (<MODE>mode); ++}) ++ ++ + (define_insn "absv2df2" + (set (match_operand:V2DF 0 "register_operand" "=f") + (abs:V2DF (match_operand:V2DF 1 "register_operand" "f"))) +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c +new file mode 100644 +index 000000000..2295503d4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c +@@ -0,0 +1,60 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -ftree-vectorize -mlasx" } */ ++/* { dg-require-effective-target loongarch_asx_hw } */ ++ ++#include "lasx-xorsign.c" ++ ++extern void abort (); ++ ++#define N 16 ++float aN = {-0.1f, -3.2f, -6.3f, -9.4f, ++ -12.5f, -15.6f, -18.7f, -21.8f, ++ 24.9f, 27.1f, 30.2f, 33.3f, ++ 36.4f, 39.5f, 42.6f, 45.7f}; ++float bN = {-1.2f, 3.4f, -5.6f, 7.8f, ++ -9.0f, 1.0f, -2.0f, 3.0f, ++ -4.0f, -5.0f, 6.0f, 7.0f, ++ -8.0f, -9.0f, 10.0f, 11.0f}; ++float rN; ++ ++double adN = {-0.1d, -3.2d, -6.3d, -9.4d, ++ -12.5d, -15.6d, -18.7d, -21.8d,
View file
_service:tar_scm:0059-LoongArch-Add-support-for-LoongArch-V1.1-approximate.patch
Added
@@ -0,0 +1,730 @@ +From 88117f2703d06e44983e54a985ec0ad6f2397a46 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 6 Dec 2023 15:04:49 +0800 +Subject: PATCH 059/188 LoongArch: Add support for LoongArch V1.1 approximate + instructions. + +This patch adds define_insn/builtins/intrinsics for these instructions, and add option +-mfrecipe to control instruction generation. + +gcc/ChangeLog: + + * config/loongarch/genopts/isa-evolution.in (fecipe): Add. + * config/loongarch/larchintrin.h (__frecipe_s): New intrinsic. + (__frecipe_d): Ditto. + (__frsqrte_s): Ditto. + (__frsqrte_d): Ditto. + * config/loongarch/lasx.md (lasx_xvfrecipe_<flasxfmt>): New insn pattern. + (lasx_xvfrsqrte_<flasxfmt>): Ditto. + * config/loongarch/lasxintrin.h (__lasx_xvfrecipe_s): New intrinsic. + (__lasx_xvfrecipe_d): Ditto. + (__lasx_xvfrsqrte_s): Ditto. + (__lasx_xvfrsqrte_d): Ditto. + * config/loongarch/loongarch-builtins.cc (AVAIL_ALL): Add predicates. + (LSX_EXT_BUILTIN): New macro. + (LASX_EXT_BUILTIN): Ditto. + * config/loongarch/loongarch-cpucfg-map.h: Regenerate. + * config/loongarch/loongarch-c.cc: Add builtin macro "__loongarch_frecipe". + * config/loongarch/loongarch-def.cc: Regenerate. + * config/loongarch/loongarch-str.h (OPTSTR_FRECIPE): Regenerate. + * config/loongarch/loongarch.cc (loongarch_asm_code_end): Dump status for TARGET_FRECIPE. + * config/loongarch/loongarch.md (loongarch_frecipe_<fmt>): New insn pattern. + (loongarch_frsqrte_<fmt>): Ditto. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/lsx.md (lsx_vfrecipe_<flsxfmt>): New insn pattern. + (lsx_vfrsqrte_<flsxfmt>): Ditto. + * config/loongarch/lsxintrin.h (__lsx_vfrecipe_s): New intrinsic. + (__lsx_vfrecipe_d): Ditto. + (__lsx_vfrsqrte_s): Ditto. + (__lsx_vfrsqrte_d): Ditto. + * doc/extend.texi: Add documentation for LoongArch new builtins and intrinsics. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/larch-frecipe-builtin.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c: New test. +--- + gcc/config/loongarch/genopts/isa-evolution.in | 1 + + gcc/config/loongarch/larchintrin.h | 38 +++++++++++++++++ + gcc/config/loongarch/lasx.md | 24 +++++++++++ + gcc/config/loongarch/lasxintrin.h | 34 +++++++++++++++ + gcc/config/loongarch/loongarch-builtins.cc | 42 +++++++++++++++++++ + gcc/config/loongarch/loongarch-c.cc | 3 ++ + gcc/config/loongarch/loongarch-cpucfg-map.h | 1 + + gcc/config/loongarch/loongarch-def.cc | 3 +- + gcc/config/loongarch/loongarch-str.h | 1 + + gcc/config/loongarch/loongarch.cc | 1 + + gcc/config/loongarch/loongarch.md | 35 +++++++++++++++- + gcc/config/loongarch/loongarch.opt | 4 ++ + gcc/config/loongarch/lsx.md | 24 +++++++++++ + gcc/config/loongarch/lsxintrin.h | 34 +++++++++++++++ + gcc/doc/extend.texi | 35 ++++++++++++++++ + .../loongarch/larch-frecipe-builtin.c | 28 +++++++++++++ + .../vector/lasx/lasx-frecipe-builtin.c | 30 +++++++++++++ + .../vector/lsx/lsx-frecipe-builtin.c | 30 +++++++++++++ + 18 files changed, 365 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c + +diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in +index a6bc3f87f..11a198b64 100644 +--- a/gcc/config/loongarch/genopts/isa-evolution.in ++++ b/gcc/config/loongarch/genopts/isa-evolution.in +@@ -1,3 +1,4 @@ ++2 25 frecipe Support frecipe.{s/d} and frsqrte.{s/d} instructions. + 2 26 div32 Support div.wu and mod.wu instructions with inputs not sign-extended. + 2 27 lam-bh Support am{swap/add}_db.{b/h} instructions. + 2 28 lamcas Support amcas_db.{b/h/w/d} instructions. +diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h +index 2833f1487..22035e767 100644 +--- a/gcc/config/loongarch/larchintrin.h ++++ b/gcc/config/loongarch/larchintrin.h +@@ -333,6 +333,44 @@ __iocsrwr_d (unsigned long int _1, unsigned int _2) + } + #endif + ++#ifdef __loongarch_frecipe ++/* Assembly instruction format: fd, fj. */ ++/* Data types in instruction templates: SF, SF. */ ++extern __inline void ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__frecipe_s (float _1) ++{ ++ __builtin_loongarch_frecipe_s ((float) _1); ++} ++ ++/* Assembly instruction format: fd, fj. */ ++/* Data types in instruction templates: DF, DF. */ ++extern __inline void ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__frecipe_d (double _1) ++{ ++ __builtin_loongarch_frecipe_d ((double) _1); ++} ++ ++/* Assembly instruction format: fd, fj. */ ++/* Data types in instruction templates: SF, SF. */ ++extern __inline void ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__frsqrte_s (float _1) ++{ ++ __builtin_loongarch_frsqrte_s ((float) _1); ++} ++ ++/* Assembly instruction format: fd, fj. */ ++/* Data types in instruction templates: DF, DF. */ ++extern __inline void ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__frsqrte_d (double _1) ++{ ++ __builtin_loongarch_frsqrte_d ((double) _1); ++} ++#endif ++ + /* Assembly instruction format: ui15. */ + /* Data types in instruction templates: USI. */ + #define __dbar(/*ui15*/ _1) __builtin_loongarch_dbar ((_1)) +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index de7c88f14..b1416f6c3 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -40,8 +40,10 @@ + UNSPEC_LASX_XVFCVTL + UNSPEC_LASX_XVFLOGB + UNSPEC_LASX_XVFRECIP ++ UNSPEC_LASX_XVFRECIPE + UNSPEC_LASX_XVFRINT + UNSPEC_LASX_XVFRSQRT ++ UNSPEC_LASX_XVFRSQRTE + UNSPEC_LASX_XVFCMP_SAF + UNSPEC_LASX_XVFCMP_SEQ + UNSPEC_LASX_XVFCMP_SLE +@@ -1633,6 +1635,17 @@ + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + ++;; Approximate Reciprocal Instructions. ++ ++(define_insn "lasx_xvfrecipe_<flasxfmt>" ++ (set (match_operand:FLASX 0 "register_operand" "=f") ++ (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") ++ UNSPEC_LASX_XVFRECIPE)) ++ "ISA_HAS_LASX && TARGET_FRECIPE" ++ "xvfrecipe.<flasxfmt>\t%u0,%u1" ++ (set_attr "type" "simd_fdiv") ++ (set_attr "mode" "<MODE>")) ++ + (define_insn "lasx_xvfrsqrt_<flasxfmt>" + (set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") +@@ -1642,6 +1655,17 @@ + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + ++;; Approximate Reciprocal Square Root Instructions. ++ ++(define_insn "lasx_xvfrsqrte_<flasxfmt>" ++ (set (match_operand:FLASX 0 "register_operand" "=f") ++ (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") ++ UNSPEC_LASX_XVFRSQRTE)) ++ "ISA_HAS_LASX && TARGET_FRECIPE" ++ "xvfrsqrte.<flasxfmt>\t%u0,%u1" ++ (set_attr "type" "simd_fdiv") ++ (set_attr "mode" "<MODE>")) ++ + (define_insn "lasx_xvftint_u_<ilasxfmt_u>_<flasxfmt>" + (set (match_operand:<VIMODE256> 0 "register_operand" "=f") + (unspec:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f") +diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h +index 7bce2c757..5e65e76e7 100644 +--- a/gcc/config/loongarch/lasxintrin.h ++++ b/gcc/config/loongarch/lasxintrin.h +@@ -2399,6 +2399,40 @@ __m256d __lasx_xvfrecip_d (__m256d _1) + return (__m256d)__builtin_lasx_xvfrecip_d ((v4f64)_1); + } + ++#if defined(__loongarch_frecipe) ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__m256 __lasx_xvfrecipe_s (__m256 _1) ++{ ++ return (__m256)__builtin_lasx_xvfrecipe_s ((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
View file
_service:tar_scm:0060-LoongArch-Use-standard-pattern-name-for-xvfrsqrt-vfr.patch
Added
@@ -0,0 +1,257 @@ +From e8210e26ac638eb443f8991fee6d412b297cb279 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 6 Dec 2023 15:04:50 +0800 +Subject: PATCH 060/188 LoongArch: Use standard pattern name for + xvfrsqrt/vfrsqrt instructions. + +Rename lasx_xvfrsqrt*/lsx_vfrsqrt* to rsqrt<mode>2 to align with standard +pattern name. Define function use_rsqrt_p to decide when to use rsqrt optab. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (lasx_xvfrsqrt_<flasxfmt>): Renamed to .. + (rsqrt<mode>2): .. this. + * config/loongarch/loongarch-builtins.cc + (CODE_FOR_lsx_vfrsqrt_d): Redefine to standard pattern name. + (CODE_FOR_lsx_vfrsqrt_s): Ditto. + (CODE_FOR_lasx_xvfrsqrt_d): Ditto. + (CODE_FOR_lasx_xvfrsqrt_s): Ditto. + * config/loongarch/loongarch.cc (use_rsqrt_p): New function. + (loongarch_optab_supported_p): Ditto. + (TARGET_OPTAB_SUPPORTED_P): New hook. + * config/loongarch/loongarch.md (*rsqrt<mode>a): Remove. + (*rsqrt<mode>2): New insn pattern. + (*rsqrt<mode>b): Remove. + * config/loongarch/lsx.md (lsx_vfrsqrt_<flsxfmt>): Renamed to .. + (rsqrt<mode>2): .. this. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-rsqrt.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-rsqrt.c: New test. +--- + gcc/config/loongarch/lasx.md | 6 ++--- + gcc/config/loongarch/loongarch-builtins.cc | 4 +++ + gcc/config/loongarch/loongarch.cc | 27 +++++++++++++++++++ + gcc/config/loongarch/loongarch.md | 24 +++++------------ + gcc/config/loongarch/lsx.md | 6 ++--- + .../loongarch/vector/lasx/lasx-rsqrt.c | 26 ++++++++++++++++++ + .../loongarch/vector/lsx/lsx-rsqrt.c | 26 ++++++++++++++++++ + 7 files changed, 96 insertions(+), 23 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index b1416f6c3..3a4a1fe51 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1646,10 +1646,10 @@ + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + +-(define_insn "lasx_xvfrsqrt_<flasxfmt>" ++(define_insn "rsqrt<mode>2" + (set (match_operand:FLASX 0 "register_operand" "=f") +- (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") +- UNSPEC_LASX_XVFRSQRT)) ++ (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") ++ UNSPEC_LASX_XVFRSQRT)) + "ISA_HAS_LASX" + "xvfrsqrt.<flasxfmt>\t%u0,%u1" + (set_attr "type" "simd_fdiv") +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index bc156bd36..4aae27a5e 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -500,6 +500,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) + #define CODE_FOR_lsx_vssrlrn_bu_h CODE_FOR_lsx_vssrlrn_u_bu_h + #define CODE_FOR_lsx_vssrlrn_hu_w CODE_FOR_lsx_vssrlrn_u_hu_w + #define CODE_FOR_lsx_vssrlrn_wu_d CODE_FOR_lsx_vssrlrn_u_wu_d ++#define CODE_FOR_lsx_vfrsqrt_d CODE_FOR_rsqrtv2df2 ++#define CODE_FOR_lsx_vfrsqrt_s CODE_FOR_rsqrtv4sf2 + + /* LoongArch ASX define CODE_FOR_lasx_mxxx */ + #define CODE_FOR_lasx_xvsadd_b CODE_FOR_ssaddv32qi3 +@@ -776,6 +778,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) + #define CODE_FOR_lasx_xvsat_hu CODE_FOR_lasx_xvsat_u_hu + #define CODE_FOR_lasx_xvsat_wu CODE_FOR_lasx_xvsat_u_wu + #define CODE_FOR_lasx_xvsat_du CODE_FOR_lasx_xvsat_u_du ++#define CODE_FOR_lasx_xvfrsqrt_d CODE_FOR_rsqrtv4df2 ++#define CODE_FOR_lasx_xvfrsqrt_s CODE_FOR_rsqrtv8sf2 + + static const struct loongarch_builtin_description loongarch_builtins = { + #define LARCH_MOVFCSR2GR 0 +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index ce1c0a8bd..95aa9453b 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -11487,6 +11487,30 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode, + is_packed); + } + ++static bool ++use_rsqrt_p (void) ++{ ++ return (flag_finite_math_only ++ && !flag_trapping_math ++ && flag_unsafe_math_optimizations); ++} ++ ++/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */ ++ ++static bool ++loongarch_optab_supported_p (int op, machine_mode, machine_mode, ++ optimization_type opt_type) ++{ ++ switch (op) ++ { ++ case rsqrt_optab: ++ return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (); ++ ++ default: ++ return true; ++ } ++} ++ + /* If -fverbose-asm, dump some info for debugging. */ + static void + loongarch_asm_code_end (void) +@@ -11625,6 +11649,9 @@ loongarch_asm_code_end (void) + #undef TARGET_FUNCTION_ARG_BOUNDARY + #define TARGET_FUNCTION_ARG_BOUNDARY loongarch_function_arg_boundary + ++#undef TARGET_OPTAB_SUPPORTED_P ++#define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p ++ + #undef TARGET_VECTOR_MODE_SUPPORTED_P + #define TARGET_VECTOR_MODE_SUPPORTED_P loongarch_vector_mode_supported_p + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 9080cec1c..4dfe583e2 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -60,6 +60,7 @@ + UNSPEC_TIE + + ;; RSQRT ++ UNSPEC_RSQRT + UNSPEC_RSQRTE + + ;; RECIP +@@ -1134,25 +1135,14 @@ + (set_attr "mode" "<UNITMODE>") + (set_attr "insn_count" "1")) + +-(define_insn "*rsqrt<mode>a" ++(define_insn "*rsqrt<mode>2" + (set (match_operand:ANYF 0 "register_operand" "=f") +- (div:ANYF (match_operand:ANYF 1 "const_1_operand" "") +- (sqrt:ANYF (match_operand:ANYF 2 "register_operand" "f")))) +- "flag_unsafe_math_optimizations" +- "frsqrt.<fmt>\t%0,%2" +- (set_attr "type" "frsqrt") +- (set_attr "mode" "<UNITMODE>") +- (set_attr "insn_count" "1")) +- +-(define_insn "*rsqrt<mode>b" +- (set (match_operand:ANYF 0 "register_operand" "=f") +- (sqrt:ANYF (div:ANYF (match_operand:ANYF 1 "const_1_operand" "") +- (match_operand:ANYF 2 "register_operand" "f")))) +- "flag_unsafe_math_optimizations" +- "frsqrt.<fmt>\t%0,%2" ++ (unspec:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ UNSPEC_RSQRT)) ++ "TARGET_HARD_FLOAT" ++ "frsqrt.<fmt>\t%0,%1" + (set_attr "type" "frsqrt") +- (set_attr "mode" "<UNITMODE>") +- (set_attr "insn_count" "1")) ++ (set_attr "mode" "<UNITMODE>")) + + ;; Approximate Reciprocal Square Root Instructions. + +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 37bdc6910..cb4a448e7 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -1559,10 +1559,10 @@ + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + +-(define_insn "lsx_vfrsqrt_<flsxfmt>" ++(define_insn "rsqrt<mode>2" + (set (match_operand:FLSX 0 "register_operand" "=f") +- (unspec:FLSX (match_operand:FLSX 1 "register_operand" "f") +- UNSPEC_LSX_VFRSQRT)) ++ (unspec:FLSX (match_operand:FLSX 1 "register_operand" "f") ++ UNSPEC_LSX_VFRSQRT)) + "ISA_HAS_LSX" + "vfrsqrt.<flsxfmt>\t%w0,%w1" + (set_attr "type" "simd_fdiv") +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c +new file mode 100644 +index 000000000..24316944d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -ffast-math" } */ ++/* { dg-final { scan-assembler "xvfrsqrt.s" } } */
View file
_service:tar_scm:0061-LoongArch-Redefine-pattern-for-xvfrecip-vfrecip-inst.patch
Added
@@ -0,0 +1,135 @@ +From 74924710ee8d662d883bf898d69aef1946d91ea5 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 6 Dec 2023 15:04:51 +0800 +Subject: PATCH 061/188 LoongArch: Redefine pattern for xvfrecip/vfrecip + instructions. + +Redefine pattern for xvfrecip instructions use rtx code instead of unspec, and enable +xvfrecip instructions to be generated during auto-vectorization. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (lasx_xvfrecip_<flasxfmt>): Renamed to .. + (recip<mode>3): .. this. + * config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vfrecip_d): Redefine + to new pattern name. + (CODE_FOR_lsx_vfrecip_s): Ditto. + (CODE_FOR_lasx_xvfrecip_d): Ditto. + (CODE_FOR_lasx_xvfrecip_s): Ditto. + (loongarch_expand_builtin_direct): For the vector recip instructions, construct a + temporary parameter const1_vector. + * config/loongarch/lsx.md (lsx_vfrecip_<flsxfmt>): Renamed to .. + (recip<mode>3): .. this. + * config/loongarch/predicates.md (const_vector_1_operand): New predicate. +--- + gcc/config/loongarch/lasx.md | 8 ++++---- + gcc/config/loongarch/loongarch-builtins.cc | 20 ++++++++++++++++++++ + gcc/config/loongarch/lsx.md | 8 ++++---- + gcc/config/loongarch/predicates.md | 4 ++++ + 4 files changed, 32 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 3a4a1fe51..ad49a3ffb 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1626,12 +1626,12 @@ + (set_attr "type" "simd_fminmax") + (set_attr "mode" "<MODE>")) + +-(define_insn "lasx_xvfrecip_<flasxfmt>" ++(define_insn "recip<mode>3" + (set (match_operand:FLASX 0 "register_operand" "=f") +- (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") +- UNSPEC_LASX_XVFRECIP)) ++ (div:FLASX (match_operand:FLASX 1 "const_vector_1_operand" "") ++ (match_operand:FLASX 2 "register_operand" "f"))) + "ISA_HAS_LASX" +- "xvfrecip.<flasxfmt>\t%u0,%u1" ++ "xvfrecip.<flasxfmt>\t%u0,%u2" + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index 4aae27a5e..85849ed29 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -502,6 +502,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) + #define CODE_FOR_lsx_vssrlrn_wu_d CODE_FOR_lsx_vssrlrn_u_wu_d + #define CODE_FOR_lsx_vfrsqrt_d CODE_FOR_rsqrtv2df2 + #define CODE_FOR_lsx_vfrsqrt_s CODE_FOR_rsqrtv4sf2 ++#define CODE_FOR_lsx_vfrecip_d CODE_FOR_recipv2df3 ++#define CODE_FOR_lsx_vfrecip_s CODE_FOR_recipv4sf3 + + /* LoongArch ASX define CODE_FOR_lasx_mxxx */ + #define CODE_FOR_lasx_xvsadd_b CODE_FOR_ssaddv32qi3 +@@ -780,6 +782,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) + #define CODE_FOR_lasx_xvsat_du CODE_FOR_lasx_xvsat_u_du + #define CODE_FOR_lasx_xvfrsqrt_d CODE_FOR_rsqrtv4df2 + #define CODE_FOR_lasx_xvfrsqrt_s CODE_FOR_rsqrtv8sf2 ++#define CODE_FOR_lasx_xvfrecip_d CODE_FOR_recipv4df3 ++#define CODE_FOR_lasx_xvfrecip_s CODE_FOR_recipv8sf3 + + static const struct loongarch_builtin_description loongarch_builtins = { + #define LARCH_MOVFCSR2GR 0 +@@ -3019,6 +3023,22 @@ loongarch_expand_builtin_direct (enum insn_code icode, rtx target, tree exp, + if (has_target_p) + create_output_operand (&opsopno++, target, TYPE_MODE (TREE_TYPE (exp))); + ++ /* For the vector reciprocal instructions, we need to construct a temporary ++ parameter const1_vector. */ ++ switch (icode) ++ { ++ case CODE_FOR_recipv8sf3: ++ case CODE_FOR_recipv4df3: ++ case CODE_FOR_recipv4sf3: ++ case CODE_FOR_recipv2df3: ++ loongarch_prepare_builtin_arg (&ops2, exp, 0); ++ create_input_operand (&ops1, CONST1_RTX (ops0.mode), ops0.mode); ++ return loongarch_expand_builtin_insn (icode, 3, ops, has_target_p); ++ ++ default: ++ break; ++ } ++ + /* Map the arguments to the other operands. */ + gcc_assert (opno + call_expr_nargs (exp) + == insn_dataicode.n_generator_args); +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index cb4a448e7..f2774f021 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -1539,12 +1539,12 @@ + (set_attr "type" "simd_fminmax") + (set_attr "mode" "<MODE>")) + +-(define_insn "lsx_vfrecip_<flsxfmt>" ++(define_insn "recip<mode>3" + (set (match_operand:FLSX 0 "register_operand" "=f") +- (unspec:FLSX (match_operand:FLSX 1 "register_operand" "f") +- UNSPEC_LSX_VFRECIP)) ++ (div:FLSX (match_operand:FLSX 1 "const_vector_1_operand" "") ++ (match_operand:FLSX 2 "register_operand" "f"))) + "ISA_HAS_LSX" +- "vfrecip.<flsxfmt>\t%w0,%w1" ++ "vfrecip.<flsxfmt>\t%w0,%w2" + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 30a0dee9f..572550dbc 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -227,6 +227,10 @@ + (and (match_code "const_int,const_wide_int,const_double,const_vector") + (match_test "op == CONST1_RTX (GET_MODE (op))"))) + ++(define_predicate "const_vector_1_operand" ++ (and (match_code "const_vector") ++ (match_test "op == CONST1_RTX (GET_MODE (op))"))) ++ + (define_predicate "reg_or_1_operand" + (ior (match_operand 0 "const_1_operand") + (match_operand 0 "register_operand"))) +-- +2.43.0 +
View file
_service:tar_scm:0062-LoongArch-New-options-mrecip-and-mrecip-with-ffast-m.patch
Added
@@ -0,0 +1,1096 @@ +From faac4efbee23e60691fc086a78284225ecf824a8 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 6 Dec 2023 15:04:52 +0800 +Subject: PATCH 062/188 LoongArch: New options -mrecip and -mrecip= with + ffast-math. + +When both the -mrecip and -mfrecipe options are enabled, use approximate reciprocal +instructions and approximate reciprocal square root instructions with additional +Newton-Raphson steps to implement single precision floating-point division, square +root and reciprocal square root operations, for a better performance. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch.opt.in (recip_mask): New variable. + (-mrecip, -mrecip): New options. + * config/loongarch/lasx.md (div<mode>3): New expander. + (*div<mode>3): Rename. + (sqrt<mode>2): New expander. + (*sqrt<mode>2): Rename. + (rsqrt<mode>2): New expander. + * config/loongarch/loongarch-protos.h (loongarch_emit_swrsqrtsf): New prototype. + (loongarch_emit_swdivsf): Ditto. + * config/loongarch/loongarch.cc (loongarch_option_override_internal): Set + recip_mask for -mrecip and -mrecip= options. + (loongarch_emit_swrsqrtsf): New function. + (loongarch_emit_swdivsf): Ditto. + * config/loongarch/loongarch.h (RECIP_MASK_NONE, RECIP_MASK_DIV, RECIP_MASK_SQRT + RECIP_MASK_RSQRT, RECIP_MASK_VEC_DIV, RECIP_MASK_VEC_SQRT, RECIP_MASK_VEC_RSQRT + RECIP_MASK_ALL): New bitmasks. + (TARGET_RECIP_DIV, TARGET_RECIP_SQRT, TARGET_RECIP_RSQRT, TARGET_RECIP_VEC_DIV + TARGET_RECIP_VEC_SQRT, TARGET_RECIP_VEC_RSQRT): New tests. + * config/loongarch/loongarch.md (sqrt<mode>2): New expander. + (*sqrt<mode>2): Rename. + (rsqrt<mode>2): New expander. + * config/loongarch/loongarch.opt (recip_mask): New variable. + (-mrecip, -mrecip): New options. + * config/loongarch/lsx.md (div<mode>3): New expander. + (*div<mode>3): Rename. + (sqrt<mode>2): New expander. + (*sqrt<mode>2): Rename. + (rsqrt<mode>2): New expander. + * config/loongarch/predicates.md (reg_or_vecotr_1_operand): New predicate. + * doc/invoke.texi (LoongArch Options): Document new options. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/divf.c: New test. + * gcc.target/loongarch/recip-divf.c: New test. + * gcc.target/loongarch/recip-sqrtf.c: New test. + * gcc.target/loongarch/sqrtf.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-divf.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-recip-divf.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-recip.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-sqrtf.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-divf.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-recip-divf.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-recip.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-sqrtf.c: New test. +--- + gcc/config/loongarch/genopts/loongarch.opt.in | 11 + + gcc/config/loongarch/lasx.md | 53 ++++- + gcc/config/loongarch/loongarch-protos.h | 2 + + gcc/config/loongarch/loongarch.cc | 188 ++++++++++++++++++ + gcc/config/loongarch/loongarch.h | 18 ++ + gcc/config/loongarch/loongarch.md | 49 ++++- + gcc/config/loongarch/loongarch.opt | 11 + + gcc/config/loongarch/lsx.md | 53 ++++- + gcc/config/loongarch/predicates.md | 4 + + gcc/doc/invoke.texi | 55 ++++- + gcc/testsuite/gcc.target/loongarch/divf.c | 10 + + .../gcc.target/loongarch/recip-divf.c | 9 + + .../gcc.target/loongarch/recip-sqrtf.c | 23 +++ + gcc/testsuite/gcc.target/loongarch/sqrtf.c | 24 +++ + .../loongarch/vector/lasx/lasx-divf.c | 13 ++ + .../loongarch/vector/lasx/lasx-recip-divf.c | 12 ++ + .../loongarch/vector/lasx/lasx-recip-sqrtf.c | 28 +++ + .../loongarch/vector/lasx/lasx-recip.c | 24 +++ + .../loongarch/vector/lasx/lasx-sqrtf.c | 29 +++ + .../loongarch/vector/lsx/lsx-divf.c | 13 ++ + .../loongarch/vector/lsx/lsx-recip-divf.c | 12 ++ + .../loongarch/vector/lsx/lsx-recip-sqrtf.c | 28 +++ + .../loongarch/vector/lsx/lsx-recip.c | 24 +++ + .../loongarch/vector/lsx/lsx-sqrtf.c | 29 +++ + 24 files changed, 711 insertions(+), 11 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/divf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/recip-divf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/recip-sqrtf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/sqrtf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-divf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-divf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-sqrtf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-divf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-divf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-sqrtf.c + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index cd5e75e4f..102202b03 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -23,6 +23,9 @@ config/loongarch/loongarch-opts.h + HeaderInclude + config/loongarch/loongarch-str.h + ++TargetVariable ++unsigned int recip_mask = 0 ++ + ; ISA related options + ;; Base ISA + Enum +@@ -194,6 +197,14 @@ mexplicit-relocs + Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET) + Use %reloc() assembly operators (for backward compatibility). + ++mrecip ++Target RejectNegative Var(loongarch_recip) ++Generate approximate reciprocal divide and square root for better throughput. ++ ++mrecip= ++Target RejectNegative Joined Var(loongarch_recip_name) ++Control generation of reciprocal estimates. ++ + ; The code model option names for -mcmodel. + Enum + Name(cmodel) Type(int) +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index ad49a3ffb..eeac8cd98 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1194,7 +1194,25 @@ + (set_attr "type" "simd_fmul") + (set_attr "mode" "<MODE>")) + +-(define_insn "div<mode>3" ++(define_expand "div<mode>3" ++ (set (match_operand:FLASX 0 "register_operand") ++ (div:FLASX (match_operand:FLASX 1 "reg_or_vecotr_1_operand") ++ (match_operand:FLASX 2 "register_operand"))) ++ "ISA_HAS_LASX" ++{ ++ if (<MODE>mode == V8SFmode ++ && TARGET_RECIP_VEC_DIV ++ && optimize_insn_for_speed_p () ++ && flag_finite_math_only && !flag_trapping_math ++ && flag_unsafe_math_optimizations) ++ { ++ loongarch_emit_swdivsf (operands0, operands1, ++ operands2, V8SFmode); ++ DONE; ++ } ++}) ++ ++(define_insn "*div<mode>3" + (set (match_operand:FLASX 0 "register_operand" "=f") + (div:FLASX (match_operand:FLASX 1 "register_operand" "f") + (match_operand:FLASX 2 "register_operand" "f"))) +@@ -1223,7 +1241,23 @@ + (set_attr "type" "simd_fmadd") + (set_attr "mode" "<MODE>")) + +-(define_insn "sqrt<mode>2" ++(define_expand "sqrt<mode>2" ++ (set (match_operand:FLASX 0 "register_operand") ++ (sqrt:FLASX (match_operand:FLASX 1 "register_operand"))) ++ "ISA_HAS_LASX" ++{ ++ if (<MODE>mode == V8SFmode ++ && TARGET_RECIP_VEC_SQRT ++ && flag_unsafe_math_optimizations ++ && optimize_insn_for_speed_p () ++ && flag_finite_math_only && !flag_trapping_math) ++ { ++ loongarch_emit_swrsqrtsf (operands0, operands1, V8SFmode, 0); ++ DONE; ++ } ++}) ++ ++(define_insn "*sqrt<mode>2" + (set (match_operand:FLASX 0 "register_operand" "=f") + (sqrt:FLASX (match_operand:FLASX 1 "register_operand" "f"))) + "ISA_HAS_LASX" +@@ -1646,7 +1680,20 @@ + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + +-(define_insn "rsqrt<mode>2" ++(define_expand "rsqrt<mode>2" ++ (set (match_operand:FLASX 0 "register_operand" "=f") ++ (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") ++ UNSPEC_LASX_XVFRSQRT)) ++ "ISA_HAS_LASX" ++ { ++ if (<MODE>mode == V8SFmode && TARGET_RECIP_VEC_RSQRT) ++ {
View file
_service:tar_scm:0063-LoongArch-Vectorized-loop-unrolling-is-disable-for-d.patch
Added
@@ -0,0 +1,83 @@ +From bb211ae35474a9fa1a8189f0a4c525ce3d8c280e Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 6 Dec 2023 15:04:53 +0800 +Subject: PATCH 063/188 LoongArch: Vectorized loop unrolling is disable for + divf/sqrtf/rsqrtf when -mrecip is enabled. + +Using -mrecip generates a sequence of instructions to replace divf, sqrtf and rsqrtf. The number +of generated instructions is close to or exceeds the maximum issue instructions per cycle of the +LoongArch, so vectorized loop unrolling is not performed on them. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_vector_costs::determine_suggested_unroll_factor): + If m_has_recip is true, uf return 1. + (loongarch_vector_costs::add_stmt_cost): Detect the use of approximate instruction sequence. +--- + gcc/config/loongarch/loongarch.cc | 36 +++++++++++++++++++++++++++++-- + 1 file changed, 34 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 18326ce47..d64777179 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3970,7 +3970,9 @@ protected: + /* Reduction factor for suggesting unroll factor. */ + unsigned m_reduc_factor = 0; + /* True if the loop contains an average operation. */ +- bool m_has_avg =false; ++ bool m_has_avg = false; ++ /* True if the loop uses approximation instruction sequence. */ ++ bool m_has_recip = false; + }; + + /* Implement TARGET_VECTORIZE_CREATE_COSTS. */ +@@ -4017,7 +4019,7 @@ loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vi + { + class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + +- if (m_has_avg) ++ if (m_has_avg || m_has_recip) + return 1; + + /* Don't unroll if it's specified explicitly not to be unrolled. */ +@@ -4077,6 +4079,36 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, + } + } + ++ combined_fn cfn; ++ if (kind == vector_stmt ++ && stmt_info ++ && stmt_info->stmt) ++ { ++ /* Detect the use of approximate instruction sequence. */ ++ if ((TARGET_RECIP_VEC_SQRT || TARGET_RECIP_VEC_RSQRT) ++ && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST) ++ switch (cfn) ++ { ++ case CFN_BUILT_IN_SQRTF: ++ m_has_recip = true; ++ default: ++ break; ++ } ++ else if (TARGET_RECIP_VEC_DIV ++ && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN) ++ { ++ machine_mode mode = TYPE_MODE (vectype); ++ switch (gimple_assign_rhs_code (stmt_info->stmt)) ++ { ++ case RDIV_EXPR: ++ if (GET_MODE_INNER (mode) == SFmode) ++ m_has_recip = true; ++ default: ++ break; ++ } ++ } ++ } ++ + return retval; + } + +-- +2.43.0 +
View file
_service:tar_scm:0064-LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch
Added
@@ -0,0 +1,130 @@ +From 6ca9670e02a7d3f939b1a75f7b5a9094cd1db909 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Fri, 25 Oct 2024 02:45:35 +0000 +Subject: PATCH 064/188 LoongArch: Fix lsx-vshuf.c and lasx-xvshuf_b.c tests + fail on LA664 PR112611 + +For xvshuf instructions, if the index value in the selector exceeds 63, it triggers +undefined behavior on LA464, but not on LA664. To ensure compatibility of these two +tests on both LA464 and LA664, we have modified both tests to ensure that the index +value in the selector does not exceed 63. + +gcc/testsuite/ChangeLog: + + PR target/112611 + * gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c: Sure index less than 64. + * gcc.target/loongarch/vector/lsx/lsx-vshuf.c: Ditto. +--- + .../loongarch/vector/lasx/lasx-xvshuf_b.c | 14 +++++++------- + .../gcc.target/loongarch/vector/lsx/lsx-vshuf.c | 12 ++++++------ + 2 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c +index b8ab38711..910d29339 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c +@@ -99,9 +99,9 @@ main () + *((unsigned long *)&__m256i_op12) = 0x7ff0000000000000; + *((unsigned long *)&__m256i_op11) = 0x7ff0000000000000; + *((unsigned long *)&__m256i_op10) = 0x7ff0000000000000; +- *((unsigned long *)&__m256i_op23) = 0x3ff0010000000000; ++ *((unsigned long *)&__m256i_op23) = 0x3f11010000000000; + *((unsigned long *)&__m256i_op22) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op21) = 0x3ff0010000000000; ++ *((unsigned long *)&__m256i_op21) = 0x3f11010000000000; + *((unsigned long *)&__m256i_op20) = 0x0000000000000000; + *((unsigned long *)&__m256i_result3) = 0x0000000000000000; + *((unsigned long *)&__m256i_result2) = 0x0000000000000000; +@@ -200,7 +200,7 @@ main () + *((unsigned long *)&__m256i_op20) = 0x0000000000000000; + *((unsigned long *)&__m256i_result3) = 0x0000000000000000; + *((unsigned long *)&__m256i_result2) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result1) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_result1) = 0xffffffff00000000; + *((unsigned long *)&__m256i_result0) = 0x0000000000000000; + __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); +@@ -351,7 +351,7 @@ main () + *((unsigned long *)&__m256i_op21) = 0x0000000000000001; + *((unsigned long *)&__m256i_op20) = 0x00000000012e2110; + *((unsigned long *)&__m256i_result3) = 0x0000000000000001; +- *((unsigned long *)&__m256i_result2) = 0x0000000200000000; ++ *((unsigned long *)&__m256i_result2) = 0x0000000000000000; + *((unsigned long *)&__m256i_result1) = 0x00000000012e2110; + *((unsigned long *)&__m256i_result0) = 0x0000000000000000; + __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2); +@@ -426,10 +426,10 @@ main () + *((unsigned long *)&__m256i_op22) = 0x8000000080000000; + *((unsigned long *)&__m256i_op21) = 0xdfffffffdfffffff; + *((unsigned long *)&__m256i_op20) = 0x8000000080000000; +- *((unsigned long *)&__m256i_result3) = 0x8000000080000000; ++ *((unsigned long *)&__m256i_result3) = 0xdfffffff80000000; + *((unsigned long *)&__m256i_result2) = 0x7fc00000dfffffff; +- *((unsigned long *)&__m256i_result1) = 0x8000000080000000; +- *((unsigned long *)&__m256i_result0) = 0x8000000080000000; ++ *((unsigned long *)&__m256i_result1) = 0x7fc0000000000000; ++ *((unsigned long *)&__m256i_result0) = 0x8000000000000000; + __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c +index f3b800f88..93a3078fa 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c +@@ -33,7 +33,7 @@ main () + *((unsigned long *)&__m128i_op21) = 0x0000000000000000; + *((unsigned long *)&__m128i_op20) = 0x3f2f1f0f00000000; + *((unsigned long *)&__m128i_result1) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result0) = 0x0000000000000000; ++ *((unsigned long *)&__m128i_result0) = 0x00ff00ff00000000; + __m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +@@ -153,7 +153,7 @@ main () + *((unsigned long *)&__m128i_op10) = 0x000000002bfd9461; + *((unsigned long *)&__m128i_op21) = 0x00007fff00007fff; + *((unsigned long *)&__m128i_op20) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result1) = 0x0000000000000000; ++ *((unsigned long *)&__m128i_result1) = 0x00007fff00000000; + *((unsigned long *)&__m128i_result0) = 0x0000000000000000; + __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); +@@ -198,7 +198,7 @@ main () + *((unsigned long *)&__m128i_op21) = 0x00000000000000c0; + *((unsigned long *)&__m128i_op20) = 0x00000001ffffff29; + *((unsigned long *)&__m128i_result1) = 0xffffff29ffffff29; +- *((unsigned long *)&__m128i_result0) = 0x0000000100000001; ++ *((unsigned long *)&__m128i_result0) = 0xffffff2900000001; + __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +@@ -219,7 +219,7 @@ main () + *((unsigned long *)&__m128i_op10) = 0x0000000000000000; + *((unsigned long *)&__m128i_op21) = 0x0000000020000020; + *((unsigned long *)&__m128i_op20) = 0x0000000020000020; +- *((unsigned long *)&__m128i_result1) = 0x2000002000000000; ++ *((unsigned long *)&__m128i_result1) = 0x0000000000000000; + *((unsigned long *)&__m128i_result0) = 0x2000002020000020; + __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); +@@ -241,7 +241,7 @@ main () + *((unsigned long *)&__m128i_op10) = 0x0000001000000010; + *((unsigned long *)&__m128i_op21) = 0x8000000100000000; + *((unsigned long *)&__m128i_op20) = 0x8000000000000103; +- *((unsigned long *)&__m128i_result1) = 0x0000010300000103; ++ *((unsigned long *)&__m128i_result1) = 0x8000000000000103; + *((unsigned long *)&__m128i_result0) = 0x0000010380000001; + __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); +@@ -252,7 +252,7 @@ main () + *((unsigned long *)&__m128i_op10) = 0x0000000000000000; + *((unsigned long *)&__m128i_op21) = 0xffffffffffffffff; + *((unsigned long *)&__m128i_op20) = 0xffffffffffffffff; +- *((unsigned long *)&__m128i_result1) = 0x0000000000000000; ++ *((unsigned long *)&__m128i_result1) = 0xffffffff00000000; + *((unsigned long *)&__m128i_result0) = 0xffffffffffffffff; + __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); +-- +2.43.0 +
View file
_service:tar_scm:0065-LoongArch-Fix-ICE-and-use-simplify_gen_subreg-instea.patch
Added
@@ -0,0 +1,318 @@ +From 87396b4550eeb097cdbe73fb19c84059ba6bb85e Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 29 Nov 2023 11:18:00 +0800 +Subject: PATCH 065/188 LoongArch: Fix ICE and use simplify_gen_subreg + instead of gen_rtx_SUBREG directly. + +loongarch_expand_vec_cond_mask_expr generates 'subreg's of 'subreg's, which are not supported +in gcc, it causes an ICE: + +ice.c:55:1: error: unrecognizable insn: + 55 | } + | ^ +(insn 63 62 64 8 (set (reg:V4DI 278) + (subreg:V4DI (subreg:V4DF (reg:V4DI 273 vect__53.26 ) 0) 0)) -1 + (nil)) +during RTL pass: vregs +ice.c:55:1: internal compiler error: in extract_insn, at recog.cc:2804 + +Last time, Ruoyao has fixed a similar ICE: +https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636156.html + +This patch fixes ICE and use simplify_gen_subreg instead of gen_rtx_SUBREG as much as possible +to avoid the same ice happening again. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_try_expand_lsx_vshuf_const): Use + simplify_gen_subreg instead of gen_rtx_SUBREG. + (loongarch_expand_vec_perm_const_2): Ditto. + (loongarch_expand_vec_cond_expr): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/pr112476-3.c: New test. + * gcc.target/loongarch/pr112476-4.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 79 +++++++++++-------- + .../gcc.target/loongarch/pr112476-3.c | 58 ++++++++++++++ + .../gcc.target/loongarch/pr112476-4.c | 4 + + 3 files changed, 108 insertions(+), 33 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-3.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-4.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index d64777179..4a3a7a246 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -8824,13 +8824,13 @@ loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) + if (d->vmode == E_V2DFmode) + { + sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = gen_rtx_SUBREG (E_V2DImode, d->target, 0); ++ tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0); + emit_move_insn (tmp, sel); + } + else if (d->vmode == E_V4SFmode) + { + sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = gen_rtx_SUBREG (E_V4SImode, d->target, 0); ++ tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0); + emit_move_insn (tmp, sel); + } + else +@@ -9614,8 +9614,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + /* Adjust op1 for selecting correct value in high 128bit of target + register. + op1: E_V4DImode, { 4, 5, 6, 7 } -> { 2, 3, 4, 5 }. */ +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); + emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, + conv_op0, GEN_INT (0x21))); + +@@ -9644,8 +9644,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + emit_move_insn (op0_alt, d->op0); + + /* Generate subreg for fitting into insn gen function. */ +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); + + /* Adjust op value in temp register. + op0 = {0,1,2,3}, op1 = {4,5,0,1} */ +@@ -9691,9 +9691,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + emit_move_insn (op1_alt, d->op1); + emit_move_insn (op0_alt, d->op0); + +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); +- rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); ++ rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target, ++ d->vmode, 0); + + emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, + conv_op0, GEN_INT (0x02))); +@@ -9725,9 +9726,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + Selector sample: E_V4DImode, { 0, 1, 4 ,5 } */ + if (!d->testing_p) + { +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); +- rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); ++ rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target, ++ d->vmode, 0); + + /* We can achieve the expectation by using sinple xvpermi.q insn. */ + emit_move_insn (conv_target, conv_op1); +@@ -9752,8 +9754,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + emit_move_insn (op1_alt, d->op1); + emit_move_insn (op0_alt, d->op0); + +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); + /* Adjust op value in temp regiter. + op0 = { 0, 1, 2, 3 }, op1 = { 6, 7, 2, 3 } */ + emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, +@@ -9797,9 +9799,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + emit_move_insn (op1_alt, d->op1); + emit_move_insn (op0_alt, d->op0); + +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); +- rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); ++ rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target, ++ d->vmode, 0); + + emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, + conv_op0, GEN_INT (0x13))); +@@ -9831,10 +9834,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + Selector sample:E_V8SImode, { 2, 2, 2, 2, 2, 2, 2, 2 } */ + if (!d->testing_p) + { +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); + rtx temp_reg = gen_reg_rtx (d->vmode); +- rtx conv_temp = gen_rtx_SUBREG (E_V4DImode, temp_reg, 0); ++ rtx conv_temp = simplify_gen_subreg (E_V4DImode, temp_reg, ++ d->vmode, 0); + + emit_move_insn (temp_reg, d->op0); + +@@ -9943,9 +9947,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + emit_move_insn (op0_alt, d->op0); + emit_move_insn (op1_alt, d->op1); + +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); +- rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); +- rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); ++ rtx conv_op0a = simplify_gen_subreg (E_V4DImode, op0_alt, ++ d->vmode, 0); ++ rtx conv_op1a = simplify_gen_subreg (E_V4DImode, op1_alt, ++ d->vmode, 0); + + /* Duplicate op0's low 128bit in op0, then duplicate high 128bit + in op1. After this, xvshuf.* insn's selector argument can +@@ -9978,10 +9984,12 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + emit_move_insn (op0_alt, d->op0); + emit_move_insn (op1_alt, d->op1); + +- rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); +- rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0); ++ rtx conv_op0a = simplify_gen_subreg (E_V4DImode, op0_alt, ++ d->vmode, 0); ++ rtx conv_op1a = simplify_gen_subreg (E_V4DImode, op1_alt, ++ d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0); + + /* Reorganize op0's hi/lo 128bit and op1's hi/lo 128bit, to make sure + that selector's low 128bit can access all op0's elements, and +@@ -10101,12 +10109,12 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + { + case E_V4DFmode: + sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = gen_rtx_SUBREG (E_V4DImode, d->target, 0); ++ tmp = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0); + emit_move_insn (tmp, sel); + break; + case E_V8SFmode: + sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = gen_rtx_SUBREG (E_V8SImode, d->target, 0); ++ tmp = simplify_gen_subreg (E_V8SImode, d->target, d->vmode, 0); + emit_move_insn (tmp, sel); + break;
View file
_service:tar_scm:0066-LoongArch-Fix-eh_return-epilogue-for-normal-returns.patch
Added
@@ -0,0 +1,236 @@ +From 34088d0a8685defa97754b7ab5d90b9bc536cfaa Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Fri, 8 Dec 2023 18:01:18 +0800 +Subject: PATCH 066/188 LoongArch: Fix eh_return epilogue for normal returns. + +On LoongArch, the regitsters $r4 - $r7 (EH_RETURN_DATA_REGNO) will be saved +and restored in the function prologue and epilogue if the given function calls +__builtin_eh_return. This causes the return value to be overwritten on normal +return paths and breaks a rare case of libgcc's _Unwind_RaiseException. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc: Do not restore the saved eh_return + data registers ($r4-$r7) for a normal return of a function that calls + __builtin_eh_return elsewhere. + * config/loongarch/loongarch-protos.h: Same. + * config/loongarch/loongarch.md: Same. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/eh_return-normal-return.c: New test. +--- + gcc/config/loongarch/loongarch-protos.h | 2 +- + gcc/config/loongarch/loongarch.cc | 34 ++++++++++++----- + gcc/config/loongarch/loongarch.md | 23 ++++++++++- + .../loongarch/eh_return-normal-return.c | 38 +++++++++++++++++++ + 4 files changed, 84 insertions(+), 13 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 117669e9f..e5fcf3111 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -60,7 +60,7 @@ enum loongarch_symbol_type { + extern rtx loongarch_emit_move (rtx, rtx); + extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int); + extern void loongarch_expand_prologue (void); +-extern void loongarch_expand_epilogue (bool); ++extern void loongarch_expand_epilogue (int); + extern bool loongarch_can_use_return_insn (void); +  + extern bool loongarch_symbolic_constant_p (rtx, enum loongarch_symbol_type *); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 4a3a7a246..7caf04d8d 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1012,7 +1012,8 @@ loongarch_save_restore_reg (machine_mode mode, int regno, HOST_WIDE_INT offset, + + static void + loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, +- loongarch_save_restore_fn fn) ++ loongarch_save_restore_fn fn, ++ bool skip_eh_data_regs_p) + { + HOST_WIDE_INT offset; + +@@ -1021,7 +1022,14 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, + for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) + if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) + { +- if (!cfun->machine->reg_is_wrapped_separatelyregno) ++ /* Special care needs to be taken for $r4-$r7 (EH_RETURN_DATA_REGNO) ++ when returning normally from a function that calls ++ __builtin_eh_return. In this case, these registers are saved but ++ should not be restored, or the return value may be clobbered. */ ++ ++ if (!(cfun->machine->reg_is_wrapped_separatelyregno ++ || (skip_eh_data_regs_p ++ && GP_ARG_FIRST <= regno && regno < GP_ARG_FIRST + 4))) + loongarch_save_restore_reg (word_mode, regno, offset, fn); + + offset -= UNITS_PER_WORD; +@@ -1294,7 +1302,7 @@ loongarch_expand_prologue (void) + GEN_INT (-step1)); + RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; + size -= step1; +- loongarch_for_each_saved_reg (size, loongarch_save_reg); ++ loongarch_for_each_saved_reg (size, loongarch_save_reg, false); + } + + /* Set up the frame pointer, if we're using one. */ +@@ -1379,11 +1387,13 @@ loongarch_can_use_return_insn (void) + return reload_completed && cfun->machine->frame.total_size == 0; + } + +-/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P +- says which. */ ++/* Expand function epilogue using the following insn patterns: ++ "epilogue" (style == NORMAL_RETURN) ++ "sibcall_epilogue" (style == SIBCALL_RETURN) ++ "eh_return" (style == EXCEPTION_RETURN) */ + + void +-loongarch_expand_epilogue (bool sibcall_p) ++loongarch_expand_epilogue (int style) + { + /* Split the frame into two. STEP1 is the amount of stack we should + deallocate before restoring the registers. STEP2 is the amount we +@@ -1400,7 +1410,8 @@ loongarch_expand_epilogue (bool sibcall_p) + bool need_barrier_p + = (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0; + +- if (!sibcall_p && loongarch_can_use_return_insn ()) ++ /* Handle simple returns. */ ++ if (style == NORMAL_RETURN && loongarch_can_use_return_insn ()) + { + emit_jump_insn (gen_return ()); + return; +@@ -1476,7 +1487,9 @@ loongarch_expand_epilogue (bool sibcall_p) + + /* Restore the registers. */ + loongarch_for_each_saved_reg (frame->total_size - step2, +- loongarch_restore_reg); ++ loongarch_restore_reg, ++ crtl->calls_eh_return ++ && style != EXCEPTION_RETURN); + + if (need_barrier_p) + loongarch_emit_stack_tie (); +@@ -1497,11 +1510,12 @@ loongarch_expand_epilogue (bool sibcall_p) + } + + /* Add in the __builtin_eh_return stack adjustment. */ +- if (crtl->calls_eh_return) ++ if (crtl->calls_eh_return && style == EXCEPTION_RETURN) + emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, + EH_RETURN_STACKADJ_RTX)); + +- if (!sibcall_p) ++ /* Emit return unless doing sibcall. */ ++ if (style != SIBCALL_RETURN) + emit_jump_insn (gen_simple_return_internal (ra)); + } + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index c6edd1dda..222f1ae83 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -125,6 +125,11 @@ + (T1_REGNUM 13) + (S0_REGNUM 23) + ++ ;; Return path styles ++ (NORMAL_RETURN 0) ++ (SIBCALL_RETURN 1) ++ (EXCEPTION_RETURN 2) ++ + ;; PIC long branch sequences are never longer than 100 bytes. + (MAX_PIC_BRANCH_LENGTH 100) + ) +@@ -3276,7 +3281,7 @@ + (const_int 2) + "" + { +- loongarch_expand_epilogue (false); ++ loongarch_expand_epilogue (NORMAL_RETURN); + DONE; + }) + +@@ -3284,7 +3289,7 @@ + (const_int 2) + "" + { +- loongarch_expand_epilogue (true); ++ loongarch_expand_epilogue (SIBCALL_RETURN); + DONE; + }) + +@@ -3341,6 +3346,20 @@ + emit_insn (gen_eh_set_ra_di (operands0)); + else + emit_insn (gen_eh_set_ra_si (operands0)); ++ ++ emit_jump_insn (gen_eh_return_internal ()); ++ emit_barrier (); ++ DONE; ++}) ++ ++(define_insn_and_split "eh_return_internal" ++ (eh_return) ++ "" ++ "#" ++ "epilogue_completed" ++ (const_int 0) ++{ ++ loongarch_expand_epilogue (EXCEPTION_RETURN); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c b/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c +new file mode 100644 +index 000000000..f8f3965f8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c +@@ -0,0 +1,38 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2" } */ ++ ++#include <stdlib.h>
View file
_service:tar_scm:0067-LoongArch-Allow-mcmodel-extreme-and-model-attribute-.patch
Added
@@ -0,0 +1,180 @@ +From fdb51014f00094737459d5c9008630454ec7f342 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Thu, 7 Dec 2023 15:45:30 +0800 +Subject: PATCH 067/188 LoongArch: Allow -mcmodel=extreme and model attribute + with -mexplicit-relocs=auto + +There seems no real reason to require -mexplicit-relocs=always for +-mcmodel=extreme or model attribute. As the linker does not know how to +relax a 3-operand la.local or la.global pseudo instruction, just emit +explicit relocs for SYMBOL_PCREL64, and under TARGET_CMODEL_EXTREME also +SYMBOL_GOT_DISP. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_explicit_relocs_p): + Return true for SYMBOL_PCREL64. Return true for SYMBOL_GOT_DISP + if TARGET_CMODEL_EXTREME. + (loongarch_split_symbol): Check for la_opt_explicit_relocs != + EXPLICIT_RELOCS_NONE instead of TARGET_EXPLICIT_RELOCS. + (loongarch_print_operand_reloc): Likewise. + (loongarch_option_override_internal): Likewise. + (loongarch_handle_model_attribute): Likewise. + * doc/invoke.texi (-mcmodel=extreme): Update the compatibility + between it and -mexplicit-relocs=. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/attr-model-3.c: New test. + * gcc.target/loongarch/attr-model-4.c: New test. + * gcc.target/loongarch/func-call-extreme-3.c: New test. + * gcc.target/loongarch/func-call-extreme-4.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 25 ++++++++++++------- + gcc/doc/invoke.texi | 4 +-- + .../gcc.target/loongarch/attr-model-3.c | 6 +++++ + .../gcc.target/loongarch/attr-model-4.c | 6 +++++ + .../loongarch/func-call-extreme-3.c | 7 ++++++ + .../loongarch/func-call-extreme-4.c | 7 ++++++ + 6 files changed, 44 insertions(+), 11 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-3.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-4.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 7caf04d8d..4362149ef 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1969,9 +1969,16 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type) + case SYMBOL_TLS_LE: + case SYMBOL_TLSGD: + case SYMBOL_TLSLDM: +- /* The linker don't know how to relax TLS accesses. */ ++ case SYMBOL_PCREL64: ++ /* The linker don't know how to relax TLS accesses or 64-bit ++ pc-relative accesses. */ + return true; + case SYMBOL_GOT_DISP: ++ /* The linker don't know how to relax GOT accesses in extreme ++ code model. */ ++ if (TARGET_CMODEL_EXTREME) ++ return true; ++ + /* If we are performing LTO for a final link, and we have the + linker plugin so we know the resolution of the symbols, then + all GOT references are binding to external symbols or +@@ -3134,7 +3141,7 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) + + if (loongarch_symbol_extreme_p (symbol_type) && can_create_pseudo_p ()) + { +- gcc_assert (TARGET_EXPLICIT_RELOCS); ++ gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE); + + temp1 = gen_reg_rtx (Pmode); + emit_move_insn (temp1, gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 0), +@@ -5933,7 +5940,7 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, + loongarch_classify_symbolic_expression (op); + + if (loongarch_symbol_extreme_p (symbol_type)) +- gcc_assert (TARGET_EXPLICIT_RELOCS); ++ gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE); + + switch (symbol_type) + { +@@ -7540,9 +7547,9 @@ loongarch_option_override_internal (struct gcc_options *opts, + switch (la_target.cmodel) + { + case CMODEL_EXTREME: +- if (!TARGET_EXPLICIT_RELOCS) +- error ("code model %qs needs %s", +- "extreme", "-mexplicit-relocs=always"); ++ if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE) ++ error ("code model %qs is not compatible with %s", ++ "extreme", "-mexplicit-relocs=none"); + + if (opts->x_flag_plt) + { +@@ -7908,11 +7915,11 @@ loongarch_handle_model_attribute (tree *node, tree name, tree arg, int, + *no_add_attrs = true; + return NULL_TREE; + } +- if (!TARGET_EXPLICIT_RELOCS) ++ if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE) + { + error_at (DECL_SOURCE_LOCATION (decl), +- "%qE attribute requires %s", name, +- "-mexplicit-relocs=always"); ++ "%qE attribute is not compatible with %s", name, ++ "-mexplicit-relocs=none"); + *no_add_attrs = true; + return NULL_TREE; + } +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 76a8f20d1..5c6515cb1 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -24602,8 +24602,8 @@ The text segment and data segment must be within 2GB addressing space. + + @item extreme + This mode does not limit the size of the code segment and data segment. +-The @option{-mcmodel=extreme} option is incompatible with @option{-fplt} and +-@option{-mno-explicit-relocs}. ++The @option{-mcmodel=extreme} option is incompatible with @option{-fplt} ++and/or @option{-mexplicit-relocs=none}. + @end table + The default code model is @code{normal}. + +diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-3.c b/gcc/testsuite/gcc.target/loongarch/attr-model-3.c +new file mode 100644 +index 000000000..5622d5086 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/attr-model-3.c +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mexplicit-relocs=auto -mcmodel=normal -O2" } */ ++/* { dg-final { scan-assembler-times "%pc64_hi12" 2 } } */ ++ ++#define ATTR_MODEL_TEST ++#include "attr-model-test.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-4.c b/gcc/testsuite/gcc.target/loongarch/attr-model-4.c +new file mode 100644 +index 000000000..482724bb9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/attr-model-4.c +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mexplicit-relocs=auto -mcmodel=extreme -O2" } */ ++/* { dg-final { scan-assembler-times "%pc64_hi12" 3 } } */ ++ ++#define ATTR_MODEL_TEST ++#include "attr-model-test.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c +new file mode 100644 +index 000000000..a4da44b4a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ ++/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ ++/* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ ++/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ ++ ++#include "func-call-extreme-1.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c +new file mode 100644 +index 000000000..16b00f4c5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ ++/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ ++/* { dg-final { scan-assembler "test1:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ ++/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ ++ ++#include "func-call-extreme-1.c" +-- +2.43.0 +
View file
_service:tar_scm:0068-LoongArch-Fix-warnings-building-libgcc.patch
Added
@@ -0,0 +1,79 @@ +From 5a910f294605d0163f8f4ac255a14425b154b5dd Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 9 Dec 2023 22:08:37 +0800 +Subject: PATCH 068/188 LoongArch: Fix warnings building libgcc + +We are excluding loongarch-opts.h from target libraries, but now struct +loongarch_target and gcc_options are not declared in the target +libraries, causing: + +In file included from ../.././gcc/options.h:8, + from ../.././gcc/tm.h:49, + from ../../../gcc/libgcc/fixed-bit.c:48: +../../../gcc/libgcc/../gcc/config/loongarch/loongarch-opts.h:57:41: +warning: 'struct gcc_options' declared inside parameter list will not +be visible outside of this definition or declaration + 57 | struct gcc_options *opts, + | ^~~~~~~~~~~ + +So exclude the declarations referring to the C++ structs as well. + +gcc/ChangeLog: + + * config/loongarch/loongarch-opts.h (la_target): Move into #if + for loongarch-def.h. + (loongarch_init_target): Likewise. + (loongarch_config_target): Likewise. + (loongarch_update_gcc_opt_status): Likewise. +--- + gcc/config/loongarch/loongarch-opts.h | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 7010ddfec..639ed50bd 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -21,22 +21,15 @@ along with GCC; see the file COPYING3. If not see + #ifndef LOONGARCH_OPTS_H + #define LOONGARCH_OPTS_H + +-/* This is a C++ header and it shouldn't be used by target libraries. */ ++/* The loongarch-def.h file is a C++ header and it shouldn't be used by ++ target libraries. Exclude it and everything using the C++ structs ++ (struct loongarch_target and gcc_options) from target libraries. */ + #if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + #include "loongarch-def.h" +-#endif + + /* Target configuration */ + extern struct loongarch_target la_target; + +-/* Flag status */ +-struct loongarch_flags { +- int flt; const char* flt_str; +-#define SX_FLAG_TYPE(x) ((x) < 0 ? -(x) : (x)) +- int sx2; +-}; +- +- + /* Initialize loongarch_target from separate option variables. */ + void + loongarch_init_target (struct loongarch_target *target, +@@ -56,7 +49,14 @@ void + loongarch_update_gcc_opt_status (struct loongarch_target *target, + struct gcc_options *opts, + struct gcc_options *opts_set); ++#endif + ++/* Flag status */ ++struct loongarch_flags { ++ int flt; const char* flt_str; ++#define SX_FLAG_TYPE(x) ((x) < 0 ? -(x) : (x)) ++ int sx2; ++}; + + /* Macros for common conditional expressions used in loongarch.{c,h,md} */ + #define TARGET_CMODEL_NORMAL (la_target.cmodel == CMODEL_NORMAL) +-- +2.43.0 +
View file
_service:tar_scm:0069-LoongArch-testsuite-Remove-XFAIL-in-vect-ftint-no-in.patch
Added
@@ -0,0 +1,30 @@ +From 639e7518c8a4468cd50d774c5a3dbda5f2dbb4a7 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Wed, 13 Dec 2023 02:39:35 +0800 +Subject: PATCH 069/188 LoongArch: testsuite: Remove XFAIL in + vect-ftint-no-inexact.c + +After r14-6455 this no longer fails. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-ftint-no-inexact.c (xfail): Remove. +--- + gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c +index 83d268099..61918beef 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c ++++ b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c +@@ -39,6 +39,5 @@ + /* { dg-final { scan-assembler-not "\txvftintrne\.w\.s" } } */ + /* { dg-final { scan-assembler-not "\txvftintrne\.l\.d" } } */ + +-/* trunc: XFAIL due to PR 107723 */ +-/* { dg-final { scan-assembler "bl\t%plt\\(trunc\\)" { xfail *-*-* } } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(trunc\\)" } } */ + /* { dg-final { scan-assembler "bl\t%plt\\(truncf\\)" } } */ +-- +2.43.0 +
View file
_service:tar_scm:0070-LoongArch-Include-rtl.h-for-COSTS_N_INSNS-instead-of.patch
Added
@@ -0,0 +1,44 @@ +From 6a5e3932a39f1ffa6f87479748ee711e4fa47d30 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 9 Dec 2023 15:27:28 +0800 +Subject: PATCH 070/188 LoongArch: Include rtl.h for COSTS_N_INSNS instead of + hard coding our own + +With loongarch-def.cc switched from C to C++, we can include rtl.h for +COSTS_N_INSNS, instead of hard coding our own. + +THis is a non-functional change for now, but it will make the code more +future-proof in case COSTS_N_INSNS in rtl.h would be changed. + +gcc/ChangeLog: + + * config/loongarch/loongarch-def.cc (rtl.h): Include. + (COSTS_N_INSNS): Remove the macro definition. +--- + gcc/config/loongarch/loongarch-def.cc | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index c41804a18..6217b1926 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see + #include "system.h" + #include "coretypes.h" + #include "tm.h" ++#include "rtl.h" + + #include "loongarch-def.h" + #include "loongarch-str.h" +@@ -89,8 +90,6 @@ array_tune<loongarch_align> loongarch_cpu_align = + .set (CPU_LA464, la464_align ()) + .set (CPU_LA664, la464_align ()); + +-#define COSTS_N_INSNS(N) ((N) * 4) +- + /* Default RTX cost initializer. */ + loongarch_rtx_cost_data::loongarch_rtx_cost_data () + : fp_add (COSTS_N_INSNS (1)), +-- +2.43.0 +
View file
_service:tar_scm:0071-LoongArch-Fix-instruction-costs-PR112936.patch
Added
@@ -0,0 +1,165 @@ +From c5abe64e64aba601e67f3367a27caf616062b8f4 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 9 Dec 2023 17:41:32 +0800 +Subject: PATCH 071/188 LoongArch: Fix instruction costs PR112936 + +Replace the instruction costs in loongarch_rtx_cost_data constructor +based on micro-benchmark results on LA464 and LA664. + +This allows optimizations like "x * 17" to alsl, and "x * 68" to alsl +and slli. + +gcc/ChangeLog: + + PR target/112936 + * config/loongarch/loongarch-def.cc + (loongarch_rtx_cost_data::loongarch_rtx_cost_data): Update + instruction costs per micro-benchmark results. + (loongarch_rtx_cost_optimize_size): Set all instruction costs + to (COSTS_N_INSNS (1) + 1). + * config/loongarch/loongarch.cc (loongarch_rtx_costs): Remove + special case for multiplication when optimizing for size. + Adjust division cost when TARGET_64BIT && !TARGET_DIV32. + Account the extra cost when TARGET_CHECK_ZERO_DIV and + optimizing for speed. + +gcc/testsuite/ChangeLog + + PR target/112936 + * gcc.target/loongarch/mul-const-reduction.c: New test. +--- + gcc/config/loongarch/loongarch-def.cc | 39 ++++++++++--------- + gcc/config/loongarch/loongarch.cc | 22 +++++------ + .../loongarch/mul-const-reduction.c | 11 ++++++ + 3 files changed, 43 insertions(+), 29 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c + +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index 6217b1926..4a8885e83 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -92,15 +92,15 @@ array_tune<loongarch_align> loongarch_cpu_align = + + /* Default RTX cost initializer. */ + loongarch_rtx_cost_data::loongarch_rtx_cost_data () +- : fp_add (COSTS_N_INSNS (1)), +- fp_mult_sf (COSTS_N_INSNS (2)), +- fp_mult_df (COSTS_N_INSNS (4)), +- fp_div_sf (COSTS_N_INSNS (6)), ++ : fp_add (COSTS_N_INSNS (5)), ++ fp_mult_sf (COSTS_N_INSNS (5)), ++ fp_mult_df (COSTS_N_INSNS (5)), ++ fp_div_sf (COSTS_N_INSNS (8)), + fp_div_df (COSTS_N_INSNS (8)), +- int_mult_si (COSTS_N_INSNS (1)), +- int_mult_di (COSTS_N_INSNS (1)), +- int_div_si (COSTS_N_INSNS (4)), +- int_div_di (COSTS_N_INSNS (6)), ++ int_mult_si (COSTS_N_INSNS (4)), ++ int_mult_di (COSTS_N_INSNS (4)), ++ int_div_si (COSTS_N_INSNS (5)), ++ int_div_di (COSTS_N_INSNS (5)), + branch_cost (6), + memory_latency (4) {} + +@@ -111,18 +111,21 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data () + array_tune<loongarch_rtx_cost_data> loongarch_cpu_rtx_cost_data = + array_tune<loongarch_rtx_cost_data> (); + +-/* RTX costs to use when optimizing for size. */ ++/* RTX costs to use when optimizing for size. ++ We use a value slightly larger than COSTS_N_INSNS (1) for all of them ++ because they are slower than simple instructions. */ ++#define COST_COMPLEX_INSN (COSTS_N_INSNS (1) + 1) + const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size = + loongarch_rtx_cost_data () +- .fp_add_ (4) +- .fp_mult_sf_ (4) +- .fp_mult_df_ (4) +- .fp_div_sf_ (4) +- .fp_div_df_ (4) +- .int_mult_si_ (4) +- .int_mult_di_ (4) +- .int_div_si_ (4) +- .int_div_di_ (4); ++ .fp_add_ (COST_COMPLEX_INSN) ++ .fp_mult_sf_ (COST_COMPLEX_INSN) ++ .fp_mult_df_ (COST_COMPLEX_INSN) ++ .fp_div_sf_ (COST_COMPLEX_INSN) ++ .fp_div_df_ (COST_COMPLEX_INSN) ++ .int_mult_si_ (COST_COMPLEX_INSN) ++ .int_mult_di_ (COST_COMPLEX_INSN) ++ .int_div_si_ (COST_COMPLEX_INSN) ++ .int_div_di_ (COST_COMPLEX_INSN); + + array_tune<int> loongarch_cpu_issue_rate = array_tune<int> () + .set (CPU_NATIVE, 4) +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 4362149ef..afbb55390 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3797,8 +3797,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + *total = (speed + ? loongarch_cost->int_mult_si * 3 + 6 + : COSTS_N_INSNS (7)); +- else if (!speed) +- *total = COSTS_N_INSNS (1) + 1; + else if (mode == DImode) + *total = loongarch_cost->int_mult_di; + else +@@ -3833,14 +3831,18 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + + case UDIV: + case UMOD: +- if (!speed) +- { +- *total = COSTS_N_INSNS (loongarch_idiv_insns (mode)); +- } +- else if (mode == DImode) ++ if (mode == DImode) + *total = loongarch_cost->int_div_di; + else +- *total = loongarch_cost->int_div_si; ++ { ++ *total = loongarch_cost->int_div_si; ++ if (TARGET_64BIT && !TARGET_DIV32) ++ *total += COSTS_N_INSNS (2); ++ } ++ ++ if (TARGET_CHECK_ZERO_DIV) ++ *total += COSTS_N_INSNS (2); ++ + return false; + + case SIGN_EXTEND: +@@ -3872,9 +3874,7 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) + == ZERO_EXTEND)))) + { +- if (!speed) +- *total = COSTS_N_INSNS (1) + 1; +- else if (mode == DImode) ++ if (mode == DImode) + *total = loongarch_cost->int_mult_di; + else + *total = loongarch_cost->int_mult_si; +diff --git a/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c b/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c +new file mode 100644 +index 000000000..02d9a4876 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mtune=la464" } */ ++/* { dg-final { scan-assembler "alsl\.w" } } */ ++/* { dg-final { scan-assembler "slli\.w" } } */ ++/* { dg-final { scan-assembler-not "mul\.w" } } */ ++ ++int ++test (int a) ++{ ++ return a * 68; ++} +-- +2.43.0 +
View file
_service:tar_scm:0072-LoongArch-Add-alslsi3_extend.patch
Added
@@ -0,0 +1,53 @@ +From 89dfb9ad8687f9b31be5925b2d106b6ec13cc628 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 9 Dec 2023 18:02:35 +0800 +Subject: PATCH 072/188 LoongArch: Add alslsi3_extend + +Following the instruction cost fix, we are generating + + alsl.w $a0, $a0, $a0, 4 + +instead of + + li.w $t0, 17 + mul.w $a0, $t0 + +for "x * 4", because alsl.w is 4 times faster than mul.w. But we didn't +have a sign-extending pattern for alsl.w, causing an extra slli.w +instruction generated to sign-extend $a0. Add the pattern to remove the +redundant extension. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (alslsi3_extend): New + define_insn. +--- + gcc/config/loongarch/loongarch.md | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 222f1ae83..23368008e 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2874,6 +2874,18 @@ + (set_attr "type" "arith") + (set_attr "mode" "<MODE>")) + ++(define_insn "alslsi3_extend" ++ (set (match_operand:DI 0 "register_operand" "=r") ++ (sign_extend:DI ++ (plus:SI ++ (ashift:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand 2 "const_immalsl_operand" "")) ++ (match_operand:SI 3 "register_operand" "r")))) ++ "" ++ "alsl.w\t%0,%1,%3,%2" ++ (set_attr "type" "arith") ++ (set_attr "mode" "SI")) ++ +  + + ;; Reverse the order of bytes of operand 1 and store the result in operand 0. +-- +2.43.0 +
View file
_service:tar_scm:0073-LoongArch-Add-support-for-D-frontend.patch
Added
@@ -0,0 +1,224 @@ +From 6ef045728a11218f023fee4527cd6d2fdb2c2910 Mon Sep 17 00:00:00 2001 +From: liushuyu <liushuyu011@gmail.com> +Date: Mon, 18 Dec 2023 09:52:07 +0800 +Subject: PATCH 073/188 LoongArch: Add support for D frontend. + +gcc/ChangeLog: + + * config.gcc: Add loongarch-d.o to d_target_objs for LoongArch + architecture. + * config/loongarch/t-loongarch: Add object target for loongarch-d.cc. + * config/loongarch/loongarch-d.cc + (loongarch_d_target_versions): add interface function to define builtin + D versions for LoongArch architecture. + (loongarch_d_handle_target_float_abi): add interface function to define + builtin D traits for LoongArch architecture. + (loongarch_d_register_target_info): add interface function to register + loongarch_d_handle_target_float_abi function. + * config/loongarch/loongarch-d.h + (loongarch_d_target_versions): add function prototype. + (loongarch_d_register_target_info): Likewise. + +libphobos/ChangeLog: + + * configure.tgt: Enable libphobos for LoongArch architecture. + * libdruntime/gcc/sections/elf.d: Add TLS_DTV_OFFSET constant for + LoongArch64. + * libdruntime/gcc/unwind/generic.d: Add __aligned__ constant for + LoongArch64. +--- + gcc/config.gcc | 1 + + gcc/config/loongarch/loongarch-d.cc | 77 ++++++++++++++++++++++ + gcc/config/loongarch/loongarch-d.h | 26 ++++++++ + gcc/config/loongarch/t-loongarch | 4 ++ + libphobos/configure.tgt | 3 + + libphobos/libdruntime/gcc/sections/elf.d | 2 + + libphobos/libdruntime/gcc/unwind/generic.d | 1 + + 7 files changed, 114 insertions(+) + create mode 100644 gcc/config/loongarch/loongarch-d.cc + create mode 100644 gcc/config/loongarch/loongarch-d.h + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 11ab620d0..039187fa2 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -456,6 +456,7 @@ mips*-*-*) + ;; + loongarch*-*-*) + cpu_type=loongarch ++ d_target_objs="loongarch-d.o" + extra_headers="larchintrin.h lsxintrin.h lasxintrin.h" + extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o" + extra_gcc_objs="loongarch-driver.o loongarch-cpu.o loongarch-opts.o loongarch-def.o" +diff --git a/gcc/config/loongarch/loongarch-d.cc b/gcc/config/loongarch/loongarch-d.cc +new file mode 100644 +index 000000000..9ac483c39 +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-d.cc +@@ -0,0 +1,77 @@ ++/* Subroutines for the D front end on the LoongArch architecture. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++#define IN_TARGET_CODE 1 ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm_d.h" ++#include "d/d-target.h" ++#include "d/d-target-def.h" ++ ++/* Implement TARGET_D_CPU_VERSIONS for LoongArch targets. */ ++ ++void ++loongarch_d_target_versions (void) ++{ ++ if (TARGET_64BIT) ++ d_add_builtin_version ("LoongArch64"); ++ else ++ d_add_builtin_version ("LoongArch32"); ++ ++ if (TARGET_HARD_FLOAT_ABI) ++ { ++ d_add_builtin_version ("LoongArch_HardFloat"); ++ d_add_builtin_version ("D_HardFloat"); ++ } ++ else if (TARGET_SOFT_FLOAT_ABI) ++ { ++ d_add_builtin_version ("LoongArch_SoftFloat"); ++ d_add_builtin_version ("D_SoftFloat"); ++ } ++} ++ ++/* Handle a call to `__traits(getTargetInfo, "floatAbi")'. */ ++ ++static tree ++loongarch_d_handle_target_float_abi (void) ++{ ++ const char *abi; ++ ++ if (TARGET_HARD_FLOAT_ABI) ++ abi = "hard"; ++ else if (TARGET_SOFT_FLOAT_ABI) ++ abi = "soft"; ++ else ++ abi = ""; ++ ++ return build_string_literal (strlen (abi) + 1, abi); ++} ++ ++/* Implement TARGET_D_REGISTER_CPU_TARGET_INFO. */ ++ ++void ++loongarch_d_register_target_info (void) ++{ ++ const struct d_target_info_spec handlers = { ++ {"floatAbi", loongarch_d_handle_target_float_abi}, ++ {NULL, NULL}, ++ }; ++ ++ d_add_target_info_handlers (handlers); ++} +diff --git a/gcc/config/loongarch/loongarch-d.h b/gcc/config/loongarch/loongarch-d.h +new file mode 100644 +index 000000000..a2fb8d51d +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-d.h +@@ -0,0 +1,26 @@ ++/* Definitions for the D front end on the LoongArch architecture. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++/* Defined in loongarch-d.cc */ ++extern void ++loongarch_d_target_versions (void); ++extern void ++loongarch_d_register_target_info (void); ++ ++/* Target hooks for D language. */ ++#define TARGET_D_CPU_VERSIONS loongarch_d_target_versions ++#define TARGET_D_REGISTER_CPU_TARGET_INFO loongarch_d_register_target_info +diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch +index a1a40431f..994f4d19c 100644 +--- a/gcc/config/loongarch/t-loongarch ++++ b/gcc/config/loongarch/t-loongarch +@@ -67,6 +67,10 @@ loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H) \ + loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.cc $(LA_STR_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + ++loongarch-d.o: $(srcdir)/config/loongarch/loongarch-d.cc ++ $(COMPILE) $< ++ $(POSTCOMPILE) ++ + $(srcdir)/config/loongarch/loongarch.opt: s-loongarch-opt ; @true + s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \ + $(srcdir)/config/loongarch/genopts/loongarch.opt.in \ +diff --git a/libphobos/configure.tgt b/libphobos/configure.tgt +index 0063dd232..dcb1551cd 100644 +--- a/libphobos/configure.tgt ++++ b/libphobos/configure.tgt +@@ -36,6 +36,9 @@ case "${target}" in + hppa-*-linux*) + LIBPHOBOS_SUPPORTED=yes + ;; ++ loongarch*-*-linux*) ++ LIBPHOBOS_SUPPORTED=yes ++ ;; + mips*-*-linux*) + LIBPHOBOS_SUPPORTED=yes + ;; +diff --git a/libphobos/libdruntime/gcc/sections/elf.d b/libphobos/libdruntime/gcc/sections/elf.d +index 5819811f3..bc993ea49 100644 +--- a/libphobos/libdruntime/gcc/sections/elf.d
View file
_service:tar_scm:0074-libruntime-Add-fiber-context-switch-code-for-LoongAr.patch
Added
@@ -0,0 +1,156 @@ +From 29eade7dc3032c6054f2ec2e2caa4ce43da6212d Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Fri, 8 Dec 2023 18:09:41 +0800 +Subject: PATCH 074/188 libruntime: Add fiber context switch code for + LoongArch. + +libphobos/ChangeLog: + + * libdruntime/config/loongarch/switchcontext.S: New file. +--- + .../config/loongarch/switchcontext.S | 133 ++++++++++++++++++ + 1 file changed, 133 insertions(+) + create mode 100644 libphobos/libdruntime/config/loongarch/switchcontext.S + +diff --git a/libphobos/libdruntime/config/loongarch/switchcontext.S b/libphobos/libdruntime/config/loongarch/switchcontext.S +new file mode 100644 +index 000000000..edfb9b67e +--- /dev/null ++++ b/libphobos/libdruntime/config/loongarch/switchcontext.S +@@ -0,0 +1,133 @@ ++/* LoongArch support code for fibers and multithreading. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++Under Section 7 of GPL version 3, you are granted additional ++permissions described in the GCC Runtime Library Exception, version ++3.1, as published by the Free Software Foundation. ++ ++You should have received a copy of the GNU General Public License and ++a copy of the GCC Runtime Library Exception along with this program; ++see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++<http://www.gnu.org/licenses/>. */ ++ ++#include "../common/threadasm.S" ++ ++/** ++ * Performs a context switch. ++ * ++ * $a0 - void** - ptr to old stack pointer ++ * $a1 - void* - new stack pointer ++ * ++ */ ++ ++#if defined(__loongarch_lp64) ++# define GPR_L ld.d ++# define GPR_S st.d ++# define SZ_GPR 8 ++# define ADDSP(si) addi.d $sp, $sp, si ++#elif defined(__loongarch64_ilp32) ++# define GPR_L ld.w ++# define GPR_S st.w ++# define SZ_GPR 4 ++# define ADDSP(si) addi.w $sp, $sp, si ++#else ++# error Unsupported GPR size (must be 64-bit or 32-bit). ++#endif ++ ++#if defined(__loongarch_double_float) ++# define FPR_L fld.d ++# define FPR_S fst.d ++# define SZ_FPR 8 ++#elif defined(__loongarch_single_float) ++# define FPR_L fld.s ++# define FPR_S fst.s ++# define SZ_FPR 4 ++#else ++# define SZ_FPR 0 ++#endif ++ ++ .text ++ .align 2 ++ .global fiber_switchContext ++ .type fiber_switchContext, @function ++fiber_switchContext: ++ .cfi_startproc ++ ADDSP(-11 * SZ_GPR) ++ ++ // fp regs and return address are stored below the stack ++ // because we don't want the GC to scan them. ++ ++ // return address (r1) ++ GPR_S $r1, $sp, -SZ_GPR ++ ++#if SZ_FPR != 0 ++ // callee-saved scratch FPRs (f24-f31) ++ FPR_S $f24, $sp, -SZ_GPR-1*SZ_FPR ++ FPR_S $f25, $sp, -SZ_GPR-2*SZ_FPR ++ FPR_S $f26, $sp, -SZ_GPR-3*SZ_FPR ++ FPR_S $f27, $sp, -SZ_GPR-4*SZ_FPR ++ FPR_S $f28, $sp, -SZ_GPR-5*SZ_FPR ++ FPR_S $f29, $sp, -SZ_GPR-6*SZ_FPR ++ FPR_S $f30, $sp, -SZ_GPR-7*SZ_FPR ++ FPR_S $f31, $sp, -SZ_GPR-8*SZ_FPR ++#endif ++ ++ // callee-saved GPRs (r21, fp (r22), r23-r31) ++ GPR_S $r21, $sp, 0*SZ_GPR ++ GPR_S $fp, $sp, 1*SZ_GPR ++ GPR_S $s0, $sp, 2*SZ_GPR ++ GPR_S $s1, $sp, 3*SZ_GPR ++ GPR_S $s2, $sp, 4*SZ_GPR ++ GPR_S $s3, $sp, 5*SZ_GPR ++ GPR_S $s4, $sp, 6*SZ_GPR ++ GPR_S $s5, $sp, 7*SZ_GPR ++ GPR_S $s6, $sp, 8*SZ_GPR ++ GPR_S $s7, $sp, 9*SZ_GPR ++ GPR_S $s8, $sp, 10*SZ_GPR ++ ++ // swap stack pointer ++ GPR_S $sp, $a0, 0 ++ move $sp, $a1 ++ ++ GPR_L $r1, $sp, -SZ_GPR ++ ++#if SZ_FPR != 0 ++ FPR_L $f24, $sp, -SZ_GPR-1*SZ_FPR ++ FPR_L $f25, $sp, -SZ_GPR-2*SZ_FPR ++ FPR_L $f26, $sp, -SZ_GPR-3*SZ_FPR ++ FPR_L $f27, $sp, -SZ_GPR-4*SZ_FPR ++ FPR_L $f28, $sp, -SZ_GPR-5*SZ_FPR ++ FPR_L $f29, $sp, -SZ_GPR-6*SZ_FPR ++ FPR_L $f30, $sp, -SZ_GPR-7*SZ_FPR ++ FPR_L $f31, $sp, -SZ_GPR-8*SZ_FPR ++#endif ++ ++ GPR_L $r21, $sp, 0*SZ_GPR ++ GPR_L $fp, $sp, 1*SZ_GPR ++ GPR_L $s0, $sp, 2*SZ_GPR ++ GPR_L $s1, $sp, 3*SZ_GPR ++ GPR_L $s2, $sp, 4*SZ_GPR ++ GPR_L $s3, $sp, 5*SZ_GPR ++ GPR_L $s4, $sp, 6*SZ_GPR ++ GPR_L $s5, $sp, 7*SZ_GPR ++ GPR_L $s6, $sp, 8*SZ_GPR ++ GPR_L $s7, $sp, 9*SZ_GPR ++ GPR_L $s8, $sp, 10*SZ_GPR ++ ++ ADDSP(11 * SZ_GPR) ++ ++ jr $r1 // return ++ .cfi_endproc ++ .size fiber_switchContext,.-fiber_switchContext +-- +2.43.0 +
View file
_service:tar_scm:0075-LoongArch-Fix-FP-vector-comparsons-PR113034.patch
Added
@@ -0,0 +1,866 @@ +From dd33794e64d462bf39e72f39343a384c191307f4 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 17 Dec 2023 01:09:20 +0800 +Subject: PATCH 075/188 LoongArch: Fix FP vector comparsons PR113034 + +We had the following mappings between <x>vfcmp submenmonics and RTX +codes: + + (define_code_attr fcc + (unordered "cun") + (ordered "cor") + (eq "ceq") + (ne "cne") + (uneq "cueq") + (unle "cule") + (unlt "cult") + (le "cle") + (lt "clt")) + +This is inconsistent with scalar code: + + (define_code_attr fcond (unordered "cun") + (uneq "cueq") + (unlt "cult") + (unle "cule") + (eq "ceq") + (lt "slt") + (le "sle") + (ordered "cor") + (ltgt "sne") + (ne "cune") + (ge "sge") + (gt "sgt") + (unge "cuge") + (ungt "cugt")) + +For every RTX code for which the LSX/LASX code is different from the +scalar code, the scalar code is correct and the LSX/LASX code is wrong. +Most seriously, the RTX code NE should be mapped to "cneq", not "cne". +Rewrite <x>vfcmp define_insns in simd.md using the same mapping as +scalar fcmp. + +Note that GAS does not support xvfcmp.{c/s}u{ge/gt} (pseudo) +instruction (although fcmp.{c/s}u{ge/gt} is supported), so we need to +switch the order of inputs and use xvfcmp.{c/s}u{le/lt} instead. + +The <x>vfcmp.{sult/sule/clt/cle}.{s/d} instructions do not have a single +RTX code, but they can be modeled as an inversed RTX code following a +"not" operation. Doing so allows the compiler to optimized vectorized +__builtin_isless etc. to a single instruction. This optimization should +be added for scalar code too and I'll do it later. + +Tests are added for mapping between C code, IEC 60559 operations, and +vfcmp instructions. + +1:https://gcc.gnu.org/pipermail/gcc-patches/2023-December/640713.html + +gcc/ChangeLog: + + PR target/113034 + * config/loongarch/lasx.md (UNSPEC_LASX_XVFCMP_*): Remove. + (lasx_xvfcmp_caf_<flasxfmt>): Remove. + (lasx_xvfcmp_cune_<FLASX:flasxfmt>): Remove. + (FSC256_UNS): Remove. + (fsc256): Remove. + (lasx_xvfcmp_<vfcond:fcc>_<FLASX:flasxfmt>): Remove. + (lasx_xvfcmp_<fsc256>_<FLASX:flasxfmt>): Remove. + * config/loongarch/lsx.md (UNSPEC_LSX_XVFCMP_*): Remove. + (lsx_vfcmp_caf_<flsxfmt>): Remove. + (lsx_vfcmp_cune_<FLSX:flsxfmt>): Remove. + (vfcond): Remove. + (fcc): Remove. + (FSC_UNS): Remove. + (fsc): Remove. + (lsx_vfcmp_<vfcond:fcc>_<FLSX:flsxfmt>): Remove. + (lsx_vfcmp_<fsc>_<FLSX:flsxfmt>): Remove. + * config/loongarch/simd.md + (fcond_simd): New define_code_iterator. + (<simd_isa>_<x>vfcmp_<fcond:fcond_simd>_<simdfmt>): + New define_insn. + (fcond_simd_rev): New define_code_iterator. + (fcond_rev_asm): New define_code_attr. + (<simd_isa>_<x>vfcmp_<fcond:fcond_simd_rev>_<simdfmt>): + New define_insn. + (fcond_inv): New define_code_iterator. + (fcond_inv_rev): New define_code_iterator. + (fcond_inv_rev_asm): New define_code_attr. + (<simd_isa>_<x>vfcmp_<fcond_inv>_<simdfmt>): New define_insn. + (<simd_isa>_<x>vfcmp_<fcond_inv:fcond_inv_rev>_<simdfmt>): + New define_insn. + (UNSPEC_SIMD_FCMP_CAF, UNSPEC_SIMD_FCMP_SAF, + UNSPEC_SIMD_FCMP_SEQ, UNSPEC_SIMD_FCMP_SUN, + UNSPEC_SIMD_FCMP_SUEQ, UNSPEC_SIMD_FCMP_CNE, + UNSPEC_SIMD_FCMP_SOR, UNSPEC_SIMD_FCMP_SUNE): New unspecs. + (SIMD_FCMP): New define_int_iterator. + (fcond_unspec): New define_int_attr. + (<simd_isa>_<x>vfcmp_<fcond_unspec>_<simdfmt>): New define_insn. + * config/loongarch/loongarch.cc (loongarch_expand_lsx_cmp): + Remove unneeded special cases. + +gcc/testsuite/ChangeLog: + + PR target/113034 + * gcc.target/loongarch/vfcmp-f.c: New test. + * gcc.target/loongarch/vfcmp-d.c: New test. + * gcc.target/loongarch/xvfcmp-f.c: New test. + * gcc.target/loongarch/xvfcmp-d.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-vcond-2.c: Scan for cune + instead of cne. + * gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: Likewise. +--- + gcc/config/loongarch/lasx.md | 76 -------- + gcc/config/loongarch/loongarch.cc | 60 +----- + gcc/config/loongarch/lsx.md | 83 -------- + gcc/config/loongarch/simd.md | 118 ++++++++++++ + .../loongarch/vector/lasx/lasx-vcond-2.c | 4 +- + .../loongarch/vector/lsx/lsx-vcond-2.c | 4 +- + gcc/testsuite/gcc.target/loongarch/vfcmp-d.c | 28 +++ + gcc/testsuite/gcc.target/loongarch/vfcmp-f.c | 178 ++++++++++++++++++ + gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c | 29 +++ + gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c | 27 +++ + 10 files changed, 385 insertions(+), 222 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vfcmp-d.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vfcmp-f.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index eeac8cd98..921ce0eeb 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -32,9 +32,7 @@ + UNSPEC_LASX_XVBITREVI + UNSPEC_LASX_XVBITSET + UNSPEC_LASX_XVBITSETI +- UNSPEC_LASX_XVFCMP_CAF + UNSPEC_LASX_XVFCLASS +- UNSPEC_LASX_XVFCMP_CUNE + UNSPEC_LASX_XVFCVT + UNSPEC_LASX_XVFCVTH + UNSPEC_LASX_XVFCVTL +@@ -44,17 +42,6 @@ + UNSPEC_LASX_XVFRINT + UNSPEC_LASX_XVFRSQRT + UNSPEC_LASX_XVFRSQRTE +- UNSPEC_LASX_XVFCMP_SAF +- UNSPEC_LASX_XVFCMP_SEQ +- UNSPEC_LASX_XVFCMP_SLE +- UNSPEC_LASX_XVFCMP_SLT +- UNSPEC_LASX_XVFCMP_SNE +- UNSPEC_LASX_XVFCMP_SOR +- UNSPEC_LASX_XVFCMP_SUEQ +- UNSPEC_LASX_XVFCMP_SULE +- UNSPEC_LASX_XVFCMP_SULT +- UNSPEC_LASX_XVFCMP_SUN +- UNSPEC_LASX_XVFCMP_SUNE + UNSPEC_LASX_XVFTINT_U + UNSPEC_LASX_XVCLO + UNSPEC_LASX_XVSAT_S +@@ -1481,69 +1468,6 @@ + (set_attr "type" "simd_fclass") + (set_attr "mode" "<MODE>")) + +-(define_insn "lasx_xvfcmp_caf_<flasxfmt>" +- (set (match_operand:<VIMODE256> 0 "register_operand" "=f") +- (unspec:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f") +- (match_operand:FLASX 2 "register_operand" "f") +- UNSPEC_LASX_XVFCMP_CAF)) +- "ISA_HAS_LASX" +- "xvfcmp.caf.<flasxfmt>\t%u0,%u1,%u2" +- (set_attr "type" "simd_fcmp") +- (set_attr "mode" "<MODE>")) +- +-(define_insn "lasx_xvfcmp_cune_<FLASX:flasxfmt>" +- (set (match_operand:<VIMODE256> 0 "register_operand" "=f") +- (unspec:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f") +- (match_operand:FLASX 2 "register_operand" "f") +- UNSPEC_LASX_XVFCMP_CUNE)) +- "ISA_HAS_LASX" +- "xvfcmp.cune.<FLASX:flasxfmt>\t%u0,%u1,%u2" +- (set_attr "type" "simd_fcmp") +- (set_attr "mode" "<MODE>")) +- +- +- +-(define_int_iterator FSC256_UNS UNSPEC_LASX_XVFCMP_SAF UNSPEC_LASX_XVFCMP_SUN +- UNSPEC_LASX_XVFCMP_SOR UNSPEC_LASX_XVFCMP_SEQ +- UNSPEC_LASX_XVFCMP_SNE UNSPEC_LASX_XVFCMP_SUEQ +- UNSPEC_LASX_XVFCMP_SUNE UNSPEC_LASX_XVFCMP_SULE +- UNSPEC_LASX_XVFCMP_SULT UNSPEC_LASX_XVFCMP_SLE +- UNSPEC_LASX_XVFCMP_SLT) +- +-(define_int_attr fsc256 +- (UNSPEC_LASX_XVFCMP_SAF "saf") +- (UNSPEC_LASX_XVFCMP_SUN "sun") +- (UNSPEC_LASX_XVFCMP_SOR "sor") +- (UNSPEC_LASX_XVFCMP_SEQ "seq") +- (UNSPEC_LASX_XVFCMP_SNE "sne") +- (UNSPEC_LASX_XVFCMP_SUEQ "sueq")
View file
_service:tar_scm:0076-LoongArch-Use-force_reg-instead-of-gen_reg_rtx-emit_.patch
Added
@@ -0,0 +1,190 @@ +From be149d7f6527df6b16f3f9f8aec1e488466a71f1 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Tue, 19 Dec 2023 04:48:03 +0800 +Subject: PATCH 076/188 LoongArch: Use force_reg instead of gen_reg_rtx + + emit_move_insn in vec_init expander PR113033 + +Jakub says: + + Then that seems like a bug in the loongarch vec_init pattern(s). + Those really don't have a predicate in any of the backends on the + input operand, so they need to force_reg it if it is something it + can't handle. I've looked e.g. at i386 vec_init and that is exactly + what it does, see the various tests + force_reg calls in + ix86_expand_vector_init*. + +So replace gen_reg_rtx + emit_move_insn with force_reg to fix PR 113033. + +gcc/ChangeLog: + + PR target/113033 + * config/loongarch/loongarch.cc + (loongarch_expand_vector_init_same): Replace gen_reg_rtx + + emit_move_insn with force_reg. + (loongarch_expand_vector_init): Likewise. + +gcc/testsuite/ChangeLog: + + PR target/113033 + * gcc.target/loongarch/pr113033.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 38 ++++++------------- + gcc/testsuite/gcc.target/loongarch/pr113033.c | 23 +++++++++++ + 2 files changed, 35 insertions(+), 26 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr113033.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index a22601d88..000d2d623 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10745,7 +10745,7 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar) + gcc_unreachable (); + } + } +- temp = gen_reg_rtx (imode); ++ + if (imode == GET_MODE (same)) + temp2 = same; + else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD) +@@ -10770,7 +10770,8 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar) + else + temp2 = lowpart_subreg (imode, same, GET_MODE (same)); + } +- emit_move_insn (temp, temp2); ++ ++ temp = force_reg (imode, temp2); + + switch (vmode) + { +@@ -10992,35 +10993,29 @@ loongarch_expand_vector_init (rtx target, rtx vals) + to reduce the number of instructions. */ + if (i == 1) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val_hi0); +- op1 = gen_reg_rtx (imode); +- emit_move_insn (op1, val_hi1); ++ op0 = force_reg (imode, val_hi0); ++ op1 = force_reg (imode, val_hi1); + emit_insn ( + loongarch_vec_repl2_256 (target_hi, op0, op1)); + } + else if (i > 1) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val_hii); ++ op0 = force_reg (imode, val_hii); + emit_insn ( + loongarch_vec_set256 (target_hi, op0, GEN_INT (i))); + } + } + else + { ++ op0 = force_reg (imode, val_hii); + /* Assign the lowest element of val_hi to all elements + of target_hi. */ + if (i == 0) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val_hi0); + emit_insn (loongarch_vec_repl1_256 (target_hi, op0)); + } + else if (!rtx_equal_p (val_hii, val_hi0)) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val_hii); + emit_insn ( + loongarch_vec_set256 (target_hi, op0, GEN_INT (i))); + } +@@ -11028,18 +11023,15 @@ loongarch_expand_vector_init (rtx target, rtx vals) + } + if (!lo_same && !half_same) + { ++ op0 = force_reg (imode, val_loi); + /* Assign the lowest element of val_lo to all elements + of target_lo. */ + if (i == 0) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val_lo0); + emit_insn (loongarch_vec_repl1_128 (target_lo, op0)); + } + else if (!rtx_equal_p (val_loi, val_lo0)) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val_loi); + emit_insn ( + loongarch_vec_set128 (target_lo, op0, GEN_INT (i))); + } +@@ -11071,16 +11063,13 @@ loongarch_expand_vector_init (rtx target, rtx vals) + reduce the number of instructions. */ + if (i == 1) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val0); +- op1 = gen_reg_rtx (imode); +- emit_move_insn (op1, val1); ++ op0 = force_reg (imode, val0); ++ op1 = force_reg (imode, val1); + emit_insn (loongarch_vec_repl2_128 (target, op0, op1)); + } + else if (i > 1) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, vali); ++ op0 = force_reg (imode, vali); + emit_insn ( + loongarch_vec_set128 (target, op0, GEN_INT (i))); + } +@@ -11093,18 +11082,15 @@ loongarch_expand_vector_init (rtx target, rtx vals) + loongarch_vec_mirror (target, target, const0_rtx)); + return; + } ++ op0 = force_reg (imode, vali); + /* Assign the lowest element of val to all elements of + target. */ + if (i == 0) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val0); + emit_insn (loongarch_vec_repl1_128 (target, op0)); + } + else if (!rtx_equal_p (vali, val0)) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, vali); + emit_insn ( + loongarch_vec_set128 (target, op0, GEN_INT (i))); + } +diff --git a/gcc/testsuite/gcc.target/loongarch/pr113033.c b/gcc/testsuite/gcc.target/loongarch/pr113033.c +new file mode 100644 +index 000000000..4ccd037d8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr113033.c +@@ -0,0 +1,23 @@ ++/* PR target/113033: ICE with vector left rotate */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx" } */ ++ ++typedef unsigned __attribute__ ((vector_size (16))) v4si; ++typedef unsigned __attribute__ ((vector_size (32))) v8si; ++typedef unsigned long long __attribute__ ((vector_size (16))) v2di; ++typedef unsigned long long __attribute__ ((vector_size (32))) v4di; ++ ++#define TEST(tp) \ ++extern tp data_##tp; \ ++tp \ ++test_##tp (int x) \ ++{ \ ++ const int bit = sizeof (data_##tp0) * __CHAR_BIT__; \ ++ data_##tp = data_##tp << (x & (bit - 1)) \ ++ | data_##tp >> (bit - x & (bit - 1)); \ ++} ++ ++TEST (v4si) ++TEST (v8si) ++TEST (v2di) ++TEST (v4di) +-- +2.43.0 +
View file
_service:tar_scm:0077-LoongArch-Clean-up-vec_init-expander.patch
Added
@@ -0,0 +1,83 @@ +From 38438021c770f077b78092299f22712fdd734814 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Tue, 19 Dec 2023 05:02:42 +0800 +Subject: PATCH 077/188 LoongArch: Clean up vec_init expander + +Non functional change, clean up the code. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_expand_vector_init_same): Remove "temp2" and reuse + "temp" instead. + (loongarch_expand_vector_init): Use gcc_unreachable () instead + of gcc_assert (0), and fix the comment for it. +--- + gcc/config/loongarch/loongarch.cc | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 000d2d623..3aeafeafd 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10723,7 +10723,7 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar) + machine_mode vmode = GET_MODE (target); + machine_mode imode = GET_MODE_INNER (vmode); + rtx same = XVECEXP (vals, 0, 0); +- rtx temp, temp2; ++ rtx temp; + + if (CONST_INT_P (same) && nvar == 0 + && loongarch_signed_immediate_p (INTVAL (same), 10, 0)) +@@ -10747,17 +10747,17 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar) + } + + if (imode == GET_MODE (same)) +- temp2 = same; ++ temp = same; + else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD) + { + if (GET_CODE (same) == MEM) + { + rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); + loongarch_emit_move (reg_tmp, same); +- temp2 = simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0); ++ temp = simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0); + } + else +- temp2 = simplify_gen_subreg (imode, same, GET_MODE (same), 0); ++ temp = simplify_gen_subreg (imode, same, GET_MODE (same), 0); + } + else + { +@@ -10765,13 +10765,13 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar) + { + rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); + loongarch_emit_move (reg_tmp, same); +- temp2 = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp)); ++ temp = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp)); + } + else +- temp2 = lowpart_subreg (imode, same, GET_MODE (same)); ++ temp = lowpart_subreg (imode, same, GET_MODE (same)); + } + +- temp = force_reg (imode, temp2); ++ temp = force_reg (imode, temp); + + switch (vmode) + { +@@ -11117,8 +11117,8 @@ loongarch_expand_vector_init (rtx target, rtx vals) + return; + } + +- /* Loongson is the only cpu with vectors with more elements. */ +- gcc_assert (0); ++ /* No LoongArch CPU supports vectors with more elements as at now. */ ++ gcc_unreachable (); + } + + /* Implement HARD_REGNO_CALLER_SAVE_MODE. */ +-- +2.43.0 +
View file
_service:tar_scm:0078-LoongArch-Fix-incorrect-code-generation-for-sad-patt.patch
Added
@@ -0,0 +1,78 @@ +From e5c0e4b416b8628585e27b524ba524261cacf713 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Thu, 14 Dec 2023 20:49:04 +0800 +Subject: PATCH 078/188 LoongArch: Fix incorrect code generation for sad + pattern + +When I attempt to enable vect_usad_char effective target for LoongArch, slp-reduc-sad.c +and vect-reduc-sad*.c tests fail. These tests fail because the sad pattern generates bad +code. This patch to fixed them, for sad patterns, use zero expansion instead of sign +expansion for reduction. + +Currently, we are fixing failed vectorized tests, and in the future, we will +enable more tests of "vect" for LoongArch. + +gcc/ChangeLog: + + * config/loongarch/lasx.md: Use zero expansion instruction. + * config/loongarch/lsx.md: Ditto. +--- + gcc/config/loongarch/lasx.md | 8 ++++---- + gcc/config/loongarch/lsx.md | 8 ++++---- + 2 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 921ce0eeb..9ca3f9278 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -5021,8 +5021,8 @@ + rtx t2 = gen_reg_rtx (V16HImode); + rtx t3 = gen_reg_rtx (V8SImode); + emit_insn (gen_lasx_xvabsd_u_bu (t1, operands1, operands2)); +- emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1)); +- emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1)); ++ emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2)); + emit_insn (gen_addv8si3 (operands0, t3, operands3)); + DONE; + }) +@@ -5038,8 +5038,8 @@ + rtx t2 = gen_reg_rtx (V16HImode); + rtx t3 = gen_reg_rtx (V8SImode); + emit_insn (gen_lasx_xvabsd_s_b (t1, operands1, operands2)); +- emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1)); +- emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1)); ++ emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2)); + emit_insn (gen_addv8si3 (operands0, t3, operands3)); + DONE; + }) +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 57e0ee3d4..7f5fff40a 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -3385,8 +3385,8 @@ + rtx t2 = gen_reg_rtx (V8HImode); + rtx t3 = gen_reg_rtx (V4SImode); + emit_insn (gen_lsx_vabsd_u_bu (t1, operands1, operands2)); +- emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1)); +- emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1)); ++ emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2)); + emit_insn (gen_addv4si3 (operands0, t3, operands3)); + DONE; + }) +@@ -3402,8 +3402,8 @@ + rtx t2 = gen_reg_rtx (V8HImode); + rtx t3 = gen_reg_rtx (V4SImode); + emit_insn (gen_lsx_vabsd_s_b (t1, operands1, operands2)); +- emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1)); +- emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1)); ++ emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2)); + emit_insn (gen_addv4si3 (operands0, t3, operands3)); + DONE; + }) +-- +2.43.0 +
View file
_service:tar_scm:0079-LoongArch-Modify-the-check-type-of-the-vector-builti.patch
Added
@@ -0,0 +1,68 @@ +From bedb0338fadc373eeafc418a7bf6395d37eec78c Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Wed, 13 Dec 2023 09:31:07 +0800 +Subject: PATCH 079/188 LoongArch: Modify the check type of the vector + builtin function. + +On LoongArch architecture, using the latest gcc14 in regression test, +it is found that the vector test cases in vector directory appear FAIL +entries with unmatched pointer types. In order to solve this kind of +problem, the type of the variable in the check result is modified with +the parameter type defined in the vector builtin function. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/simd_correctness_check.h:The variable + types in the check results are modified in conjunction with the + parameter types defined in the vector builtin function. +--- + .../loongarch/vector/simd_correctness_check.h | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h +index eb7fbd59c..551340bd5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h ++++ b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h +@@ -8,11 +8,12 @@ + int fail = 0; \ + for (size_t i = 0; i < sizeof (res) / sizeof (res0); ++i) \ + { \ +- long *temp_ref = &refi, *temp_res = &resi; \ ++ long long *temp_ref = (long long *)&refi, \ ++ *temp_res = (long long *)&resi; \ + if (abs (*temp_ref - *temp_res) > 0) \ + { \ + printf (" error: %s at line %ld , expected " #ref \ +- "%ld:0x%lx, got: 0x%lx\n", \ ++ "%ld:0x%016lx, got: 0x%016lx\n", \ + __FILE__, line, i, *temp_ref, *temp_res); \ + fail = 1; \ + } \ +@@ -28,11 +29,11 @@ + int fail = 0; \ + for (size_t i = 0; i < sizeof (res) / sizeof (res0); ++i) \ + { \ +- int *temp_ref = &refi, *temp_res = &resi; \ ++ int *temp_ref = (int *)&refi, *temp_res = (int *)&resi; \ + if (abs (*temp_ref - *temp_res) > 0) \ + { \ + printf (" error: %s at line %ld , expected " #ref \ +- "%ld:0x%x, got: 0x%x\n", \ ++ "%ld:0x%08x, got: 0x%08x\n", \ + __FILE__, line, i, *temp_ref, *temp_res); \ + fail = 1; \ + } \ +@@ -47,8 +48,8 @@ + { \ + if (ref != res) \ + { \ +- printf (" error: %s at line %ld , expected %d, got %d\n", __FILE__, \ +- line, ref, res); \ ++ printf (" error: %s at line %ld , expected 0x:%016x", \ ++ "got 0x:%016x\n", __FILE__, line, ref, res); \ + } \ + } \ + while (0) +-- +2.43.0 +
View file
_service:tar_scm:0080-LoongArch-extend.texi-Fix-typos-in-LSX-intrinsics.patch
Added
@@ -0,0 +1,250 @@ +From 2e0092b20b845e0e301b1dab177b338e35981f10 Mon Sep 17 00:00:00 2001 +From: Jiajie Chen <c@jia.je> +Date: Wed, 13 Dec 2023 23:26:01 +0800 +Subject: PATCH 080/188 LoongArch: extend.texi: Fix typos in LSX intrinsics + +Several typos have been found and fixed: missing semicolons, using +variable name instead of type, duplicate functions and wrong types. + +gcc/ChangeLog: + + * doc/extend.texi(__lsx_vabsd_di): remove extra `i' in name. + (__lsx_vfrintrm_d, __lsx_vfrintrm_s, __lsx_vfrintrne_d, + __lsx_vfrintrne_s, __lsx_vfrintrp_d, __lsx_vfrintrp_s, __lsx_vfrintrz_d, + __lsx_vfrintrz_s): fix return types. + (__lsx_vld, __lsx_vldi, __lsx_vldrepl_b, __lsx_vldrepl_d, + __lsx_vldrepl_h, __lsx_vldrepl_w, __lsx_vmaxi_b, __lsx_vmaxi_d, + __lsx_vmaxi_h, __lsx_vmaxi_w, __lsx_vmini_b, __lsx_vmini_d, + __lsx_vmini_h, __lsx_vmini_w, __lsx_vsrani_d_q, __lsx_vsrarni_d_q, + __lsx_vsrlni_d_q, __lsx_vsrlrni_d_q, __lsx_vssrani_d_q, + __lsx_vssrarni_d_q, __lsx_vssrarni_du_q, __lsx_vssrlni_d_q, + __lsx_vssrlrni_du_q, __lsx_vst, __lsx_vstx, __lsx_vssrani_du_q, + __lsx_vssrlni_du_q, __lsx_vssrlrni_d_q): add missing semicolon. + (__lsx_vpickve2gr_bu, __lsx_vpickve2gr_hu): fix typo in return + type. + (__lsx_vstelm_b, __lsx_vstelm_d, __lsx_vstelm_h, + __lsx_vstelm_w): use imm type for the last argument. + (__lsx_vsigncov_b, __lsx_vsigncov_h, __lsx_vsigncov_w, + __lsx_vsigncov_d): remove duplicate definitions. +--- + gcc/doc/extend.texi | 90 ++++++++++++++++++++++----------------------- + 1 file changed, 43 insertions(+), 47 deletions(-) + +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index bb042ae78..ac8da4e80 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -16392,7 +16392,7 @@ int __lsx_bz_v (__m128i); + int __lsx_bz_w (__m128i); + __m128i __lsx_vabsd_b (__m128i, __m128i); + __m128i __lsx_vabsd_bu (__m128i, __m128i); +-__m128i __lsx_vabsd_di (__m128i, __m128i); ++__m128i __lsx_vabsd_d (__m128i, __m128i); + __m128i __lsx_vabsd_du (__m128i, __m128i); + __m128i __lsx_vabsd_h (__m128i, __m128i); + __m128i __lsx_vabsd_hu (__m128i, __m128i); +@@ -16598,14 +16598,14 @@ __m128 __lsx_vfnmsub_s (__m128, __m128, __m128); + __m128d __lsx_vfrecip_d (__m128d); + __m128 __lsx_vfrecip_s (__m128); + __m128d __lsx_vfrint_d (__m128d); +-__m128i __lsx_vfrintrm_d (__m128d); +-__m128i __lsx_vfrintrm_s (__m128); +-__m128i __lsx_vfrintrne_d (__m128d); +-__m128i __lsx_vfrintrne_s (__m128); +-__m128i __lsx_vfrintrp_d (__m128d); +-__m128i __lsx_vfrintrp_s (__m128); +-__m128i __lsx_vfrintrz_d (__m128d); +-__m128i __lsx_vfrintrz_s (__m128); ++__m128d __lsx_vfrintrm_d (__m128d); ++__m128 __lsx_vfrintrm_s (__m128); ++__m128d __lsx_vfrintrne_d (__m128d); ++__m128 __lsx_vfrintrne_s (__m128); ++__m128d __lsx_vfrintrp_d (__m128d); ++__m128 __lsx_vfrintrp_s (__m128); ++__m128d __lsx_vfrintrz_d (__m128d); ++__m128 __lsx_vfrintrz_s (__m128); + __m128 __lsx_vfrint_s (__m128); + __m128d __lsx_vfrsqrt_d (__m128d); + __m128 __lsx_vfrsqrt_s (__m128); +@@ -16674,12 +16674,12 @@ __m128i __lsx_vinsgr2vr_b (__m128i, int, imm0_15); + __m128i __lsx_vinsgr2vr_d (__m128i, long int, imm0_1); + __m128i __lsx_vinsgr2vr_h (__m128i, int, imm0_7); + __m128i __lsx_vinsgr2vr_w (__m128i, int, imm0_3); +-__m128i __lsx_vld (void *, imm_n2048_2047) +-__m128i __lsx_vldi (imm_n1024_1023) +-__m128i __lsx_vldrepl_b (void *, imm_n2048_2047) +-__m128i __lsx_vldrepl_d (void *, imm_n256_255) +-__m128i __lsx_vldrepl_h (void *, imm_n1024_1023) +-__m128i __lsx_vldrepl_w (void *, imm_n512_511) ++__m128i __lsx_vld (void *, imm_n2048_2047); ++__m128i __lsx_vldi (imm_n1024_1023); ++__m128i __lsx_vldrepl_b (void *, imm_n2048_2047); ++__m128i __lsx_vldrepl_d (void *, imm_n256_255); ++__m128i __lsx_vldrepl_h (void *, imm_n1024_1023); ++__m128i __lsx_vldrepl_w (void *, imm_n512_511); + __m128i __lsx_vldx (void *, long int); + __m128i __lsx_vmadd_b (__m128i, __m128i, __m128i); + __m128i __lsx_vmadd_d (__m128i, __m128i, __m128i); +@@ -16715,13 +16715,13 @@ __m128i __lsx_vmax_d (__m128i, __m128i); + __m128i __lsx_vmax_du (__m128i, __m128i); + __m128i __lsx_vmax_h (__m128i, __m128i); + __m128i __lsx_vmax_hu (__m128i, __m128i); +-__m128i __lsx_vmaxi_b (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_b (__m128i, imm_n16_15); + __m128i __lsx_vmaxi_bu (__m128i, imm0_31); +-__m128i __lsx_vmaxi_d (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_d (__m128i, imm_n16_15); + __m128i __lsx_vmaxi_du (__m128i, imm0_31); +-__m128i __lsx_vmaxi_h (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_h (__m128i, imm_n16_15); + __m128i __lsx_vmaxi_hu (__m128i, imm0_31); +-__m128i __lsx_vmaxi_w (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_w (__m128i, imm_n16_15); + __m128i __lsx_vmaxi_wu (__m128i, imm0_31); + __m128i __lsx_vmax_w (__m128i, __m128i); + __m128i __lsx_vmax_wu (__m128i, __m128i); +@@ -16731,13 +16731,13 @@ __m128i __lsx_vmin_d (__m128i, __m128i); + __m128i __lsx_vmin_du (__m128i, __m128i); + __m128i __lsx_vmin_h (__m128i, __m128i); + __m128i __lsx_vmin_hu (__m128i, __m128i); +-__m128i __lsx_vmini_b (__m128i, imm_n16_15) ++__m128i __lsx_vmini_b (__m128i, imm_n16_15); + __m128i __lsx_vmini_bu (__m128i, imm0_31); +-__m128i __lsx_vmini_d (__m128i, imm_n16_15) ++__m128i __lsx_vmini_d (__m128i, imm_n16_15); + __m128i __lsx_vmini_du (__m128i, imm0_31); +-__m128i __lsx_vmini_h (__m128i, imm_n16_15) ++__m128i __lsx_vmini_h (__m128i, imm_n16_15); + __m128i __lsx_vmini_hu (__m128i, imm0_31); +-__m128i __lsx_vmini_w (__m128i, imm_n16_15) ++__m128i __lsx_vmini_w (__m128i, imm_n16_15); + __m128i __lsx_vmini_wu (__m128i, imm0_31); + __m128i __lsx_vmin_w (__m128i, __m128i); + __m128i __lsx_vmin_wu (__m128i, __m128i); +@@ -16826,11 +16826,11 @@ __m128i __lsx_vpickod_d (__m128i, __m128i); + __m128i __lsx_vpickod_h (__m128i, __m128i); + __m128i __lsx_vpickod_w (__m128i, __m128i); + int __lsx_vpickve2gr_b (__m128i, imm0_15); +-unsinged int __lsx_vpickve2gr_bu (__m128i, imm0_15); ++unsigned int __lsx_vpickve2gr_bu (__m128i, imm0_15); + long int __lsx_vpickve2gr_d (__m128i, imm0_1); + unsigned long int __lsx_vpickve2gr_du (__m128i, imm0_1); + int __lsx_vpickve2gr_h (__m128i, imm0_7); +-unsinged int __lsx_vpickve2gr_hu (__m128i, imm0_7); ++unsigned int __lsx_vpickve2gr_hu (__m128i, imm0_7); + int __lsx_vpickve2gr_w (__m128i, imm0_3); + unsigned int __lsx_vpickve2gr_wu (__m128i, imm0_3); + __m128i __lsx_vreplgr2vr_b (int); +@@ -16893,10 +16893,6 @@ __m128i __lsx_vsigncov_b (__m128i, __m128i); + __m128i __lsx_vsigncov_d (__m128i, __m128i); + __m128i __lsx_vsigncov_h (__m128i, __m128i); + __m128i __lsx_vsigncov_w (__m128i, __m128i); +-__m128i __lsx_vsigncov_b (__m128i, __m128i); +-__m128i __lsx_vsigncov_d (__m128i, __m128i); +-__m128i __lsx_vsigncov_h (__m128i, __m128i); +-__m128i __lsx_vsigncov_w (__m128i, __m128i); + __m128i __lsx_vsle_b (__m128i, __m128i); + __m128i __lsx_vsle_bu (__m128i, __m128i); + __m128i __lsx_vsle_d (__m128i, __m128i); +@@ -16953,7 +16949,7 @@ __m128i __lsx_vsrai_w (__m128i, imm0_31); + __m128i __lsx_vsran_b_h (__m128i, __m128i); + __m128i __lsx_vsran_h_w (__m128i, __m128i); + __m128i __lsx_vsrani_b_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vsrani_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrani_d_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vsrani_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vsrani_w_d (__m128i, __m128i, imm0_63); + __m128i __lsx_vsran_w_d (__m128i, __m128i); +@@ -16967,7 +16963,7 @@ __m128i __lsx_vsrari_w (__m128i, imm0_31); + __m128i __lsx_vsrarn_b_h (__m128i, __m128i); + __m128i __lsx_vsrarn_h_w (__m128i, __m128i); + __m128i __lsx_vsrarni_b_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vsrarni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrarni_d_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vsrarni_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vsrarni_w_d (__m128i, __m128i, imm0_63); + __m128i __lsx_vsrarn_w_d (__m128i, __m128i); +@@ -16983,7 +16979,7 @@ __m128i __lsx_vsrli_w (__m128i, imm0_31); + __m128i __lsx_vsrln_b_h (__m128i, __m128i); + __m128i __lsx_vsrln_h_w (__m128i, __m128i); + __m128i __lsx_vsrlni_b_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vsrlni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrlni_d_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vsrlni_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vsrlni_w_d (__m128i, __m128i, imm0_63); + __m128i __lsx_vsrln_w_d (__m128i, __m128i); +@@ -16997,7 +16993,7 @@ __m128i __lsx_vsrlri_w (__m128i, imm0_31); + __m128i __lsx_vsrlrn_b_h (__m128i, __m128i); + __m128i __lsx_vsrlrn_h_w (__m128i, __m128i); + __m128i __lsx_vsrlrni_b_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vsrlrni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrlrni_d_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vsrlrni_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vsrlrni_w_d (__m128i, __m128i, imm0_63); + __m128i __lsx_vsrlrn_w_d (__m128i, __m128i); +@@ -17009,8 +17005,8 @@ __m128i __lsx_vssran_hu_w (__m128i, __m128i); + __m128i __lsx_vssran_h_w (__m128i, __m128i); + __m128i __lsx_vssrani_b_h (__m128i, __m128i, imm0_15); + __m128i __lsx_vssrani_bu_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vssrani_d_q (__m128i, __m128i, imm0_127) +-__m128i __lsx_vssrani_du_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrani_d_q (__m128i, __m128i, imm0_127); ++__m128i __lsx_vssrani_du_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vssrani_hu_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vssrani_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vssrani_w_d (__m128i, __m128i, imm0_63); +@@ -17023,8 +17019,8 @@ __m128i __lsx_vssrarn_hu_w (__m128i, __m128i); + __m128i __lsx_vssrarn_h_w (__m128i, __m128i); + __m128i __lsx_vssrarni_b_h (__m128i, __m128i, imm0_15); + __m128i __lsx_vssrarni_bu_h (__m128i, __m128i, imm0_15);
View file
_service:tar_scm:0081-LoongArch-Fix-builtin-function-prototypes-for-LASX-i.patch
Added
@@ -0,0 +1,60 @@ +From d9965ed8d9f4244ac1948c6fb92c7c0f7d80b3a4 Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Tue, 19 Dec 2023 16:43:17 +0800 +Subject: PATCH 081/188 LoongArch: Fix builtin function prototypes for LASX + in doc. + +gcc/ChangeLog: + + * doc/extend.texi:According to the documents submitted earlier, + Two problems with function return types and using the actual types + of parameters instead of variable names were found and fixed. +--- + gcc/doc/extend.texi | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index ac8da4e80..c793c9c5d 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -17438,14 +17438,14 @@ __m256 __lasx_xvfnmsub_s (__m256, __m256, __m256); + __m256d __lasx_xvfrecip_d (__m256d); + __m256 __lasx_xvfrecip_s (__m256); + __m256d __lasx_xvfrint_d (__m256d); +-__m256i __lasx_xvfrintrm_d (__m256d); +-__m256i __lasx_xvfrintrm_s (__m256); +-__m256i __lasx_xvfrintrne_d (__m256d); +-__m256i __lasx_xvfrintrne_s (__m256); +-__m256i __lasx_xvfrintrp_d (__m256d); +-__m256i __lasx_xvfrintrp_s (__m256); +-__m256i __lasx_xvfrintrz_d (__m256d); +-__m256i __lasx_xvfrintrz_s (__m256); ++__m256d __lasx_xvfrintrm_d (__m256d); ++__m256 __lasx_xvfrintrm_s (__m256); ++__m256d __lasx_xvfrintrne_d (__m256d); ++__m256 __lasx_xvfrintrne_s (__m256); ++__m256d __lasx_xvfrintrp_d (__m256d); ++__m256 __lasx_xvfrintrp_s (__m256); ++__m256d __lasx_xvfrintrz_d (__m256d); ++__m256 __lasx_xvfrintrz_s (__m256); + __m256 __lasx_xvfrint_s (__m256); + __m256d __lasx_xvfrsqrt_d (__m256d); + __m256 __lasx_xvfrsqrt_s (__m256); +@@ -17912,10 +17912,10 @@ __m256i __lasx_xvssub_hu (__m256i, __m256i); + __m256i __lasx_xvssub_w (__m256i, __m256i); + __m256i __lasx_xvssub_wu (__m256i, __m256i); + void __lasx_xvst (__m256i, void *, imm_n2048_2047); +-void __lasx_xvstelm_b (__m256i, void *, imm_n128_127, idx); +-void __lasx_xvstelm_d (__m256i, void *, imm_n128_127, idx); +-void __lasx_xvstelm_h (__m256i, void *, imm_n128_127, idx); +-void __lasx_xvstelm_w (__m256i, void *, imm_n128_127, idx); ++void __lasx_xvstelm_b (__m256i, void *, imm_n128_127, imm0_31); ++void __lasx_xvstelm_d (__m256i, void *, imm_n128_127, imm0_3); ++void __lasx_xvstelm_h (__m256i, void *, imm_n128_127, imm0_15); ++void __lasx_xvstelm_w (__m256i, void *, imm_n128_127, imm0_7); + void __lasx_xvstx (__m256i, void *, long int); + __m256i __lasx_xvsub_b (__m256i, __m256i); + __m256i __lasx_xvsub_d (__m256i, __m256i); +-- +2.43.0 +
View file
_service:tar_scm:0082-LoongArch-Add-asm-modifiers-to-the-LSX-and-LASX-dire.patch
Added
@@ -0,0 +1,92 @@ +From 48f0d47eb6dc2c799c845a25cfabd586bd176378 Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Tue, 5 Dec 2023 14:44:35 +0800 +Subject: PATCH 082/188 LoongArch: Add asm modifiers to the LSX and LASX + directives in the doc. + +gcc/ChangeLog: + + * doc/extend.texi:Add modifiers to the vector of asm in the doc. + * doc/md.texi:Refine the description of the modifier 'f' in the doc. +--- + gcc/doc/extend.texi | 46 +++++++++++++++++++++++++++++++++++++++++++++ + gcc/doc/md.texi | 2 +- + 2 files changed, 47 insertions(+), 1 deletion(-) + +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index c793c9c5d..bcb9329c2 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -11424,10 +11424,56 @@ The list below describes the supported modifiers and their effects for LoongArch + @item @code{d} @tab Same as @code{c}. + @item @code{i} @tab Print the character ''@code{i}'' if the operand is not a register. + @item @code{m} @tab Same as @code{c}, but the printed value is @code{operand - 1}. ++@item @code{u} @tab Print a LASX register. ++@item @code{w} @tab Print a LSX register. + @item @code{X} @tab Print a constant integer operand in hexadecimal. + @item @code{z} @tab Print the operand in its unmodified form, followed by a comma. + @end multitable + ++References to input and output operands in the assembler template of extended ++asm statements can use modifiers to affect the way the operands are formatted ++in the code output to the assembler. For example, the following code uses the ++'w' modifier for LoongArch: ++ ++@example ++test-asm.c: ++ ++#include <lsxintrin.h> ++ ++__m128i foo (void) ++@{ ++__m128i a,b,c; ++__asm__ ("vadd.d %w0,%w1,%w2\n\t" ++ :"=f" (c) ++ :"f" (a),"f" (b)); ++ ++return c; ++@} ++ ++@end example ++ ++@noindent ++The compile command for the test case is as follows: ++ ++@example ++gcc test-asm.c -mlsx -S -o test-asm.s ++@end example ++ ++@noindent ++The assembly statement produces the following assembly code: ++ ++@example ++vadd.d $vr0,$vr0,$vr1 ++@end example ++ ++This is a 128-bit vector addition instruction, @code{c} (referred to in the ++template string as %0) is the output, and @code{a} (%1) and @code{b} (%2) are ++the inputs. @code{__m128i} is a vector data type defined in the file ++@code{lsxintrin.h} (@xref{LoongArch SX Vector Intrinsics}). The symbol '=f' ++represents a constraint using a floating-point register as an output type, and ++the 'f' in the input operand represents a constraint using a floating-point ++register operand, which can refer to the definition of a constraint ++(@xref{Constraints}) in gcc. + + @lowersections + @include md.texi +diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi +index b58da0787..a2e839073 100644 +--- a/gcc/doc/md.texi ++++ b/gcc/doc/md.texi +@@ -2750,7 +2750,7 @@ $r1h + @item LoongArch---@file{config/loongarch/constraints.md} + @table @code + @item f +-A floating-point register (if available). ++A floating-point or vector register (if available). + @item k + A memory operand whose address is formed by a base register and + (optionally scaled) index register. +-- +2.43.0 +
View file
_service:tar_scm:0083-LoongArch-Implement-FCCmode-reload-and-cstore-ANYF-m.patch
Added
@@ -0,0 +1,392 @@ +From b199de440fc877efdd1dde90b5c1c5111e060c1b Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 15 Dec 2023 01:49:40 +0800 +Subject: PATCH 083/188 LoongArch: Implement FCCmode reload and + cstore<ANYF:mode>4 + +We used a branch to load floating-point comparison results into GPR. +This is very slow when the branch is not predictable. + +Implement movfcc so we can reload FCCmode into GPRs, FPRs, and MEM. +Then implement cstore<ANYF:mode>4. + +gcc/ChangeLog: + + * config/loongarch/loongarch-tune.h + (loongarch_rtx_cost_data::movcf2gr): New field. + (loongarch_rtx_cost_data::movcf2gr_): New method. + (loongarch_rtx_cost_data::use_movcf2gr): New method. + * config/loongarch/loongarch-def.cc + (loongarch_rtx_cost_data::loongarch_rtx_cost_data): Set movcf2gr + to COSTS_N_INSNS (7) and movgr2cf to COSTS_N_INSNS (15), based + on timing on LA464. + (loongarch_cpu_rtx_cost_data): Set movcf2gr and movgr2cf to + COSTS_N_INSNS (1) for LA664. + (loongarch_rtx_cost_optimize_size): Set movcf2gr and movgr2cf to + COSTS_N_INSNS (1) + 1. + * config/loongarch/predicates.md (loongarch_fcmp_operator): New + predicate. + * config/loongarch/loongarch.md (movfcc): Change to + define_expand. + (movfcc_internal): New define_insn. + (fcc_to_<X:mode>): New define_insn. + (cstore<ANYF:mode>4): New define_expand. + * config/loongarch/loongarch.cc + (loongarch_hard_regno_mode_ok_uncached): Allow FCCmode in GPRs + and GPRs. + (loongarch_secondary_reload): Reload FCCmode via FPR and/or GPR. + (loongarch_emit_float_compare): Call gen_reg_rtx instead of + loongarch_allocate_fcc. + (loongarch_allocate_fcc): Remove. + (loongarch_move_to_gpr_cost): Handle FCC_REGS -> GR_REGS. + (loongarch_move_from_gpr_cost): Handle GR_REGS -> FCC_REGS. + (loongarch_register_move_cost): Handle FCC_REGS -> FCC_REGS, + FCC_REGS -> FP_REGS, and FP_REGS -> FCC_REGS. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/movcf2gr.c: New test. + * gcc.target/loongarch/movcf2gr-via-fr.c: New test. +--- + gcc/config/loongarch/loongarch-def.cc | 13 +++- + gcc/config/loongarch/loongarch-tune.h | 15 +++- + gcc/config/loongarch/loongarch.cc | 70 ++++++++++++------- + gcc/config/loongarch/loongarch.md | 69 ++++++++++++++++-- + gcc/config/loongarch/predicates.md | 4 ++ + .../gcc.target/loongarch/movcf2gr-via-fr.c | 10 +++ + gcc/testsuite/gcc.target/loongarch/movcf2gr.c | 9 +++ + 7 files changed, 157 insertions(+), 33 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/movcf2gr.c + +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index 4a8885e83..843be78e4 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -101,15 +101,21 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data () + int_mult_di (COSTS_N_INSNS (4)), + int_div_si (COSTS_N_INSNS (5)), + int_div_di (COSTS_N_INSNS (5)), ++ movcf2gr (COSTS_N_INSNS (7)), ++ movgr2cf (COSTS_N_INSNS (15)), + branch_cost (6), + memory_latency (4) {} + + /* The following properties cannot be looked up directly using "cpucfg". + So it is necessary to provide a default value for "unknown native" + tune targets (i.e. -mtune=native while PRID does not correspond to +- any known "-mtune" type). Currently all numbers are default. */ ++ any known "-mtune" type). */ + array_tune<loongarch_rtx_cost_data> loongarch_cpu_rtx_cost_data = +- array_tune<loongarch_rtx_cost_data> (); ++ array_tune<loongarch_rtx_cost_data> () ++ .set (CPU_LA664, ++ loongarch_rtx_cost_data () ++ .movcf2gr_ (COSTS_N_INSNS (1)) ++ .movgr2cf_ (COSTS_N_INSNS (1))); + + /* RTX costs to use when optimizing for size. + We use a value slightly larger than COSTS_N_INSNS (1) for all of them +@@ -125,7 +131,8 @@ const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size = + .int_mult_si_ (COST_COMPLEX_INSN) + .int_mult_di_ (COST_COMPLEX_INSN) + .int_div_si_ (COST_COMPLEX_INSN) +- .int_div_di_ (COST_COMPLEX_INSN); ++ .int_div_di_ (COST_COMPLEX_INSN) ++ .movcf2gr_ (COST_COMPLEX_INSN); + + array_tune<int> loongarch_cpu_issue_rate = array_tune<int> () + .set (CPU_NATIVE, 4) +diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h +index 616b94e87..26f163f0a 100644 +--- a/gcc/config/loongarch/loongarch-tune.h ++++ b/gcc/config/loongarch/loongarch-tune.h +@@ -35,6 +35,8 @@ struct loongarch_rtx_cost_data + unsigned short int_mult_di; + unsigned short int_div_si; + unsigned short int_div_di; ++ unsigned short movcf2gr; ++ unsigned short movgr2cf; + unsigned short branch_cost; + unsigned short memory_latency; + +@@ -95,6 +97,18 @@ struct loongarch_rtx_cost_data + return *this; + } + ++ loongarch_rtx_cost_data movcf2gr_ (unsigned short _movcf2gr) ++ { ++ movcf2gr = _movcf2gr; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data movgr2cf_ (unsigned short _movgr2cf) ++ { ++ movgr2cf = _movgr2cf; ++ return *this; ++ } ++ + loongarch_rtx_cost_data branch_cost_ (unsigned short _branch_cost) + { + branch_cost = _branch_cost; +@@ -106,7 +120,6 @@ struct loongarch_rtx_cost_data + memory_latency = _memory_latency; + return *this; + } +- + }; + + /* Costs to use when optimizing for size. */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 3aeafeafd..56f631b1a 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5119,29 +5119,6 @@ loongarch_zero_if_equal (rtx cmp0, rtx cmp1) + OPTAB_DIRECT); + } + +-/* Allocate a floating-point condition-code register of mode MODE. */ +- +-static rtx +-loongarch_allocate_fcc (machine_mode mode) +-{ +- unsigned int regno, count; +- +- gcc_assert (TARGET_HARD_FLOAT); +- +- if (mode == FCCmode) +- count = 1; +- else +- gcc_unreachable (); +- +- cfun->machine->next_fcc += -cfun->machine->next_fcc & (count - 1); +- if (cfun->machine->next_fcc > FCC_REG_LAST - FCC_REG_FIRST) +- cfun->machine->next_fcc = 0; +- +- regno = FCC_REG_FIRST + cfun->machine->next_fcc; +- cfun->machine->next_fcc += count; +- return gen_rtx_REG (mode, regno); +-} +- + /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */ + + static void +@@ -5256,7 +5233,7 @@ loongarch_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1) + operands for FCMP.cond.fmt, instead a reversed condition code is + required and a test for false. */ + *code = NE; +- *op0 = loongarch_allocate_fcc (FCCmode); ++ *op0 = gen_reg_rtx (FCCmode); + + *op1 = const0_rtx; + loongarch_emit_binary (cmp_code, *op0, cmp_op0, cmp_op1); +@@ -6626,7 +6603,7 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode) + enum mode_class mclass; + + if (mode == FCCmode) +- return FCC_REG_P (regno); ++ return FCC_REG_P (regno) || GP_REG_P (regno) || FP_REG_P (regno); + + size = GET_MODE_SIZE (mode); + mclass = GET_MODE_CLASS (mode); +@@ -6841,6 +6818,9 @@ loongarch_move_to_gpr_cost (reg_class_t from) + /* MOVFR2GR, etc. */ + return 4; + ++ case FCC_REGS: ++ return loongarch_cost->movcf2gr; ++ + default:
View file
_service:tar_scm:0084-LoongArch-Add-sign_extend-pattern-for-32-bit-rotate-.patch
Added
@@ -0,0 +1,69 @@ +From 8da6a317bc3ad64da8590649b83a841391f20438 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 17 Dec 2023 04:26:23 +0800 +Subject: PATCH 084/188 LoongArch: Add sign_extend pattern for 32-bit rotate + shift + +Remove a redundant sign extension. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (rotrsi3_extend): New + define_insn. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/rotrw.c: New test. +--- + gcc/config/loongarch/loongarch.md | 10 ++++++++++ + gcc/testsuite/gcc.target/loongarch/rotrw.c | 17 +++++++++++++++++ + 2 files changed, 27 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotrw.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 6cf71d9e4..44e8d336a 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2893,6 +2893,16 @@ + (set_attr "type" "shift,shift") + (set_attr "mode" "<MODE>")) + ++(define_insn "rotrsi3_extend" ++ (set (match_operand:DI 0 "register_operand" "=r,r") ++ (sign_extend:DI ++ (rotatert:SI (match_operand:SI 1 "register_operand" "r,r") ++ (match_operand:SI 2 "arith_operand" "r,I")))) ++ "TARGET_64BIT" ++ "rotr%i2.w\t%0,%1,%2" ++ (set_attr "type" "shift,shift") ++ (set_attr "mode" "SI")) ++ + ;; The following templates were added to generate "bstrpick.d + alsl.d" + ;; instruction pairs. + ;; It is required that the values of const_immalsl_operand and +diff --git a/gcc/testsuite/gcc.target/loongarch/rotrw.c b/gcc/testsuite/gcc.target/loongarch/rotrw.c +new file mode 100644 +index 000000000..6ed45e8b8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotrw.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++/* { dg-final { scan-assembler "rotr\\.w\t\\\$r4,\\\$r4,\\\$r5" } } */ ++/* { dg-final { scan-assembler "rotri\\.w\t\\\$r4,\\\$r4,5" } } */ ++/* { dg-final { scan-assembler-not "slli\\.w" } } */ ++ ++unsigned ++rotr (unsigned a, unsigned b) ++{ ++ return a >> b | a << 32 - b; ++} ++ ++unsigned ++rotri (unsigned a) ++{ ++ return a >> 5 | a << 27; ++} +-- +2.43.0 +
View file
_service:tar_scm:0085-LoongArch-Fixed-bug-in-bstrins_-mode-_for_ior_mask-t.patch
Added
@@ -0,0 +1,37 @@ +From e56d6d9526e1565fffeb320e15796385eb1732b8 Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Mon, 25 Dec 2023 11:20:23 +0800 +Subject: PATCH 085/188 LoongArch: Fixed bug in *bstrins_<mode>_for_ior_mask + template. + +We found that using the latest compiled gcc will cause a miscompare error +when running spec2006 400.perlbench test with -flto turned on. After testing, +it was found that only the LoongArch architecture will report errors. +The first error commit was located through the git bisect command as +r14-3773-g5b857e87201335. Through debugging, it was found that the problem +was that the split condition of the *bstrins_<mode>_for_ior_mask template was +empty, which should actually be consistent with the insn condition. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md: Adjust. +--- + gcc/config/loongarch/loongarch.md | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 44e8d336a..3d5b75825 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1489,7 +1489,7 @@ + "loongarch_pre_reload_split () && \ + loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)" + "#" +- "" ++ "&& true" + (set (match_dup 0) (match_dup 1)) + (set (zero_extract:GPR (match_dup 0) (match_dup 2) (match_dup 4)) + (match_dup 3)) +-- +2.43.0 +
View file
_service:tar_scm:0086-LoongArch-Fix-insn-output-of-vec_concat-templates-fo.patch
Added
@@ -0,0 +1,132 @@ +From b1947829a5949a37db09bc23681e44c8479bd404 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Fri, 22 Dec 2023 16:22:03 +0800 +Subject: PATCH 086/188 LoongArch: Fix insn output of vec_concat templates + for LASX. + +When investigaing failure of gcc.dg/vect/slp-reduc-sad.c, following +instruction block are being generated by vec_concatv32qi (which is +generated by vec_initv32qiv16qi) at entrance of foo() function: + + vldx $vr3,$r5,$r6 + vld $vr2,$r5,0 + xvpermi.q $xr2,$xr3,0x20 + +causes the reversion of vec_initv32qiv16qi operation's high and +low 128-bit part. + +According to other target's similar impl and LSX impl for following +RTL representation, current definition in lasx.md of "vec_concat<mode>" +are wrong: + + (set (op0) (vec_concat (op1) (op2))) + +For correct behavior, the last argument of xvpermi.q should be 0x02 +instead of 0x20. This patch fixes this issue and cleanup the vec_concat +template impl. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (vec_concatv4di): Delete. + (vec_concatv8si): Delete. + (vec_concatv16hi): Delete. + (vec_concatv32qi): Delete. + (vec_concatv4df): Delete. + (vec_concatv8sf): Delete. + (vec_concat<mode>): New template with insn output fixed. +--- + gcc/config/loongarch/lasx.md | 74 ++++-------------------------------- + 1 file changed, 7 insertions(+), 67 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 9ca3f9278..46150f2fb 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -577,77 +577,17 @@ + (set_attr "type" "simd_insert") + (set_attr "mode" "<MODE>")) + +-(define_insn "vec_concatv4di" +- (set (match_operand:V4DI 0 "register_operand" "=f") +- (vec_concat:V4DI +- (match_operand:V2DI 1 "register_operand" "0") +- (match_operand:V2DI 2 "register_operand" "f"))) +- "ISA_HAS_LASX" +-{ +- return "xvpermi.q\t%u0,%u2,0x20"; +-} +- (set_attr "type" "simd_splat") +- (set_attr "mode" "V4DI")) +- +-(define_insn "vec_concatv8si" +- (set (match_operand:V8SI 0 "register_operand" "=f") +- (vec_concat:V8SI +- (match_operand:V4SI 1 "register_operand" "0") +- (match_operand:V4SI 2 "register_operand" "f"))) +- "ISA_HAS_LASX" +-{ +- return "xvpermi.q\t%u0,%u2,0x20"; +-} +- (set_attr "type" "simd_splat") +- (set_attr "mode" "V4DI")) +- +-(define_insn "vec_concatv16hi" +- (set (match_operand:V16HI 0 "register_operand" "=f") +- (vec_concat:V16HI +- (match_operand:V8HI 1 "register_operand" "0") +- (match_operand:V8HI 2 "register_operand" "f"))) +- "ISA_HAS_LASX" +-{ +- return "xvpermi.q\t%u0,%u2,0x20"; +-} +- (set_attr "type" "simd_splat") +- (set_attr "mode" "V4DI")) +- +-(define_insn "vec_concatv32qi" +- (set (match_operand:V32QI 0 "register_operand" "=f") +- (vec_concat:V32QI +- (match_operand:V16QI 1 "register_operand" "0") +- (match_operand:V16QI 2 "register_operand" "f"))) +- "ISA_HAS_LASX" +-{ +- return "xvpermi.q\t%u0,%u2,0x20"; +-} +- (set_attr "type" "simd_splat") +- (set_attr "mode" "V4DI")) +- +-(define_insn "vec_concatv4df" +- (set (match_operand:V4DF 0 "register_operand" "=f") +- (vec_concat:V4DF +- (match_operand:V2DF 1 "register_operand" "0") +- (match_operand:V2DF 2 "register_operand" "f"))) +- "ISA_HAS_LASX" +-{ +- return "xvpermi.q\t%u0,%u2,0x20"; +-} +- (set_attr "type" "simd_splat") +- (set_attr "mode" "V4DF")) +- +-(define_insn "vec_concatv8sf" +- (set (match_operand:V8SF 0 "register_operand" "=f") +- (vec_concat:V8SF +- (match_operand:V4SF 1 "register_operand" "0") +- (match_operand:V4SF 2 "register_operand" "f"))) ++(define_insn "vec_concat<mode>" ++ (set (match_operand:LASX 0 "register_operand" "=f") ++ (vec_concat:LASX ++ (match_operand:<VHMODE256_ALL> 1 "register_operand" "0") ++ (match_operand:<VHMODE256_ALL> 2 "register_operand" "f"))) + "ISA_HAS_LASX" + { +- return "xvpermi.q\t%u0,%u2,0x20"; ++ return "xvpermi.q\t%u0,%u2,0x02"; + } + (set_attr "type" "simd_splat") +- (set_attr "mode" "V4DI")) ++ (set_attr "mode" "<MODE>")) + + ;; xshuf.w + (define_insn "lasx_xvperm_<lasxfmt_f_wd>" +-- +2.43.0 +
View file
_service:tar_scm:0087-LoongArch-Fix-ICE-when-passing-two-same-vector-argum.patch
Added
@@ -0,0 +1,232 @@ +From 1096571509762846e2222f575bc981385b4e9fb7 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Fri, 22 Dec 2023 16:18:44 +0800 +Subject: PATCH 087/188 LoongArch: Fix ICE when passing two same vector + argument consecutively + +Following code will cause ICE on LoongArch target: + + #include <lsxintrin.h> + + extern void bar (__m128i, __m128i); + + __m128i a; + + void + foo () + { + bar (a, a); + } + +It is caused by missing constraint definition in mov<mode>_lsx. This +patch fixes the template and remove the unnecessary processing from +loongarch_split_move () function. + +This patch also cleanup the redundant definition from +loongarch_split_move () and loongarch_split_move_p (). + +gcc/ChangeLog: + + * config/loongarch/lasx.md: Use loongarch_split_move and + loongarch_split_move_p directly. + * config/loongarch/loongarch-protos.h + (loongarch_split_move): Remove unnecessary argument. + (loongarch_split_move_insn_p): Delete. + (loongarch_split_move_insn): Delete. + * config/loongarch/loongarch.cc + (loongarch_split_move_insn_p): Delete. + (loongarch_load_store_insns): Use loongarch_split_move_p + directly. + (loongarch_split_move): remove the unnecessary processing. + (loongarch_split_move_insn): Delete. + * config/loongarch/lsx.md: Use loongarch_split_move and + loongarch_split_move_p directly. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lsx/lsx-mov-1.c: New test. +--- + gcc/config/loongarch/lasx.md | 4 +- + gcc/config/loongarch/loongarch-protos.h | 4 +- + gcc/config/loongarch/loongarch.cc | 49 +------------------ + gcc/config/loongarch/lsx.md | 10 ++-- + .../loongarch/vector/lsx/lsx-mov-1.c | 14 ++++++ + 5 files changed, 24 insertions(+), 57 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-mov-1.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 46150f2fb..dbbf5a136 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -839,10 +839,10 @@ + (set (match_operand:LASX 0 "nonimmediate_operand") + (match_operand:LASX 1 "move_operand")) + "reload_completed && ISA_HAS_LASX +- && loongarch_split_move_insn_p (operands0, operands1)" ++ && loongarch_split_move_p (operands0, operands1)" + (const_int 0) + { +- loongarch_split_move_insn (operands0, operands1, curr_insn); ++ loongarch_split_move (operands0, operands1); + DONE; + }) + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index e5fcf3111..2067e50c3 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -82,11 +82,9 @@ extern rtx loongarch_legitimize_call_address (rtx); + + extern rtx loongarch_subword (rtx, bool); + extern bool loongarch_split_move_p (rtx, rtx); +-extern void loongarch_split_move (rtx, rtx, rtx); ++extern void loongarch_split_move (rtx, rtx); + extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode); + extern void loongarch_split_plus_constant (rtx *, machine_mode); +-extern bool loongarch_split_move_insn_p (rtx, rtx); +-extern void loongarch_split_move_insn (rtx, rtx, rtx); + extern void loongarch_split_128bit_move (rtx, rtx); + extern bool loongarch_split_128bit_move_p (rtx, rtx); + extern void loongarch_split_256bit_move (rtx, rtx); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 56f631b1a..5c278386a 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2558,7 +2558,6 @@ loongarch_split_const_insns (rtx x) + return low + high; + } + +-bool loongarch_split_move_insn_p (rtx dest, rtx src); + /* Return one word of 128-bit value OP, taking into account the fixed + endianness of certain registers. BYTE selects from the byte address. */ + +@@ -2598,7 +2597,7 @@ loongarch_load_store_insns (rtx mem, rtx_insn *insn) + { + set = single_set (insn); + if (set +- && !loongarch_split_move_insn_p (SET_DEST (set), SET_SRC (set))) ++ && !loongarch_split_move_p (SET_DEST (set), SET_SRC (set))) + might_split_p = false; + } + +@@ -4216,7 +4215,7 @@ loongarch_split_move_p (rtx dest, rtx src) + SPLIT_TYPE describes the split condition. */ + + void +-loongarch_split_move (rtx dest, rtx src, rtx insn_) ++loongarch_split_move (rtx dest, rtx src) + { + rtx low_dest; + +@@ -4254,33 +4253,6 @@ loongarch_split_move (rtx dest, rtx src, rtx insn_) + loongarch_subword (src, true)); + } + } +- +- /* This is a hack. See if the next insn uses DEST and if so, see if we +- can forward SRC for DEST. This is most useful if the next insn is a +- simple store. */ +- rtx_insn *insn = (rtx_insn *) insn_; +- struct loongarch_address_info addr = {}; +- if (insn) +- { +- rtx_insn *next = next_nonnote_nondebug_insn_bb (insn); +- if (next) +- { +- rtx set = single_set (next); +- if (set && SET_SRC (set) == dest) +- { +- if (MEM_P (src)) +- { +- rtx tmp = XEXP (src, 0); +- loongarch_classify_address (&addr, tmp, GET_MODE (tmp), +- true); +- if (addr.reg && !reg_overlap_mentioned_p (dest, addr.reg)) +- validate_change (next, &SET_SRC (set), src, false); +- } +- else +- validate_change (next, &SET_SRC (set), src, false); +- } +- } +- } + } + + /* Check if adding an integer constant value for a specific mode can be +@@ -4327,23 +4299,6 @@ loongarch_split_plus_constant (rtx *op, machine_mode mode) + op2 = gen_int_mode (v, mode); + } + +-/* Return true if a move from SRC to DEST in INSN should be split. */ +- +-bool +-loongarch_split_move_insn_p (rtx dest, rtx src) +-{ +- return loongarch_split_move_p (dest, src); +-} +- +-/* Split a move from SRC to DEST in INSN, given that +- loongarch_split_move_insn_p holds. */ +- +-void +-loongarch_split_move_insn (rtx dest, rtx src, rtx insn) +-{ +- loongarch_split_move (dest, src, insn); +-} +- + /* Implement TARGET_CONSTANT_ALIGNMENT. */ + + static HOST_WIDE_INT +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 7f5fff40a..3e3248ef4 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -794,21 +794,21 @@ + }) + + (define_insn "mov<mode>_lsx" +- (set (match_operand:LSX 0 "nonimmediate_operand" "=f,f,R,*r,*f") +- (match_operand:LSX 1 "move_operand" "fYGYI,R,f,*f,*r")) ++ (set (match_operand:LSX 0 "nonimmediate_operand" "=f,f,R,*r,*f,*r") ++ (match_operand:LSX 1 "move_operand" "fYGYI,R,f,*f,*r,*r")) + "ISA_HAS_LSX" + { return loongarch_output_move (operands0, operands1); } +- (set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert") ++ (set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert,simd_copy") + (set_attr "mode" "<MODE>")) + + (define_split + (set (match_operand:LSX 0 "nonimmediate_operand") + (match_operand:LSX 1 "move_operand"))
View file
_service:tar_scm:0088-LoongArch-Expand-left-rotate-to-right-rotate-with-ne.patch
Added
@@ -0,0 +1,253 @@ +From a2cc86c9b5e44c3dcdb8c52d6ae5f535442ec1d4 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 17 Dec 2023 05:38:20 +0800 +Subject: PATCH 088/188 LoongArch: Expand left rotate to right rotate with + negated amount + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (rotl<mode>3): + New define_expand. + * config/loongarch/simd.md (vrotl<mode>3): Likewise. + (rotl<mode>3): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/rotl-with-rotr.c: New test. + * gcc.target/loongarch/rotl-with-vrotr-b.c: New test. + * gcc.target/loongarch/rotl-with-vrotr-h.c: New test. + * gcc.target/loongarch/rotl-with-vrotr-w.c: New test. + * gcc.target/loongarch/rotl-with-vrotr-d.c: New test. + * gcc.target/loongarch/rotl-with-xvrotr-b.c: New test. + * gcc.target/loongarch/rotl-with-xvrotr-h.c: New test. + * gcc.target/loongarch/rotl-with-xvrotr-w.c: New test. + * gcc.target/loongarch/rotl-with-xvrotr-d.c: New test. +--- + gcc/config/loongarch/loongarch.md | 12 ++++++++ + gcc/config/loongarch/simd.md | 29 +++++++++++++++++++ + .../gcc.target/loongarch/rotl-with-rotr.c | 9 ++++++ + .../gcc.target/loongarch/rotl-with-vrotr-b.c | 7 +++++ + .../gcc.target/loongarch/rotl-with-vrotr-d.c | 7 +++++ + .../gcc.target/loongarch/rotl-with-vrotr-h.c | 7 +++++ + .../gcc.target/loongarch/rotl-with-vrotr-w.c | 28 ++++++++++++++++++ + .../gcc.target/loongarch/rotl-with-xvrotr-b.c | 7 +++++ + .../gcc.target/loongarch/rotl-with-xvrotr-d.c | 7 +++++ + .../gcc.target/loongarch/rotl-with-xvrotr-h.c | 7 +++++ + .../gcc.target/loongarch/rotl-with-xvrotr-w.c | 7 +++++ + 11 files changed, 127 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-d.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-h.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-w.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 3d5b75825..ed4d4b906 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2903,6 +2903,18 @@ + (set_attr "type" "shift,shift") + (set_attr "mode" "SI")) + ++;; Expand left rotate to right rotate. ++(define_expand "rotl<mode>3" ++ (set (match_dup 3) ++ (neg:SI (match_operand:SI 2 "register_operand"))) ++ (set (match_operand:GPR 0 "register_operand") ++ (rotatert:GPR (match_operand:GPR 1 "register_operand") ++ (match_dup 3))) ++ "" ++ { ++ operands3 = gen_reg_rtx (SImode); ++ }); ++ + ;; The following templates were added to generate "bstrpick.d + alsl.d" + ;; instruction pairs. + ;; It is required that the values of const_immalsl_operand and +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 13202f79b..93fb39abc 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -268,6 +268,35 @@ + (set_attr "type" "simd_int_arith") + (set_attr "mode" "<MODE>")) + ++;; Expand left rotate to right rotate. ++(define_expand "vrotl<mode>3" ++ (set (match_dup 3) ++ (neg:IVEC (match_operand:IVEC 2 "register_operand"))) ++ (set (match_operand:IVEC 0 "register_operand") ++ (rotatert:IVEC (match_operand:IVEC 1 "register_operand") ++ (match_dup 3))) ++ "" ++ { ++ operands3 = gen_reg_rtx (<MODE>mode); ++ }); ++ ++;; Expand left rotate with a scalar amount to right rotate: negate the ++;; scalar before broadcasting it because scalar negation is cheaper than ++;; vector negation. ++(define_expand "rotl<mode>3" ++ (set (match_dup 3) ++ (neg:SI (match_operand:SI 2 "register_operand"))) ++ (set (match_dup 4) ++ (vec_duplicate:IVEC (subreg:<IVEC:UNITMODE> (match_dup 3) 0))) ++ (set (match_operand:IVEC 0 "register_operand") ++ (rotatert:IVEC (match_operand:IVEC 1 "register_operand") ++ (match_dup 4))) ++ "" ++ { ++ operands3 = gen_reg_rtx (SImode); ++ operands4 = gen_reg_rtx (<MODE>mode); ++ }); ++ + ;; <x>vrotri.{b/h/w/d} + + (define_insn "rotr<mode>3" +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c +new file mode 100644 +index 000000000..84cc53cec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++/* { dg-final { scan-assembler "rotr\\.w" } } */ ++ ++unsigned ++t (unsigned a, unsigned b) ++{ ++ return a << b | a >> (32 - b); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c +new file mode 100644 +index 000000000..14298bf9e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "vrotr\\.b" 2 } } */ ++/* { dg-final { scan-assembler-times "vneg\\.b" 1 } } */ ++ ++#define TYPE char ++#include "rotl-with-vrotr-w.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c +new file mode 100644 +index 000000000..0e971b323 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "vrotr\\.d" 2 } } */ ++/* { dg-final { scan-assembler-times "vneg\\.d" 1 } } */ ++ ++#define TYPE long long ++#include "rotl-with-vrotr-w.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c +new file mode 100644 +index 000000000..93216ebc2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "vrotr\\.h" 2 } } */ ++/* { dg-final { scan-assembler-times "vneg\\.h" 1 } } */ ++ ++#define TYPE short ++#include "rotl-with-vrotr-w.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c +new file mode 100644 +index 000000000..d05b86f47 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "vrotr\\.w" 2 } } */ ++/* { dg-final { scan-assembler-times "vneg\\.w" 1 } } */ ++ ++#ifndef VLEN ++#define VLEN 16 ++#endif ++ ++#ifndef TYPE ++#define TYPE int ++#endif ++ ++typedef unsigned TYPE V __attribute__ ((vector_size (VLEN))); ++V a, b, c; ++ ++void ++test (int x) ++{ ++ b = a << x | a >> ((int)sizeof (TYPE) * __CHAR_BIT__ - x); ++} ++ ++void ++test2 (void) ++{ ++ for (int i = 0; i < VLEN / sizeof (TYPE); i++) ++ ci = ai << bi | ai >> ((int)sizeof (TYPE) * __CHAR_BIT__ - bi); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c
View file
_service:tar_scm:0089-LoongArch-Fix-infinite-secondary-reloading-of-FCCmod.patch
Added
@@ -0,0 +1,104 @@ +From 1e389ec3bad94888fadd153f191fe8862448f258 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Wed, 27 Dec 2023 04:28:56 +0800 +Subject: PATCH 089/188 LoongArch: Fix infinite secondary reloading of + FCCmode PR113148 + +The GCC internal doc says: + + X might be a pseudo-register or a 'subreg' of a pseudo-register, + which could either be in a hard register or in memory. Use + 'true_regnum' to find out; it will return -1 if the pseudo is in + memory and the hard register number if it is in a register. + +So "MEM_P (x)" is not enough for checking if we are reloading from/to +the memory. This bug has caused reload pass to stall and finally ICE +complaining with "maximum number of generated reload insns per insn +achieved", since r14-6814. + +Check if "true_regnum (x)" is -1 besides "MEM_P (x)" to fix the issue. + +gcc/ChangeLog: + + PR target/113148 + * config/loongarch/loongarch.cc (loongarch_secondary_reload): + Check if regno == -1 besides MEM_P (x) for reloading FCCmode + from/to FPR to/from memory. + +gcc/testsuite/ChangeLog: + + PR target/113148 + * gcc.target/loongarch/pr113148.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 3 +- + gcc/testsuite/gcc.target/loongarch/pr113148.c | 44 +++++++++++++++++++ + 2 files changed, 46 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr113148.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 5c278386a..2e305f940 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6902,7 +6902,8 @@ loongarch_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, + return NO_REGS; + } + +- if (reg_class_subset_p (rclass, FP_REGS) && MEM_P (x)) ++ if (reg_class_subset_p (rclass, FP_REGS) ++ && (regno == -1 || MEM_P (x))) + return GR_REGS; + + return NO_REGS; +diff --git a/gcc/testsuite/gcc.target/loongarch/pr113148.c b/gcc/testsuite/gcc.target/loongarch/pr113148.c +new file mode 100644 +index 000000000..cf48e5520 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr113148.c +@@ -0,0 +1,44 @@ ++/* PR 113148: ICE caused by infinite reloading */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=la464 -mfpu=64 -mabi=lp64d" } */ ++ ++struct bound ++{ ++ double max; ++} drawQuadrant_bound; ++double w4, innerXfromXY_y, computeBound_right_0; ++struct arc_def ++{ ++ double w, h; ++ double a0, a1; ++}; ++static void drawQuadrant (struct arc_def *); ++static void ++computeBound (struct arc_def *def, struct bound *bound) ++{ ++ double ellipsex_1, ellipsex_0; ++ bound->max = def->a1 ?: __builtin_sin (w4) * def->h; ++ if (def->a0 == 5 && def->w == def->h) ++ ; ++ else ++ ellipsex_0 = def->a0 == 0.0 ?: __builtin_cos (w4); ++ if (def->a1 == 5 && def->w == def->h) ++ ellipsex_1 = bound->max; ++ __builtin_sqrt (ellipsex_1 * innerXfromXY_y * innerXfromXY_y * w4); ++ computeBound_right_0 = ellipsex_0; ++} ++void ++drawArc () ++{ ++ struct arc_def foo; ++ for (;;) ++ drawQuadrant (&foo); ++} ++void ++drawQuadrant (struct arc_def *def) ++{ ++ int y, miny; ++ computeBound (def, &drawQuadrant_bound); ++ while (y >= miny) ++ ; ++} +-- +2.43.0 +
View file
_service:tar_scm:0090-LoongArch-Replace-mexplicit-relocs-auto-simple-used-.patch
Added
@@ -0,0 +1,305 @@ +From 294893b352898328d804f2d07981f6bf1e54f8b6 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Tue, 12 Dec 2023 04:54:21 +0800 +Subject: PATCH 090/188 LoongArch: Replace -mexplicit-relocs=auto simple-used + address peephole2 with combine + +The problem with peephole2 is it uses a naive sliding-window algorithm +and misses many cases. For example: + + float a10000; + float t() { return a0 + a8000; } + +is compiled to: + + la.local $r13,a + la.local $r12,a+32768 + fld.s $f1,$r13,0 + fld.s $f0,$r12,-768 + fadd.s $f0,$f1,$f0 + +by trunk. But as we've explained in r14-4851, the following would be +better with -mexplicit-relocs=auto: + + pcalau12i $r13,%pc_hi20(a) + pcalau12i $r12,%pc_hi20(a+32000) + fld.s $f1,$r13,%pc_lo12(a) + fld.s $f0,$r12,%pc_lo12(a+32000) + fadd.s $f0,$f1,$f0 + +However the sliding-window algorithm just won't detect the pcalau12i/fld +pair to be optimized. Use a define_insn_and_rewrite in combine pass +will work around the issue. + +gcc/ChangeLog: + + * config/loongarch/predicates.md + (symbolic_pcrel_offset_operand): New define_predicate. + (mem_simple_ldst_operand): Likewise. + * config/loongarch/loongarch-protos.h + (loongarch_rewrite_mem_for_simple_ldst): Declare. + * config/loongarch/loongarch.cc + (loongarch_rewrite_mem_for_simple_ldst): Implement. + * config/loongarch/loongarch.md (simple_load<mode>): New + define_insn_and_rewrite. + (simple_load_<su>ext<SUBDI:mode><GPR:mode>): Likewise. + (simple_store<mode>): Likewise. + (define_peephole2): Remove la.local/fld peepholes. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c: + New test. + * gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c: + New test. +--- + gcc/config/loongarch/loongarch-protos.h | 1 + + gcc/config/loongarch/loongarch.cc | 16 +++ + gcc/config/loongarch/loongarch.md | 114 +++++------------- + gcc/config/loongarch/predicates.md | 13 ++ + ...explicit-relocs-auto-single-load-store-2.c | 11 ++ + ...explicit-relocs-auto-single-load-store-3.c | 18 +++ + 6 files changed, 86 insertions(+), 87 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 2067e50c3..5060efbb6 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -163,6 +163,7 @@ extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); + extern bool loongarch_check_zero_div_p (void); + extern bool loongarch_pre_reload_split (void); + extern int loongarch_use_bstrins_for_ior_with_mask (machine_mode, rtx *); ++extern rtx loongarch_rewrite_mem_for_simple_ldst (rtx); + + union loongarch_gen_fn_ptrs + { +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 2e305f940..c6318bee9 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5713,6 +5713,22 @@ loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op) + return 0; + } + ++/* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto ++ -mcmodel={normal/medium}. */ ++rtx ++loongarch_rewrite_mem_for_simple_ldst (rtx mem) ++{ ++ rtx addr = XEXP (mem, 0); ++ rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ++ UNSPEC_PCALAU12I_GR); ++ rtx new_mem; ++ ++ addr = gen_rtx_LO_SUM (Pmode, force_reg (Pmode, hi), addr); ++ new_mem = gen_rtx_MEM (GET_MODE (mem), addr); ++ MEM_COPY_ATTRIBUTES (new_mem, mem); ++ return new_mem; ++} ++ + /* Print the text for PRINT_OPERAND punctation character CH to FILE. + The punctuation characters are: + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index ed4d4b906..3c61a0cf4 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -4135,101 +4135,41 @@ + ;; + ;; And if the pseudo op cannot be relaxed, we'll get a worse result (with + ;; 3 instructions). +-(define_peephole2 +- (set (match_operand:P 0 "register_operand") +- (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand") +- (mem:LD_AT_LEAST_32_BIT (match_dup 0))) +- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ +- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ +- && (peep2_reg_dead_p (2, operands0) \ +- || REGNO (operands0) == REGNO (operands2))" +- (set (match_dup 2) +- (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1)))) +- { +- emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); +- }) +- +-(define_peephole2 +- (set (match_operand:P 0 "register_operand") +- (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand") +- (mem:LD_AT_LEAST_32_BIT (plus (match_dup 0) +- (match_operand 3 "const_int_operand")))) +- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ +- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ +- && (peep2_reg_dead_p (2, operands0) \ +- || REGNO (operands0) == REGNO (operands2))" +- (set (match_dup 2) +- (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1)))) +- { +- operands1 = plus_constant (Pmode, operands1, INTVAL (operands3)); +- emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); +- }) +- +-(define_peephole2 +- (set (match_operand:P 0 "register_operand") +- (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (match_operand:GPR 2 "register_operand") +- (any_extend:GPR (mem:SUBDI (match_dup 0)))) +- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ +- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ +- && (peep2_reg_dead_p (2, operands0) \ +- || REGNO (operands0) == REGNO (operands2))" +- (set (match_dup 2) +- (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0) +- (match_dup 1))))) ++(define_insn_and_rewrite "simple_load<mode>" ++ (set (match_operand:LD_AT_LEAST_32_BIT 0 "register_operand" "=r,f") ++ (match_operand:LD_AT_LEAST_32_BIT 1 "mem_simple_ldst_operand" "")) ++ "loongarch_pre_reload_split () ++ && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)" ++ "#" ++ "&& true" + { +- emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); ++ operands1 = loongarch_rewrite_mem_for_simple_ldst (operands1); + }) + +-(define_peephole2 +- (set (match_operand:P 0 "register_operand") +- (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (match_operand:GPR 2 "register_operand") ++(define_insn_and_rewrite "simple_load_<su>ext<SUBDI:mode><GPR:mode>" ++ (set (match_operand:GPR 0 "register_operand" "=r") + (any_extend:GPR +- (mem:SUBDI (plus (match_dup 0) +- (match_operand 3 "const_int_operand"))))) +- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ +- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ +- && (peep2_reg_dead_p (2, operands0) \ +- || REGNO (operands0) == REGNO (operands2))" +- (set (match_dup 2) +- (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0) +- (match_dup 1))))) +- { +- operands1 = plus_constant (Pmode, operands1, INTVAL (operands3)); +- emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); +- }) +- +-(define_peephole2 +- (set (match_operand:P 0 "register_operand") +- (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (mem:ST_ANY (match_dup 0)) +- (match_operand:ST_ANY 2 "register_operand")) +- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ +- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ +- && (peep2_reg_dead_p (2, operands0)) \ +- && REGNO (operands0) != REGNO (operands2)"
View file
_service:tar_scm:0091-LoongArch-Fix-the-format-of-bstrins_-mode-_for_ior_m.patch
Added
@@ -0,0 +1,33 @@ +From 4d569c5fde85ca426eecf57119048ec25f048758 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 29 Dec 2023 20:04:34 +0800 +Subject: PATCH 091/188 LoongArch: Fix the format of + bstrins_<mode>_for_ior_mask condition (NFC) + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (bstrins_<mode>_for_ior_mask): + For the condition, remove unneeded trailing "\" and move "&&" to + follow GNU coding style. NFC. +--- + gcc/config/loongarch/loongarch.md | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 3c61a0cf4..996df66e8 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1486,8 +1486,8 @@ + (match_operand:GPR 2 "const_int_operand")) + (and:GPR (match_operand:GPR 3 "register_operand") + (match_operand:GPR 4 "const_int_operand")))) +- "loongarch_pre_reload_split () && \ +- loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)" ++ "loongarch_pre_reload_split () ++ && loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)" + "#" + "&& true" + (set (match_dup 0) (match_dup 1)) +-- +2.43.0 +
View file
_service:tar_scm:0092-LoongArch-Added-TLS-Le-Relax-support.patch
Added
@@ -0,0 +1,280 @@ +From 58d41ffad306a359ecd2902ec19d582506f14b10 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Tue, 12 Dec 2023 16:32:31 +0800 +Subject: PATCH 092/188 LoongArch: Added TLS Le Relax support. + +Check whether the assembler supports tls le relax. If it supports it, the assembly +instruction sequence of tls le relax will be generated by default. + +The original way to obtain the tls le symbol address: + lu12i.w $rd, %le_hi20(sym) + ori $rd, $rd, %le_lo12(sym) + add.{w/d} $rd, $rd, $tp + +If the assembler supports tls le relax, the following sequence is generated: + + lu12i.w $rd, %le_hi20_r(sym) + add.{w/d} $rd,$rd,$tp,%le_add_r(sym) + addi.{w/d} $rd,$rd,%le_lo12_r(sym) + +gcc/ChangeLog: + + * config.in: Regenerate. + * config/loongarch/loongarch-opts.h (HAVE_AS_TLS_LE_RELAXATION): Define. + * config/loongarch/loongarch.cc (loongarch_legitimize_tls_address): + Added TLS Le Relax support. + (loongarch_print_operand_reloc): Add the output string of TLS Le Relax. + * config/loongarch/loongarch.md (@add_tls_le_relax<mode>): New template. + * configure: Regenerate. + * configure.ac: Check if binutils supports TLS le relax. + +gcc/testsuite/ChangeLog: + + * lib/target-supports.exp: Add a function to check whether binutil supports + TLS Le Relax. + * gcc.target/loongarch/tls-le-relax.c: New test. +--- + gcc/config.in | 6 +++ + gcc/config/loongarch/loongarch-opts.h | 4 ++ + gcc/config/loongarch/loongarch.cc | 46 +++++++++++++++++-- + gcc/config/loongarch/loongarch.md | 12 +++++ + gcc/configure | 31 +++++++++++++ + gcc/configure.ac | 5 ++ + .../gcc.target/loongarch/tls-le-relax.c | 12 +++++ + gcc/testsuite/lib/target-supports.exp | 12 +++++ + 8 files changed, 125 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-le-relax.c + +diff --git a/gcc/config.in b/gcc/config.in +index 033cfb98b..7220b2b2b 100644 +--- a/gcc/config.in ++++ b/gcc/config.in +@@ -771,6 +771,12 @@ + #endif + + ++/* Define if your assembler supports tls le relocation. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_AS_TLS_LE_RELAXATION ++#endif ++ ++ + /* Define if your assembler supports vl/vst/vlm/vstm with an optional + alignment hint argument. */ + #ifndef USED_FOR_TARGET +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 639ed50bd..8491bee0d 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -114,4 +114,8 @@ struct loongarch_flags { + #define HAVE_AS_TLS 0 + #endif + ++#ifndef HAVE_AS_TLS_LE_RELAXATION ++#define HAVE_AS_TLS_LE_RELAXATION 0 ++#endif ++ + #endif /* LOONGARCH_OPTS_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index c6318bee9..d1b1950dc 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2993,7 +2993,29 @@ loongarch_legitimize_tls_address (rtx loc) + + case TLS_MODEL_LOCAL_EXEC: + { +- /* la.tls.le; tp-relative add. */ ++ /* la.tls.le; tp-relative add. ++ ++ normal: ++ lu12i.w $rd, %le_hi20(sym) ++ ori $rd, $rd, %le_lo12(sym) ++ add.{w/d} $rd, $rd, $tp ++ (st.{w/d}/ld.{w/d} $rs, $rd, 0) ++ ++ tls le relax: ++ lu12i.w $rd, %le_hi20_r(sym) ++ add.{w/d} $rd,$rd,$tp ++ addi.{w/d} $rd,$rd,%le_lo12_r(sym) ++ (st.{w/d}/ld.{w/d} $rs, $rd, 0) ++ ++ extreme (When the code model is set to extreme, the TLS le Relax ++ instruction sequence is not generated): ++ lu12i.w $rd, %le_hi20(sym) ++ ori $rd, $rd, %le_lo12(sym) ++ lu32i.d $rd, %le64_lo20(sym) ++ lu52i.d $rd, $rd, %le64_hi12(sym) ++ add.d $rd, $rd, $tp ++ (st.{w/d}/ld.{w/d} $rs, $rd, 0) */ ++ + tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); + tmp1 = gen_reg_rtx (Pmode); + dest = gen_reg_rtx (Pmode); +@@ -3004,7 +3026,20 @@ loongarch_legitimize_tls_address (rtx loc) + tmp3 = gen_reg_rtx (Pmode); + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); + high = loongarch_force_temporary (tmp3, high); +- emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2)); ++ ++ /* The assembler does not implement tls le relax support when the ++ code model is extreme, so when the code model is extreme, the ++ old symbol address acquisition method is still used. */ ++ if (HAVE_AS_TLS_LE_RELAXATION && !TARGET_CMODEL_EXTREME) ++ { ++ emit_insn (gen_add_tls_le_relax (Pmode, dest, high, ++ tp, loc)); ++ loongarch_emit_move (dest, ++ gen_rtx_LO_SUM (Pmode, dest, tmp2)); ++ return dest; ++ } ++ else ++ emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2)); + + if (TARGET_CMODEL_EXTREME) + { +@@ -5936,7 +5971,12 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, + gcc_unreachable (); + } + else +- reloc = hi_reloc ? "%le_hi20" : "%le_lo12"; ++ { ++ if (HAVE_AS_TLS_LE_RELAXATION && !TARGET_CMODEL_EXTREME) ++ reloc = hi_reloc ? "%le_hi20_r" : "%le_lo12_r"; ++ else ++ reloc = hi_reloc ? "%le_hi20" : "%le_lo12"; ++ } + break; + + case SYMBOL_TLSGD: +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 996df66e8..02c537d4c 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -73,6 +73,7 @@ + UNSPEC_LOAD_FROM_GOT + UNSPEC_PCALAU12I + UNSPEC_PCALAU12I_GR ++ UNSPEC_ADD_TLS_LE_RELAX + UNSPEC_ORI_L_LO12 + UNSPEC_LUI_L_HI20 + UNSPEC_LUI_H_LO20 +@@ -2503,6 +2504,17 @@ + "pcalau12i\t%0,%%pc_hi20(%1)" + (set_attr "type" "move")) + ++(define_insn "@add_tls_le_relax<mode>" ++ (set (match_operand:P 0 "register_operand" "=r") ++ (unspec:P (match_operand:P 1 "register_operand" "r") ++ (match_operand:P 2 "register_operand" "r") ++ (match_operand:P 3 "symbolic_operand") ++ UNSPEC_ADD_TLS_LE_RELAX)) ++ "HAVE_AS_TLS_LE_RELAXATION" ++ "add.<d>\t%0,%1,%2,%%le_add_r(%3)" ++ (set_attr "type" "move") ++) ++ + (define_insn "@ori_l_lo12<mode>" + (set (match_operand:P 0 "register_operand" "=r") + (unspec:P (match_operand:P 1 "register_operand" "r") +diff --git a/gcc/configure b/gcc/configure +index 5842e7a18..eecfe60d6 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -28968,6 +28968,37 @@ if test $gcc_cv_as_loongarch_cond_branch_relax = yes; then + + $as_echo "#define HAVE_AS_COND_BRANCH_RELAXATION 1" >>confdefs.h + ++fi ++ ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for tls le relaxation support" >&5 ++$as_echo_n "checking assembler for tls le relaxation support... " >&6; } ++if ${gcc_cv_as_loongarch_tls_le_relaxation_support+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ gcc_cv_as_loongarch_tls_le_relaxation_support=no ++ if test x$gcc_cv_as != x; then ++ $as_echo 'lu12i.w $t0,%le_hi20_r(a)' > conftest.s ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5
View file
_service:tar_scm:0093-LoongArch-Provide-fmin-fmax-RTL-pattern-for-vectors.patch
Added
@@ -0,0 +1,112 @@ +From 97081ba053424e35b1869a00d6ac0e84362d09ea Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 30 Dec 2023 21:40:11 +0800 +Subject: PATCH 093/188 LoongArch: Provide fmin/fmax RTL pattern for vectors + +We already had smin/smax RTL pattern using vfmin/vfmax instructions. +But for smin/smax, it's unspecified what will happen if either operand +contains any NaN operands. So we would not vectorize the loop with +-fno-finite-math-only (the default for all optimization levels expect +-Ofast). + +But, LoongArch vfmin/vfmax instruction is IEEE-754-2008 conformant so we +can also use them and vectorize the loop. + +gcc/ChangeLog: + + * config/loongarch/simd.md (fmax<mode>3): New define_insn. + (fmin<mode>3): Likewise. + (reduc_fmax_scal_<mode>3): New define_expand. + (reduc_fmin_scal_<mode>3): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vfmax-vfmin.c: New test. +--- + gcc/config/loongarch/simd.md | 31 +++++++++++++++++++ + .../gcc.target/loongarch/vfmax-vfmin.c | 31 +++++++++++++++++++ + 2 files changed, 62 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c + +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 93fb39abc..8ac1d75a8 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -426,6 +426,37 @@ + (set_attr "type" "simd_fcmp") + (set_attr "mode" "<MODE>")) + ++; xvf{min/max} instructions are IEEE-754-2008 conforming, use them for ++; the corresponding IEEE-754-2008 operations. We must use UNSPEC instead ++; of smin/smax though, see PR105414 and PR107013. ++ ++(define_int_iterator UNSPEC_FMAXMIN UNSPEC_FMAX UNSPEC_FMIN) ++(define_int_attr fmaxmin (UNSPEC_FMAX "fmax") (UNSPEC_FMIN "fmin")) ++ ++(define_insn "<fmaxmin><mode>3" ++ (set (match_operand:FVEC 0 "register_operand" "=f") ++ (unspec:FVEC (match_operand:FVEC 1 "register_operand" "f") ++ (match_operand:FVEC 2 "register_operand" "f") ++ UNSPEC_FMAXMIN)) ++ "" ++ "<x>v<fmaxmin>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2" ++ (set_attr "type" "simd_fminmax") ++ (set_attr "mode" "<MODE>")) ++ ++;; ... and also reduc operations. ++(define_expand "reduc_<fmaxmin>_scal_<mode>" ++ (match_operand:<UNITMODE> 0 "register_operand") ++ (match_operand:FVEC 1 "register_operand") ++ (const_int UNSPEC_FMAXMIN) ++ "" ++{ ++ rtx tmp = gen_reg_rtx (<MODE>mode); ++ loongarch_expand_vector_reduc (gen_<fmaxmin><mode>3, tmp, operands1); ++ emit_insn (gen_vec_extract<mode><unitmode> (operands0, tmp, ++ const0_rtx)); ++ DONE; ++}) ++ + ; The LoongArch SX Instructions. + (include "lsx.md") + +diff --git a/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c b/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c +new file mode 100644 +index 000000000..811fee361 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mtune=la464 -mlasx" } */ ++/* { dg-final { scan-assembler "\tvfmin\\.d" } } */ ++/* { dg-final { scan-assembler "\tvfmax\\.d" } } */ ++/* { dg-final { scan-assembler "\txvfmin\\.d" } } */ ++/* { dg-final { scan-assembler "\txvfmax\\.d" } } */ ++/* { dg-final { scan-assembler "\tvfmin\\.s" } } */ ++/* { dg-final { scan-assembler "\tvfmax\\.s" } } */ ++/* { dg-final { scan-assembler "\txvfmin\\.s" } } */ ++/* { dg-final { scan-assembler "\txvfmax\\.s" } } */ ++ ++#define T(OP) __typeof__ (__builtin_##OP (0, 0)) ++ ++#define TEST(OP, LEN) \ ++void \ ++test_##OP##LEN (T (OP) *restrict dest, \ ++ const T (OP) *restrict src1, \ ++ const T (OP) *restrict src2) \ ++{ \ ++ for (int i = 0; i < LEN / sizeof (T(OP)); i++) \ ++ desti = __builtin_##OP (src1i, src2i); \ ++} ++ ++TEST(fmin, 16) ++TEST(fmax, 16) ++TEST(fmin, 32) ++TEST(fmax, 32) ++TEST(fminf, 16) ++TEST(fmaxf, 16) ++TEST(fminf, 32) ++TEST(fmaxf, 32) +-- +2.43.0 +
View file
_service:tar_scm:0094-LoongArch-Merge-constant-vector-permuatation-impleme.patch
Added
@@ -0,0 +1,1484 @@ +From 06a6a571fd557b53f805d990dd1a40a2ab7c1e5c Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Thu, 28 Dec 2023 20:26:46 +0800 +Subject: PATCH 094/188 LoongArch: Merge constant vector permuatation + implementations. + +There are currently two versions of the implementations of constant +vector permutation: loongarch_expand_vec_perm_const_1 and +loongarch_expand_vec_perm_const_2. The implementations of the two +versions are different. Currently, only the implementation of +loongarch_expand_vec_perm_const_1 is used for 256-bit vectors. We +hope to streamline the code as much as possible while retaining the +better-performing implementation of the two. By repeatedly testing +spec2006 and spec2017, we got the following Merged version. +Compared with the pre-merger version, the number of lines of code +in loongarch.cc has been reduced by 888 lines. At the same time, +the performance of SPECint2006 under Ofast has been improved by 0.97%, +and the performance of SPEC2017 fprate has been improved by 0.27%. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_is_odd_extraction): + Remove useless forward declaration. + (loongarch_is_even_extraction): Remove useless forward declaration. + (loongarch_try_expand_lsx_vshuf_const): Removed. + (loongarch_expand_vec_perm_const_1): Merged. + (loongarch_is_double_duplicate): Removed. + (loongarch_is_center_extraction): Ditto. + (loongarch_is_reversing_permutation): Ditto. + (loongarch_is_di_misalign_extract): Ditto. + (loongarch_is_si_misalign_extract): Ditto. + (loongarch_is_lasx_lowpart_extract): Ditto. + (loongarch_is_op_reverse_perm): Ditto. + (loongarch_is_single_op_perm): Ditto. + (loongarch_is_divisible_perm): Ditto. + (loongarch_is_triple_stride_extract): Ditto. + (loongarch_expand_vec_perm_const_2): Merged. + (loongarch_expand_vec_perm_const): New. + (loongarch_vectorize_vec_perm_const): Adjust. +--- + gcc/config/loongarch/loongarch.cc | 1308 +++++------------------------ + 1 file changed, 210 insertions(+), 1098 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index d1b1950dc..9d2374a46 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -8823,143 +8823,6 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) + } + } + +-static bool +-loongarch_is_odd_extraction (struct expand_vec_perm_d *); +- +-static bool +-loongarch_is_even_extraction (struct expand_vec_perm_d *); +- +-static bool +-loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) +-{ +- int i; +- rtx target, op0, op1, sel, tmp; +- rtx rpermMAX_VECT_LEN; +- +- if (d->vmode == E_V2DImode || d->vmode == E_V2DFmode +- || d->vmode == E_V4SImode || d->vmode == E_V4SFmode +- || d->vmode == E_V8HImode || d->vmode == E_V16QImode) +- { +- target = d->target; +- op0 = d->op0; +- op1 = d->one_vector_p ? d->op0 : d->op1; +- +- if (GET_MODE (op0) != GET_MODE (op1) +- || GET_MODE (op0) != GET_MODE (target)) +- return false; +- +- if (d->testing_p) +- return true; +- +- /* If match extract-even and extract-odd permutations pattern, use +- * vselect much better than vshuf. */ +- if (loongarch_is_odd_extraction (d) +- || loongarch_is_even_extraction (d)) +- { +- if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, +- d->perm, d->nelt)) +- return true; +- +- unsigned char perm2MAX_VECT_LEN; +- for (i = 0; i < d->nelt; ++i) +- perm2i = (d->permi + d->nelt) & (2 * d->nelt - 1); +- +- if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, +- perm2, d->nelt)) +- return true; +- } +- +- for (i = 0; i < d->nelt; i += 1) +- { +- rpermi = GEN_INT (d->permi); +- } +- +- if (d->vmode == E_V2DFmode) +- { +- sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0); +- emit_move_insn (tmp, sel); +- } +- else if (d->vmode == E_V4SFmode) +- { +- sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0); +- emit_move_insn (tmp, sel); +- } +- else +- { +- sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm)); +- emit_move_insn (d->target, sel); +- } +- +- switch (d->vmode) +- { +- case E_V2DFmode: +- emit_insn (gen_lsx_vshuf_d_f (target, target, op1, op0)); +- break; +- case E_V2DImode: +- emit_insn (gen_lsx_vshuf_d (target, target, op1, op0)); +- break; +- case E_V4SFmode: +- emit_insn (gen_lsx_vshuf_w_f (target, target, op1, op0)); +- break; +- case E_V4SImode: +- emit_insn (gen_lsx_vshuf_w (target, target, op1, op0)); +- break; +- case E_V8HImode: +- emit_insn (gen_lsx_vshuf_h (target, target, op1, op0)); +- break; +- case E_V16QImode: +- emit_insn (gen_lsx_vshuf_b (target, op1, op0, target)); +- break; +- default: +- break; +- } +- +- return true; +- } +- return false; +-} +- +-static bool +-loongarch_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) +-{ +- unsigned int i, nelt = d->nelt; +- unsigned char perm2MAX_VECT_LEN; +- +- if (d->one_vector_p) +- { +- /* Try interleave with alternating operands. */ +- memcpy (perm2, d->perm, sizeof (perm2)); +- for (i = 1; i < nelt; i += 2) +- perm2i += nelt; +- if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, perm2, +- nelt)) +- return true; +- } +- else +- { +- if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, +- d->perm, nelt)) +- return true; +- +- /* Try again with swapped operands. */ +- for (i = 0; i < nelt; ++i) +- perm2i = (d->permi + nelt) & (2 * nelt - 1); +- if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, +- nelt)) +- return true; +- } +- +- if (loongarch_expand_lsx_shuffle (d)) +- return true; +- if (loongarch_expand_vec_perm_even_odd (d)) +- return true; +- if (loongarch_expand_vec_perm_interleave (d)) +- return true; +- return false; +-} +- + /* Following are the assist function for const vector permutation support. */ + static bool + loongarch_is_quad_duplicate (struct expand_vec_perm_d *d) +@@ -8991,36 +8854,6 @@ loongarch_is_quad_duplicate (struct expand_vec_perm_d *d) + return result; + } + +-static bool +-loongarch_is_double_duplicate (struct expand_vec_perm_d *d) +-{ +- if (!d->one_vector_p)
View file
_service:tar_scm:0095-LoongArch-testsuite-Fix-FAIL-in-lasx-xvstelm.c-file.patch
Added
@@ -0,0 +1,34 @@ +From 6263acd411b9685ebc7b16d19b91aad39cb7e184 Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Fri, 29 Dec 2023 09:45:15 +0800 +Subject: PATCH 095/188 LoongArch: testsuite:Fix FAIL in lasx-xvstelm.c file. + +After implementing the cost model on the LoongArch architecture, the GCC +compiler code has this feature turned on by default, which causes the +lasx-xvstelm.c file test to fail. Through analysis, this test case can +generate vectorization instructions required for detection only after +disabling the functionality of the cost model with the "-fno-vect-cost-model" +compilation option. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-xvstelm.c:Add compile + option "-fno-vect-cost-model" to dg-options. +--- + gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c +index 1a7b0e86f..4b846204a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O3 -mlasx" } */ ++/* { dg-options "-O3 -mlasx -fno-vect-cost-model" } */ + /* { dg-final { scan-assembler-times "xvstelm.w" 8} } */ + + #define LEN 256 +-- +2.43.0 +
View file
_service:tar_scm:0096-LoongArch-testsuite-Modify-the-test-behavior-of-the-.patch
Added
@@ -0,0 +1,47 @@ +From c21f2c7e6c2385a3783977bbca79ebe178d0d141 Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Fri, 5 Jan 2024 11:43:24 +0800 +Subject: PATCH 096/188 LoongArch: testsuite:Modify the test behavior of the + vect-bic-bitmask-{12, 23}.c file. + +Before modifying the test behavior of the program, dg-do is set to assemble in +vect-bic-bitmask-{12,23}.c. However, when the binutils library does not support +the vector instruction set, it will FAIL to recognize the vector instruction +and fail item will appear in the assembly stage. So set the program's dg-do to +compile. + +gcc/testsuite/ChangeLog: + + * gcc.dg/vect/vect-bic-bitmask-12.c: Change the default + setting of assembly to compile. + * gcc.dg/vect/vect-bic-bitmask-23.c: Dito. +--- + gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c | 2 +- + gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c +index 36ec5a8b1..213e4c2a4 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c +@@ -1,5 +1,5 @@ + /* { dg-skip-if "missing optab for vectorization" { sparc*-*-* } } */ +-/* { dg-do assemble } */ ++/* { dg-do compile } */ + /* { dg-additional-options "-O3 -fdump-tree-dce -w" } */ + + #include <stdint.h> +diff --git a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c +index 5b4c3b6e1..5dceb4bbc 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c +@@ -1,5 +1,5 @@ + /* { dg-skip-if "missing optab for vectorization" { sparc*-*-* } } */ +-/* { dg-do assemble } */ ++/* { dg-do compile } */ + /* { dg-additional-options "-O1 -fdump-tree-dce -w" } */ + + #include <stdint.h> +-- +2.43.0 +
View file
_service:tar_scm:0097-Improve-non-loop-disambiguation.patch
Added
@@ -0,0 +1,101 @@ +From 6de2e0d400cbe46da482a672810c37b1832c408c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com> +Date: Thu, 25 Jul 2024 19:45:43 +0800 +Subject: PATCH Improve non-loop disambiguation + +This optimization is brought from https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=038b077689bb5310386b04d40a2cea234f01e6aa. + +When dr_may_alias_p is called without a loop context, it tries +to use the tree-affine interface to calculate the difference +between the two addresses and use that difference to check whether +the gap between the accesses is known at compile time. However, as the +example in the PR shows, this doesn't expand SSA_NAMEs and so can easily +be defeated by things like reassociation. + +One fix would have been to use aff_combination_expand to expand the +SSA_NAMEs, but we'd then need some way of maintaining the associated +cache. This patch instead reuses the innermost_loop_behavior fields +(which exist even when no loop context is provided). + +It might still be useful to do the aff_combination_expand thing too, +if an example turns out to need it. +--- + gcc/common.opt | 4 ++++ + gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c | 16 +++++++++++++++ + gcc/tree-data-ref.cc | 22 +++++++++++++++++++++ + 3 files changed, 42 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index b18f0b944..75bf9c9c1 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -3217,6 +3217,10 @@ ftree-loop-vectorize + Common Var(flag_tree_loop_vectorize) Optimization EnabledBy(ftree-vectorize) + Enable loop vectorization on trees. + ++falias-analysis-expand-ssa ++Common Var(flag_alias_analysis_expand_ssa) Init(0) ++Enable expanded SSA name analysis during alias analysis. ++ + ftree-slp-vectorize + Common Var(flag_tree_slp_vectorize) Optimization EnabledBy(ftree-vectorize) + Enable basic block vectorization (SLP) on trees. +diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c +new file mode 100644 +index 000000000..5ff8a8a62 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-falias-analysis-expand-ssa" } */ ++ ++void f(double *p, long i) ++{ ++ pi+0 += 1; ++ pi+1 += 1; ++} ++void g(double *p, long i) ++{ ++ double *q = p + i; ++ q0 += 1; ++ q1 += 1; ++} ++ ++/* { dg-final { scan-tree-dump-not "can't determine dependence" slp2 } } */ +diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc +index e6ae9e847..a05073c51 100644 +--- a/gcc/tree-data-ref.cc ++++ b/gcc/tree-data-ref.cc +@@ -2993,6 +2993,28 @@ dr_may_alias_p (const struct data_reference *a, const struct data_reference *b, + disambiguation. */ + if (!loop_nest) + { ++ if (flag_alias_analysis_expand_ssa) ++ { ++ tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a))); ++ tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b))); ++ ++ if (DR_BASE_ADDRESS (a) ++ && DR_BASE_ADDRESS (b) ++ && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b)) ++ && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b)) ++ && poly_int_tree_p (tree_size_a) ++ && poly_int_tree_p (tree_size_b) ++ && !ranges_maybe_overlap_p (wi::to_widest (DR_INIT (a)), ++ wi::to_widest (tree_size_a), ++ wi::to_widest (DR_INIT (b)), ++ wi::to_widest (tree_size_b))) ++ { ++ gcc_assert (integer_zerop (DR_STEP (a)) ++ && integer_zerop (DR_STEP (b))); ++ return false; ++ } ++ } ++ + aff_tree off1, off2; + poly_widest_int size1, size2; + get_inner_reference_aff (DR_REF (a), &off1, &size1); +-- +2.33.0 +
View file
_service:tar_scm:0097-LoongArch-testsuite-Delete-the-default-run-behavior-.patch
Added
@@ -0,0 +1,31 @@ +From cdee2d1e7391d95bf6fd471fddcb86ee81247929 Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Fri, 5 Jan 2024 11:43:27 +0800 +Subject: PATCH 097/188 LoongArch: testsuite:Delete the default run behavior + in pr60510.f. + +When binutils does not support vector instruction sets, the test program fails +because it does not recognize vectorization at the assembly stage. Therefore, +the default run behavior of the program is deleted, so that the behavior of +the program depends on whether the software supports vectorization. + +gcc/testsuite/ChangeLog: + + * gfortran.dg/vect/pr60510.f: Delete the default behavior of the + program. +--- + gcc/testsuite/gfortran.dg/vect/pr60510.f | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/gcc/testsuite/gfortran.dg/vect/pr60510.f b/gcc/testsuite/gfortran.dg/vect/pr60510.f +index ecd50dd55..c1e11b27d 100644 +--- a/gcc/testsuite/gfortran.dg/vect/pr60510.f ++++ b/gcc/testsuite/gfortran.dg/vect/pr60510.f +@@ -1,4 +1,3 @@ +-! { dg-do run } + ! { dg-require-effective-target vect_double } + ! { dg-require-effective-target vect_intdouble_cvt } + ! { dg-additional-options "-fno-inline -ffast-math" } +-- +2.43.0 +
View file
_service:tar_scm:0097-aarch64-Use-local-frame-vars-in-shrink-wrapping-code.patch
Deleted
@@ -1,378 +0,0 @@ -From 62fbb215cc817e9f2c1ca80282a64f4ee30806bc Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:48 +0100 -Subject: PATCH aarch64: Use local frame vars in shrink-wrapping code - -aarch64_layout_frame uses a shorthand for referring to -cfun->machine->frame: - - aarch64_frame &frame = cfun->machine->frame; - -This patch does the same for some other heavy users of the structure. -No functional change intended. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_save_callee_saves): Use - a local shorthand for cfun->machine->frame. - (aarch64_restore_callee_saves, aarch64_get_separate_components): - (aarch64_process_components): Likewise. - (aarch64_allocate_and_probe_stack_space): Likewise. - (aarch64_expand_prologue, aarch64_expand_epilogue): Likewise. - (aarch64_layout_frame): Use existing shorthand for one more case. ---- - gcc/config/aarch64/aarch64.cc | 123 ++++++++++++++++++---------------- - 1 file changed, 64 insertions(+), 59 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 226dc9dffd47..ae42ffdedbeb 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8351,7 +8351,7 @@ aarch64_layout_frame (void) - frame.is_scs_enabled - = (!crtl->calls_eh_return - && sanitize_flags_p (SANITIZE_SHADOW_CALL_STACK) -- && known_ge (cfun->machine->frame.reg_offsetLR_REGNUM, 0)); -+ && known_ge (frame.reg_offsetLR_REGNUM, 0)); - - /* When shadow call stack is enabled, the scs_pop in the epilogue will - restore x30, and we don't need to pop x30 again in the traditional -@@ -8763,6 +8763,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, - unsigned start, unsigned limit, bool skip_wb, - bool hard_fp_valid_p) - { -+ aarch64_frame &frame = cfun->machine->frame; - rtx_insn *insn; - unsigned regno; - unsigned regno2; -@@ -8777,8 +8778,8 @@ aarch64_save_callee_saves (poly_int64 start_offset, - bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); - - if (skip_wb -- && (regno == cfun->machine->frame.wb_push_candidate1 -- || regno == cfun->machine->frame.wb_push_candidate2)) -+ && (regno == frame.wb_push_candidate1 -+ || regno == frame.wb_push_candidate2)) - continue; - - if (cfun->machine->reg_is_wrapped_separatelyregno) -@@ -8786,7 +8787,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, - - machine_mode mode = aarch64_reg_save_mode (regno); - reg = gen_rtx_REG (mode, regno); -- offset = start_offset + cfun->machine->frame.reg_offsetregno; -+ offset = start_offset + frame.reg_offsetregno; - rtx base_rtx = stack_pointer_rtx; - poly_int64 sp_offset = offset; - -@@ -8799,7 +8800,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, - { - gcc_assert (known_eq (start_offset, 0)); - poly_int64 fp_offset -- = cfun->machine->frame.below_hard_fp_saved_regs_size; -+ = frame.below_hard_fp_saved_regs_size; - if (hard_fp_valid_p) - base_rtx = hard_frame_pointer_rtx; - else -@@ -8821,8 +8822,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, - && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit - && !cfun->machine->reg_is_wrapped_separatelyregno2 - && known_eq (GET_MODE_SIZE (mode), -- cfun->machine->frame.reg_offsetregno2 -- - cfun->machine->frame.reg_offsetregno)) -+ frame.reg_offsetregno2 - frame.reg_offsetregno)) - { - rtx reg2 = gen_rtx_REG (mode, regno2); - rtx mem2; -@@ -8872,6 +8872,7 @@ static void - aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, - unsigned limit, bool skip_wb, rtx *cfi_ops) - { -+ aarch64_frame &frame = cfun->machine->frame; - unsigned regno; - unsigned regno2; - poly_int64 offset; -@@ -8888,13 +8889,13 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, - rtx reg, mem; - - if (skip_wb -- && (regno == cfun->machine->frame.wb_pop_candidate1 -- || regno == cfun->machine->frame.wb_pop_candidate2)) -+ && (regno == frame.wb_pop_candidate1 -+ || regno == frame.wb_pop_candidate2)) - continue; - - machine_mode mode = aarch64_reg_save_mode (regno); - reg = gen_rtx_REG (mode, regno); -- offset = start_offset + cfun->machine->frame.reg_offsetregno; -+ offset = start_offset + frame.reg_offsetregno; - rtx base_rtx = stack_pointer_rtx; - if (mode == VNx2DImode && BYTES_BIG_ENDIAN) - aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, -@@ -8905,8 +8906,7 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, - && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit - && !cfun->machine->reg_is_wrapped_separatelyregno2 - && known_eq (GET_MODE_SIZE (mode), -- cfun->machine->frame.reg_offsetregno2 -- - cfun->machine->frame.reg_offsetregno)) -+ frame.reg_offsetregno2 - frame.reg_offsetregno)) - { - rtx reg2 = gen_rtx_REG (mode, regno2); - rtx mem2; -@@ -9011,6 +9011,7 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset) - static sbitmap - aarch64_get_separate_components (void) - { -+ aarch64_frame &frame = cfun->machine->frame; - sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1); - bitmap_clear (components); - -@@ -9027,18 +9028,18 @@ aarch64_get_separate_components (void) - if (mode == VNx2DImode && BYTES_BIG_ENDIAN) - continue; - -- poly_int64 offset = cfun->machine->frame.reg_offsetregno; -+ poly_int64 offset = frame.reg_offsetregno; - - /* If the register is saved in the first SVE save slot, we use - it as a stack probe for -fstack-clash-protection. */ - if (flag_stack_clash_protection -- && maybe_ne (cfun->machine->frame.below_hard_fp_saved_regs_size, 0) -+ && maybe_ne (frame.below_hard_fp_saved_regs_size, 0) - && known_eq (offset, 0)) - continue; - - /* Get the offset relative to the register we'll use. */ - if (frame_pointer_needed) -- offset -= cfun->machine->frame.below_hard_fp_saved_regs_size; -+ offset -= frame.below_hard_fp_saved_regs_size; - else - offset += crtl->outgoing_args_size; - -@@ -9057,11 +9058,11 @@ aarch64_get_separate_components (void) - /* If the spare predicate register used by big-endian SVE code - is call-preserved, it must be saved in the main prologue - before any saves that use it. */ -- if (cfun->machine->frame.spare_pred_reg != INVALID_REGNUM) -- bitmap_clear_bit (components, cfun->machine->frame.spare_pred_reg); -+ if (frame.spare_pred_reg != INVALID_REGNUM) -+ bitmap_clear_bit (components, frame.spare_pred_reg); - -- unsigned reg1 = cfun->machine->frame.wb_push_candidate1; -- unsigned reg2 = cfun->machine->frame.wb_push_candidate2; -+ unsigned reg1 = frame.wb_push_candidate1; -+ unsigned reg2 = frame.wb_push_candidate2; - /* If registers have been chosen to be stored/restored with - writeback don't interfere with them to avoid having to output explicit - stack adjustment instructions. */ -@@ -9170,6 +9171,7 @@ aarch64_get_next_set_bit (sbitmap bmp, unsigned int start) - static void - aarch64_process_components (sbitmap components, bool prologue_p) - { -+ aarch64_frame &frame = cfun->machine->frame; - rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed - ? HARD_FRAME_POINTER_REGNUM - : STACK_POINTER_REGNUM); -@@ -9184,9 +9186,9 @@ aarch64_process_components (sbitmap components, bool prologue_p) - machine_mode mode = aarch64_reg_save_mode (regno); - - rtx reg = gen_rtx_REG (mode, regno); -- poly_int64 offset = cfun->machine->frame.reg_offsetregno; -+ poly_int64 offset = frame.reg_offsetregno; - if (frame_pointer_needed) -- offset -= cfun->machine->frame.below_hard_fp_saved_regs_size; -+ offset -= frame.below_hard_fp_saved_regs_size; - else - offset += crtl->outgoing_args_size; - -@@ -9211,14 +9213,14 @@ aarch64_process_components (sbitmap components, bool prologue_p) - break; - } - -- poly_int64 offset2 = cfun->machine->frame.reg_offsetregno2; -+ poly_int64 offset2 = frame.reg_offsetregno2; - /* The next register is not of the same class or its offset is not - mergeable with the current one into a pair. */ - if (aarch64_sve_mode_p (mode) - || !satisfies_constraint_Ump (mem) - || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2) - || (crtl->abi->id () == ARM_PCS_SIMD && FP_REGNUM_P (regno)) -- || maybe_ne ((offset2 - cfun->machine->frame.reg_offsetregno),
View file
_service:tar_scm:0098-CHREC-multiplication-and-undefined-overflow.patch
Added
@@ -0,0 +1,265 @@ +From c4e4fef145c1e402f0558cc35f6c1ed0a08beffb Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com> +Date: Thu, 25 Jul 2024 20:16:52 +0800 +Subject: PATCH CHREC multiplication and undefined overflow + +This optimization is brought from https://gcc.gnu.org/pipermail/gcc-patches/2024-February/646531.html + +When folding a multiply CHRECs are handled like {a, +, b} * c +is {a*c, +, b*c} but that isn't generally correct when overflow +invokes undefined behavior. The following uses unsigned arithmetic +unless either a is zero or a and b have the same sign. + +I've used simple early outs for INTEGER_CSTs and otherwise use +a range-query since we lack a tree_expr_nonpositive_p and +get_range_pos_neg isn't a good fit. +--- + gcc/common.opt | 4 ++ + gcc/testsuite/gcc.dg/pr68317.c | 6 +- + gcc/testsuite/gcc.dg/torture/pr114074.c | 31 ++++++++++ + gcc/tree-chrec.cc | 81 +++++++++++++++++++++---- + gcc/tree-chrec.h | 2 +- + gcc/value-range.cc | 12 ++++ + gcc/value-range.h | 2 + + 7 files changed, 123 insertions(+), 15 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/torture/pr114074.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index b18f0b944..d3af3ba39 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1771,6 +1771,10 @@ floop-interchange + Common Var(flag_loop_interchange) Optimization + Enable loop interchange on trees. + ++fchrec-mul-fold-strict-overflow ++Common Var(flag_chrec_mul_fold_strict_overflow) Init(0) ++Enable strict overflow handling during constant folding of multiply CHRECs. ++ + floop-block + Common Alias(floop-nest-optimize) + Enable loop nest transforms. Same as -floop-nest-optimize. +diff --git a/gcc/testsuite/gcc.dg/pr68317.c b/gcc/testsuite/gcc.dg/pr68317.c +index bd053a752..671a67d95 100644 +--- a/gcc/testsuite/gcc.dg/pr68317.c ++++ b/gcc/testsuite/gcc.dg/pr68317.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fdisable-tree-ethread" } */ ++/* { dg-options "-O2 -fdisable-tree-ethread -fchrec-mul-fold-strict-overflow" } */ + + /* Note: Threader will collapse loop. */ + +@@ -12,8 +12,8 @@ foo () + { + int32_t index = 0; + +- for (index; index <= 10; index--) // expected warning here ++ for (index; index <= 10; index--) /* { dg-warning "iteration \0-9\+ invokes undefined behavior" } */ + /* Result of the following multiply will overflow + when converted to signed int32_t. */ +- bar ((0xcafe + index) * 0xdead); /* { dg-warning "iteration \0-9\+ invokes undefined behavior" } */ ++ bar ((0xcafe + index) * 0xdead); + } +diff --git a/gcc/testsuite/gcc.dg/torture/pr114074.c b/gcc/testsuite/gcc.dg/torture/pr114074.c +new file mode 100644 +index 000000000..9a383d8fc +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/torture/pr114074.c +@@ -0,0 +1,31 @@ ++/* { dg-do run } */ ++<<<<<<< HEAD ++/* { dg-options "-fchrec-mul-fold-strict-overflow" } */ ++======= ++/* { dg-options "-fchrec-mul-fold-strict-overflow"" } */ ++>>>>>>> 47092575e7696f5a21cf75284fe3d4feb0c813ab ++int a, b, d; ++ ++__attribute__((noipa)) void ++foo (void) ++{ ++ ++d; ++} ++ ++int ++main () ++{ ++ for (a = 0; a > -3; a -= 2) ++ { ++ int c = a; ++ b = __INT_MAX__ - 3000; ++ a = ~c * b; ++ foo (); ++ if (!a) ++ break; ++ a = c; ++ } ++ if (d != 2) ++ __builtin_abort (); ++ return 0; ++} +diff --git a/gcc/tree-chrec.cc b/gcc/tree-chrec.cc +index c44cea754..3323901bc 100644 +--- a/gcc/tree-chrec.cc ++++ b/gcc/tree-chrec.cc +@@ -38,6 +38,8 @@ along with GCC; see the file COPYING3. If not see + #include "gimple.h" + #include "tree-ssa-loop.h" + #include "dumpfile.h" ++#include "value-range.h" ++#include "value-query.h" + #include "tree-scalar-evolution.h" + + /* Extended folder for chrecs. */ +@@ -404,6 +406,13 @@ chrec_fold_multiply (tree type, + || automatically_generated_chrec_p (op1)) + return chrec_fold_automatically_generated_operands (op0, op1); + ++ if (flag_chrec_mul_fold_strict_overflow) ++ { ++ if (TREE_CODE (op0) != POLYNOMIAL_CHREC ++ && TREE_CODE (op1) == POLYNOMIAL_CHREC) ++ std::swap (op0, op1); ++ } ++ + switch (TREE_CODE (op0)) + { + case POLYNOMIAL_CHREC: +@@ -428,10 +437,53 @@ chrec_fold_multiply (tree type, + if (integer_zerop (op1)) + return build_int_cst (type, 0); + +- return build_polynomial_chrec +- (CHREC_VARIABLE (op0), +- chrec_fold_multiply (type, CHREC_LEFT (op0), op1), +- chrec_fold_multiply (type, CHREC_RIGHT (op0), op1)); ++ if (flag_chrec_mul_fold_strict_overflow) ++ { ++ /* When overflow is undefined and CHREC_LEFT/RIGHT do not have the ++ same sign or CHREC_LEFT is zero then folding the multiply into ++ the addition does not have the same behavior on overflow. Use ++ unsigned arithmetic in that case. */ ++ value_range rl, rr; ++ if (!ANY_INTEGRAL_TYPE_P (type) ++ || TYPE_OVERFLOW_WRAPS (type) ++ || integer_zerop (CHREC_LEFT (op0)) ++ || (TREE_CODE (CHREC_LEFT (op0)) == INTEGER_CST ++ && TREE_CODE (CHREC_RIGHT (op0)) == INTEGER_CST ++ && (tree_int_cst_sgn (CHREC_LEFT (op0)) ++ == tree_int_cst_sgn (CHREC_RIGHT (op0)))) ++ || (get_range_query (cfun)->range_of_expr (rl, CHREC_LEFT (op0)) ++ && !rl.undefined_p () ++ && (rl.nonpositive_p () || rl.nonnegative_p ()) ++ && get_range_query (cfun)->range_of_expr (rr, ++ CHREC_RIGHT (op0)) ++ && !rr.undefined_p () ++ && ((rl.nonpositive_p () && rr.nonpositive_p ()) ++ || (rl.nonnegative_p () && rr.nonnegative_p ())))) ++ { ++ tree left = chrec_fold_multiply (type, CHREC_LEFT (op0), op1); ++ tree right = chrec_fold_multiply (type, CHREC_RIGHT (op0), op1); ++ return build_polynomial_chrec (CHREC_VARIABLE (op0), left, right); ++ } ++ else ++ { ++ tree utype = unsigned_type_for (type); ++ tree uop1 = chrec_convert_rhs (utype, op1); ++ tree uleft0 = chrec_convert_rhs (utype, CHREC_LEFT (op0)); ++ tree uright0 = chrec_convert_rhs (utype, CHREC_RIGHT (op0)); ++ tree left = chrec_fold_multiply (utype, uleft0, uop1); ++ tree right = chrec_fold_multiply (utype, uright0, uop1); ++ tree tem = build_polynomial_chrec (CHREC_VARIABLE (op0), ++ left, right); ++ return chrec_convert_rhs (type, tem); ++ } ++ } ++ else ++ { ++ return build_polynomial_chrec ++ (CHREC_VARIABLE (op0), ++ chrec_fold_multiply (type, CHREC_LEFT (op0), op1), ++ chrec_fold_multiply (type, CHREC_RIGHT (op0), op1)); ++ } + } + + CASE_CONVERT: +@@ -449,13 +501,20 @@ chrec_fold_multiply (tree type, + switch (TREE_CODE (op1)) + { + case POLYNOMIAL_CHREC: +- gcc_checking_assert +- (!chrec_contains_symbols_defined_in_loop (op1, +- CHREC_VARIABLE (op1))); +- return build_polynomial_chrec +- (CHREC_VARIABLE (op1), +- chrec_fold_multiply (type, CHREC_LEFT (op1), op0), +- chrec_fold_multiply (type, CHREC_RIGHT (op1), op0)); ++ if (flag_chrec_mul_fold_strict_overflow) ++ { ++ gcc_unreachable ();
View file
_service:tar_scm:0098-LoongArch-testsuite-Added-additional-vectorization-m.patch
Added
@@ -0,0 +1,157 @@ +From c8fa8efa3297ebced55da8a69cf44f314573be7c Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Fri, 5 Jan 2024 11:43:28 +0800 +Subject: PATCH 098/188 LoongArch: testsuite:Added additional vectorization + "-mlasx" compilation option. + +In the LoongArch architecture, the reason for not adding the 128-bit +vector-width-*hi* instruction template in the GCC back end is that it causes +program performance loss, so we can only add the "-mlasx" compilation option +to use 256-bit vectorization functions in test files. + +gcc/testsuite/ChangeLog: + + * gcc.dg/vect/bb-slp-pattern-1.c: If you are testing on the + LoongArch architecture, you need to add the "-mlasx" compilation + option to generate vectorized code. + * gcc.dg/vect/slp-widen-mult-half.c: Dito. + * gcc.dg/vect/vect-widen-mult-const-s16.c: Dito. + * gcc.dg/vect/vect-widen-mult-const-u16.c: Dito. + * gcc.dg/vect/vect-widen-mult-half-u8.c: Dito. + * gcc.dg/vect/vect-widen-mult-half.c: Dito. + * gcc.dg/vect/vect-widen-mult-u16.c: Dito. + * gcc.dg/vect/vect-widen-mult-u8-s16-s32.c: Dito. + * gcc.dg/vect/vect-widen-mult-u8-u32.c: Dito. + * gcc.dg/vect/vect-widen-mult-u8.c: Dito. +--- + gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c | 1 + + gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c | 1 + + 10 files changed, 10 insertions(+) + +diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c +index 47b1a4366..52ffca82a 100644 +--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c ++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-* } } */ + + #include <stdarg.h> + #include "tree-vect.h" +diff --git a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c +index e3bfee333..cd44e551f 100644 +--- a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c ++++ b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c +@@ -1,6 +1,7 @@ + /* Disabling epilogues until we find a better way to deal with scans. */ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-* } } */ + + #include "tree-vect.h" + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c +index 4c95dd201..082c758cb 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c +@@ -2,6 +2,7 @@ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ + /* { dg-additional-options "-fno-ipa-icf" } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include "tree-vect.h" + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c +index 4075f815c..a95e617ad 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c +@@ -2,6 +2,7 @@ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ + /* { dg-additional-options "-fno-ipa-icf" } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include "tree-vect.h" + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c +index c4ac88e18..14d96645a 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c +@@ -2,6 +2,7 @@ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ + /* { dg-additional-options "-fno-ipa-icf" } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include "tree-vect.h" + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c +index ebbf4f5e8..7901dae85 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c +@@ -1,6 +1,7 @@ + /* Disabling epilogues until we find a better way to deal with scans. */ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include "tree-vect.h" + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c +index 2e28baae0..21b39953e 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c +@@ -1,6 +1,7 @@ + /* Disabling epilogues until we find a better way to deal with scans. */ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include <stdarg.h> + #include "tree-vect.h" +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c +index d277f0b2b..4827e11b2 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c +@@ -1,6 +1,7 @@ + /* Disabling epilogues until we find a better way to deal with scans. */ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include <stdarg.h> + #include "tree-vect.h" +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c +index f50358802..87eb9e0cb 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c +@@ -1,5 +1,6 @@ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-* } } */ + + #include <stdarg.h> + #include "tree-vect.h" +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c +index 03d137941..507d30c35 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c +@@ -1,5 +1,6 @@ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include <stdarg.h> + #include "tree-vect.h" +-- +2.43.0 +
View file
_service:tar_scm:0098-aarch64-Avoid-a-use-of-callee-offset.patch
Deleted
@@ -1,73 +0,0 @@ -From 12a8889de169f892d2e927584c00d20b8b7e456f Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:49 +0100 -Subject: PATCH aarch64: Avoid a use of callee_offset - -When we emit the frame chain, i.e. when we reach Here in this statement -of aarch64_expand_prologue: - - if (emit_frame_chain) - { - // Here - ... - } - -the stack is in one of two states: - -- We've allocated up to the frame chain, but no more. - -- We've allocated the whole frame, and the frame chain is within easy - reach of the new SP. - -The offset of the frame chain from the current SP is available -in aarch64_frame as callee_offset. It is also available as the -chain_offset local variable, where the latter is calculated from other -data. (However, chain_offset is not always equal to callee_offset when -!emit_frame_chain, so chain_offset isn't redundant.) - -In c600df9a4060da3c6121ff4d0b93f179eafd69d1 I switched to using -chain_offset for the initialisation of the hard frame pointer: - - aarch64_add_offset (Pmode, hard_frame_pointer_rtx, -- stack_pointer_rtx, callee_offset, -+ stack_pointer_rtx, chain_offset, - tmp1_rtx, tmp0_rtx, frame_pointer_needed); - -But the later REG_CFA_ADJUST_CFA handling still used callee_offset. - -I think the difference is harmless, but it's more logical for the -CFA note to be in sync, and it's more convenient for later patches -if it uses chain_offset. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_expand_prologue): Use - chain_offset rather than callee_offset. ---- - gcc/config/aarch64/aarch64.cc | 4 +--- - 1 file changed, 1 insertion(+), 3 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index ae42ffdedbeb..79253322fd7c 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -9670,7 +9670,6 @@ aarch64_expand_prologue (void) - poly_int64 initial_adjust = frame.initial_adjust; - HOST_WIDE_INT callee_adjust = frame.callee_adjust; - poly_int64 final_adjust = frame.final_adjust; -- poly_int64 callee_offset = frame.callee_offset; - poly_int64 sve_callee_adjust = frame.sve_callee_adjust; - poly_int64 below_hard_fp_saved_regs_size - = frame.below_hard_fp_saved_regs_size; -@@ -9783,8 +9782,7 @@ aarch64_expand_prologue (void) - implicit. */ - if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX)) - { -- rtx src = plus_constant (Pmode, stack_pointer_rtx, -- callee_offset); -+ rtx src = plus_constant (Pmode, stack_pointer_rtx, chain_offset); - add_reg_note (insn, REG_CFA_ADJUST_CFA, - gen_rtx_SET (hard_frame_pointer_rtx, src)); - } --- -2.43.5 -
View file
_service:tar_scm:0099-Enable-Transposed-SLP.patch
Added
@@ -0,0 +1,5624 @@ +From 0dd3b8532f35486bd5db2c71342c8dfed4c0893a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com> +Date: Thu, 25 Jul 2024 17:25:23 +0800 +Subject: PATCH Enable Transposed SLP. + +--- + gcc/common.opt | 4 + + gcc/testsuite/gcc.dg/vect/transpose-1.c | 53 + + gcc/testsuite/gcc.dg/vect/transpose-2.c | 50 + + gcc/testsuite/gcc.dg/vect/transpose-3.c | 54 + + gcc/testsuite/gcc.dg/vect/transpose-4.c | 53 + + gcc/testsuite/gcc.dg/vect/transpose-5.c | 74 ++ + gcc/testsuite/gcc.dg/vect/transpose-6.c | 67 + + gcc/testsuite/gcc.dg/vect/transpose-7.c | 53 + + gcc/testsuite/gcc.dg/vect/transpose-8.c | 53 + + gcc/testsuite/gcc.dg/vect/vect.exp | 7 + + gcc/tree-loop-distribution.cc | 1464 ++++++++++++++++++++- + gcc/tree-vect-data-refs.cc | 237 ++++ + gcc/tree-vect-loop.cc | 42 +- + gcc/tree-vect-patterns.cc | 4 +- + gcc/tree-vect-slp.cc | 1553 ++++++++++++++++++++--- + gcc/tree-vect-stmts.cc | 973 +++++++++++++- + gcc/tree-vectorizer.h | 96 +- + 17 files changed, 4648 insertions(+), 189 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-1.c + create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-2.c + create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-3.c + create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-4.c + create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-5.c + create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-6.c + create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-7.c + create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-8.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index b18f0b944..5958c4e0b 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -3221,6 +3221,10 @@ ftree-slp-vectorize + Common Var(flag_tree_slp_vectorize) Optimization EnabledBy(ftree-vectorize) + Enable basic block vectorization (SLP) on trees. + ++ftree-slp-transpose-vectorize ++Common Var(flag_tree_slp_transpose_vectorize) Optimization Init(0) ++Enable basic block vectorization (SLP) for transposed stores and loads on trees. ++ + fvect-cost-model= + Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization + -fvect-cost-model=unlimited|dynamic|cheap|very-cheap Specifies the cost model for vectorization. +diff --git a/gcc/testsuite/gcc.dg/vect/transpose-1.c b/gcc/testsuite/gcc.dg/vect/transpose-1.c +new file mode 100644 +index 000000000..8237a8b9e +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/transpose-1.c +@@ -0,0 +1,53 @@ ++/* { dg-do compile { target { aarch64*-*-linux* } } } */ ++/* { dg-require-effective-target vect_int } */ ++#include <stdio.h> ++#include <stdlib.h> ++#include "tree-vect.h" ++ ++#define N 4 ++#define M 256 ++ ++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2) ++{ ++ int i = 0; ++ int sum = 0; ++ unsigned c0N, c1N, c2N, c3N, c4N, c5N, c6N, c7N; ++ for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2) ++ { ++ c0i = pix10 - pix20; ++ c1i = pix11 - pix21; ++ c2i = pix12 - pix22; ++ c3i = pix13 - pix23; ++ c4i = pix14 - pix24; ++ c5i = pix15 - pix25; ++ c6i = pix16 - pix26; ++ c7i = pix17 - pix27; ++ } ++ for (int i = 0; i < N; i++) ++ { ++ sum += c0i + c1i + c2i + c3i + c4i + c5i + c6i + c7i; ++ } ++ return sum; ++} ++ ++int main (int argc, const char* argv) ++{ ++ unsigned char input1M; ++ unsigned char input2M; ++ int i1 = 16; ++ int i2 = 8; ++ check_vect (); ++ for (int i = 0; i < M; i++) ++ { ++ input1i = i * 2; ++ input2i = i; ++ } ++ int sum = foo (input1, i1, input2, i2); ++ if (sum != 1264) ++ { ++ abort (); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */ +diff --git a/gcc/testsuite/gcc.dg/vect/transpose-2.c b/gcc/testsuite/gcc.dg/vect/transpose-2.c +new file mode 100644 +index 000000000..fdf4dbd96 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/transpose-2.c +@@ -0,0 +1,50 @@ ++/* { dg-do compile { target { aarch64*-*-linux* } } } */ ++/* { dg-additional-options "-fno-tree-loop-vectorize -fno-tree-dse" } */ ++/* { dg-require-effective-target vect_int } */ ++#include <stdio.h> ++#include <stdlib.h> ++#include "tree-vect.h" ++ ++#define N 8 ++#define M 256 ++ ++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2) ++{ ++ int i = 0; ++ int sum = 0; ++ unsigned short c0N, c1N, c2N, c3N, c4N, c5N, c6N, c7N; ++ for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2) ++ { ++ c0i = pix10 - pix20; ++ c1i = pix11 - pix21; ++ c2i = pix12 - pix22; ++ c3i = pix13 - pix23; ++ } ++ for (int i = 0; i < N; i++) ++ { ++ sum += c0i + c1i + c2i + c3i; ++ } ++ return sum; ++} ++ ++int main (int argc, const char* argv) ++{ ++ unsigned char input1M; ++ unsigned char input2M; ++ int i1 = 5; ++ int i2 = 4; ++ check_vect (); ++ for (int i = 0; i < M; i++) ++ { ++ input1i = i * 4; ++ input2i = i * 2; ++ } ++ int sum = foo (input1, i1, input2, i2); ++ if (sum != 1440) ++ { ++ abort (); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */ +diff --git a/gcc/testsuite/gcc.dg/vect/transpose-3.c b/gcc/testsuite/gcc.dg/vect/transpose-3.c +new file mode 100644 +index 000000000..e492e3717 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/transpose-3.c +@@ -0,0 +1,54 @@ ++/* { dg-do compile { target { aarch64*-*-linux* } } } */ ++/* { dg-additional-options "-fno-tree-loop-vectorize -fno-tree-dse -fno-tree-fre" } */ ++/* { dg-require-effective-target vect_int } */ ++#include <stdio.h> ++#include <stdlib.h> ++#include "tree-vect.h" ++ ++#define N 4 ++#define M 256 ++ ++int foo (unsigned short *pix1, int i_pix1, unsigned short *pix2, int i_pix2) ++{ ++ int i = 0; ++ int sum = 0; ++ unsigned c0N, c1N, c2N, c3N, c4N, c5N, c6N, c7N; ++ for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2) ++ { ++ c0i = pix10 - pix20; ++ c1i = pix11 - pix21; ++ c2i = pix12 - pix22; ++ c3i = pix13 - pix23; ++ c4i = pix14 - pix24; ++ c5i = pix15 - pix25; ++ c6i = pix16 - pix26; ++ c7i = pix17 - pix27; ++ } ++ for (int i = 0; i < N; i++) ++ { ++ sum += c0i + c1i + c2i + c3i + c4i + c5i + c6i + c7i; ++ }
View file
_service:tar_scm:0099-LoongArch-testsuite-Give-up-the-detection-of-the-gcc.patch
Added
@@ -0,0 +1,80 @@ +From df18d0c85049402b8f2f44c3c4e013a0b6d91cee Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Fri, 5 Jan 2024 11:43:29 +0800 +Subject: PATCH 099/188 LoongArch: testsuite:Give up the detection of the + gcc.dg/fma-{3, 4, 6, 7}.c file. + +On the LoongArch architecture, the above four test cases need to be waived +during testing. There are two situations: + +1. The function of fma-{3,6}.c test is to find the value of c-a*b, but on +the LoongArch architecture, the function of the existing fnmsub instruction +is to find the value of -(a*b - c); + +2. The function of fma-{4,7}.c test is to find the value of -(a*b)-c, but on +the LoongArch architecture, the function of the existing fnmadd instruction +is to find the value of -(a*b + c); + +Through the analysis of the above two cases, there will be positive and +negative zero inequality. + +gcc/testsuite/ChangeLog + + * gcc.dg/fma-3.c: The intermediate file corresponding to the + function does not produce the corresponding FNMA symbol, so the test + rules should be skipped when testing. + * gcc.dg/fma-4.c: The intermediate file corresponding to the + function does not produce the corresponding FNMS symbol, so skip the + test rules when testing. + * gcc.dg/fma-6.c: The cause is the same as fma-3.c. + * gcc.dg/fma-7.c: The cause is the same as fma-4.c +--- + gcc/testsuite/gcc.dg/fma-3.c | 2 +- + gcc/testsuite/gcc.dg/fma-4.c | 2 +- + gcc/testsuite/gcc.dg/fma-6.c | 2 +- + gcc/testsuite/gcc.dg/fma-7.c | 2 +- + 4 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/gcc/testsuite/gcc.dg/fma-3.c b/gcc/testsuite/gcc.dg/fma-3.c +index 699aa2c95..6649b54b6 100644 +--- a/gcc/testsuite/gcc.dg/fma-3.c ++++ b/gcc/testsuite/gcc.dg/fma-3.c +@@ -12,4 +12,4 @@ f2 (double a, double b, double c) + return c - a * b; + } + +-/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 2 "widening_mul" { target scalar_all_fma } } } */ ++/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 2 "widening_mul" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */ +diff --git a/gcc/testsuite/gcc.dg/fma-4.c b/gcc/testsuite/gcc.dg/fma-4.c +index bff928f1f..f1701c196 100644 +--- a/gcc/testsuite/gcc.dg/fma-4.c ++++ b/gcc/testsuite/gcc.dg/fma-4.c +@@ -12,4 +12,4 @@ f2 (double a, double b, double c) + return -(a * b) - c; + } + +-/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 2 "widening_mul" { target scalar_all_fma } } } */ ++/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 2 "widening_mul" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */ +diff --git a/gcc/testsuite/gcc.dg/fma-6.c b/gcc/testsuite/gcc.dg/fma-6.c +index 87258cec4..9e49b62b6 100644 +--- a/gcc/testsuite/gcc.dg/fma-6.c ++++ b/gcc/testsuite/gcc.dg/fma-6.c +@@ -64,4 +64,4 @@ f10 (double a, double b, double c) + return -__builtin_fma (a, b, -c); + } + +-/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 14 "optimized" { target scalar_all_fma } } } */ ++/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 14 "optimized" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */ +diff --git a/gcc/testsuite/gcc.dg/fma-7.c b/gcc/testsuite/gcc.dg/fma-7.c +index f409cc8ee..86aacad7b 100644 +--- a/gcc/testsuite/gcc.dg/fma-7.c ++++ b/gcc/testsuite/gcc.dg/fma-7.c +@@ -64,4 +64,4 @@ f10 (double a, double b, double c) + return -__builtin_fma (a, b, c); + } + +-/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 14 "optimized" { target scalar_all_fma } } } */ ++/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 14 "optimized" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */ +-- +2.43.0 +
View file
_service:tar_scm:0099-aarch64-Explicitly-handle-frames-with-no-saved-registers.patch
Deleted
@@ -1,48 +0,0 @@ -From 03d5e89e7f3be53fd7142556e8e0a2774c653dca Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:49 +0100 -Subject: PATCH aarch64: Explicitly handle frames with no saved registers - -If a frame has no saved registers, it can be allocated in one go. -There is no need to treat the areas below and above the saved -registers as separate. - -And if we allocate the frame in one go, it should be allocated -as the initial_adjust rather than the final_adjust. This allows the -frame size to grow to guard_size - guard_used_by_caller before a stack -probe is needed. (A frame with no register saves is necessarily a -leaf frame.) - -This is a no-op as thing stand, since a leaf function will have -no outgoing arguments, and so all the frame will be above where -the saved registers normally go. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_layout_frame): Explicitly - allocate the frame in one go if there are no saved registers. ---- - gcc/config/aarch64/aarch64.cc | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 79253322fd7c..e1f21230c15e 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8378,9 +8378,11 @@ aarch64_layout_frame (void) - - HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset; - HOST_WIDE_INT const_saved_regs_size; -- if (frame.frame_size.is_constant (&const_size) -- && const_size < max_push_offset -- && known_eq (frame.hard_fp_offset, const_size)) -+ if (known_eq (frame.saved_regs_size, 0)) -+ frame.initial_adjust = frame.frame_size; -+ else if (frame.frame_size.is_constant (&const_size) -+ && const_size < max_push_offset -+ && known_eq (frame.hard_fp_offset, const_size)) - { - /* Simple, small frame with no outgoing arguments: - --- -2.43.5 -
View file
_service:tar_scm:0100-Add-hip09-machine-discribtion.patch
Added
@@ -0,0 +1,882 @@ +From d9131757175667d35e74d9ee84689039990af768 Mon Sep 17 00:00:00 2001 +From: xingyushuai <xingyushuai@huawei.com> +Date: Fri, 3 Mar 2023 09:31:04 +0800 +Subject: PATCH 001/157 Add hip09 machine discribtion + +Here is the patch introducing hip09 machine model +for the scheduler. +--- + gcc/config/aarch64/aarch64-cores.def | 1 + + gcc/config/aarch64/aarch64-cost-tables.h | 104 +++++ + gcc/config/aarch64/aarch64-tune.md | 2 +- + gcc/config/aarch64/aarch64.cc | 109 +++++ + gcc/config/aarch64/aarch64.md | 1 + + gcc/config/aarch64/hip09.md | 558 +++++++++++++++++++++++ + 6 files changed, 774 insertions(+), 1 deletion(-) + create mode 100644 gcc/config/aarch64/hip09.md + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 70b11eb80..a854bdb24 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -130,6 +130,7 @@ AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F + + /* HiSilicon ('H') cores. */ + AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) ++AARCH64_CORE("hip09", hip09, hip09, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_PROFILE | AARCH64_FL_PREDRES, hip09, 0x48, 0xd02, 0x0) + + /* ARMv8.3-A Architecture Processors. */ + +diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h +index 48522606f..fc5a3cbe4 100644 +--- a/gcc/config/aarch64/aarch64-cost-tables.h ++++ b/gcc/config/aarch64/aarch64-cost-tables.h +@@ -668,6 +668,110 @@ const struct cpu_cost_table a64fx_extra_costs = + } + }; + ++const struct cpu_cost_table hip09_extra_costs = ++{ ++ /* ALU */ ++ { ++ 0, /* arith. */ ++ 0, /* logical. */ ++ 0, /* shift. */ ++ 0, /* shift_reg. */ ++ COSTS_N_INSNS (1), /* arith_shift. */ ++ COSTS_N_INSNS (1), /* arith_shift_reg. */ ++ COSTS_N_INSNS (1), /* log_shift. */ ++ COSTS_N_INSNS (1), /* log_shift_reg. */ ++ 0, /* extend. */ ++ COSTS_N_INSNS (1), /* extend_arith. */ ++ 0, /* bfi. */ ++ 0, /* bfx. */ ++ 0, /* clz. */ ++ 0, /* rev. */ ++ 0, /* non_exec. */ ++ true /* non_exec_costs_exec. */ ++ }, ++ ++ { ++ /* MULT SImode */ ++ { ++ COSTS_N_INSNS (2), /* simple. */ ++ COSTS_N_INSNS (2), /* flag_setting. */ ++ COSTS_N_INSNS (2), /* extend. */ ++ COSTS_N_INSNS (2), /* add. */ ++ COSTS_N_INSNS (2), /* extend_add. */ ++ COSTS_N_INSNS (11) /* idiv. */ ++ }, ++ /* MULT DImode */ ++ { ++ COSTS_N_INSNS (3), /* simple. */ ++ 0, /* flag_setting (N/A). */ ++ COSTS_N_INSNS (3), /* extend. */ ++ COSTS_N_INSNS (3), /* add. */ ++ COSTS_N_INSNS (3), /* extend_add. */ ++ COSTS_N_INSNS (19) /* idiv. */ ++ } ++ }, ++ /* LD/ST */ ++ { ++ COSTS_N_INSNS (3), /* load. */ ++ COSTS_N_INSNS (4), /* load_sign_extend. */ ++ COSTS_N_INSNS (3), /* ldrd. */ ++ COSTS_N_INSNS (3), /* ldm_1st. */ ++ 1, /* ldm_regs_per_insn_1st. */ ++ 2, /* ldm_regs_per_insn_subsequent. */ ++ COSTS_N_INSNS (4), /* loadf. */ ++ COSTS_N_INSNS (4), /* loadd. */ ++ COSTS_N_INSNS (4), /* load_unaligned. */ ++ 0, /* store. */ ++ 0, /* strd. */ ++ 0, /* stm_1st. */ ++ 1, /* stm_regs_per_insn_1st. */ ++ 2, /* stm_regs_per_insn_subsequent. */ ++ 0, /* storef. */ ++ 0, /* stored. */ ++ COSTS_N_INSNS (1), /* store_unaligned. */ ++ COSTS_N_INSNS (4), /* loadv. */ ++ COSTS_N_INSNS (4) /* storev. */ ++ }, ++ { ++ /* FP SFmode */ ++ { ++ COSTS_N_INSNS (10), /* div. */ ++ COSTS_N_INSNS (4), /* mult. */ ++ COSTS_N_INSNS (4), /* mult_addsub. */ ++ COSTS_N_INSNS (4), /* fma. */ ++ COSTS_N_INSNS (4), /* addsub. */ ++ COSTS_N_INSNS (1), /* fpconst. */ ++ COSTS_N_INSNS (1), /* neg. */ ++ COSTS_N_INSNS (1), /* compare. */ ++ COSTS_N_INSNS (2), /* widen. */ ++ COSTS_N_INSNS (2), /* narrow. */ ++ COSTS_N_INSNS (2), /* toint. */ ++ COSTS_N_INSNS (1), /* fromint. */ ++ COSTS_N_INSNS (2) /* roundint. */ ++ }, ++ /* FP DFmode */ ++ { ++ COSTS_N_INSNS (17), /* div. */ ++ COSTS_N_INSNS (4), /* mult. */ ++ COSTS_N_INSNS (6), /* mult_addsub. */ ++ COSTS_N_INSNS (6), /* fma. */ ++ COSTS_N_INSNS (3), /* addsub. */ ++ COSTS_N_INSNS (1), /* fpconst. */ ++ COSTS_N_INSNS (1), /* neg. */ ++ COSTS_N_INSNS (1), /* compare. */ ++ COSTS_N_INSNS (2), /* widen. */ ++ COSTS_N_INSNS (2), /* narrow. */ ++ COSTS_N_INSNS (2), /* toint. */ ++ COSTS_N_INSNS (1), /* fromint. */ ++ COSTS_N_INSNS (2) /* roundint. */ ++ } ++ }, ++ /* Vector */ ++ { ++ COSTS_N_INSNS (1) /* alu. */ ++ } ++}; ++ + const struct cpu_cost_table ampere1_extra_costs = + { + /* ALU */ +diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md +index 9dc9adc70..238bb6e31 100644 +--- a/gcc/config/aarch64/aarch64-tune.md ++++ b/gcc/config/aarch64/aarch64-tune.md +@@ -1,5 +1,5 @@ + ;; -*- buffer-read-only: t -*- + ;; Generated automatically by gentune.sh from aarch64-cores.def + (define_attr "tune" +- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter,neoversev2" ++ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter,neoversev2" + (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 5537a537c..e9b3980c4 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -465,6 +465,22 @@ static const struct cpu_addrcost_table tsv110_addrcost_table = + 0, /* imm_offset */ + }; + ++static const struct cpu_addrcost_table hip09_addrcost_table = ++{ ++ { ++ 1, /* hi */ ++ 0, /* si */ ++ 0, /* di */ ++ 1, /* ti */ ++ }, ++ 0, /* pre_modify */ ++ 0, /* post_modify */ ++ 0, /* register_offset */ ++ 1, /* register_sextend */ ++ 1, /* register_zextend */ ++ 0, /* imm_offset */ ++}; ++ + static const struct cpu_addrcost_table qdf24xx_addrcost_table = + { + { +@@ -660,6 +676,16 @@ static const struct cpu_regmove_cost a64fx_regmove_cost = + 2 /* FP2FP */ + }; + ++static const struct cpu_regmove_cost hip09_regmove_cost = ++{ ++ 1, /* GP2GP */ ++ /* Avoid the use of slow int<->fp moves for spilling by setting ++ their cost higher than memmov_cost. */ ++ 2, /* GP2FP */ ++ 3, /* FP2GP */ ++ 2 /* FP2FP */ ++}; ++ + static const struct cpu_regmove_cost neoversen2_regmove_cost = + { + 1, /* GP2GP */
View file
_service:tar_scm:0100-LoongArch-Fixed-the-problem-of-incorrect-judgment-of.patch
Added
@@ -0,0 +1,206 @@ +From 90db6906a92b685403d9220e94f779737d2dd100 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 4 Jan 2024 10:37:53 +0800 +Subject: PATCH 100/188 LoongArch: Fixed the problem of incorrect judgment of + the immediate field of the xvld/xvst instruction. + +The xvld/xvst directive is defined as follows: + xvld/xvst {x/v}d, rj, si12 + +When not modified, the immediate field of xvld/xvst is between 10 and +14 bits depending on the type. However, in loongarch_valid_offset_p, the +immediate field is restricted first, so there is no error. However, in +some cases redundant instructions will be generated, see test cases. +Now modify it according to the description in the instruction manual. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (lasx_mxld_<lasxfmt_f>): + Modify the method of determining the memory offset of xvld/xvst. + (lasx_mxst_<lasxfmt_f>): Likewise. + * config/loongarch/loongarch.cc (loongarch_valid_offset_p): Delete. + (loongarch_address_insns): Likewise. + * config/loongarch/lsx.md (lsx_ld_<lsxfmt_f>): Likewise. + (lsx_st_<lsxfmt_f>): Likewise. + * config/loongarch/predicates.md (aq10b_operand): Likewise. + (aq10h_operand): Likewise. + (aq10w_operand): Likewise. + (aq10d_operand): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-ld-st-imm12.c: New test. +--- + gcc/config/loongarch/lasx.md | 26 ------------------- + gcc/config/loongarch/loongarch.cc | 19 +++----------- + gcc/config/loongarch/lsx.md | 26 ------------------- + gcc/config/loongarch/predicates.md | 16 ------------ + .../gcc.target/loongarch/vect-ld-st-imm12.c | 15 +++++++++++ + 5 files changed, 19 insertions(+), 83 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index dbbf5a136..95c6bae20 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -846,32 +846,6 @@ + DONE; + }) + +-;; Offset load +-(define_expand "lasx_mxld_<lasxfmt_f>" +- (match_operand:LASX 0 "register_operand") +- (match_operand 1 "pmode_register_operand") +- (match_operand 2 "aq10<lasxfmt>_operand") +- "ISA_HAS_LASX" +-{ +- rtx addr = plus_constant (GET_MODE (operands1), operands1, +- INTVAL (operands2)); +- loongarch_emit_move (operands0, gen_rtx_MEM (<MODE>mode, addr)); +- DONE; +-}) +- +-;; Offset store +-(define_expand "lasx_mxst_<lasxfmt_f>" +- (match_operand:LASX 0 "register_operand") +- (match_operand 1 "pmode_register_operand") +- (match_operand 2 "aq10<lasxfmt>_operand") +- "ISA_HAS_LASX" +-{ +- rtx addr = plus_constant (GET_MODE (operands1), operands1, +- INTVAL (operands2)); +- loongarch_emit_move (gen_rtx_MEM (<MODE>mode, addr), operands0); +- DONE; +-}) +- + ;; LASX + (define_insn "add<mode>3" + (set (match_operand:ILASX 0 "register_operand" "=f,f,f") +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 9d2374a46..ddb32cea2 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2123,21 +2123,11 @@ loongarch_valid_offset_p (rtx x, machine_mode mode) + + /* We may need to split multiword moves, so make sure that every word + is accessible. */ +- if (GET_MODE_SIZE (mode) > UNITS_PER_WORD ++ if (!(LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)) ++ && GET_MODE_SIZE (mode) > UNITS_PER_WORD + && !IMM12_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD)) + return false; + +- /* LSX LD.* and ST.* supports 10-bit signed offsets. */ +- if (LSX_SUPPORTED_MODE_P (mode) +- && !loongarch_signed_immediate_p (INTVAL (x), 10, +- loongarch_ldst_scaled_shift (mode))) +- return false; +- +- /* LASX XVLD.B and XVST.B supports 10-bit signed offsets without shift. */ +- if (LASX_SUPPORTED_MODE_P (mode) +- && !loongarch_signed_immediate_p (INTVAL (x), 10, 0)) +- return false; +- + return true; + } + +@@ -2372,9 +2362,8 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p) + case ADDRESS_REG: + if (lsx_p) + { +- /* LSX LD.* and ST.* supports 10-bit signed offsets. */ +- if (loongarch_signed_immediate_p (INTVAL (addr.offset), 10, +- loongarch_ldst_scaled_shift (mode))) ++ /* LSX LD.* and ST.* supports 12-bit signed offsets. */ ++ if (IMM12_OPERAND (INTVAL (addr.offset))) + return 1; + else + return 0; +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 3e3248ef4..02e89247b 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -812,32 +812,6 @@ + DONE; + }) + +-;; Offset load +-(define_expand "lsx_ld_<lsxfmt_f>" +- (match_operand:LSX 0 "register_operand") +- (match_operand 1 "pmode_register_operand") +- (match_operand 2 "aq10<lsxfmt>_operand") +- "ISA_HAS_LSX" +-{ +- rtx addr = plus_constant (GET_MODE (operands1), operands1, +- INTVAL (operands2)); +- loongarch_emit_move (operands0, gen_rtx_MEM (<MODE>mode, addr)); +- DONE; +-}) +- +-;; Offset store +-(define_expand "lsx_st_<lsxfmt_f>" +- (match_operand:LSX 0 "register_operand") +- (match_operand 1 "pmode_register_operand") +- (match_operand 2 "aq10<lsxfmt>_operand") +- "ISA_HAS_LSX" +-{ +- rtx addr = plus_constant (GET_MODE (operands1), operands1, +- INTVAL (operands2)); +- loongarch_emit_move (gen_rtx_MEM (<MODE>mode, addr), operands0); +- DONE; +-}) +- + ;; Integer operations + (define_insn "add<mode>3" + (set (match_operand:ILSX 0 "register_operand" "=f,f,f") +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 3698b9103..824a85b36 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -167,22 +167,6 @@ + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 3)"))) + +-(define_predicate "aq10b_operand" +- (and (match_code "const_int") +- (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 0)"))) +- +-(define_predicate "aq10h_operand" +- (and (match_code "const_int") +- (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 1)"))) +- +-(define_predicate "aq10w_operand" +- (and (match_code "const_int") +- (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 2)"))) +- +-(define_predicate "aq10d_operand" +- (and (match_code "const_int") +- (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 3)"))) +- + (define_predicate "aq12b_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 12, 0)"))) +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c b/gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c +new file mode 100644 +index 000000000..bfc208e4f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-march=loongarch64 -mabi=lp64d -mlasx -O2" } */ ++/* { dg-final { scan-assembler-not "addi.d" } } */ ++ ++extern short a1000; ++extern short b1000; ++extern short c1000; ++ ++void ++test (void) ++{
View file
_service:tar_scm:0100-aarch64-Add-bytes-below-saved-regs-to-frame-info.patch
Deleted
@@ -1,233 +0,0 @@ -From 49c2eb7616756c323b7f6b18d8616ec945eb1263 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:49 +0100 -Subject: PATCH aarch64: Add bytes_below_saved_regs to frame info - -The frame layout code currently hard-codes the assumption that -the number of bytes below the saved registers is equal to the -size of the outgoing arguments. This patch abstracts that -value into a new field of aarch64_frame. - -gcc/ - * config/aarch64/aarch64.h (aarch64_frame::bytes_below_saved_regs): New - field. - * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it, - and use it instead of crtl->outgoing_args_size. - (aarch64_get_separate_components): Use bytes_below_saved_regs instead - of outgoing_args_size. - (aarch64_process_components): Likewise. ---- - gcc/config/aarch64/aarch64.cc | 71 ++++++++++++++++++----------------- - gcc/config/aarch64/aarch64.h | 5 +++ - 2 files changed, 41 insertions(+), 35 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index e1f21230c15e..94e1b6865849 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8217,6 +8217,8 @@ aarch64_layout_frame (void) - gcc_assert (crtl->is_leaf - || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED)); - -+ frame.bytes_below_saved_regs = crtl->outgoing_args_size; -+ - /* Now assign stack slots for the registers. Start with the predicate - registers, since predicate LDR and STR have a relatively small - offset range. These saves happen below the hard frame pointer. */ -@@ -8321,18 +8323,18 @@ aarch64_layout_frame (void) - - poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size; - -- poly_int64 above_outgoing_args -+ poly_int64 saved_regs_and_above - = aligned_upper_bound (varargs_and_saved_regs_size - + get_frame_size (), - STACK_BOUNDARY / BITS_PER_UNIT); - - frame.hard_fp_offset -- = above_outgoing_args - frame.below_hard_fp_saved_regs_size; -+ = saved_regs_and_above - frame.below_hard_fp_saved_regs_size; - - /* Both these values are already aligned. */ -- gcc_assert (multiple_p (crtl->outgoing_args_size, -+ gcc_assert (multiple_p (frame.bytes_below_saved_regs, - STACK_BOUNDARY / BITS_PER_UNIT)); -- frame.frame_size = above_outgoing_args + crtl->outgoing_args_size; -+ frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs; - - frame.locals_offset = frame.saved_varargs_size; - -@@ -8376,7 +8378,7 @@ aarch64_layout_frame (void) - else if (frame.wb_pop_candidate1 != INVALID_REGNUM) - max_push_offset = 256; - -- HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset; -+ HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset; - HOST_WIDE_INT const_saved_regs_size; - if (known_eq (frame.saved_regs_size, 0)) - frame.initial_adjust = frame.frame_size; -@@ -8384,31 +8386,31 @@ aarch64_layout_frame (void) - && const_size < max_push_offset - && known_eq (frame.hard_fp_offset, const_size)) - { -- /* Simple, small frame with no outgoing arguments: -+ /* Simple, small frame with no data below the saved registers. - - stp reg1, reg2, sp, -frame_size! - stp reg3, reg4, sp, 16 */ - frame.callee_adjust = const_size; - } -- else if (crtl->outgoing_args_size.is_constant (&const_outgoing_args_size) -+ else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs) - && frame.saved_regs_size.is_constant (&const_saved_regs_size) -- && const_outgoing_args_size + const_saved_regs_size < 512 -- /* We could handle this case even with outgoing args, provided -- that the number of args left us with valid offsets for all -- predicate and vector save slots. It's such a rare case that -- it hardly seems worth the effort though. */ -- && (!saves_below_hard_fp_p || const_outgoing_args_size == 0) -+ && const_below_saved_regs + const_saved_regs_size < 512 -+ /* We could handle this case even with data below the saved -+ registers, provided that that data left us with valid offsets -+ for all predicate and vector save slots. It's such a rare -+ case that it hardly seems worth the effort though. */ -+ && (!saves_below_hard_fp_p || const_below_saved_regs == 0) - && !(cfun->calls_alloca - && frame.hard_fp_offset.is_constant (&const_fp_offset) - && const_fp_offset < max_push_offset)) - { -- /* Frame with small outgoing arguments: -+ /* Frame with small area below the saved registers: - - sub sp, sp, frame_size -- stp reg1, reg2, sp, outgoing_args_size -- stp reg3, reg4, sp, outgoing_args_size + 16 */ -+ stp reg1, reg2, sp, bytes_below_saved_regs -+ stp reg3, reg4, sp, bytes_below_saved_regs + 16 */ - frame.initial_adjust = frame.frame_size; -- frame.callee_offset = const_outgoing_args_size; -+ frame.callee_offset = const_below_saved_regs; - } - else if (saves_below_hard_fp_p - && known_eq (frame.saved_regs_size, -@@ -8418,30 +8420,29 @@ aarch64_layout_frame (void) - - sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size - save SVE registers relative to SP -- sub sp, sp, outgoing_args_size */ -+ sub sp, sp, bytes_below_saved_regs */ - frame.initial_adjust = (frame.hard_fp_offset - + frame.below_hard_fp_saved_regs_size); -- frame.final_adjust = crtl->outgoing_args_size; -+ frame.final_adjust = frame.bytes_below_saved_regs; - } - else if (frame.hard_fp_offset.is_constant (&const_fp_offset) - && const_fp_offset < max_push_offset) - { -- /* Frame with large outgoing arguments or SVE saves, but with -- a small local area: -+ /* Frame with large area below the saved registers, or with SVE saves, -+ but with a small area above: - - stp reg1, reg2, sp, -hard_fp_offset! - stp reg3, reg4, sp, 16 - sub sp, sp, below_hard_fp_saved_regs_size - save SVE registers relative to SP -- sub sp, sp, outgoing_args_size */ -+ sub sp, sp, bytes_below_saved_regs */ - frame.callee_adjust = const_fp_offset; - frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; -- frame.final_adjust = crtl->outgoing_args_size; -+ frame.final_adjust = frame.bytes_below_saved_regs; - } - else - { -- /* Frame with large local area and outgoing arguments or SVE saves, -- using frame pointer: -+ /* General case: - - sub sp, sp, hard_fp_offset - stp x29, x30, sp, 0 -@@ -8449,10 +8450,10 @@ aarch64_layout_frame (void) - stp reg3, reg4, sp, 16 - sub sp, sp, below_hard_fp_saved_regs_size - save SVE registers relative to SP -- sub sp, sp, outgoing_args_size */ -+ sub sp, sp, bytes_below_saved_regs */ - frame.initial_adjust = frame.hard_fp_offset; - frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; -- frame.final_adjust = crtl->outgoing_args_size; -+ frame.final_adjust = frame.bytes_below_saved_regs; - } - - /* Make sure the individual adjustments add up to the full frame size. */ -@@ -9043,7 +9044,7 @@ aarch64_get_separate_components (void) - if (frame_pointer_needed) - offset -= frame.below_hard_fp_saved_regs_size; - else -- offset += crtl->outgoing_args_size; -+ offset += frame.bytes_below_saved_regs; - - /* Check that we can access the stack slot of the register with one - direct load with no adjustments needed. */ -@@ -9192,7 +9193,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) - if (frame_pointer_needed) - offset -= frame.below_hard_fp_saved_regs_size; - else -- offset += crtl->outgoing_args_size; -+ offset += frame.bytes_below_saved_regs; - - rtx addr = plus_constant (Pmode, ptr_reg, offset); - rtx mem = gen_frame_mem (mode, addr); -@@ -9246,7 +9247,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) - if (frame_pointer_needed) - offset2 -= frame.below_hard_fp_saved_regs_size; - else -- offset2 += crtl->outgoing_args_size; -+ offset2 += frame.bytes_below_saved_regs; - rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); - rtx mem2 = gen_frame_mem (mode, addr2); - rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2) -@@ -9320,10 +9321,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void) - registers. If POLY_SIZE is not large enough to require a probe this function - will only adjust the stack. When allocating the stack space - FRAME_RELATED_P is then used to indicate if the allocation is frame related. -- FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing -- arguments. If we are then we ensure that any allocation larger than the ABI -- defined buffer needs a probe so that the invariant of having a 1KB buffer is -- maintained. -+ FINAL_ADJUSTMENT_P indicates whether we are allocating the area below
View file
_service:tar_scm:0101-Add-hip11-CPU-pipeline-scheduling.patch
Added
@@ -0,0 +1,755 @@ +From 824fccdab1d3c5e87fb88b31f0eeb7abd1b35c1f Mon Sep 17 00:00:00 2001 +From: XingYuShuai <1150775134@qq.com> +Date: Mon, 26 Feb 2024 20:34:06 +0800 +Subject: PATCH 002/157 Add hip11 CPU pipeline scheduling + +This patch adds an mcpu: hip11. It has been tested on aarch64 +and no regressions from this patch. +--- + gcc/config/aarch64/aarch64-cores.def | 1 + + gcc/config/aarch64/aarch64-cost-tables.h | 104 ++++++ + gcc/config/aarch64/aarch64-tune.md | 2 +- + gcc/config/aarch64/aarch64.cc | 108 ++++++ + gcc/config/aarch64/aarch64.md | 1 + + gcc/config/aarch64/hip11.md | 418 +++++++++++++++++++++++ + gcc/doc/invoke.texi | 2 +- + 7 files changed, 634 insertions(+), 2 deletions(-) + create mode 100644 gcc/config/aarch64/hip11.md + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index a854bdb24..601b72abb 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -173,6 +173,7 @@ AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | + AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1) + + AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1) ++AARCH64_CORE("hip11", hip11, hip11, 8_5A, AARCH64_FL_FOR_ARCH8_5| AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_F16, hip11, 0x48, 0xd22, -1) + + AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) + AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) +diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h +index fc5a3cbe4..0ee427b61 100644 +--- a/gcc/config/aarch64/aarch64-cost-tables.h ++++ b/gcc/config/aarch64/aarch64-cost-tables.h +@@ -561,6 +561,110 @@ const struct cpu_cost_table tsv110_extra_costs = + } + }; + ++const struct cpu_cost_table hip11_extra_costs = ++{ ++ /* ALU */ ++ { ++ 0, /* arith. */ ++ 0, /* logical. */ ++ 0, /* shift. */ ++ 0, /* shift_reg. */ ++ COSTS_N_INSNS (1), /* arith_shift. */ ++ COSTS_N_INSNS (1), /* arith_shift_reg. */ ++ COSTS_N_INSNS (1), /* log_shift. */ ++ COSTS_N_INSNS (1), /* log_shift_reg. */ ++ 0, /* extend. */ ++ COSTS_N_INSNS (1), /* extend_arith. */ ++ 0, /* bfi. */ ++ 0, /* bfx. */ ++ 0, /* clz. */ ++ 0, /* rev. */ ++ 0, /* non_exec. */ ++ true /* non_exec_costs_exec. */ ++ }, ++ ++ { ++ /* MULT SImode */ ++ { ++ COSTS_N_INSNS (2), /* simple. */ ++ COSTS_N_INSNS (2), /* flag_setting. */ ++ COSTS_N_INSNS (2), /* extend. */ ++ COSTS_N_INSNS (2), /* add. */ ++ COSTS_N_INSNS (2), /* extend_add. */ ++ COSTS_N_INSNS (11) /* idiv. */ ++ }, ++ /* MULT DImode */ ++ { ++ COSTS_N_INSNS (3), /* simple. */ ++ 0, /* flag_setting (N/A). */ ++ COSTS_N_INSNS (3), /* extend. */ ++ COSTS_N_INSNS (3), /* add. */ ++ COSTS_N_INSNS (3), /* extend_add. */ ++ COSTS_N_INSNS (19) /* idiv. */ ++ } ++ }, ++ /* LD/ST */ ++ { ++ COSTS_N_INSNS (3), /* load. */ ++ COSTS_N_INSNS (4), /* load_sign_extend. */ ++ COSTS_N_INSNS (3), /* ldrd. */ ++ COSTS_N_INSNS (3), /* ldm_1st. */ ++ 1, /* ldm_regs_per_insn_1st. */ ++ 2, /* ldm_regs_per_insn_subsequent. */ ++ COSTS_N_INSNS (4), /* loadf. */ ++ COSTS_N_INSNS (4), /* loadd. */ ++ COSTS_N_INSNS (4), /* load_unaligned. */ ++ 0, /* store. */ ++ 0, /* strd. */ ++ 0, /* stm_1st. */ ++ 1, /* stm_regs_per_insn_1st. */ ++ 2, /* stm_regs_per_insn_subsequent. */ ++ 0, /* storef. */ ++ 0, /* stored. */ ++ COSTS_N_INSNS (1), /* store_unaligned. */ ++ COSTS_N_INSNS (4), /* loadv. */ ++ COSTS_N_INSNS (4) /* storev. */ ++ }, ++ { ++ /* FP SFmode */ ++ { ++ COSTS_N_INSNS (10), /* div. */ ++ COSTS_N_INSNS (4), /* mult. */ ++ COSTS_N_INSNS (4), /* mult_addsub. */ ++ COSTS_N_INSNS (4), /* fma. */ ++ COSTS_N_INSNS (4), /* addsub. */ ++ COSTS_N_INSNS (1), /* fpconst. */ ++ COSTS_N_INSNS (1), /* neg. */ ++ COSTS_N_INSNS (1), /* compare. */ ++ COSTS_N_INSNS (2), /* widen. */ ++ COSTS_N_INSNS (2), /* narrow. */ ++ COSTS_N_INSNS (2), /* toint. */ ++ COSTS_N_INSNS (1), /* fromint. */ ++ COSTS_N_INSNS (2) /* roundint. */ ++ }, ++ /* FP DFmode */ ++ { ++ COSTS_N_INSNS (17), /* div. */ ++ COSTS_N_INSNS (4), /* mult. */ ++ COSTS_N_INSNS (6), /* mult_addsub. */ ++ COSTS_N_INSNS (6), /* fma. */ ++ COSTS_N_INSNS (3), /* addsub. */ ++ COSTS_N_INSNS (1), /* fpconst. */ ++ COSTS_N_INSNS (1), /* neg. */ ++ COSTS_N_INSNS (1), /* compare. */ ++ COSTS_N_INSNS (2), /* widen. */ ++ COSTS_N_INSNS (2), /* narrow. */ ++ COSTS_N_INSNS (2), /* toint. */ ++ COSTS_N_INSNS (1), /* fromint. */ ++ COSTS_N_INSNS (2) /* roundint. */ ++ } ++ }, ++ /* Vector */ ++ { ++ COSTS_N_INSNS (1) /* alu. */ ++ } ++}; ++ + const struct cpu_cost_table a64fx_extra_costs = + { + /* ALU */ +diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md +index 238bb6e31..511422081 100644 +--- a/gcc/config/aarch64/aarch64-tune.md ++++ b/gcc/config/aarch64/aarch64-tune.md +@@ -1,5 +1,5 @@ + ;; -*- buffer-read-only: t -*- + ;; Generated automatically by gentune.sh from aarch64-cores.def + (define_attr "tune" +- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter,neoversev2" ++ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,hip11,demeter,neoversev2" + (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index e9b3980c4..7c62ddb2a 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -481,6 +481,22 @@ static const struct cpu_addrcost_table hip09_addrcost_table = + 0, /* imm_offset */ + }; + ++static const struct cpu_addrcost_table hip11_addrcost_table = ++{ ++ { ++ 1, /* hi */ ++ 0, /* si */ ++ 0, /* di */ ++ 1, /* ti */ ++ }, ++ 0, /* pre_modify */ ++ 0, /* post_modify */ ++ 0, /* register_offset */ ++ 1, /* register_sextend */ ++ 1, /* register_zextend */ ++ 0, /* imm_offset */ ++}; ++ + static const struct cpu_addrcost_table qdf24xx_addrcost_table = + { + { +@@ -666,6 +682,16 @@ static const struct cpu_regmove_cost tsv110_regmove_cost = + 2 /* FP2FP */ + }; + ++static const struct cpu_regmove_cost hip11_regmove_cost = ++{ ++ 1, /* GP2GP */ ++ /* Avoid the use of slow int<->fp moves for spilling by setting ++ their cost higher than memmov_cost. */ ++ 2, /* GP2FP */ ++ 3, /* FP2GP */ ++ 2 /* FP2FP */ ++}; ++ + static const struct cpu_regmove_cost a64fx_regmove_cost = + {
View file
_service:tar_scm:0101-LoongArch-Improve-lasx_xvpermi_q_-LASX-mode-insn-pat.patch
Added
@@ -0,0 +1,150 @@ +From f5355c67104cb5d150e1fd3b58807b2ad4e67b7c Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Fri, 5 Jan 2024 15:37:13 +0800 +Subject: PATCH 101/188 LoongArch: Improve lasx_xvpermi_q_<LASX:mode> insn + pattern + +For instruction xvpermi.q, unused bits in operands3 need be set to 0 to avoid +causing undefined behavior on LA464. + +gcc/ChangeLog: + + * config/loongarch/lasx.md: Set the unused bits in operand3 to 0. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-xvpremi.c: Removed. + * gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c: New test. +--- + gcc/config/loongarch/lasx.md | 9 ++- + .../loongarch/vector/lasx/lasx-xvpermi_q.c | 64 +++++++++++++++++++ + .../loongarch/vector/lasx/lasx-xvpremi.c | 19 ------ + 3 files changed, 72 insertions(+), 20 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c + delete mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 95c6bae20..b4aa8e261 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -635,6 +635,8 @@ + (set_attr "mode" "<MODE>")) + + ;; xvpermi.q ++;; Unused bits in operands3 need be set to 0 to avoid ++;; causing undefined behavior on LA464. + (define_insn "lasx_xvpermi_q_<LASX:mode>" + (set (match_operand:LASX 0 "register_operand" "=f") + (unspec:LASX +@@ -643,7 +645,12 @@ + (match_operand 3 "const_uimm8_operand") + UNSPEC_LASX_XVPERMI_Q)) + "ISA_HAS_LASX" +- "xvpermi.q\t%u0,%u2,%3" ++{ ++ int mask = 0x33; ++ mask &= INTVAL (operands3); ++ operands3 = GEN_INT (mask); ++ return "xvpermi.q\t%u0,%u2,%3"; ++} + (set_attr "type" "simd_splat") + (set_attr "mode" "<MODE>")) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c +new file mode 100644 +index 000000000..dbc29d2fb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c +@@ -0,0 +1,64 @@ ++/* { dg-options "-mlasx -w -fno-strict-aliasing" } */ ++#include "../simd_correctness_check.h" ++#include <lasxintrin.h> ++ ++int ++main () ++{ ++ __m256i __m256i_op0, __m256i_op1, __m256i_op2, __m256i_out, __m256i_result; ++ __m256 __m256_op0, __m256_op1, __m256_op2, __m256_out, __m256_result; ++ __m256d __m256d_op0, __m256d_op1, __m256d_op2, __m256d_out, __m256d_result; ++ ++ int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail; ++ long int long_op0, long_op1, long_op2, lont_out, lont_result; ++ long int long_int_out, long_int_result; ++ unsigned int unsigned_int_out, unsigned_int_result; ++ unsigned long int unsigned_long_int_out, unsigned_long_int_result; ++ ++ *((unsigned long*)& __m256i_op03) = 0x7fe37fe3001d001d; ++ *((unsigned long*)& __m256i_op02) = 0x7fff7fff7fff0000; ++ *((unsigned long*)& __m256i_op01) = 0x7fe37fe3001d001d; ++ *((unsigned long*)& __m256i_op00) = 0x7fff7fff7fff0000; ++ *((unsigned long*)& __m256i_op13) = 0x7575757575757575; ++ *((unsigned long*)& __m256i_op12) = 0x7575757575757575; ++ *((unsigned long*)& __m256i_op11) = 0x7575757575757575; ++ *((unsigned long*)& __m256i_op10) = 0x7575757575757575; ++ *((unsigned long*)& __m256i_result3) = 0x7fe37fe3001d001d; ++ *((unsigned long*)& __m256i_result2) = 0x7fff7fff7fff0000; ++ *((unsigned long*)& __m256i_result1) = 0x7fe37fe3001d001d; ++ *((unsigned long*)& __m256i_result0) = 0x7fff7fff7fff0000; ++ __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x2a); ++ ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); ++ ++ *((unsigned long*)& __m256i_op03) = 0x0000000000000000; ++ *((unsigned long*)& __m256i_op02) = 0x000000000019001c; ++ *((unsigned long*)& __m256i_op01) = 0x0000000000000000; ++ *((unsigned long*)& __m256i_op00) = 0x000000000019001c; ++ *((unsigned long*)& __m256i_op13) = 0x0000000000000000; ++ *((unsigned long*)& __m256i_op12) = 0x00000000000001fe; ++ *((unsigned long*)& __m256i_op11) = 0x0000000000000000; ++ *((unsigned long*)& __m256i_op10) = 0x00000000000001fe; ++ *((unsigned long*)& __m256i_result3) = 0x0000000000000000; ++ *((unsigned long*)& __m256i_result2) = 0x000000000019001c; ++ *((unsigned long*)& __m256i_result1) = 0x0000000000000000; ++ *((unsigned long*)& __m256i_result0) = 0x00000000000001fe; ++ __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0xb9); ++ ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); ++ ++ *((unsigned long*)& __m256i_op03) = 0x00ff00ff00ff00ff; ++ *((unsigned long*)& __m256i_op02) = 0x00ff00ff00ff00ff; ++ *((unsigned long*)& __m256i_op01) = 0x00ff00ff00ff00ff; ++ *((unsigned long*)& __m256i_op00) = 0x00ff00ff00ff00ff; ++ *((unsigned long*)& __m256i_op13) = 0xffffffffffffffff; ++ *((unsigned long*)& __m256i_op12) = 0xffff0000ffff0000; ++ *((unsigned long*)& __m256i_op11) = 0xffffffffffffffff; ++ *((unsigned long*)& __m256i_op10) = 0xffff0000ffff0000; ++ *((unsigned long*)& __m256i_result3) = 0xffffffffffffffff; ++ *((unsigned long*)& __m256i_result2) = 0xffff0000ffff0000; ++ *((unsigned long*)& __m256i_result1) = 0x00ff00ff00ff00ff; ++ *((unsigned long*)& __m256i_result0) = 0x00ff00ff00ff00ff; ++ __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0xca); ++ ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c +deleted file mode 100644 +index e9fc1d7d3..000000000 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c ++++ /dev/null +@@ -1,19 +0,0 @@ +-/* { dg-options "-mlasx -w -fno-strict-aliasing" } */ +-#include "../simd_correctness_check.h" +-#include <lasxintrin.h> +- +-int +-main () +-{ +- __m256i __m256i_op0, __m256i_op1, __m256i_op2, __m256i_out, __m256i_result; +- __m256 __m256_op0, __m256_op1, __m256_op2, __m256_out, __m256_result; +- __m256d __m256d_op0, __m256d_op1, __m256d_op2, __m256d_out, __m256d_result; +- +- int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail; +- long int long_op0, long_op1, long_op2, lont_out, lont_result; +- long int long_int_out, long_int_result; +- unsigned int unsigned_int_out, unsigned_int_result; +- unsigned long int unsigned_long_int_out, unsigned_long_int_result; +- +- return 0; +-} +-- +2.43.0 +
View file
_service:tar_scm:0101-aarch64-Add-bytes-below-hard-fp-to-frame-info.patch
Deleted
@@ -1,84 +0,0 @@ -From 34081079ea4de0c98331843f574b5f6f94d7b234 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:50 +0100 -Subject: PATCH aarch64: Add bytes_below_hard_fp to frame info - -Following on from the previous bytes_below_saved_regs patch, this one -records the number of bytes that are below the hard frame pointer. -This eventually replaces below_hard_fp_saved_regs_size. - -If a frame pointer is not needed, the epilogue adds final_adjust -to the stack pointer before restoring registers: - - aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true); - -Therefore, if the epilogue needs to restore the stack pointer from -the hard frame pointer, the directly corresponding offset is: - - -bytes_below_hard_fp + final_adjust - -i.e. go from the hard frame pointer to the bottom of the frame, -then add the same amount as if we were using the stack pointer -from the outset. - -gcc/ - * config/aarch64/aarch64.h (aarch64_frame::bytes_below_hard_fp): New - field. - * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it. - (aarch64_expand_epilogue): Use it instead of - below_hard_fp_saved_regs_size. ---- - gcc/config/aarch64/aarch64.cc | 6 +++--- - gcc/config/aarch64/aarch64.h | 5 +++++ - 2 files changed, 8 insertions(+), 3 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 94e1b6865849..c7d84245fbfc 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8269,6 +8269,7 @@ aarch64_layout_frame (void) - of the callee save area. */ - bool saves_below_hard_fp_p = maybe_ne (offset, 0); - frame.below_hard_fp_saved_regs_size = offset; -+ frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs; - if (frame.emit_frame_chain) - { - /* FP and LR are placed in the linkage record. */ -@@ -9856,8 +9857,7 @@ aarch64_expand_epilogue (bool for_sibcall) - poly_int64 final_adjust = frame.final_adjust; - poly_int64 callee_offset = frame.callee_offset; - poly_int64 sve_callee_adjust = frame.sve_callee_adjust; -- poly_int64 below_hard_fp_saved_regs_size -- = frame.below_hard_fp_saved_regs_size; -+ poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp; - unsigned reg1 = frame.wb_pop_candidate1; - unsigned reg2 = frame.wb_pop_candidate2; - unsigned int last_gpr = (frame.is_scs_enabled -@@ -9915,7 +9915,7 @@ aarch64_expand_epilogue (bool for_sibcall) - is restored on the instruction doing the writeback. */ - aarch64_add_offset (Pmode, stack_pointer_rtx, - hard_frame_pointer_rtx, -- -callee_offset - below_hard_fp_saved_regs_size, -+ -bytes_below_hard_fp + final_adjust, - tmp1_rtx, tmp0_rtx, callee_adjust == 0); - else - /* The case where we need to re-use the register here is very rare, so -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 1e105e12db8d..de68ff7202fc 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -880,6 +880,11 @@ struct GTY (()) aarch64_frame - are saved below the hard frame pointer. */ - poly_int64 below_hard_fp_saved_regs_size; - -+ /* The number of bytes between the bottom of the static frame (the bottom -+ of the outgoing arguments) and the hard frame pointer. This value is -+ always a multiple of STACK_BOUNDARY. */ -+ poly_int64 bytes_below_hard_fp; -+ - /* Offset from the base of the frame (incomming SP) to the - top of the locals area. This value is always a multiple of - STACK_BOUNDARY. */ --- -2.43.5 -
View file
_service:tar_scm:0102-Add-Crc32-Optimization-in-Gzip-For-crc32-algorithm-i.patch
Added
@@ -0,0 +1,2164 @@ +From 8fa9788ac64a9ea5dc92c61c8f2ec11075cd17ec Mon Sep 17 00:00:00 2001 +From: XingYushuai <xingyushuai@huawei.com> +Date: Thu, 15 Dec 2022 14:34:16 +0800 +Subject: PATCH 003/157 Add Crc32 Optimization in Gzip For crc32 algorithm in + APBC int_gzip. + +Match crc32 lookup table algorithm. An example for crc32 lookup table +elg: ```c do { c = crc_32_tab((int)c ^ (*s++)) & 0xff ^ (c >> 8); } while (--n); + +Usage: `gcc -O3 -march=armv8.1-a -floop-crc yourfile.c` +Node: The cpu you use needs to support the crc32 instructions +--- + gcc/Makefile.in | 1 + + gcc/common.opt | 4 + + gcc/config/aarch64/aarch64-builtins.cc | 30 + + gcc/config/aarch64/aarch64-protos.h | 1 + + gcc/config/aarch64/aarch64.cc | 12 + + gcc/doc/invoke.texi | 6 +- + gcc/doc/tm.texi | 9 + + gcc/doc/tm.texi.in | 2 + + gcc/match.pd | 23 + + gcc/passes.def | 1 + + gcc/target.def | 14 + + .../tree-ssa/loop-crc-loop-condition-fail.c | 85 ++ + .../tree-ssa/loop-crc-loop-form-fail-2.c | 90 ++ + .../gcc.dg/tree-ssa/loop-crc-loop-form-fail.c | 112 ++ + .../gcc.dg/tree-ssa/loop-crc-sucess.c | 83 + + .../tree-ssa/loop-crc-table-check-fail.c | 114 ++ + gcc/timevar.def | 1 + + gcc/tree-pass.h | 1 + + gcc/tree-ssa-loop-crc.cc | 1333 +++++++++++++++++ + 19 files changed, 1921 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-condition-fail.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail-2.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c + create mode 100644 gcc/tree-ssa-loop-crc.cc + +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index 5cd838270..2b9f025dc 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -1649,6 +1649,7 @@ OBJS = \ + tree-ssa-ifcombine.o \ + tree-ssa-live.o \ + tree-ssa-loop-ch.o \ ++ tree-ssa-loop-crc.o \ + tree-ssa-loop-im.o \ + tree-ssa-loop-ivcanon.o \ + tree-ssa-loop-ivopts.o \ +diff --git a/gcc/common.opt b/gcc/common.opt +index b18f0b944..42fb2fc19 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1119,6 +1119,10 @@ fcrypto-accel-aes + Common Var(flag_crypto_accel_aes) Init(0) Optimization + Perform crypto acceleration AES pattern matching. + ++floop-crc ++Common Var(flag_loop_crc) Optimization ++Do the loop crc conversion. ++ + fauto-inc-dec + Common Var(flag_auto_inc_dec) Init(1) Optimization + Generate auto-inc/dec instructions. +diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc +index 42276e7ca..3b952ef39 100644 +--- a/gcc/config/aarch64/aarch64-builtins.cc ++++ b/gcc/config/aarch64/aarch64-builtins.cc +@@ -551,6 +551,12 @@ typedef struct + #define VAR1(T, N, MAP, FLAG, A) \ + AARCH64_SIMD_BUILTIN_##T##_##N##A, + ++enum aarch64_crc_builtins{ ++ AARCH64_BUILTIN_CRC32B, ++ AARCH64_BUILTIN_CRC32H, ++ AARCH64_BUILTIN_CRC32W, ++}; ++ + enum aarch64_builtins + { + AARCH64_BUILTIN_MIN, +@@ -1812,6 +1818,30 @@ aarch64_general_builtin_decl (unsigned code, bool) + return aarch64_builtin_declscode; + } + ++/* Implement TARGET_GET_CRC_BUILTIN_CODE */ ++unsigned ++get_crc_builtin_code(unsigned code, bool) ++{ ++ if (code > AARCH64_BUILTIN_CRC32W) ++ return AARCH64_BUILTIN_MIN; ++ ++ unsigned res = AARCH64_BUILTIN_MIN; ++ switch (code) { ++ case AARCH64_BUILTIN_CRC32B: ++ res = AARCH64_BUILTIN_crc32b; ++ break; ++ case AARCH64_BUILTIN_CRC32H: ++ res = AARCH64_BUILTIN_crc32h; ++ break; ++ case AARCH64_BUILTIN_CRC32W: ++ res = AARCH64_BUILTIN_crc32w; ++ break; ++ default: ++ break; ++ } ++ return res; ++} ++ + typedef enum + { + SIMD_ARG_COPY_TO_REG, +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 475d174dd..853197ee9 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -994,6 +994,7 @@ gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *, + gimple_stmt_iterator *); + rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int); + tree aarch64_general_builtin_decl (unsigned, bool); ++unsigned get_crc_builtin_code(unsigned , bool); + tree aarch64_general_builtin_rsqrt (unsigned int); + tree aarch64_builtin_vectorized_function (unsigned int, tree, tree); + void handle_arm_acle_h (void); +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 5537a537c..280e0b618 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -15210,6 +15210,15 @@ aarch64_builtin_decl (unsigned int code, bool initialize_p) + gcc_unreachable (); + } + ++/* Implement TARGET_GET_CRC_BUILTIN_CODE. */ ++static unsigned ++aarch64_get_crc_builtin_code(unsigned code, bool initialize_p) ++{ ++ unsigned subcode = get_crc_builtin_code(code,initialize_p); ++ unsigned res = subcode << AARCH64_BUILTIN_SHIFT; ++ return res; ++} ++ + /* Return true if it is safe and beneficial to use the approximate rsqrt optabs + to optimize 1.0/sqrt. */ + +@@ -27677,6 +27686,9 @@ aarch64_get_v16qi_mode () + #undef TARGET_BUILTIN_DECL + #define TARGET_BUILTIN_DECL aarch64_builtin_decl + ++#undef TARGET_GET_CRC_BUILTIN_CODE ++#define TARGET_GET_CRC_BUILTIN_CODE aarch64_get_crc_builtin_code ++ + #undef TARGET_BUILTIN_RECIPROCAL + #define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 7ca60dd64..c3ce148b0 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -537,7 +537,7 @@ Objective-C and Objective-C++ Dialects}. + -fisolate-erroneous-paths-dereference -fisolate-erroneous-paths-attribute @gol + -fivopts -fkeep-inline-functions -fkeep-static-functions @gol + -fkeep-static-consts -flimit-function-alignment -flive-range-shrinkage @gol +--floop-block -floop-interchange -floop-strip-mine @gol ++-floop-block -floop-crc -floop-interchange -floop-strip-mine @gol + -floop-unroll-and-jam -floop-nest-optimize @gol + -floop-parallelize-all -flra-remat -flto -flto-compression-level @gol + -flto-partition=@var{alg} -fmerge-all-constants @gol +@@ -12159,6 +12159,10 @@ GIMPLE -> GRAPHITE -> GIMPLE transformation. Some minimal optimizations + are also performed by the code generator isl, like index splitting and + dead code elimination in loops. + ++@item -floop-crc ++@opindex floop-crc ++Do the loop crc conversion ++ + @item -floop-nest-optimize + @opindex floop-nest-optimize + Enable the isl based loop nest optimizer. This is a generic loop nest +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 851d31c18..5a1e0fe43 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -11658,6 +11658,15 @@ If @var{code} is out of range the function should return + @code{error_mark_node}. + @end deftypefn + ++@deftypefn {Target Hook} unsigned TARGET_GET_CRC_BUILTIN_CODE (unsigned @var{code}, bool @var{initialize_p}) ++Define this hook to get crc32 builtin code. It should be a function that ++returns the crc32 builtin function code @var{code}. ++If there is no such builtin and it cannot be initialized at this time ++if @var{initialize_p} is true the function should return @code{NULL_TREE}. ++If @var{code} is out of range the function should return ++@code{error_mark_node}. ++@end deftypefn ++ + @deftypefn {Target Hook} rtx TARGET_EXPAND_BUILTIN (tree @var{exp}, rtx @var{target}, rtx @var{subtarget}, machine_mode @var{mode}, int @var{ignore}) +
View file
_service:tar_scm:0102-LoongArch-Implement-vec_init-M-N-where-N-is-a-LSX-ve.patch
Added
@@ -0,0 +1,253 @@ +From a321a294407781b2694fe9a3be0099fe38ccf13a Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Fri, 5 Jan 2024 15:38:25 +0800 +Subject: PATCH 102/188 LoongArch: Implement vec_init<M><N> where N is a LSX + vector mode + +This patch implements more vec_init optabs that can handle two LSX vectors producing a LASX +vector by concatenating them. When an lsx vector is concatenated with an LSX const_vector of +zeroes, the vec_concatz pattern can be used effectively. For example as below + +typedef short v8hi __attribute__ ((vector_size (16))); +typedef short v16hi __attribute__ ((vector_size (32))); +v8hi a, b; + +v16hi vec_initv16hiv8hi () +{ + return __builtin_shufflevector (a, b, 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15); +} + +Before this patch: + +vec_initv16hiv8hi: + addi.d $r3,$r3,-64 + .cfi_def_cfa_offset 64 + xvrepli.h $xr0,0 + la.local $r12,.LANCHOR0 + xvst $xr0,$r3,0 + xvst $xr0,$r3,32 + vld $vr0,$r12,0 + vst $vr0,$r3,0 + vld $vr0,$r12,16 + vst $vr0,$r3,32 + xvld $xr1,$r3,32 + xvld $xr2,$r3,32 + xvld $xr0,$r3,0 + xvilvh.h $xr0,$xr1,$xr0 + xvld $xr1,$r3,0 + xvilvl.h $xr1,$xr2,$xr1 + addi.d $r3,$r3,64 + .cfi_def_cfa_offset 0 + xvpermi.q $xr0,$xr1,32 + jr $r1 + +After this patch: + +vec_initv16hiv8hi: + la.local $r12,.LANCHOR0 + vld $vr0,$r12,32 + vld $vr2,$r12,48 + xvilvh.h $xr1,$xr2,$xr0 + xvilvl.h $xr0,$xr2,$xr0 + xvpermi.q $xr1,$xr0,32 + xvst $xr1,$r4,0 + jr $r1 + +gcc/ChangeLog: + + * config/loongarch/lasx.md (vec_initv32qiv16qi): Rename to .. + (vec_init<mode><lasxhalf>): .. this, and extend to mode. + (@vec_concatz<mode>): New insn pattern. + * config/loongarch/loongarch.cc (loongarch_expand_vector_group_init): + Handle VALS containing two vectors. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c: New test. +--- + gcc/config/loongarch/lasx.md | 26 +++++++- + gcc/config/loongarch/loongarch.cc | 44 +++++++++++-- + .../loongarch/vector/lasx/lasx-vec-init-2.c | 65 +++++++++++++++++++ + 3 files changed, 128 insertions(+), 7 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index b4aa8e261..803c5dd93 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -465,6 +465,11 @@ + (V16HI "w") + (V32QI "w")) + ++;; Half modes of all LASX vector modes, in lower-case. ++(define_mode_attr lasxhalf (V32QI "v16qi") (V16HI "v8hi") ++ (V8SI "v4si") (V4DI "v2di") ++ (V8SF "v4sf") (V4DF "v2df")) ++ + (define_expand "vec_init<mode><unitmode>" + (match_operand:LASX 0 "register_operand") + (match_operand:LASX 1 "") +@@ -474,9 +479,9 @@ + DONE; + }) + +-(define_expand "vec_initv32qiv16qi" +- (match_operand:V32QI 0 "register_operand") +- (match_operand:V16QI 1 "") ++(define_expand "vec_init<mode><lasxhalf>" ++ (match_operand:LASX 0 "register_operand") ++ (match_operand:<VHMODE256_ALL> 1 "") + "ISA_HAS_LASX" + { + loongarch_expand_vector_group_init (operands0, operands1); +@@ -577,6 +582,21 @@ + (set_attr "type" "simd_insert") + (set_attr "mode" "<MODE>")) + ++(define_insn "@vec_concatz<mode>" ++ (set (match_operand:LASX 0 "register_operand" "=f") ++ (vec_concat:LASX ++ (match_operand:<VHMODE256_ALL> 1 "nonimmediate_operand") ++ (match_operand:<VHMODE256_ALL> 2 "const_0_operand"))) ++ "ISA_HAS_LASX" ++{ ++ if (MEM_P (operands1)) ++ return "vld\t%w0,%1"; ++ else ++ return "vori.b\t%w0,%w1,0"; ++} ++ (set_attr "type" "simd_splat") ++ (set_attr "mode" "<MODE>")) ++ + (define_insn "vec_concat<mode>" + (set (match_operand:LASX 0 "register_operand" "=f") + (vec_concat:LASX +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index ddb32cea2..fccdc21a8 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -9842,10 +9842,46 @@ loongarch_gen_const_int_vector_shuffle (machine_mode mode, int val) + void + loongarch_expand_vector_group_init (rtx target, rtx vals) + { +- rtx ops2 = { force_reg (E_V16QImode, XVECEXP (vals, 0, 0)), +- force_reg (E_V16QImode, XVECEXP (vals, 0, 1)) }; +- emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (E_V32QImode, ops0, +- ops1))); ++ machine_mode vmode = GET_MODE (target); ++ machine_mode half_mode = VOIDmode; ++ rtx low = XVECEXP (vals, 0, 0); ++ rtx high = XVECEXP (vals, 0, 1); ++ ++ switch (vmode) ++ { ++ case E_V32QImode: ++ half_mode = V16QImode; ++ break; ++ case E_V16HImode: ++ half_mode = V8HImode; ++ break; ++ case E_V8SImode: ++ half_mode = V4SImode; ++ break; ++ case E_V4DImode: ++ half_mode = V2DImode; ++ break; ++ case E_V8SFmode: ++ half_mode = V4SFmode; ++ break; ++ case E_V4DFmode: ++ half_mode = V2DFmode; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (high == CONST0_RTX (half_mode)) ++ emit_insn (gen_vec_concatz (vmode, target, low, high)); ++ else ++ { ++ if (!register_operand (low, half_mode)) ++ low = force_reg (half_mode, low); ++ if (!register_operand (high, half_mode)) ++ high = force_reg (half_mode, high); ++ emit_insn (gen_rtx_SET (target, ++ gen_rtx_VEC_CONCAT (vmode, low, high))); ++ } + } + + /* Expand initialization of a vector which has all same elements. */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c +new file mode 100644 +index 000000000..7592198c4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c +@@ -0,0 +1,65 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -fno-vect-cost-model -mlasx" } */ ++/* { dg-final { scan-assembler-times "vld" 12 } } */ ++ ++ ++typedef char v16qi __attribute__ ((vector_size (16))); ++typedef char v32qi __attribute__ ((vector_size (32))); ++ ++typedef short v8hi __attribute__ ((vector_size (16))); ++typedef short v16hi __attribute__ ((vector_size (32))); ++ ++typedef int v4si __attribute__ ((vector_size (16))); ++typedef int v8si __attribute__ ((vector_size (32))); ++
View file
_service:tar_scm:0102-aarch64-Tweak-aarch64-save-restore-callee-saves.patch
Deleted
@@ -1,225 +0,0 @@ -From 187861af7c51db9eddc6f954b589c121b210fc74 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:50 +0100 -Subject: PATCH aarch64: Tweak aarch64_save/restore_callee_saves - -aarch64_save_callee_saves and aarch64_restore_callee_saves took -a parameter called start_offset that gives the offset of the -bottom of the saved register area from the current stack pointer. -However, it's more convenient for later patches if we use the -bottom of the entire frame as the reference point, rather than -the bottom of the saved registers. - -Doing that removes the need for the callee_offset field. -Other than that, this is not a win on its own. It only really -makes sense in combination with the follow-on patches. - -gcc/ - * config/aarch64/aarch64.h (aarch64_frame::callee_offset): Delete. - * config/aarch64/aarch64.cc (aarch64_layout_frame): Remove - callee_offset handling. - (aarch64_save_callee_saves): Replace the start_offset parameter - with a bytes_below_sp parameter. - (aarch64_restore_callee_saves): Likewise. - (aarch64_expand_prologue): Update accordingly. - (aarch64_expand_epilogue): Likewise. ---- - gcc/config/aarch64/aarch64.cc | 56 +++++++++++++++++------------------ - gcc/config/aarch64/aarch64.h | 4 --- - 2 files changed, 28 insertions(+), 32 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index c7d84245fbfc..e79551af41df 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8343,7 +8343,6 @@ aarch64_layout_frame (void) - frame.final_adjust = 0; - frame.callee_adjust = 0; - frame.sve_callee_adjust = 0; -- frame.callee_offset = 0; - - frame.wb_pop_candidate1 = frame.wb_push_candidate1; - frame.wb_pop_candidate2 = frame.wb_push_candidate2; -@@ -8411,7 +8410,6 @@ aarch64_layout_frame (void) - stp reg1, reg2, sp, bytes_below_saved_regs - stp reg3, reg4, sp, bytes_below_saved_regs + 16 */ - frame.initial_adjust = frame.frame_size; -- frame.callee_offset = const_below_saved_regs; - } - else if (saves_below_hard_fp_p - && known_eq (frame.saved_regs_size, -@@ -8758,12 +8756,13 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg, - } - - /* Emit code to save the callee-saved registers from register number START -- to LIMIT to the stack at the location starting at offset START_OFFSET, -- skipping any write-back candidates if SKIP_WB is true. HARD_FP_VALID_P -- is true if the hard frame pointer has been set up. */ -+ to LIMIT to the stack. The stack pointer is currently BYTES_BELOW_SP -+ bytes above the bottom of the static frame. Skip any write-back -+ candidates if SKIP_WB is true. HARD_FP_VALID_P is true if the hard -+ frame pointer has been set up. */ - - static void --aarch64_save_callee_saves (poly_int64 start_offset, -+aarch64_save_callee_saves (poly_int64 bytes_below_sp, - unsigned start, unsigned limit, bool skip_wb, - bool hard_fp_valid_p) - { -@@ -8791,7 +8790,9 @@ aarch64_save_callee_saves (poly_int64 start_offset, - - machine_mode mode = aarch64_reg_save_mode (regno); - reg = gen_rtx_REG (mode, regno); -- offset = start_offset + frame.reg_offsetregno; -+ offset = (frame.reg_offsetregno -+ + frame.bytes_below_saved_regs -+ - bytes_below_sp); - rtx base_rtx = stack_pointer_rtx; - poly_int64 sp_offset = offset; - -@@ -8802,9 +8803,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, - else if (GP_REGNUM_P (regno) - && (!offset.is_constant (&const_offset) || const_offset >= 512)) - { -- gcc_assert (known_eq (start_offset, 0)); -- poly_int64 fp_offset -- = frame.below_hard_fp_saved_regs_size; -+ poly_int64 fp_offset = frame.bytes_below_hard_fp - bytes_below_sp; - if (hard_fp_valid_p) - base_rtx = hard_frame_pointer_rtx; - else -@@ -8868,12 +8867,13 @@ aarch64_save_callee_saves (poly_int64 start_offset, - } - - /* Emit code to restore the callee registers from register number START -- up to and including LIMIT. Restore from the stack offset START_OFFSET, -- skipping any write-back candidates if SKIP_WB is true. Write the -- appropriate REG_CFA_RESTORE notes into CFI_OPS. */ -+ up to and including LIMIT. The stack pointer is currently BYTES_BELOW_SP -+ bytes above the bottom of the static frame. Skip any write-back -+ candidates if SKIP_WB is true. Write the appropriate REG_CFA_RESTORE -+ notes into CFI_OPS. */ - - static void --aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, -+aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start, - unsigned limit, bool skip_wb, rtx *cfi_ops) - { - aarch64_frame &frame = cfun->machine->frame; -@@ -8899,7 +8899,9 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, - - machine_mode mode = aarch64_reg_save_mode (regno); - reg = gen_rtx_REG (mode, regno); -- offset = start_offset + frame.reg_offsetregno; -+ offset = (frame.reg_offsetregno -+ + frame.bytes_below_saved_regs -+ - bytes_below_sp); - rtx base_rtx = stack_pointer_rtx; - if (mode == VNx2DImode && BYTES_BIG_ENDIAN) - aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, -@@ -9675,8 +9677,6 @@ aarch64_expand_prologue (void) - HOST_WIDE_INT callee_adjust = frame.callee_adjust; - poly_int64 final_adjust = frame.final_adjust; - poly_int64 sve_callee_adjust = frame.sve_callee_adjust; -- poly_int64 below_hard_fp_saved_regs_size -- = frame.below_hard_fp_saved_regs_size; - unsigned reg1 = frame.wb_push_candidate1; - unsigned reg2 = frame.wb_push_candidate2; - bool emit_frame_chain = frame.emit_frame_chain; -@@ -9752,8 +9752,8 @@ aarch64_expand_prologue (void) - - frame.hard_fp_offset); - gcc_assert (known_ge (chain_offset, 0)); - -- /* The offset of the bottom of the save area from the current SP. */ -- poly_int64 saved_regs_offset = chain_offset - below_hard_fp_saved_regs_size; -+ /* The offset of the current SP from the bottom of the static frame. */ -+ poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust; - - if (emit_frame_chain) - { -@@ -9761,7 +9761,7 @@ aarch64_expand_prologue (void) - { - reg1 = R29_REGNUM; - reg2 = R30_REGNUM; -- aarch64_save_callee_saves (saved_regs_offset, reg1, reg2, -+ aarch64_save_callee_saves (bytes_below_sp, reg1, reg2, - false, false); - } - else -@@ -9801,7 +9801,7 @@ aarch64_expand_prologue (void) - emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); - } - -- aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM, -+ aarch64_save_callee_saves (bytes_below_sp, R0_REGNUM, R30_REGNUM, - callee_adjust != 0 || emit_frame_chain, - emit_frame_chain); - if (maybe_ne (sve_callee_adjust, 0)) -@@ -9811,16 +9811,17 @@ aarch64_expand_prologue (void) - aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, - sve_callee_adjust, - !frame_pointer_needed, false); -- saved_regs_offset += sve_callee_adjust; -+ bytes_below_sp -= sve_callee_adjust; - } -- aarch64_save_callee_saves (saved_regs_offset, P0_REGNUM, P15_REGNUM, -+ aarch64_save_callee_saves (bytes_below_sp, P0_REGNUM, P15_REGNUM, - false, emit_frame_chain); -- aarch64_save_callee_saves (saved_regs_offset, V0_REGNUM, V31_REGNUM, -+ aarch64_save_callee_saves (bytes_below_sp, V0_REGNUM, V31_REGNUM, - callee_adjust != 0 || emit_frame_chain, - emit_frame_chain); - - /* We may need to probe the final adjustment if it is larger than the guard - that is assumed by the called. */ -+ gcc_assert (known_eq (bytes_below_sp, final_adjust)); - aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust, - !frame_pointer_needed, true); - } -@@ -9855,7 +9856,6 @@ aarch64_expand_epilogue (bool for_sibcall) - poly_int64 initial_adjust = frame.initial_adjust; - HOST_WIDE_INT callee_adjust = frame.callee_adjust; - poly_int64 final_adjust = frame.final_adjust; -- poly_int64 callee_offset = frame.callee_offset; - poly_int64 sve_callee_adjust = frame.sve_callee_adjust; - poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp; - unsigned reg1 = frame.wb_pop_candidate1; -@@ -9925,9 +9925,9 @@ aarch64_expand_epilogue (bool for_sibcall) - - /* Restore the vector registers before the predicate registers, - so that we can use P4 as a temporary for big-endian SVE frames. */ -- aarch64_restore_callee_saves (callee_offset, V0_REGNUM, V31_REGNUM, -+ aarch64_restore_callee_saves (final_adjust, V0_REGNUM, V31_REGNUM, - callee_adjust != 0, &cfi_ops); -- aarch64_restore_callee_saves (callee_offset, P0_REGNUM, P15_REGNUM, -+ aarch64_restore_callee_saves (final_adjust, P0_REGNUM, P15_REGNUM, - false, &cfi_ops); - if (maybe_ne (sve_callee_adjust, 0)) - aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true); -@@ -9935,7 +9935,7 @@ aarch64_expand_epilogue (bool for_sibcall)
View file
_service:tar_scm:0103-LoongArch-Handle-ISA-evolution-switches-along-with-o.patch
Added
@@ -0,0 +1,533 @@ +From 901663758281d4ce87a75e4d6e45de621b65f0cb Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Mon, 8 Jan 2024 09:14:07 +0800 +Subject: PATCH 103/188 LoongArch: Handle ISA evolution switches along with + other options + +gcc/ChangeLog: + + * config/loongarch/genopts/genstr.sh: Prepend the isa_evolution + variable with the common la_ prefix. + * config/loongarch/genopts/loongarch.opt.in: Mark ISA evolution + flags as saved using TargetVariable. + * config/loongarch/loongarch.opt: Same. + * config/loongarch/loongarch-def.h: Define evolution_set to + mark changes to the -march default. + * config/loongarch/loongarch-driver.cc: Same. + * config/loongarch/loongarch-opts.cc: Same. + * config/loongarch/loongarch-opts.h: Define and use ISA evolution + conditions around the la_target structure. + * config/loongarch/loongarch.cc: Same. + * config/loongarch/loongarch.md: Same. + * config/loongarch/loongarch-builtins.cc: Same. + * config/loongarch/loongarch-c.cc: Same. + * config/loongarch/lasx.md: Same. + * config/loongarch/lsx.md: Same. + * config/loongarch/sync.md: Same. +--- + gcc/config/loongarch/genopts/genstr.sh | 2 +- + gcc/config/loongarch/genopts/loongarch.opt.in | 6 ++--- + gcc/config/loongarch/lasx.md | 4 ++-- + gcc/config/loongarch/loongarch-builtins.cc | 6 ++--- + gcc/config/loongarch/loongarch-c.cc | 2 +- + gcc/config/loongarch/loongarch-def.h | 5 +++- + gcc/config/loongarch/loongarch-driver.cc | 5 ++-- + gcc/config/loongarch/loongarch-opts.cc | 17 ++++++++++++- + gcc/config/loongarch/loongarch-opts.h | 24 +++++++++++++++---- + gcc/config/loongarch/loongarch.cc | 24 ++++++++----------- + gcc/config/loongarch/loongarch.md | 12 +++++----- + gcc/config/loongarch/loongarch.opt | 16 ++++++------- + gcc/config/loongarch/lsx.md | 4 ++-- + gcc/config/loongarch/sync.md | 22 ++++++++--------- + 14 files changed, 90 insertions(+), 59 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/genstr.sh b/gcc/config/loongarch/genopts/genstr.sh +index bcc616e98..391eca121 100755 +--- a/gcc/config/loongarch/genopts/genstr.sh ++++ b/gcc/config/loongarch/genopts/genstr.sh +@@ -107,7 +107,7 @@ EOF + print("") + print("m"$3) + gsub(/-/, "_", $3) +- print("Target Mask(ISA_"toupper($3)") Var(isa_evolution)") ++ print("Target Mask(ISA_"toupper($3)") Var(la_isa_evolution)") + $1=""; $2=""; $3="" + sub(/^ */, "", $0) + print($0) +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 102202b03..a866dab84 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -259,6 +259,6 @@ default value is 4. + ; Features added during ISA evolution. This concept is different from ISA + ; extension, read Section 1.5 of LoongArch v1.10 Volume 1 for the + ; explanation. These features may be implemented and enumerated with +-; CPUCFG independantly, so we use bit flags to specify them. +-Variable +-HOST_WIDE_INT isa_evolution = 0 ++; CPUCFG independently, so we use bit flags to specify them. ++TargetVariable ++HOST_WIDE_INT la_isa_evolution = 0 +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 803c5dd93..fdfd65e4a 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1540,7 +1540,7 @@ + (set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") + UNSPEC_LASX_XVFRECIPE)) +- "ISA_HAS_LASX && TARGET_FRECIPE" ++ "ISA_HAS_LASX && ISA_HAS_FRECIPE" + "xvfrecipe.<flasxfmt>\t%u0,%u1" + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) +@@ -1573,7 +1573,7 @@ + (set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") + UNSPEC_LASX_XVFRSQRTE)) +- "ISA_HAS_LASX && TARGET_FRECIPE" ++ "ISA_HAS_LASX && ISA_HAS_FRECIPE" + "xvfrsqrte.<flasxfmt>\t%u0,%u1" + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index 85849ed29..e3b4dbc52 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -120,9 +120,9 @@ struct loongarch_builtin_description + AVAIL_ALL (hard_float, TARGET_HARD_FLOAT_ABI) + AVAIL_ALL (lsx, ISA_HAS_LSX) + AVAIL_ALL (lasx, ISA_HAS_LASX) +-AVAIL_ALL (frecipe, TARGET_FRECIPE && TARGET_HARD_FLOAT_ABI) +-AVAIL_ALL (lsx_frecipe, ISA_HAS_LSX && TARGET_FRECIPE) +-AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) ++AVAIL_ALL (frecipe, ISA_HAS_FRECIPE && TARGET_HARD_FLOAT_ABI) ++AVAIL_ALL (lsx_frecipe, ISA_HAS_LSX && ISA_HAS_FRECIPE) ++AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE) + + /* Construct a loongarch_builtin_description from the given arguments. + +diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc +index a89477a74..df2a482ad 100644 +--- a/gcc/config/loongarch/loongarch-c.cc ++++ b/gcc/config/loongarch/loongarch-c.cc +@@ -102,7 +102,7 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) + else + builtin_define ("__loongarch_frlen=0"); + +- if (TARGET_HARD_FLOAT && TARGET_FRECIPE) ++ if (TARGET_HARD_FLOAT && ISA_HAS_FRECIPE) + builtin_define ("__loongarch_frecipe"); + + if (ISA_HAS_LSX) +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index f8f36f0e2..9e5eee0e2 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -132,8 +132,11 @@ struct loongarch_isa + + Using int64_t instead of HOST_WIDE_INT for C compatibility. */ + int64_t evolution; ++ int64_t evolution_set; + +- loongarch_isa () : base (0), fpu (0), simd (0), evolution (0) {} ++ loongarch_isa () : ++ base (0), fpu (0), simd (0), evolution (0), evolution_set (0) ++ {} + loongarch_isa base_ (int _base) { base = _base; return *this; } + loongarch_isa fpu_ (int _fpu) { fpu = _fpu; return *this; } + loongarch_isa simd_ (int _simd) { simd = _simd; return *this; } +diff --git a/gcc/config/loongarch/loongarch-driver.cc b/gcc/config/loongarch/loongarch-driver.cc +index b3626984d..b84a6eaf7 100644 +--- a/gcc/config/loongarch/loongarch-driver.cc ++++ b/gcc/config/loongarch/loongarch-driver.cc +@@ -42,9 +42,10 @@ extern struct obstack opts_obstack; + const char* + la_driver_init (int argc ATTRIBUTE_UNUSED, const char **argv ATTRIBUTE_UNUSED) + { +- /* Initialize all fields of la_target to -1 */ ++ /* Initialize all fields of la_target. */ + loongarch_init_target (&la_target, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, +- M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET); ++ M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, ++ 0, 0); + return ""; + } + +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index d31becc67..935d09f45 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -140,7 +140,9 @@ static int with_default_simd = 0; + void + loongarch_init_target (struct loongarch_target *target, + int cpu_arch, int cpu_tune, int fpu, int simd, +- int abi_base, int abi_ext, int cmodel) ++ int abi_base, int abi_ext, int cmodel, ++ HOST_WIDE_INT isa_evolution, ++ HOST_WIDE_INT isa_evolution_set) + { + if (!target) + return; +@@ -148,6 +150,8 @@ loongarch_init_target (struct loongarch_target *target, + target->cpu_tune = cpu_tune; + target->isa.fpu = fpu; + target->isa.simd = simd; ++ target->isa.evolution = isa_evolution; ++ target->isa.evolution_set = isa_evolution_set; + target->abi.base = abi_base; + target->abi.ext = abi_ext; + target->cmodel = cmodel; +@@ -184,6 +188,9 @@ loongarch_config_target (struct loongarch_target *target, + M_OPT_ABSENT (target->abi.base) ? 0 : 1, + }; + ++ int64_t isa_evolution = target->isa.evolution; ++ int64_t isa_evolution_set = target->isa.evolution_set; ++ + /* 1. Target ABI */ + if (constrained.abi_base) + t.abi.base = target->abi.base; +@@ -394,6 +401,13 @@ config_target_isa: + } + } + ++ /* Apply the ISA evolution feature switches from the user. */ ++ HOST_WIDE_INT isa_evolution_orig = t.isa.evolution; ++ t.isa.evolution &= ~(~isa_evolution & isa_evolution_set); ++ t.isa.evolution |= isa_evolution & isa_evolution_set; ++
View file
_service:tar_scm:0103-SME-Remove-hip09-and-hip11-in-aarch64-cores.def-to-b.patch
Added
@@ -0,0 +1,34 @@ +From 72c48ade495ef99ef032a6c44365eb102b74888e Mon Sep 17 00:00:00 2001 +From: xiezhiheng <xiezhiheng@huawei.com> +Date: Fri, 23 Aug 2024 15:14:04 +0800 +Subject: PATCH 004/157 SME Remove hip09 and hip11 in aarch64-cores.def to + backport SME + +Will apply it in the end. +--- + gcc/config/aarch64/aarch64-cores.def | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 601b72abb..70b11eb80 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -130,7 +130,6 @@ AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F + + /* HiSilicon ('H') cores. */ + AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) +-AARCH64_CORE("hip09", hip09, hip09, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_PROFILE | AARCH64_FL_PREDRES, hip09, 0x48, 0xd02, 0x0) + + /* ARMv8.3-A Architecture Processors. */ + +@@ -173,7 +172,6 @@ AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | + AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1) + + AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1) +-AARCH64_CORE("hip11", hip11, hip11, 8_5A, AARCH64_FL_FOR_ARCH8_5| AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_F16, hip11, 0x48, 0xd22, -1) + + AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) + AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) +-- +2.33.0 +
View file
_service:tar_scm:0103-aarch64-Only-calculate-chain-offset-if-there-is-a-chain.patch
Deleted
@@ -1,44 +0,0 @@ -From 2b983f9064d808daf909bde1d4a13980934a7e6e Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:51 +0100 -Subject: PATCH aarch64: Only calculate chain_offset if there is a chain - -After previous patches, it is no longer necessary to calculate -a chain_offset in cases where there is no chain record. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_expand_prologue): Move the - calculation of chain_offset into the emit_frame_chain block. ---- - gcc/config/aarch64/aarch64.cc | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index e79551af41df..d71a042d6112 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -9747,16 +9747,16 @@ aarch64_expand_prologue (void) - if (callee_adjust != 0) - aarch64_push_regs (reg1, reg2, callee_adjust); - -- /* The offset of the frame chain record (if any) from the current SP. */ -- poly_int64 chain_offset = (initial_adjust + callee_adjust -- - frame.hard_fp_offset); -- gcc_assert (known_ge (chain_offset, 0)); -- - /* The offset of the current SP from the bottom of the static frame. */ - poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust; - - if (emit_frame_chain) - { -+ /* The offset of the frame chain record (if any) from the current SP. */ -+ poly_int64 chain_offset = (initial_adjust + callee_adjust -+ - frame.hard_fp_offset); -+ gcc_assert (known_ge (chain_offset, 0)); -+ - if (callee_adjust == 0) - { - reg1 = R29_REGNUM; --- -2.43.5 -
View file
_service:tar_scm:0104-Backport-SME-AArch64-Cleanup-CPU-option-processing-c.patch
Added
@@ -0,0 +1,336 @@ +From 9a36ca4e9188ee402327ec908d4f6860f2ee67eb Mon Sep 17 00:00:00 2001 +From: Wilco Dijkstra <wilco.dijkstra@arm.com> +Date: Wed, 18 May 2022 16:02:12 +0100 +Subject: PATCH 005/157 BackportSME AArch64: Cleanup CPU option + processing code + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1be715f31605976d8e4336973d3b81c5b7cea79f + +The --with-cpu/--with-arch configure option processing not only checks valid +arguments but also sets TARGET_CPU_DEFAULT with a CPU and extension bitmask. +This isn't used however since a --with-cpu is translated into a -mcpu option +which is processed as if written on the command-line (so TARGET_CPU_DEFAULT +is never accessed). + +So remove all the complex processing and bitmask, and just validate the +option. Fix a bug that always reports valid architecture extensions as invalid. +As a result the CPU processing in aarch64.c can be simplified. + +gcc/ + * config.gcc (aarch64*-*-*): Simplify --with-cpu and --with-arch + processing. Add support for architectural extensions. + * config/aarch64/aarch64.h (TARGET_CPU_DEFAULT): Remove + AARCH64_CPU_DEFAULT_FLAGS. + (TARGET_CPU_NBITS): Remove. + (TARGET_CPU_MASK): Remove. + * config/aarch64/aarch64.cc (AARCH64_CPU_DEFAULT_FLAGS): Remove define. + (get_tune_cpu): Assert CPU is always valid. + (get_arch): Assert architecture is always valid. + (aarch64_override_options): Cleanup CPU selection code and simplify logic. + (aarch64_option_restore): Remove unnecessary checks on tune. +--- + gcc/config.gcc | 43 +------------ + gcc/config/aarch64/aarch64.cc | 115 +++++++++------------------------- + gcc/config/aarch64/aarch64.h | 9 +-- + 3 files changed, 32 insertions(+), 135 deletions(-) + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 8fdde1576..3be450471 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -4190,8 +4190,6 @@ case "${target}" in + pattern=AARCH64_CORE + fi + +- ext_mask=AARCH64_CPU_DEFAULT_FLAGS +- + # Find the base CPU or ARCH id in aarch64-cores.def or + # aarch64-arches.def + if x"$base_val" = x \ +@@ -4199,23 +4197,6 @@ case "${target}" in + ${srcdir}/config/aarch64/$def \ + > /dev/null; then + +- if $which = arch ; then +- base_id=`grep "^$pattern(\"$base_val\"," \ +- ${srcdir}/config/aarch64/$def | \ +- sed -e 's/^^,*, *//' | \ +- sed -e 's/,.*$//'` +- # Extract the architecture flags from aarch64-arches.def +- ext_mask=`grep "^$pattern(\"$base_val\"," \ +- ${srcdir}/config/aarch64/$def | \ +- sed -e 's/)$//' | \ +- sed -e 's/^.*,//'` +- else +- base_id=`grep "^$pattern(\"$base_val\"," \ +- ${srcdir}/config/aarch64/$def | \ +- sed -e 's/^^,*, *//' | \ +- sed -e 's/,.*$//'` +- fi +- + # Disallow extensions in --with-tune=cortex-a53+crc. + if $which = tune && x"$ext_val" != x ; then + echo "Architecture extensions not supported in --with-$which=$val" 1>&2 +@@ -4246,25 +4227,7 @@ case "${target}" in + grep "^\"$base_ext\""` + + if x"$base_ext" = x \ +- || -n $opt_line ; then +- +- # These regexp extract the elements based on +- # their group match index in the regexp. +- ext_canon=`echo -e "$opt_line" | \ +- sed -e "s/$sed_patt/\2/"` +- ext_on=`echo -e "$opt_line" | \ +- sed -e "s/$sed_patt/\3/"` +- ext_off=`echo -e "$opt_line" | \ +- sed -e "s/$sed_patt/\4/"` +- +- if $ext = $base_ext ; then +- # Adding extension +- ext_mask="("$ext_mask") | ("$ext_on" | "$ext_canon")" +- else +- # Removing extension +- ext_mask="("$ext_mask") & ~("$ext_off" | "$ext_canon")" +- fi +- ++ || x"$opt_line" != x ; then + true + else + echo "Unknown extension used in --with-$which=$val" 1>&2 +@@ -4273,10 +4236,6 @@ case "${target}" in + ext_val=`echo $ext_val | sed -e 's/a-z0-9\+//'` + done + +- ext_mask="(("$ext_mask") << TARGET_CPU_NBITS)" +- if x"$base_id" != x ; then +- target_cpu_cname="TARGET_CPU_$base_id | $ext_mask" +- fi + true + else + # Allow --with-$which=native. +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 7c62ddb2a..ba888beb0 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -3014,8 +3014,6 @@ static const struct attribute_spec aarch64_attribute_table = + { NULL, 0, 0, false, false, false, false, NULL, NULL } + }; + +-#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0) +- + /* An ISA extension in the co-processor and main instruction set space. */ + struct aarch64_option_extension + { +@@ -18411,39 +18409,24 @@ aarch64_validate_mtune (const char *str, const struct processor **res) + return false; + } + +-static_assert (TARGET_CPU_generic < TARGET_CPU_MASK, +- "TARGET_CPU_NBITS is big enough"); +- +-/* Return the CPU corresponding to the enum CPU. +- If it doesn't specify a cpu, return the default. */ ++/* Return the CPU corresponding to the enum CPU. */ + + static const struct processor * + aarch64_get_tune_cpu (enum aarch64_processor cpu) + { +- if (cpu != aarch64_none) +- return &all_corescpu; ++ gcc_assert (cpu != aarch64_none); + +- /* The & TARGET_CPU_MASK is to extract the bottom TARGET_CPU_NBITS bits that +- encode the default cpu as selected by the --with-cpu GCC configure option +- in config.gcc. +- ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS +- flags mechanism should be reworked to make it more sane. */ +- return &all_coresTARGET_CPU_DEFAULT & TARGET_CPU_MASK; ++ return &all_corescpu; + } + +-/* Return the architecture corresponding to the enum ARCH. +- If it doesn't specify a valid architecture, return the default. */ ++/* Return the architecture corresponding to the enum ARCH. */ + + static const struct processor * + aarch64_get_arch (enum aarch64_arch arch) + { +- if (arch != aarch64_no_arch) +- return &all_architecturesarch; +- +- const struct processor *cpu +- = &all_coresTARGET_CPU_DEFAULT & TARGET_CPU_MASK; ++ gcc_assert (arch != aarch64_no_arch); + +- return &all_architecturescpu->arch; ++ return &all_architecturesarch; + } + + /* Return the VG value associated with -msve-vector-bits= value VALUE. */ +@@ -18481,10 +18464,6 @@ aarch64_override_options (void) + uint64_t arch_isa = 0; + aarch64_isa_flags = 0; + +- bool valid_cpu = true; +- bool valid_tune = true; +- bool valid_arch = true; +- + selected_cpu = NULL; + selected_arch = NULL; + selected_tune = NULL; +@@ -18499,77 +18478,56 @@ aarch64_override_options (void) + If either of -march or -mtune is given, they override their + respective component of -mcpu. */ + if (aarch64_cpu_string) +- valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu, +- &cpu_isa); ++ aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu, &cpu_isa); + + if (aarch64_arch_string) +- valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch, +- &arch_isa); ++ aarch64_validate_march (aarch64_arch_string, &selected_arch, &arch_isa); + + if (aarch64_tune_string) +- valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune); ++ aarch64_validate_mtune (aarch64_tune_string, &selected_tune); + + #ifdef SUBTARGET_OVERRIDE_OPTIONS
View file
_service:tar_scm:0104-LoongArch-Rename-ISA_BASE_LA64V100-to-ISA_BASE_LA64.patch
Added
@@ -0,0 +1,220 @@ +From 282b0847a86fab49fb3582371647fa4cb2d941ed Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Mon, 8 Jan 2024 09:14:08 +0800 +Subject: PATCH 104/188 LoongArch: Rename ISA_BASE_LA64V100 to ISA_BASE_LA64 + +LoongArch ISA manual v1.10 suggests that software should not depend on +the ISA version number for marking processor features. The ISA version +number is now defined as a collective name of individual ISA evolutions. +Since there is a independent ISA evolution mask now, we can drop the +version information from the base ISA. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch-strings: Rename. + * config/loongarch/genopts/loongarch.opt.in: Same. + * config/loongarch/loongarch-cpu.cc: Same. + * config/loongarch/loongarch-def.cc: Same. + * config/loongarch/loongarch-def.h: Same. + * config/loongarch/loongarch-opts.cc: Same. + * config/loongarch/loongarch-opts.h: Same. + * config/loongarch/loongarch-str.h: Same. + * config/loongarch/loongarch.opt: Same. +--- + gcc/config/loongarch/genopts/loongarch-strings | 2 +- + gcc/config/loongarch/genopts/loongarch.opt.in | 2 +- + gcc/config/loongarch/loongarch-cpu.cc | 2 +- + gcc/config/loongarch/loongarch-def.cc | 14 +++++++------- + gcc/config/loongarch/loongarch-def.h | 6 +++--- + gcc/config/loongarch/loongarch-opts.cc | 10 +++++----- + gcc/config/loongarch/loongarch-opts.h | 2 +- + gcc/config/loongarch/loongarch-str.h | 2 +- + gcc/config/loongarch/loongarch.opt | 2 +- + 9 files changed, 21 insertions(+), 21 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +index 411ad5696..ce70b8b9c 100644 +--- a/gcc/config/loongarch/genopts/loongarch-strings ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -29,7 +29,7 @@ STR_CPU_LA464 la464 + STR_CPU_LA664 la664 + + # Base architecture +-STR_ISA_BASE_LA64V100 la64 ++STR_ISA_BASE_LA64 la64 + + # -mfpu + OPTSTR_ISA_EXT_FPU fpu +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index a866dab84..851d8d1f3 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -33,7 +33,7 @@ Name(isa_base) Type(int) + Basic ISAs of LoongArch: + + EnumValue +-Enum(isa_base) String(@@STR_ISA_BASE_LA64V100@@) Value(ISA_BASE_LA64V100) ++Enum(isa_base) String(@@STR_ISA_BASE_LA64@@) Value(ISA_BASE_LA64) + + ;; ISA extensions / adjustments + Enum +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index 7e0625835..551d4f72c 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -133,7 +133,7 @@ fill_native_cpu_config (struct loongarch_target *tgt) + switch (cpucfg_cache1 & 0x3) + { + case 0x02: +- tmp = ISA_BASE_LA64V100; ++ tmp = ISA_BASE_LA64; + break; + + default: +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index 843be78e4..533dd0af2 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -48,16 +48,16 @@ array_arch<loongarch_isa> loongarch_cpu_default_isa = + array_arch<loongarch_isa> () + .set (CPU_LOONGARCH64, + loongarch_isa () +- .base_ (ISA_BASE_LA64V100) ++ .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU64)) + .set (CPU_LA464, + loongarch_isa () +- .base_ (ISA_BASE_LA64V100) ++ .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU64) + .simd_ (ISA_EXT_SIMD_LASX)) + .set (CPU_LA664, + loongarch_isa () +- .base_ (ISA_BASE_LA64V100) ++ .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU64) + .simd_ (ISA_EXT_SIMD_LASX) + .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA +@@ -153,7 +153,7 @@ array_tune<int> loongarch_cpu_multipass_dfa_lookahead = array_tune<int> () + + array<const char *, N_ISA_BASE_TYPES> loongarch_isa_base_strings = + array<const char *, N_ISA_BASE_TYPES> () +- .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100); ++ .set (ISA_BASE_LA64, STR_ISA_BASE_LA64); + + array<const char *, N_ISA_EXT_TYPES> loongarch_isa_ext_strings = + array<const char *, N_ISA_EXT_TYPES> () +@@ -189,15 +189,15 @@ array<array<loongarch_isa, N_ABI_EXT_TYPES>, N_ABI_BASE_TYPES> + array<loongarch_isa, N_ABI_EXT_TYPES> () + .set (ABI_EXT_BASE, + loongarch_isa () +- .base_ (ISA_BASE_LA64V100) ++ .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU64))) + .set (ABI_BASE_LP64F, + array<loongarch_isa, N_ABI_EXT_TYPES> () + .set (ABI_EXT_BASE, + loongarch_isa () +- .base_ (ISA_BASE_LA64V100) ++ .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU32))) + .set (ABI_BASE_LP64S, + array<loongarch_isa, N_ABI_EXT_TYPES> () + .set (ABI_EXT_BASE, +- loongarch_isa ().base_ (ISA_BASE_LA64V100))); ++ loongarch_isa ().base_ (ISA_BASE_LA64))); +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index 9e5eee0e2..a133ea265 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -55,9 +55,9 @@ along with GCC; see the file COPYING3. If not see + + /* enum isa_base */ + +-/* LoongArch V1.00. */ +-#define ISA_BASE_LA64V100 0 +-#define N_ISA_BASE_TYPES 1 ++/* LoongArch64 */ ++#define ISA_BASE_LA64 0 ++#define N_ISA_BASE_TYPES 1 + extern loongarch_def_array<const char *, N_ISA_BASE_TYPES> + loongarch_isa_base_strings; + +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 935d09f45..cf4c7bc93 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -567,17 +567,17 @@ isa_default_abi (const struct loongarch_isa *isa) + switch (isa->fpu) + { + case ISA_EXT_FPU64: +- if (isa->base >= ISA_BASE_LA64V100) ++ if (isa->base >= ISA_BASE_LA64) + abi.base = ABI_BASE_LP64D; + break; + + case ISA_EXT_FPU32: +- if (isa->base >= ISA_BASE_LA64V100) ++ if (isa->base >= ISA_BASE_LA64) + abi.base = ABI_BASE_LP64F; + break; + + case ISA_EXT_NONE: +- if (isa->base >= ISA_BASE_LA64V100) ++ if (isa->base >= ISA_BASE_LA64) + abi.base = ABI_BASE_LP64S; + break; + +@@ -596,8 +596,8 @@ isa_base_compat_p (const struct loongarch_isa *set1, + { + switch (set2->base) + { +- case ISA_BASE_LA64V100: +- return (set1->base >= ISA_BASE_LA64V100); ++ case ISA_BASE_LA64: ++ return (set1->base >= ISA_BASE_LA64); + + default: + gcc_unreachable (); +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 204338553..463812136 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -79,7 +79,7 @@ struct loongarch_flags { + #define TARGET_DOUBLE_FLOAT (la_target.isa.fpu == ISA_EXT_FPU64) + #define TARGET_DOUBLE_FLOAT_ABI (la_target.abi.base == ABI_BASE_LP64D) + +-#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64V100) ++#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64) + #define TARGET_ABI_LP64 (la_target.abi.base == ABI_BASE_LP64D \ + || la_target.abi.base == ABI_BASE_LP64F \ + || la_target.abi.base == ABI_BASE_LP64S) +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index a8821acb0..2251df38b 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -32,7 +32,7 @@ along with GCC; see the file COPYING3. If not see + #define STR_CPU_LA464 "la464" + #define STR_CPU_LA664 "la664" +
View file
_service:tar_scm:0104-aarch64-Rename-locals-offset-to-bytes-above-locals.patch
Deleted
@@ -1,91 +0,0 @@ -From 0a0a824808d1dec51004fb5805c1a0ae2a35433f Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:51 +0100 -Subject: PATCH aarch64: Rename locals_offset to bytes_above_locals -MIME-Version: 1.0 -Content-Type: text/plain; charset=utf8 -Content-Transfer-Encoding: 8bit - -locals_offset was described as: - - /* Offset from the base of the frame (incomming SP) to the - top of the locals area. This value is always a multiple of - STACK_BOUNDARY. */ - -This is implicitly an âupside downâ view of the frame: the incoming -SP is at offset 0, and anything N bytes below the incoming SP is at -offset N (rather than -N). - -However, reg_offset instead uses a âright way upâ view; that is, -it views offsets in address terms. Something above X is at a -positive offset from X and something below X is at a negative -offset from X. - -Also, even on FRAME_GROWS_DOWNWARD targets like AArch64, -target-independent code views offsets in address terms too: -locals are allocated at negative offsets to virtual_stack_vars. - -It seems confusing to have *_offset fields of the same structure -using different polarities like this. This patch tries to avoid -that by renaming locals_offset to bytes_above_locals. - -gcc/ - * config/aarch64/aarch64.h (aarch64_frame::locals_offset): Rename to... - (aarch64_frame::bytes_above_locals): ...this. - * config/aarch64/aarch64.cc (aarch64_layout_frame) - (aarch64_initial_elimination_offset): Update accordingly. ---- - gcc/config/aarch64/aarch64.cc | 6 +++--- - gcc/config/aarch64/aarch64.h | 6 +++--- - 2 files changed, 6 insertions(+), 6 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index d71a042d6112..d4ec352ba98a 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8337,7 +8337,7 @@ aarch64_layout_frame (void) - STACK_BOUNDARY / BITS_PER_UNIT)); - frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs; - -- frame.locals_offset = frame.saved_varargs_size; -+ frame.bytes_above_locals = frame.saved_varargs_size; - - frame.initial_adjust = 0; - frame.final_adjust = 0; -@@ -12578,13 +12578,13 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to) - return frame.hard_fp_offset; - - if (from == FRAME_POINTER_REGNUM) -- return frame.hard_fp_offset - frame.locals_offset; -+ return frame.hard_fp_offset - frame.bytes_above_locals; - } - - if (to == STACK_POINTER_REGNUM) - { - if (from == FRAME_POINTER_REGNUM) -- return frame.frame_size - frame.locals_offset; -+ return frame.frame_size - frame.bytes_above_locals; - } - - return frame.frame_size; -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 94fca4b94716..bf46e6124aa9 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -885,10 +885,10 @@ struct GTY (()) aarch64_frame - always a multiple of STACK_BOUNDARY. */ - poly_int64 bytes_below_hard_fp; - -- /* Offset from the base of the frame (incomming SP) to the -- top of the locals area. This value is always a multiple of -+ /* The number of bytes between the top of the locals area and the top -+ of the frame (the incomming SP). This value is always a multiple of - STACK_BOUNDARY. */ -- poly_int64 locals_offset; -+ poly_int64 bytes_above_locals; - - /* Offset from the base of the frame (incomming SP) to the - hard_frame_pointer. This value is always a multiple of --- -2.43.5 -
View file
_service:tar_scm:0105-Backport-SME-AArch64-Cleanup-option-processing-code.patch
Added
@@ -0,0 +1,528 @@ +From ba32885874fc6caa90f6ae5e264bc3d51f64a26e Mon Sep 17 00:00:00 2001 +From: Wilco Dijkstra <wilco.dijkstra@arm.com> +Date: Wed, 1 Jun 2022 16:46:36 +0100 +Subject: PATCH 006/157 BackportSME AArch64: Cleanup option processing + code + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ae54c1b09963779c5c3914782324ff48af32e2f1 + +Further cleanup option processing. Remove the duplication of global +variables for CPU and tune settings so that CPU option processing is +simplified even further. Move global variables that need save and +restore due to target option processing into aarch64.opt. This removes +the need for explicit saving/restoring and unnecessary reparsing of +options. + +gcc/ + * config/aarch64/aarch64.opt (explicit_tune_core): Rename to + selected_tune. + (explicit_arch): Rename to selected_arch. + (x_aarch64_override_tune_string): Remove. + (aarch64_ra_sign_key): Add as TargetVariable so it gets saved/restored. + (aarch64_override_tune_string): Add Save so it gets saved/restored. + * config/aarch64/aarch64.h (aarch64_architecture_version): Remove. + * config/aarch64/aarch64.cc (aarch64_architecture_version): Remove. + (processor): Remove archtecture_version field. + (selected_arch): Remove global. + (selected_cpu): Remove global. + (selected_tune): Remove global. + (aarch64_ra_sign_key): Move global to aarch64.opt so it is saved. + (aarch64_override_options_internal): Use aarch64_get_tune_cpu. + (aarch64_override_options): Further simplify code to only set + selected_arch and selected_tune globals. + (aarch64_option_save): Remove now that target options are saved. + (aarch64_option_restore): Remove redundant target option restores. + * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Use + AARCH64_ISA_V9. + * config/aarch64/aarch64-opts.h (aarch64_key_type): Add, moved from... + * config/aarch64/aarch64-protos.h (aarch64_key_type): Remove. + (aarch64_ra_sign_key): Remove. +--- + gcc/config/aarch64/aarch64-c.cc | 2 +- + gcc/config/aarch64/aarch64-opts.h | 6 + + gcc/config/aarch64/aarch64-protos.h | 8 -- + gcc/config/aarch64/aarch64.cc | 183 ++++++++++------------------ + gcc/config/aarch64/aarch64.h | 3 - + gcc/config/aarch64/aarch64.opt | 12 +- + 6 files changed, 76 insertions(+), 138 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index a4c407724..90d45e45d 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -82,7 +82,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) + { + aarch64_def_or_undef (flag_unsafe_math_optimizations, "__ARM_FP_FAST", pfile); + +- builtin_define_with_int_value ("__ARM_ARCH", aarch64_architecture_version); ++ builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9 ? 9 : 8); + + builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM", + flag_short_enums ? 1 : 4); +diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h +index 93572fe83..421648a15 100644 +--- a/gcc/config/aarch64/aarch64-opts.h ++++ b/gcc/config/aarch64/aarch64-opts.h +@@ -98,4 +98,10 @@ enum stack_protector_guard { + SSP_GLOBAL /* global canary */ + }; + ++/* The key type that -msign-return-address should use. */ ++enum aarch64_key_type { ++ AARCH64_KEY_A, ++ AARCH64_KEY_B ++}; ++ + #endif +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 475d174dd..e60ce3c36 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -672,14 +672,6 @@ enum simd_immediate_check { + AARCH64_CHECK_MOV = AARCH64_CHECK_ORR | AARCH64_CHECK_BIC + }; + +-/* The key type that -msign-return-address should use. */ +-enum aarch64_key_type { +- AARCH64_KEY_A, +- AARCH64_KEY_B +-}; +- +-extern enum aarch64_key_type aarch64_ra_sign_key; +- + extern struct tune_params aarch64_tune_params; + + /* The available SVE predicate patterns, known in the ACLE as "svpattern". */ +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index ba888beb0..254ecfaa2 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -306,9 +306,6 @@ static bool aarch64_print_address_internal (FILE*, machine_mode, rtx, + aarch64_addr_query_type); + static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val); + +-/* Major revision number of the ARM Architecture implemented by the target. */ +-unsigned aarch64_architecture_version; +- + /* The processor for which instructions should be scheduled. */ + enum aarch64_processor aarch64_tune = cortexa53; + +@@ -2931,7 +2928,6 @@ struct processor + enum aarch64_processor ident; + enum aarch64_processor sched_core; + enum aarch64_arch arch; +- unsigned architecture_version; + const uint64_t flags; + const struct tune_params *const tune; + }; +@@ -2940,9 +2936,9 @@ struct processor + static const struct processor all_architectures = + { + #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \ +- {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL}, ++ {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, FLAGS, NULL}, + #include "aarch64-arches.def" +- {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL} ++ {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL} + }; + + /* Processor cores implementing AArch64. */ +@@ -2950,23 +2946,13 @@ static const struct processor all_cores = + { + #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \ + {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \ +- all_architecturesAARCH64_ARCH_##ARCH.architecture_version, \ + FLAGS, &COSTS##_tunings}, + #include "aarch64-cores.def" +- {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8, ++ {"generic", generic, cortexa53, AARCH64_ARCH_8A, + AARCH64_FL_FOR_ARCH8, &generic_tunings}, +- {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL} ++ {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL} + }; + +- +-/* Target specification. These are populated by the -march, -mtune, -mcpu +- handling code or by target attributes. */ +-static const struct processor *selected_arch; +-static const struct processor *selected_cpu; +-static const struct processor *selected_tune; +- +-enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A; +- + /* The current tuning set. */ + struct tune_params aarch64_tune_params = generic_tunings; + +@@ -10633,8 +10619,8 @@ aarch64_case_values_threshold (void) + /* Use the specified limit for the number of cases before using jump + tables at higher optimization levels. */ + if (optimize > 2 +- && selected_cpu->tune->max_case_values != 0) +- return selected_cpu->tune->max_case_values; ++ && aarch64_tune_params.max_case_values != 0) ++ return aarch64_tune_params.max_case_values; + else + return optimize_size ? 8 : 11; + } +@@ -17769,6 +17755,26 @@ initialize_aarch64_tls_size (struct gcc_options *opts) + return; + } + ++/* Return the CPU corresponding to the enum CPU. */ ++ ++static const struct processor * ++aarch64_get_tune_cpu (enum aarch64_processor cpu) ++{ ++ gcc_assert (cpu != aarch64_none); ++ ++ return &all_corescpu; ++} ++ ++/* Return the architecture corresponding to the enum ARCH. */ ++ ++static const struct processor * ++aarch64_get_arch (enum aarch64_arch arch) ++{ ++ gcc_assert (arch != aarch64_no_arch); ++ ++ return &all_architecturesarch; ++} ++ + /* Parse STRING looking for options in the format: + string :: option:string + option :: name=substring +@@ -17879,18 +17885,18 @@ aarch64_override_options_after_change_1 (struct gcc_options *opts) + void + aarch64_override_options_internal (struct gcc_options *opts) + { +- aarch64_tune_flags = selected_tune->flags; +- aarch64_tune = selected_tune->sched_core;
View file
_service:tar_scm:0105-LoongArch-Use-enums-for-constants.patch
Added
@@ -0,0 +1,181 @@ +From 907b35525c8abcdfe22152ebce6640dbe3905cce Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Mon, 8 Jan 2024 09:14:09 +0800 +Subject: PATCH 105/188 LoongArch: Use enums for constants + +Target features constants from loongarch-def.h are currently defined as macros. +Switch to enums for better look in the debugger. + +gcc/ChangeLog: + + * config/loongarch/loongarch-def.h: Define constants with + enums instead of Macros. +--- + gcc/config/loongarch/loongarch-def.h | 115 ++++++++++++++++----------- + 1 file changed, 67 insertions(+), 48 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index a133ea265..28da3ae5f 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -23,12 +23,10 @@ along with GCC; see the file COPYING3. If not see + - ISA extensions (isa_ext), + - base ABI types (abi_base), + - ABI extension types (abi_ext). +- +- - code models (cmodel) +- - other command-line switches (switch) ++ - code models (cmodel) + + These values are primarily used for implementing option handling +- logic in "loongarch.opt", "loongarch-driver.c" and "loongarch-opt.c". ++ logic in "loongarch.opt", "loongarch-driver.cc" and "loongarch-opt.cc". + + As for the result of this option handling process, the following + scheme is adopted to represent the final configuration: +@@ -53,30 +51,40 @@ along with GCC; see the file COPYING3. If not see + #include "loongarch-def-array.h" + #include "loongarch-tune.h" + +-/* enum isa_base */ + +-/* LoongArch64 */ +-#define ISA_BASE_LA64 0 +-#define N_ISA_BASE_TYPES 1 ++/* ISA base */ ++enum { ++ ISA_BASE_LA64 = 0, /* LoongArch64 */ ++ N_ISA_BASE_TYPES = 1 ++}; ++ + extern loongarch_def_array<const char *, N_ISA_BASE_TYPES> + loongarch_isa_base_strings; + +-/* enum isa_ext_* */ +-#define ISA_EXT_NONE 0 +-#define ISA_EXT_FPU32 1 +-#define ISA_EXT_FPU64 2 +-#define N_ISA_EXT_FPU_TYPES 3 +-#define ISA_EXT_SIMD_LSX 3 +-#define ISA_EXT_SIMD_LASX 4 +-#define N_ISA_EXT_TYPES 5 ++ ++/* ISA extensions */ ++enum { ++ ISA_EXT_NONE = 0, ++ ISA_EXT_FPU32 = 1, ++ ISA_EXT_FPU64 = 2, ++ N_ISA_EXT_FPU_TYPES = 3, ++ ISA_EXT_SIMD_LSX = 3, ++ ISA_EXT_SIMD_LASX = 4, ++ N_ISA_EXT_TYPES = 5 ++}; ++ + extern loongarch_def_array<const char *, N_ISA_EXT_TYPES> + loongarch_isa_ext_strings; + +-/* enum abi_base */ +-#define ABI_BASE_LP64D 0 +-#define ABI_BASE_LP64F 1 +-#define ABI_BASE_LP64S 2 +-#define N_ABI_BASE_TYPES 3 ++ ++/* Base ABI */ ++enum { ++ ABI_BASE_LP64D = 0, ++ ABI_BASE_LP64F = 1, ++ ABI_BASE_LP64S = 2, ++ N_ABI_BASE_TYPES = 3 ++}; ++ + extern loongarch_def_array<const char *, N_ABI_BASE_TYPES> + loongarch_abi_base_strings; + +@@ -90,28 +98,38 @@ extern loongarch_def_array<const char *, N_ABI_BASE_TYPES> + (abi_base == ABI_BASE_LP64S) + + +-/* enum abi_ext */ +-#define ABI_EXT_BASE 0 +-#define N_ABI_EXT_TYPES 1 ++/* ABI Extension */ ++enum { ++ ABI_EXT_BASE = 0, ++ N_ABI_EXT_TYPES = 1 ++}; ++ + extern loongarch_def_array<const char *, N_ABI_EXT_TYPES> + loongarch_abi_ext_strings; + +-/* enum cmodel */ +-#define CMODEL_NORMAL 0 +-#define CMODEL_TINY 1 +-#define CMODEL_TINY_STATIC 2 +-#define CMODEL_MEDIUM 3 +-#define CMODEL_LARGE 4 +-#define CMODEL_EXTREME 5 +-#define N_CMODEL_TYPES 6 ++ ++/* Code Model */ ++enum { ++ CMODEL_NORMAL = 0, ++ CMODEL_TINY = 1, ++ CMODEL_TINY_STATIC = 2, ++ CMODEL_MEDIUM = 3, ++ CMODEL_LARGE = 4, ++ CMODEL_EXTREME = 5, ++ N_CMODEL_TYPES = 6 ++}; ++ + extern loongarch_def_array<const char *, N_CMODEL_TYPES> + loongarch_cmodel_strings; + +-/* enum explicit_relocs */ +-#define EXPLICIT_RELOCS_AUTO 0 +-#define EXPLICIT_RELOCS_NONE 1 +-#define EXPLICIT_RELOCS_ALWAYS 2 +-#define N_EXPLICIT_RELOCS_TYPES 3 ++ ++/* Explicit Reloc Type */ ++enum { ++ EXPLICIT_RELOCS_AUTO = 0, ++ EXPLICIT_RELOCS_NONE = 1, ++ EXPLICIT_RELOCS_ALWAYS = 2, ++ N_EXPLICIT_RELOCS_TYPES = 3 ++}; + + /* The common default value for variables whose assignments + are triggered by command-line options. */ +@@ -159,17 +177,18 @@ struct loongarch_target + int cmodel; /* CMODEL_ */ + }; + +-/* CPU properties. */ +-/* index */ +-#define CPU_NATIVE 0 +-#define CPU_ABI_DEFAULT 1 +-#define CPU_LOONGARCH64 2 +-#define CPU_LA464 3 +-#define CPU_LA664 4 +-#define N_ARCH_TYPES 5 +-#define N_TUNE_TYPES 5 +- +-/* parallel tables. */ ++/* CPU model */ ++enum { ++ CPU_NATIVE = 0, ++ CPU_ABI_DEFAULT = 1, ++ CPU_LOONGARCH64 = 2, ++ CPU_LA464 = 3, ++ CPU_LA664 = 4, ++ N_ARCH_TYPES = 5, ++ N_TUNE_TYPES = 5 ++}; ++ ++/* CPU model properties */ + extern loongarch_def_array<const char *, N_ARCH_TYPES> + loongarch_cpu_strings; + extern loongarch_def_array<loongarch_isa, N_ARCH_TYPES> +-- +2.43.0 +
View file
_service:tar_scm:0105-aarch64-Rename-hard-fp-offset-to-bytes-above-hard-fp.patch
Deleted
@@ -1,148 +0,0 @@ -From 3fbf0789202b30a67b12e1fb785c7130f098d665 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:52 +0100 -Subject: PATCH aarch64: Rename hard_fp_offset to bytes_above_hard_fp -MIME-Version: 1.0 -Content-Type: text/plain; charset=utf8 -Content-Transfer-Encoding: 8bit - -Similarly to the previous locals_offset patch, hard_fp_offset -was described as: - - /* Offset from the base of the frame (incomming SP) to the - hard_frame_pointer. This value is always a multiple of - STACK_BOUNDARY. */ - poly_int64 hard_fp_offset; - -which again took an âupside-downâ view: higher offsets meant lower -addresses. This patch renames the field to bytes_above_hard_fp instead. - -gcc/ - * config/aarch64/aarch64.h (aarch64_frame::hard_fp_offset): Rename - to... - (aarch64_frame::bytes_above_hard_fp): ...this. - * config/aarch64/aarch64.cc (aarch64_layout_frame) - (aarch64_expand_prologue): Update accordingly. - (aarch64_initial_elimination_offset): Likewise. ---- - gcc/config/aarch64/aarch64.cc | 26 +++++++++++++------------- - gcc/config/aarch64/aarch64.h | 6 +++--- - 2 files changed, 16 insertions(+), 16 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index d4ec352ba98a..3c4052740e7a 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8329,7 +8329,7 @@ aarch64_layout_frame (void) - + get_frame_size (), - STACK_BOUNDARY / BITS_PER_UNIT); - -- frame.hard_fp_offset -+ frame.bytes_above_hard_fp - = saved_regs_and_above - frame.below_hard_fp_saved_regs_size; - - /* Both these values are already aligned. */ -@@ -8378,13 +8378,13 @@ aarch64_layout_frame (void) - else if (frame.wb_pop_candidate1 != INVALID_REGNUM) - max_push_offset = 256; - -- HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset; -+ HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; - HOST_WIDE_INT const_saved_regs_size; - if (known_eq (frame.saved_regs_size, 0)) - frame.initial_adjust = frame.frame_size; - else if (frame.frame_size.is_constant (&const_size) - && const_size < max_push_offset -- && known_eq (frame.hard_fp_offset, const_size)) -+ && known_eq (frame.bytes_above_hard_fp, const_size)) - { - /* Simple, small frame with no data below the saved registers. - -@@ -8401,8 +8401,8 @@ aarch64_layout_frame (void) - case that it hardly seems worth the effort though. */ - && (!saves_below_hard_fp_p || const_below_saved_regs == 0) - && !(cfun->calls_alloca -- && frame.hard_fp_offset.is_constant (&const_fp_offset) -- && const_fp_offset < max_push_offset)) -+ && frame.bytes_above_hard_fp.is_constant (&const_above_fp) -+ && const_above_fp < max_push_offset)) - { - /* Frame with small area below the saved registers: - -@@ -8420,12 +8420,12 @@ aarch64_layout_frame (void) - sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size - save SVE registers relative to SP - sub sp, sp, bytes_below_saved_regs */ -- frame.initial_adjust = (frame.hard_fp_offset -+ frame.initial_adjust = (frame.bytes_above_hard_fp - + frame.below_hard_fp_saved_regs_size); - frame.final_adjust = frame.bytes_below_saved_regs; - } -- else if (frame.hard_fp_offset.is_constant (&const_fp_offset) -- && const_fp_offset < max_push_offset) -+ else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp) -+ && const_above_fp < max_push_offset) - { - /* Frame with large area below the saved registers, or with SVE saves, - but with a small area above: -@@ -8435,7 +8435,7 @@ aarch64_layout_frame (void) - sub sp, sp, below_hard_fp_saved_regs_size - save SVE registers relative to SP - sub sp, sp, bytes_below_saved_regs */ -- frame.callee_adjust = const_fp_offset; -+ frame.callee_adjust = const_above_fp; - frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; - frame.final_adjust = frame.bytes_below_saved_regs; - } -@@ -8450,7 +8450,7 @@ aarch64_layout_frame (void) - sub sp, sp, below_hard_fp_saved_regs_size - save SVE registers relative to SP - sub sp, sp, bytes_below_saved_regs */ -- frame.initial_adjust = frame.hard_fp_offset; -+ frame.initial_adjust = frame.bytes_above_hard_fp; - frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; - frame.final_adjust = frame.bytes_below_saved_regs; - } -@@ -9754,7 +9754,7 @@ aarch64_expand_prologue (void) - { - /* The offset of the frame chain record (if any) from the current SP. */ - poly_int64 chain_offset = (initial_adjust + callee_adjust -- - frame.hard_fp_offset); -+ - frame.bytes_above_hard_fp); - gcc_assert (known_ge (chain_offset, 0)); - - if (callee_adjust == 0) -@@ -12575,10 +12575,10 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to) - if (to == HARD_FRAME_POINTER_REGNUM) - { - if (from == ARG_POINTER_REGNUM) -- return frame.hard_fp_offset; -+ return frame.bytes_above_hard_fp; - - if (from == FRAME_POINTER_REGNUM) -- return frame.hard_fp_offset - frame.bytes_above_locals; -+ return frame.bytes_above_hard_fp - frame.bytes_above_locals; - } - - if (to == STACK_POINTER_REGNUM) -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index bf46e6124aa9..dd1f403f9393 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -890,10 +890,10 @@ struct GTY (()) aarch64_frame - STACK_BOUNDARY. */ - poly_int64 bytes_above_locals; - -- /* Offset from the base of the frame (incomming SP) to the -- hard_frame_pointer. This value is always a multiple of -+ /* The number of bytes between the hard_frame_pointer and the top of -+ the frame (the incomming SP). This value is always a multiple of - STACK_BOUNDARY. */ -- poly_int64 hard_fp_offset; -+ poly_int64 bytes_above_hard_fp; - - /* The size of the frame. This value is the offset from base of the - frame (incomming SP) to the stack_pointer. This value is always --- -2.43.5 -
View file
_service:tar_scm:0106-Backport-SME-aarch64-Add-march-support-for-Armv9.1-A.patch
Added
@@ -0,0 +1,108 @@ +From 0bfb7b0b745d0a9af13772ad48ccc102e557f95a Mon Sep 17 00:00:00 2001 +From: Kyrylo Tkachov <kyrylo.tkachov@arm.com> +Date: Mon, 26 Sep 2022 10:10:25 +0100 +Subject: PATCH 007/157 BackportSME aarch64: Add -march support for + Armv9.1-A, Armv9.2-A, Armv9.3-A + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c33e12fa479c01848f4a288883bf1ef848c94ca3 + +This is a straightforward patch that allows targeting the architecture revisions mentioned in the subject +through -march. These are already supported in binutils. + +Bootstrapped and tested on aarch64-none-linux-gnu. + +gcc/ChangeLog: + + * config/aarch64/aarch64-arches.def (armv9.1-a): Define. + (armv9.2-a): Likewise. + (armv9.3-a): Likewise. + * config/aarch64/aarch64.h (AARCH64_FL_V9_1): Likewise. + (AARCH64_FL_V9_2): Likewise. + (AARCH64_FL_V9_3): Likewise. + (AARCH64_FL_FOR_ARCH9_1): Likewise. + (AARCH64_FL_FOR_ARCH9_2): Likewise. + (AARCH64_FL_FOR_ARCH9_3): Likewise. + (AARCH64_ISA_V9_1): Likewise. + (AARCH64_ISA_V9_2): Likewise. + (AARCH64_ISA_V9_3): Likewise. + * doc/invoke.texi (AArch64 Options): Document armv9.1-a, armv9.2-a, + armv9.3-a values to -march. +--- + gcc/config/aarch64/aarch64-arches.def | 3 +++ + gcc/config/aarch64/aarch64.h | 18 ++++++++++++++++++ + gcc/doc/invoke.texi | 3 +++ + 3 files changed, 24 insertions(+) + +diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def +index 3c2b16588..6150448dc 100644 +--- a/gcc/config/aarch64/aarch64-arches.def ++++ b/gcc/config/aarch64/aarch64-arches.def +@@ -41,5 +41,8 @@ AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_ARCH8 + AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_ARCH8_8) + AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_ARCH8_R) + AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_ARCH9) ++AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_ARCH9_1) ++AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_ARCH9_2) ++AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_ARCH9_3) + + #undef AARCH64_ARCH +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 7d73689e4..42aae37ef 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -239,6 +239,15 @@ + /* Armv8.8-a architecture extensions. */ + #define AARCH64_FL_V8_8 (1ULL << 45) + ++/* Armv9.1-A. */ ++#define AARCH64_FL_V9_1 (1ULL << 46) ++ ++/* Armv9.2-A. */ ++#define AARCH64_FL_V9_2 (1ULL << 47) ++ ++/* Armv9.3-A. */ ++#define AARCH64_FL_V9_3 (1ULL << 48) ++ + /* Has FP and SIMD. */ + #define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD) + +@@ -274,6 +283,12 @@ + #define AARCH64_FL_FOR_ARCH9 \ + (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9 \ + | AARCH64_FL_F16) ++#define AARCH64_FL_FOR_ARCH9_1 \ ++ (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1) ++#define AARCH64_FL_FOR_ARCH9_2 \ ++ (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2) ++#define AARCH64_FL_FOR_ARCH9_3 \ ++ (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3) + + /* Macros to test ISA flags. */ + +@@ -314,6 +329,9 @@ + #define AARCH64_ISA_V8_R (aarch64_isa_flags & AARCH64_FL_V8_R) + #define AARCH64_ISA_PAUTH (aarch64_isa_flags & AARCH64_FL_PAUTH) + #define AARCH64_ISA_V9 (aarch64_isa_flags & AARCH64_FL_V9) ++#define AARCH64_ISA_V9_1 (aarch64_isa_flags & AARCH64_FL_V9_1) ++#define AARCH64_ISA_V9_2 (aarch64_isa_flags & AARCH64_FL_V9_2) ++#define AARCH64_ISA_V9_3 (aarch64_isa_flags & AARCH64_FL_V9_3) + #define AARCH64_ISA_MOPS (aarch64_isa_flags & AARCH64_FL_MOPS) + #define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64) + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 17d9e4126..53709b246 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -19176,6 +19176,9 @@ and the features that they enable by default: + @item @samp{armv8.7-a} @tab Armv8.7-A @tab @samp{armv8.6-a}, @samp{+ls64} + @item @samp{armv8.8-a} @tab Armv8.8-a @tab @samp{armv8.7-a}, @samp{+mops} + @item @samp{armv9-a} @tab Armv9-A @tab @samp{armv8.5-a}, @samp{+sve}, @samp{+sve2} ++@item @samp{armv9.1-a} @tab Armv9.1-A @tab @samp{armv9-a}, @samp{+bf16}, @samp{+i8mm} ++@item @samp{armv9.2-a} @tab Armv9.2-A @tab @samp{armv9.1-a}, @samp{+ls64} ++@item @samp{armv9.3-a} @tab Armv9.3-A @tab @samp{armv9.2-a}, @samp{+mops} + @item @samp{armv8-r} @tab Armv8-R @tab @samp{armv8-r} + @end multitable + +-- +2.33.0 +
View file
_service:tar_scm:0106-LoongArch-Simplify-mexplicit-reloc-definitions.patch
Added
@@ -0,0 +1,124 @@ +From dc572aebb3a2c9062014ec50764bbc702dbb8a20 Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Mon, 8 Jan 2024 09:14:10 +0800 +Subject: PATCH 106/188 LoongArch: Simplify -mexplicit-reloc definitions + +Since we do not need printing or manual parsing of this option, +(whether in the driver or for target attributes to be supported later) +it can be handled in the .opt file framework. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch-strings: Remove explicit-reloc + argument string definitions. + * config/loongarch/loongarch-str.h: Same. + * config/loongarch/genopts/loongarch.opt.in: Mark -mno-explicit-relocs + as aliases to -mexplicit-relocs={always,none} + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch.cc: Same. +--- + gcc/config/loongarch/genopts/loongarch-strings | 6 ------ + gcc/config/loongarch/genopts/loongarch.opt.in | 8 ++++---- + gcc/config/loongarch/loongarch-str.h | 5 ----- + gcc/config/loongarch/loongarch.cc | 12 ------------ + gcc/config/loongarch/loongarch.opt | 2 +- + 5 files changed, 5 insertions(+), 28 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +index ce70b8b9c..99fd4e7cd 100644 +--- a/gcc/config/loongarch/genopts/loongarch-strings ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -64,9 +64,3 @@ STR_CMODEL_TS tiny-static + STR_CMODEL_MEDIUM medium + STR_CMODEL_LARGE large + STR_CMODEL_EXTREME extreme +- +-# -mexplicit-relocs +-OPTSTR_EXPLICIT_RELOCS explicit-relocs +-STR_EXPLICIT_RELOCS_AUTO auto +-STR_EXPLICIT_RELOCS_NONE none +-STR_EXPLICIT_RELOCS_ALWAYS always +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 851d8d1f3..f2055b55e 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -181,20 +181,20 @@ Name(explicit_relocs) Type(int) + The code model option names for -mexplicit-relocs: + + EnumValue +-Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_AUTO@@) Value(EXPLICIT_RELOCS_AUTO) ++Enum(explicit_relocs) String(auto) Value(EXPLICIT_RELOCS_AUTO) + + EnumValue +-Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_NONE@@) Value(EXPLICIT_RELOCS_NONE) ++Enum(explicit_relocs) String(none) Value(EXPLICIT_RELOCS_NONE) + + EnumValue +-Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_ALWAYS@@) Value(EXPLICIT_RELOCS_ALWAYS) ++Enum(explicit_relocs) String(always) Value(EXPLICIT_RELOCS_ALWAYS) + + mexplicit-relocs= + Target RejectNegative Joined Enum(explicit_relocs) Var(la_opt_explicit_relocs) Init(M_OPT_UNSET) + Use %reloc() assembly operators. + + mexplicit-relocs +-Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET) ++Target Alias(mexplicit-relocs=, always, none) + Use %reloc() assembly operators (for backward compatibility). + + mrecip +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index 2251df38b..cacae38c0 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -63,11 +63,6 @@ along with GCC; see the file COPYING3. If not see + #define STR_CMODEL_LARGE "large" + #define STR_CMODEL_EXTREME "extreme" + +-#define OPTSTR_EXPLICIT_RELOCS "explicit-relocs" +-#define STR_EXPLICIT_RELOCS_AUTO "auto" +-#define STR_EXPLICIT_RELOCS_NONE "none" +-#define STR_EXPLICIT_RELOCS_ALWAYS "always" +- + #define OPTSTR_FRECIPE "frecipe" + #define OPTSTR_DIV32 "div32" + #define OPTSTR_LAM_BH "lam-bh" +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index b0bb67d60..8cd703caa 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -7518,18 +7518,6 @@ loongarch_option_override_internal (struct gcc_options *opts, + loongarch_update_gcc_opt_status (&la_target, opts, opts_set); + loongarch_cpu_option_override (&la_target, opts, opts_set); + +- if (la_opt_explicit_relocs != M_OPT_UNSET +- && la_opt_explicit_relocs_backward != M_OPT_UNSET) +- error ("do not use %qs (with %qs) and %qs (without %qs) together", +- "-mexplicit-relocs=", "=", +- la_opt_explicit_relocs_backward ? "-mexplicit-relocs" +- : "-mno-explicit-relocs", "="); +- +- if (la_opt_explicit_relocs_backward != M_OPT_UNSET) +- la_opt_explicit_relocs = (la_opt_explicit_relocs_backward +- ? EXPLICIT_RELOCS_ALWAYS +- : EXPLICIT_RELOCS_NONE); +- + if (la_opt_explicit_relocs == M_OPT_UNSET) + la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS + ? (loongarch_mrelax +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index df7314973..d6e337ac2 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -202,7 +202,7 @@ Target RejectNegative Joined Enum(explicit_relocs) Var(la_opt_explicit_relocs) I + Use %reloc() assembly operators. + + mexplicit-relocs +-Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET) ++Target Alias(mexplicit-relocs=, always, none) + Use %reloc() assembly operators (for backward compatibility). + + mrecip +-- +2.43.0 +
View file
_service:tar_scm:0106-aarch64-Tweak-frame-size-comment.patch
Deleted
@@ -1,35 +0,0 @@ -From aac8b31379ac3bbd14fc6427dce23f56e54e8485 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:52 +0100 -Subject: PATCH aarch64: Tweak frame_size comment -MIME-Version: 1.0 -Content-Type: text/plain; charset=utf8 -Content-Transfer-Encoding: 8bit - -This patch fixes another case in which a value was described with -an âupside-downâ view. - -gcc/ - * config/aarch64/aarch64.h (aarch64_frame::frame_size): Tweak comment. ---- - gcc/config/aarch64/aarch64.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index dd1f403f9393..700524ae22bf 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -895,8 +895,8 @@ struct GTY (()) aarch64_frame - STACK_BOUNDARY. */ - poly_int64 bytes_above_hard_fp; - -- /* The size of the frame. This value is the offset from base of the -- frame (incomming SP) to the stack_pointer. This value is always -+ /* The size of the frame, i.e. the number of bytes between the bottom -+ of the outgoing arguments and the incoming SP. This value is always - a multiple of STACK_BOUNDARY. */ - poly_int64 frame_size; - --- -2.43.5 -
View file
_service:tar_scm:0107-Backport-SME-Revert-aarch64-Define-__ARM_FEATURE_RCP.patch
Added
@@ -0,0 +1,112 @@ +From b36c8c41cab42d3df45197bb287f06381d660001 Mon Sep 17 00:00:00 2001 +From: xiezhiheng <xiezhiheng@huawei.com> +Date: Mon, 19 Feb 2024 19:27:29 +0800 +Subject: PATCH 008/157 BackportSME Revert "aarch64: Define + __ARM_FEATURE_RCPC" + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=40a727379f3e8e6a83aea4e94c38dfa5dd8ef33d + +Revert this commit to solve conflicts with later patches, +and will apply it later. +--- + gcc/config/aarch64/aarch64-c.cc | 1 - + gcc/config/aarch64/aarch64-cores.def | 10 +++++----- + gcc/config/aarch64/aarch64.h | 4 +--- + .../gcc.target/aarch64/pragma_cpp_predefs_1.c | 20 ------------------- + 4 files changed, 6 insertions(+), 29 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index 90d45e45d..3d2fb5ec2 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -202,7 +202,6 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) + "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile); + aarch64_def_or_undef (TARGET_LS64, + "__ARM_FEATURE_LS64", pfile); +- aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile); + + /* Not for ACLE, but required to keep "float.h" correct if we switch + target between implementations that do or do not support ARMv8.2-A +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 70b11eb80..842d64932 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -134,17 +134,17 @@ AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_ + /* ARMv8.3-A Architecture Processors. */ + + /* Marvell cores (TX3). */ +-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) ++AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) + + /* ARMv8.4-A Architecture Processors. */ + + /* Arm ('A') cores. */ +-AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) +-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) +-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) ++AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) ++AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) ++AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) + + /* Qualcomm ('Q') cores. */ +-AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO, saphira, 0x51, 0xC01, -1) ++AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) + + /* ARMv8-A big.LITTLE implementations. */ + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 42aae37ef..7c090c8f2 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -262,8 +262,7 @@ + #define AARCH64_FL_FOR_ARCH8_2 \ + (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2) + #define AARCH64_FL_FOR_ARCH8_3 \ +- (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3 | AARCH64_FL_PAUTH \ +- | AARCH64_FL_RCPC) ++ (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3 | AARCH64_FL_PAUTH) + #define AARCH64_FL_FOR_ARCH8_4 \ + (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4 | AARCH64_FL_F16FML \ + | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM) +@@ -314,7 +313,6 @@ + #define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4) + #define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3) + #define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML) +-#define AARCH64_ISA_RCPC (aarch64_isa_flags & AARCH64_FL_RCPC) + #define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4) + #define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG) + #define AARCH64_ISA_V8_5 (aarch64_isa_flags & AARCH64_FL_V8_5) +diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c +index 307fa3d67..bfb044f5d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c +@@ -248,26 +248,6 @@ + #error "__ARM_FEATURE_CRC32 is not defined but should be!" + #endif + +-#pragma GCC target ("arch=armv8.2-a") +-#ifdef __ARM_FEATURE_RCPC +-#error "__ARM_FEATURE_RCPC is defined but should not be!" +-#endif +- +-#pragma GCC target ("arch=armv8.2-a+rcpc") +-#ifndef __ARM_FEATURE_RCPC +-#error "__ARM_FEATURE_RCPC is not defined but should be!" +-#endif +- +-#pragma GCC target ("+norcpc") +-#ifdef __ARM_FEATURE_RCPC +-#error "__ARM_FEATURE_RCPC is defined but should not be!" +-#endif +- +-#pragma GCC target ("arch=armv8.3-a") +-#ifndef __ARM_FEATURE_RCPC +-#error "__ARM_FEATURE_RCPC is not defined but should be!" +-#endif +- + int + foo (int a) + { +-- +2.33.0 +
View file
_service:tar_scm:0107-LoongArch-testsuite-Add-loongarch-support-to-slp-21..patch
Added
@@ -0,0 +1,35 @@ +From f90e31b6dc8c99f6670dee9a120c5dd9fa9a18d9 Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Wed, 10 Jan 2024 15:25:21 +0800 +Subject: PATCH 107/188 LoongArch: testsuite: Add loongarch support to + slp-21.c. + +The function of this test is to check that the compiler supports vectorization +using SLP and vec_{load/store/*}_lanes. However, vec_{load/store/*}_lanes are +not supported on LoongArch, such as the corresponding "st4/ld4" directives on +aarch64. + +gcc/testsuite/ChangeLog: + + * gcc.dg/vect/slp-21.c: Add loongarch. +--- + gcc/testsuite/gcc.dg/vect/slp-21.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/testsuite/gcc.dg/vect/slp-21.c b/gcc/testsuite/gcc.dg/vect/slp-21.c +index 4b83adb98..3b7e92fe8 100644 +--- a/gcc/testsuite/gcc.dg/vect/slp-21.c ++++ b/gcc/testsuite/gcc.dg/vect/slp-21.c +@@ -210,7 +210,7 @@ int main (void) + + Not all vect_perm targets support that, and it's a bit too specific to have + its own effective-target selector, so we just test targets directly. */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target { powerpc64*-*-* s390*-*-* } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_strided4 && { ! { powerpc64*-*-* s390*-*-* } } } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target { powerpc64*-*-* s390*-*-* loongarch*-*-* } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_strided4 && { ! { powerpc64*-*-* s390*-*-* loongarch*-*-* } } } } } } */ + /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided4 } } } } } */ + +-- +2.43.0 +
View file
_service:tar_scm:0107-aarch64-Measure-reg-offset-from-the-bottom-of-the-frame.patch
Deleted
@@ -1,195 +0,0 @@ -From 8d5506a8aeb8dd7e8b209a3663b07688478f76b9 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:53 +0100 -Subject: PATCH aarch64: Measure reg_offset from the bottom of the frame - -reg_offset was measured from the bottom of the saved register area. -This made perfect sense with the original layout, since the bottom -of the saved register area was also the hard frame pointer address. -It became slightly less obvious with SVE, since we save SVE -registers below the hard frame pointer, but it still made sense. - -However, if we want to allow different frame layouts, it's more -convenient and obvious to measure reg_offset from the bottom of -the frame. After previous patches, it's also a slight simplification -in its own right. - -gcc/ - * config/aarch64/aarch64.h (aarch64_frame): Add comment above - reg_offset. - * config/aarch64/aarch64.cc (aarch64_layout_frame): Walk offsets - from the bottom of the frame, rather than the bottom of the saved - register area. Measure reg_offset from the bottom of the frame - rather than the bottom of the saved register area. - (aarch64_save_callee_saves): Update accordingly. - (aarch64_restore_callee_saves): Likewise. - (aarch64_get_separate_components): Likewise. - (aarch64_process_components): Likewise. ---- - gcc/config/aarch64/aarch64.cc | 53 ++++++++++++++++------------------- - gcc/config/aarch64/aarch64.h | 3 ++ - 2 files changed, 27 insertions(+), 29 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 3c4052740e7a..97dd077844b4 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8139,7 +8139,6 @@ aarch64_needs_frame_chain (void) - static void - aarch64_layout_frame (void) - { -- poly_int64 offset = 0; - int regno, last_fp_reg = INVALID_REGNUM; - machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM); - poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode); -@@ -8217,7 +8216,9 @@ aarch64_layout_frame (void) - gcc_assert (crtl->is_leaf - || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED)); - -- frame.bytes_below_saved_regs = crtl->outgoing_args_size; -+ poly_int64 offset = crtl->outgoing_args_size; -+ gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); -+ frame.bytes_below_saved_regs = offset; - - /* Now assign stack slots for the registers. Start with the predicate - registers, since predicate LDR and STR have a relatively small -@@ -8229,7 +8230,8 @@ aarch64_layout_frame (void) - offset += BYTES_PER_SVE_PRED; - } - -- if (maybe_ne (offset, 0)) -+ poly_int64 saved_prs_size = offset - frame.bytes_below_saved_regs; -+ if (maybe_ne (saved_prs_size, 0)) - { - /* If we have any vector registers to save above the predicate registers, - the offset of the vector register save slots need to be a multiple -@@ -8247,10 +8249,10 @@ aarch64_layout_frame (void) - offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); - else - { -- if (known_le (offset, vector_save_size)) -- offset = vector_save_size; -- else if (known_le (offset, vector_save_size * 2)) -- offset = vector_save_size * 2; -+ if (known_le (saved_prs_size, vector_save_size)) -+ offset = frame.bytes_below_saved_regs + vector_save_size; -+ else if (known_le (saved_prs_size, vector_save_size * 2)) -+ offset = frame.bytes_below_saved_regs + vector_save_size * 2; - else - gcc_unreachable (); - } -@@ -8267,9 +8269,10 @@ aarch64_layout_frame (void) - - /* OFFSET is now the offset of the hard frame pointer from the bottom - of the callee save area. */ -- bool saves_below_hard_fp_p = maybe_ne (offset, 0); -- frame.below_hard_fp_saved_regs_size = offset; -- frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs; -+ frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; -+ bool saves_below_hard_fp_p -+ = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); -+ frame.bytes_below_hard_fp = offset; - if (frame.emit_frame_chain) - { - /* FP and LR are placed in the linkage record. */ -@@ -8320,9 +8323,10 @@ aarch64_layout_frame (void) - - offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); - -- frame.saved_regs_size = offset; -+ frame.saved_regs_size = offset - frame.bytes_below_saved_regs; - -- poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size; -+ poly_int64 varargs_and_saved_regs_size -+ = frame.saved_regs_size + frame.saved_varargs_size; - - poly_int64 saved_regs_and_above - = aligned_upper_bound (varargs_and_saved_regs_size -@@ -8790,9 +8794,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, - - machine_mode mode = aarch64_reg_save_mode (regno); - reg = gen_rtx_REG (mode, regno); -- offset = (frame.reg_offsetregno -- + frame.bytes_below_saved_regs -- - bytes_below_sp); -+ offset = frame.reg_offsetregno - bytes_below_sp; - rtx base_rtx = stack_pointer_rtx; - poly_int64 sp_offset = offset; - -@@ -8899,9 +8901,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start, - - machine_mode mode = aarch64_reg_save_mode (regno); - reg = gen_rtx_REG (mode, regno); -- offset = (frame.reg_offsetregno -- + frame.bytes_below_saved_regs -- - bytes_below_sp); -+ offset = frame.reg_offsetregno - bytes_below_sp; - rtx base_rtx = stack_pointer_rtx; - if (mode == VNx2DImode && BYTES_BIG_ENDIAN) - aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, -@@ -9040,14 +9040,12 @@ aarch64_get_separate_components (void) - it as a stack probe for -fstack-clash-protection. */ - if (flag_stack_clash_protection - && maybe_ne (frame.below_hard_fp_saved_regs_size, 0) -- && known_eq (offset, 0)) -+ && known_eq (offset, frame.bytes_below_saved_regs)) - continue; - - /* Get the offset relative to the register we'll use. */ - if (frame_pointer_needed) -- offset -= frame.below_hard_fp_saved_regs_size; -- else -- offset += frame.bytes_below_saved_regs; -+ offset -= frame.bytes_below_hard_fp; - - /* Check that we can access the stack slot of the register with one - direct load with no adjustments needed. */ -@@ -9194,9 +9192,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) - rtx reg = gen_rtx_REG (mode, regno); - poly_int64 offset = frame.reg_offsetregno; - if (frame_pointer_needed) -- offset -= frame.below_hard_fp_saved_regs_size; -- else -- offset += frame.bytes_below_saved_regs; -+ offset -= frame.bytes_below_hard_fp; - - rtx addr = plus_constant (Pmode, ptr_reg, offset); - rtx mem = gen_frame_mem (mode, addr); -@@ -9248,9 +9244,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) - /* REGNO2 can be saved/restored in a pair with REGNO. */ - rtx reg2 = gen_rtx_REG (mode, regno2); - if (frame_pointer_needed) -- offset2 -= frame.below_hard_fp_saved_regs_size; -- else -- offset2 += frame.bytes_below_saved_regs; -+ offset2 -= frame.bytes_below_hard_fp; - rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); - rtx mem2 = gen_frame_mem (mode, addr2); - rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2) -@@ -9366,7 +9360,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - if (final_adjustment_p - && known_eq (frame.below_hard_fp_saved_regs_size, 0)) - { -- poly_int64 lr_offset = frame.reg_offsetLR_REGNUM; -+ poly_int64 lr_offset = (frame.reg_offsetLR_REGNUM -+ - frame.bytes_below_saved_regs); - if (known_ge (lr_offset, 0)) - min_probe_threshold -= lr_offset.to_constant (); - else -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 700524ae22bf..b61358370732 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -860,6 +860,9 @@ extern enum aarch64_processor aarch64_tune; - #ifdef HAVE_POLY_INT_H - struct GTY (()) aarch64_frame - { -+ /* The offset from the bottom of the static frame (the bottom of the -+ outgoing arguments) of each register save slot, or -2 if no save is -+ needed. */ - poly_int64 reg_offsetLAST_SAVED_REGNUM + 1; - - /* The number of extra stack bytes taken up by register varargs. --- -2.43.5 -
View file
_service:tar_scm:0108-Backport-SME-Revert-Ampere-1-and-Ampere-1A-core-defi.patch
Added
@@ -0,0 +1,39 @@ +From 34374de5edde59f27a1b3b443e8a163fc5b528d7 Mon Sep 17 00:00:00 2001 +From: xiezhiheng <xiezhiheng@huawei.com> +Date: Tue, 20 Feb 2024 10:13:06 +0800 +Subject: PATCH 009/157 BackportSME Revert "Ampere-1 and Ampere-1A core + definition in aarch64-cores.def" + +Revert it to solve conflicts with later patches, and will apply it +later. It's introduced by commit 3668a59ae22a and e9f0d974600e. +--- + gcc/config/aarch64/aarch64-cores.def | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 842d64932..0402bfb74 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -69,8 +69,7 @@ AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_ARCH + AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) + + /* Ampere Computing ('\xC0') cores. */ +-AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_RNG | AARCH64_FL_SHA3, ampere1, 0xC0, 0xac3, -1) +-AARCH64_CORE("ampere1a", ampere1a, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_RNG | AARCH64_FL_SHA3 | AARCH64_FL_MEMTAG, ampere1a, 0xC0, 0xac4, -1) ++AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6, ampere1, 0xC0, 0xac3, -1) + /* Do not swap around "emag" and "xgene1", + this order is required to handle variant correctly. */ + AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) +@@ -164,8 +163,7 @@ AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_ARCH8_R, cor + /* Armv9.0-A Architecture Processors. */ + + /* Arm ('A') cores. */ +-AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG +- | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) ++AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) + + AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1) + +-- +2.33.0 +
View file
_service:tar_scm:0108-LoongArch-Optimized-some-of-the-symbolic-expansion-i.patch
Added
@@ -0,0 +1,228 @@ +From 9b19eb071fe3826aa61567b927fc95a37f6560f7 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 8 Dec 2023 10:16:48 +0800 +Subject: PATCH 108/188 LoongArch: Optimized some of the symbolic expansion + instructions generated during bitwise operations. + +There are two mode iterators defined in the loongarch.md: + (define_mode_iterator GPR SI (DI "TARGET_64BIT")) + and + (define_mode_iterator X (SI "!TARGET_64BIT") (DI "TARGET_64BIT")) +Replace the mode in the bit arithmetic from GPR to X. + +Since the bitwise operation instruction does not distinguish between 64-bit, +32-bit, etc., it is necessary to perform symbolic expansion if the bitwise +operation is less than 64 bits. +The original definition would have generated a lot of redundant symbolic +extension instructions. This problem is optimized with reference to the +implementation of RISCV. + +Add this patch spec2017 500.perlbench performance improvement by 1.8% + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (one_cmpl<mode>2): Replace GPR with X. + (*nor<mode>3): Likewise. + (nor<mode>3): Likewise. + (*negsi2_extended): New template. + (*<optab>si3_internal): Likewise. + (*one_cmplsi2_internal): Likewise. + (*norsi3_internal): Likewise. + (*<optab>nsi_internal): Likewise. + (bytepick_w_<bytepick_imm>_extend): Modify this template according to the + modified bit operation to make the optimization work. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/sign-extend-bitwise.c: New test. +--- + gcc/config/loongarch/loongarch.md | 93 ++++++++++++++----- + .../loongarch/sign-extend-bitwise.c | 21 +++++ + 2 files changed, 90 insertions(+), 24 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/sign-extend-bitwise.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 23653a2b0..6ebf33cbe 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -736,7 +736,7 @@ + + (define_insn "sub<mode>3" + (set (match_operand:GPR 0 "register_operand" "=r") +- (minus:GPR (match_operand:GPR 1 "register_operand" "rJ") ++ (minus:GPR (match_operand:GPR 1 "register_operand" "r") + (match_operand:GPR 2 "register_operand" "r"))) + "" + "sub.<d>\t%0,%z1,%2" +@@ -1412,13 +1412,13 @@ + (set_attr "alu_type" "sub") + (set_attr "mode" "<MODE>")) + +-(define_insn "one_cmpl<mode>2" +- (set (match_operand:GPR 0 "register_operand" "=r") +- (not:GPR (match_operand:GPR 1 "register_operand" "r"))) +- "" +- "nor\t%0,%.,%1" +- (set_attr "alu_type" "not") +- (set_attr "mode" "<MODE>")) ++(define_insn "*negsi2_extended" ++ (set (match_operand:DI 0 "register_operand" "=r") ++ (sign_extend:DI (neg:SI (match_operand:SI 1 "register_operand" "r")))) ++ "TARGET_64BIT" ++ "sub.w\t%0,%.,%1" ++ (set_attr "alu_type" "sub") ++ (set_attr "mode" "SI")) + + (define_insn "neg<mode>2" + (set (match_operand:ANYF 0 "register_operand" "=f") +@@ -1438,14 +1438,39 @@ + ;; + + (define_insn "<optab><mode>3" +- (set (match_operand:GPR 0 "register_operand" "=r,r") +- (any_bitwise:GPR (match_operand:GPR 1 "register_operand" "%r,r") +- (match_operand:GPR 2 "uns_arith_operand" "r,K"))) ++ (set (match_operand:X 0 "register_operand" "=r,r") ++ (any_bitwise:X (match_operand:X 1 "register_operand" "%r,r") ++ (match_operand:X 2 "uns_arith_operand" "r,K"))) + "" + "<insn>%i2\t%0,%1,%2" + (set_attr "type" "logical") + (set_attr "mode" "<MODE>")) + ++(define_insn "*<optab>si3_internal" ++ (set (match_operand:SI 0 "register_operand" "=r,r") ++ (any_bitwise:SI (match_operand:SI 1 "register_operand" "%r,r") ++ (match_operand:SI 2 "uns_arith_operand" " r,K"))) ++ "TARGET_64BIT" ++ "<insn>%i2\t%0,%1,%2" ++ (set_attr "type" "logical") ++ (set_attr "mode" "SI")) ++ ++(define_insn "one_cmpl<mode>2" ++ (set (match_operand:X 0 "register_operand" "=r") ++ (not:X (match_operand:X 1 "register_operand" "r"))) ++ "" ++ "nor\t%0,%.,%1" ++ (set_attr "alu_type" "not") ++ (set_attr "mode" "<MODE>")) ++ ++(define_insn "*one_cmplsi2_internal" ++ (set (match_operand:SI 0 "register_operand" "=r") ++ (not:SI (match_operand:SI 1 "register_operand" " r"))) ++ "TARGET_64BIT" ++ "nor\t%0,%.,%1" ++ (set_attr "type" "logical") ++ (set_attr "mode" "SI")) ++ + (define_insn "and<mode>3_extended" + (set (match_operand:GPR 0 "register_operand" "=r") + (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "r") +@@ -1561,25 +1586,43 @@ + (set_attr "type" "logical") + (set_attr "mode" "HI")) + +-(define_insn "*nor<mode>3" +- (set (match_operand:GPR 0 "register_operand" "=r") +- (and:GPR (not:GPR (match_operand:GPR 1 "register_operand" "%r")) +- (not:GPR (match_operand:GPR 2 "register_operand" "r")))) ++(define_insn "nor<mode>3" ++ (set (match_operand:X 0 "register_operand" "=r") ++ (and:X (not:X (match_operand:X 1 "register_operand" "%r")) ++ (not:X (match_operand:X 2 "register_operand" "r")))) + "" + "nor\t%0,%1,%2" + (set_attr "type" "logical") + (set_attr "mode" "<MODE>")) + ++(define_insn "*norsi3_internal" ++ (set (match_operand:SI 0 "register_operand" "=r") ++ (and:SI (not:SI (match_operand:SI 1 "register_operand" "%r")) ++ (not:SI (match_operand:SI 2 "register_operand" "r")))) ++ "TARGET_64BIT" ++ "nor\t%0,%1,%2" ++ (set_attr "type" "logical") ++ (set_attr "mode" "SI")) ++ + (define_insn "<optab>n<mode>" +- (set (match_operand:GPR 0 "register_operand" "=r") +- (neg_bitwise:GPR +- (not:GPR (match_operand:GPR 1 "register_operand" "r")) +- (match_operand:GPR 2 "register_operand" "r"))) ++ (set (match_operand:X 0 "register_operand" "=r") ++ (neg_bitwise:X ++ (not:X (match_operand:X 1 "register_operand" "r")) ++ (match_operand:X 2 "register_operand" "r"))) + "" + "<insn>n\t%0,%2,%1" + (set_attr "type" "logical") + (set_attr "mode" "<MODE>")) + ++(define_insn "*<optab>nsi_internal" ++ (set (match_operand:SI 0 "register_operand" "=r") ++ (neg_bitwise:SI ++ (not:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "r"))) ++ "TARGET_64BIT" ++ "<insn>n\t%0,%2,%1" ++ (set_attr "type" "logical") ++ (set_attr "mode" "SI")) +  + ;; + ;; .................... +@@ -3167,7 +3210,6 @@ + (label_ref (match_operand 1)) + (pc)))) + +- +  + ;; + ;; .................... +@@ -3967,10 +4009,13 @@ + (define_insn "bytepick_w_<bytepick_imm>_extend" + (set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI +- (ior:SI (lshiftrt (match_operand:SI 1 "register_operand" "r") +- (const_int <bytepick_w_lshiftrt_amount>)) +- (ashift (match_operand:SI 2 "register_operand" "r") +- (const_int bytepick_w_ashift_amount))))) ++ (subreg:SI ++ (ior:DI (subreg:DI (lshiftrt ++ (match_operand:SI 1 "register_operand" "r") ++ (const_int <bytepick_w_lshiftrt_amount>)) 0) ++ (subreg:DI (ashift ++ (match_operand:SI 2 "register_operand" "r") ++ (const_int bytepick_w_ashift_amount)) 0)) 0))) + "TARGET_64BIT" + "bytepick.w\t%0,%1,%2,<bytepick_imm>" + (set_attr "mode" "SI")) +diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend-bitwise.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-bitwise.c
View file
_service:tar_scm:0108-aarch64-Simplify-top-of-frame-allocation.patch
Deleted
@@ -1,55 +0,0 @@ -From b47766614df3b9df878262efb2ad73aaac108363 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:53 +0100 -Subject: PATCH aarch64: Simplify top of frame allocation - -After previous patches, it no longer really makes sense to allocate -the top of the frame in terms of varargs_and_saved_regs_size and -saved_regs_and_above. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_layout_frame): Simplify - the allocation of the top of the frame. ---- - gcc/config/aarch64/aarch64.cc | 23 ++++++++--------------- - 1 file changed, 8 insertions(+), 15 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 97dd077844b4..81935852d5b2 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8325,23 +8325,16 @@ aarch64_layout_frame (void) - - frame.saved_regs_size = offset - frame.bytes_below_saved_regs; - -- poly_int64 varargs_and_saved_regs_size -- = frame.saved_regs_size + frame.saved_varargs_size; -- -- poly_int64 saved_regs_and_above -- = aligned_upper_bound (varargs_and_saved_regs_size -- + get_frame_size (), -- STACK_BOUNDARY / BITS_PER_UNIT); -- -- frame.bytes_above_hard_fp -- = saved_regs_and_above - frame.below_hard_fp_saved_regs_size; -+ offset += get_frame_size (); -+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); -+ auto top_of_locals = offset; - -- /* Both these values are already aligned. */ -- gcc_assert (multiple_p (frame.bytes_below_saved_regs, -- STACK_BOUNDARY / BITS_PER_UNIT)); -- frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs; -+ offset += frame.saved_varargs_size; -+ gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); -+ frame.frame_size = offset; - -- frame.bytes_above_locals = frame.saved_varargs_size; -+ frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp; -+ frame.bytes_above_locals = frame.frame_size - top_of_locals; - - frame.initial_adjust = 0; - frame.final_adjust = 0; --- -2.43.5 -
View file
_service:tar_scm:0109-Backport-SME-aarch64-Rename-AARCH64_ISA-architecture.patch
Added
@@ -0,0 +1,157 @@ +From 244780570ebc85c44806559ba165d4a70a2333d1 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:50 +0100 +Subject: PATCH 010/157 BackportSME aarch64: Rename AARCH64_ISA + architecture-level macros + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2a4788ac3bae1467b0379852d5a6690a8496d0c9 + +All AARCH64_ISA_* architecture-level macros except AARCH64_ISA_V8_R +are for the A profile: they cause __ARM_ARCH_PROFILE to be set to +'A' and they are associated with architecture names like armv8.4-a. + +It's convenient for later patches if we make this explicit +by adding an "A" to the name. Also, rather than add an underscore +(as for V8_R) it's more convenient to add the profile directly +to the number, like we already do in the ARCH_IDENT field of the +aarch64-arches.def entries. + +gcc/ + * config/aarch64/aarch64.h (AARCH64_ISA_V8_2, AARCH64_ISA_V8_3) + (AARCH64_ISA_V8_4, AARCH64_ISA_V8_5, AARCH64_ISA_V8_6) + (AARCH64_ISA_V9, AARCH64_ISA_V9_1, AARCH64_ISA_V9_2) + (AARCH64_ISA_V9_3): Add "A" to the end of the name. + (AARCH64_ISA_V8_R): Rename to AARCH64_ISA_V8R. + (TARGET_ARMV8_3, TARGET_JSCVT, TARGET_FRINT, TARGET_MEMTAG): Update + accordingly. + * common/config/aarch64/aarch64-common.cc + (aarch64_get_extension_string_for_isa_flags): Likewise. + * config/aarch64/aarch64-c.cc + (aarch64_define_unconditional_macros): Likewise. +--- + gcc/common/config/aarch64/aarch64-common.cc | 2 +- + gcc/config/aarch64/aarch64-c.cc | 4 +-- + gcc/config/aarch64/aarch64.h | 28 ++++++++++----------- + 3 files changed, 17 insertions(+), 17 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index 85ce8133b..3dc020f0c 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -506,7 +506,7 @@ aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags, + + Note that assemblers with Armv8-R AArch64 support should not have this + issue, so we don't need this fix when targeting Armv8-R. */ +- if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8_R) ++ if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8R) + isa_flag_bits |= AARCH64_ISA_CRC; + + /* Pass Two: +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index 3d2fb5ec2..18c9b975b 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -64,7 +64,7 @@ aarch64_define_unconditional_macros (cpp_reader *pfile) + builtin_define ("__ARM_ARCH_8A"); + + builtin_define_with_int_value ("__ARM_ARCH_PROFILE", +- AARCH64_ISA_V8_R ? 'R' : 'A'); ++ AARCH64_ISA_V8R ? 'R' : 'A'); + builtin_define ("__ARM_FEATURE_CLZ"); + builtin_define ("__ARM_FEATURE_IDIV"); + builtin_define ("__ARM_FEATURE_UNALIGNED"); +@@ -82,7 +82,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) + { + aarch64_def_or_undef (flag_unsafe_math_optimizations, "__ARM_FP_FAST", pfile); + +- builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9 ? 9 : 8); ++ builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9A ? 9 : 8); + + builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM", + flag_short_enums ? 1 : 4); +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 7c090c8f2..356a263b2 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -297,7 +297,7 @@ + #define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD) + #define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE) + #define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_RDMA) +-#define AARCH64_ISA_V8_2 (aarch64_isa_flags & AARCH64_FL_V8_2) ++#define AARCH64_ISA_V8_2A (aarch64_isa_flags & AARCH64_FL_V8_2) + #define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16) + #define AARCH64_ISA_SVE (aarch64_isa_flags & AARCH64_FL_SVE) + #define AARCH64_ISA_SVE2 (aarch64_isa_flags & AARCH64_FL_SVE2) +@@ -305,31 +305,31 @@ + #define AARCH64_ISA_SVE2_BITPERM (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM) + #define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3) + #define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4) +-#define AARCH64_ISA_V8_3 (aarch64_isa_flags & AARCH64_FL_V8_3) ++#define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3) + #define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD) + #define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES) + #define AARCH64_ISA_SHA2 (aarch64_isa_flags & AARCH64_FL_SHA2) +-#define AARCH64_ISA_V8_4 (aarch64_isa_flags & AARCH64_FL_V8_4) ++#define AARCH64_ISA_V8_4A (aarch64_isa_flags & AARCH64_FL_V8_4) + #define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4) + #define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3) + #define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML) + #define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4) + #define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG) +-#define AARCH64_ISA_V8_5 (aarch64_isa_flags & AARCH64_FL_V8_5) ++#define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5) + #define AARCH64_ISA_TME (aarch64_isa_flags & AARCH64_FL_TME) + #define AARCH64_ISA_MEMTAG (aarch64_isa_flags & AARCH64_FL_MEMTAG) +-#define AARCH64_ISA_V8_6 (aarch64_isa_flags & AARCH64_FL_V8_6) ++#define AARCH64_ISA_V8_6A (aarch64_isa_flags & AARCH64_FL_V8_6) + #define AARCH64_ISA_I8MM (aarch64_isa_flags & AARCH64_FL_I8MM) + #define AARCH64_ISA_F32MM (aarch64_isa_flags & AARCH64_FL_F32MM) + #define AARCH64_ISA_F64MM (aarch64_isa_flags & AARCH64_FL_F64MM) + #define AARCH64_ISA_BF16 (aarch64_isa_flags & AARCH64_FL_BF16) + #define AARCH64_ISA_SB (aarch64_isa_flags & AARCH64_FL_SB) +-#define AARCH64_ISA_V8_R (aarch64_isa_flags & AARCH64_FL_V8_R) ++#define AARCH64_ISA_V8R (aarch64_isa_flags & AARCH64_FL_V8_R) + #define AARCH64_ISA_PAUTH (aarch64_isa_flags & AARCH64_FL_PAUTH) +-#define AARCH64_ISA_V9 (aarch64_isa_flags & AARCH64_FL_V9) +-#define AARCH64_ISA_V9_1 (aarch64_isa_flags & AARCH64_FL_V9_1) +-#define AARCH64_ISA_V9_2 (aarch64_isa_flags & AARCH64_FL_V9_2) +-#define AARCH64_ISA_V9_3 (aarch64_isa_flags & AARCH64_FL_V9_3) ++#define AARCH64_ISA_V9A (aarch64_isa_flags & AARCH64_FL_V9) ++#define AARCH64_ISA_V9_1A (aarch64_isa_flags & AARCH64_FL_V9_1) ++#define AARCH64_ISA_V9_2A (aarch64_isa_flags & AARCH64_FL_V9_2) ++#define AARCH64_ISA_V9_3A (aarch64_isa_flags & AARCH64_FL_V9_3) + #define AARCH64_ISA_MOPS (aarch64_isa_flags & AARCH64_FL_MOPS) + #define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64) + +@@ -383,16 +383,16 @@ + #define TARGET_SVE2_SM4 (TARGET_SVE2 && AARCH64_ISA_SVE2_SM4) + + /* ARMv8.3-A features. */ +-#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3) ++#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A) + + /* Javascript conversion instruction from Armv8.3-a. */ +-#define TARGET_JSCVT (TARGET_FLOAT && AARCH64_ISA_V8_3) ++#define TARGET_JSCVT (TARGET_FLOAT && AARCH64_ISA_V8_3A) + + /* Armv8.3-a Complex number extension to AdvSIMD extensions. */ + #define TARGET_COMPLEX (TARGET_SIMD && TARGET_ARMV8_3) + + /* Floating-point rounding instructions from Armv8.5-a. */ +-#define TARGET_FRINT (AARCH64_ISA_V8_5 && TARGET_FLOAT) ++#define TARGET_FRINT (AARCH64_ISA_V8_5A && TARGET_FLOAT) + + /* TME instructions are enabled. */ + #define TARGET_TME (AARCH64_ISA_TME) +@@ -401,7 +401,7 @@ + #define TARGET_RNG (AARCH64_ISA_RNG) + + /* Memory Tagging instructions optional to Armv8.5 enabled through +memtag. */ +-#define TARGET_MEMTAG (AARCH64_ISA_V8_5 && AARCH64_ISA_MEMTAG) ++#define TARGET_MEMTAG (AARCH64_ISA_V8_5A && AARCH64_ISA_MEMTAG) + + /* I8MM instructions are enabled through +i8mm. */ + #define TARGET_I8MM (AARCH64_ISA_I8MM) +-- +2.33.0 +
View file
_service:tar_scm:0109-LoongArch-Implement-option-save-restore.patch
Added
@@ -0,0 +1,467 @@ +From 146c85fa8b32d88acacf8645096d004e0c6f2f9c Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Thu, 11 Jan 2024 09:07:10 +0800 +Subject: PATCH 109/188 LoongArch: Implement option save/restore + +LTO option streaming and target attributes both require per-function +target configuration, which is achieved via option save/restore. + +We implement TARGET_OPTION_{SAVE,RESTORE} to switch the la_target +context in addition to other automatically maintained option states +(via the "Save" option property in the .opt files). + +Tested on loongarch64-linux-gnu without regression. + + PR target/113233 + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch.opt.in: Mark options with + the "Save" property. + * config/loongarch/loongarch.opt: Same. + * config/loongarch/loongarch-opts.cc: Refresh -mcmodel= state + according to la_target. + * config/loongarch/loongarch.cc: Implement TARGET_OPTION_{SAVE, + RESTORE} for the la_target structure; Rename option conditions + to have the same "la_" prefix. + * config/loongarch/loongarch.h: Same. +--- + gcc/config/loongarch/genopts/loongarch.opt.in | 38 ++++----- + gcc/config/loongarch/loongarch-opts.cc | 7 ++ + gcc/config/loongarch/loongarch.cc | 80 +++++++++++++++---- + gcc/config/loongarch/loongarch.h | 2 +- + gcc/config/loongarch/loongarch.opt | 38 ++++----- + 5 files changed, 111 insertions(+), 54 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index f2055b55e..4d6b1902d 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -50,7 +50,7 @@ EnumValue + Enum(isa_ext_fpu) String(@@STR_ISA_EXT_FPU64@@) Value(ISA_EXT_FPU64) + + m@@OPTSTR_ISA_EXT_FPU@@= +-Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) Init(M_OPT_UNSET) ++Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) Init(M_OPT_UNSET) Save + -m@@OPTSTR_ISA_EXT_FPU@@=FPU Generate code for the given FPU. + + m@@OPTSTR_ISA_EXT_FPU@@=@@STR_ISA_EXT_FPU0@@ +@@ -82,7 +82,7 @@ EnumValue + Enum(isa_ext_simd) String(@@STR_ISA_EXT_LASX@@) Value(ISA_EXT_SIMD_LASX) + + m@@OPTSTR_ISA_EXT_SIMD@@= +-Target RejectNegative Joined ToLower Enum(isa_ext_simd) Var(la_opt_simd) Init(M_OPT_UNSET) ++Target RejectNegative Joined ToLower Enum(isa_ext_simd) Var(la_opt_simd) Init(M_OPT_UNSET) Save + -m@@OPTSTR_ISA_EXT_SIMD@@=SIMD Generate code for the given SIMD extension. + + m@@STR_ISA_EXT_LSX@@ +@@ -114,11 +114,11 @@ EnumValue + Enum(cpu_type) String(@@STR_CPU_LA664@@) Value(CPU_LA664) + + m@@OPTSTR_ARCH@@= +-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) ++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) Save + -m@@OPTSTR_ARCH@@=PROCESSOR Generate code for the given PROCESSOR ISA. + + m@@OPTSTR_TUNE@@= +-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) ++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) Save + -m@@OPTSTR_TUNE@@=PROCESSOR Generate optimized code for PROCESSOR. + + +@@ -149,31 +149,31 @@ Variable + int la_opt_abi_ext = M_OPT_UNSET + + mbranch-cost= +-Target RejectNegative Joined UInteger Var(loongarch_branch_cost) ++Target RejectNegative Joined UInteger Var(la_branch_cost) Save + -mbranch-cost=COST Set the cost of branches to roughly COST instructions. + + mcheck-zero-division +-Target Mask(CHECK_ZERO_DIV) ++Target Mask(CHECK_ZERO_DIV) Save + Trap on integer divide by zero. + + mcond-move-int +-Target Var(TARGET_COND_MOVE_INT) Init(1) ++Target Mask(COND_MOVE_INT) Save + Conditional moves for integral are enabled. + + mcond-move-float +-Target Var(TARGET_COND_MOVE_FLOAT) Init(1) ++Target Mask(COND_MOVE_FLOAT) Save + Conditional moves for float are enabled. + + mmemcpy +-Target Mask(MEMCPY) ++Target Mask(MEMCPY) Save + Prevent optimizing block moves, which is also the default behavior of -Os. + + mstrict-align +-Target Var(TARGET_STRICT_ALIGN) Init(0) ++Target Mask(STRICT_ALIGN) Save + Do not generate unaligned memory accesses. + + mmax-inline-memcpy-size= +-Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024) ++Target Joined RejectNegative UInteger Var(la_max_inline_memcpy_size) Init(1024) Save + -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. + + Enum +@@ -198,11 +198,11 @@ Target Alias(mexplicit-relocs=, always, none) + Use %reloc() assembly operators (for backward compatibility). + + mrecip +-Target RejectNegative Var(loongarch_recip) ++Target RejectNegative Var(la_recip) Save + Generate approximate reciprocal divide and square root for better throughput. + + mrecip= +-Target RejectNegative Joined Var(loongarch_recip_name) ++Target RejectNegative Joined Var(la_recip_name) Save + Control generation of reciprocal estimates. + + ; The code model option names for -mcmodel. +@@ -229,29 +229,29 @@ EnumValue + Enum(cmodel) String(@@STR_CMODEL_EXTREME@@) Value(CMODEL_EXTREME) + + mcmodel= +-Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) Init(M_OPT_UNSET) ++Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) Init(M_OPT_UNSET) Save + Specify the code model. + + mdirect-extern-access +-Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0) ++Target Mask(DIRECT_EXTERN_ACCESS) Save + Avoid using the GOT to access external symbols. + + mrelax +-Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION) ++Target Mask(LINKER_RELAXATION) + Take advantage of linker relaxations to reduce the number of instructions + required to materialize symbol addresses. + + mpass-mrelax-to-as +-Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) ++Driver Var(la_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) + Pass -mrelax or -mno-relax option to the assembler. + + -param=loongarch-vect-unroll-limit= +-Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param ++Target Joined UInteger Var(la_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param + Used to limit unroll factor which indicates how much the autovectorizer may + unroll a loop. The default value is 6. + + -param=loongarch-vect-issue-info= +-Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) IntegerRange(1, 64) Param ++Target Undocumented Joined UInteger Var(la_vect_issue_info) Init(4) IntegerRange(1, 64) Param + Indicate how many non memory access vector instructions can be issued per + cycle, it's used in unroll factor determination for autovectorizer. The + default value is 4. +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index cf4c7bc93..a2b069d83 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -785,8 +785,15 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + opts->x_la_opt_cpu_arch = target->cpu_arch; + opts->x_la_opt_cpu_tune = target->cpu_tune; + ++ /* status of -mcmodel */ ++ opts->x_la_opt_cmodel = target->cmodel; ++ + /* status of -mfpu */ + opts->x_la_opt_fpu = target->isa.fpu; ++ ++ /* status of -msimd */ + opts->x_la_opt_simd = target->isa.simd; ++ ++ /* ISA evolution features */ + opts->x_la_isa_evolution = target->isa.evolution; + } +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 8cd703caa..533bae5b2 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4079,10 +4079,10 @@ loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vi + + /* Use this simple hardware resource model that how many non vld/vst + vector instructions can be issued per cycle. */ +- unsigned int issue_info = loongarch_vect_issue_info; ++ unsigned int issue_info = la_vect_issue_info; + unsigned int reduc_factor = m_reduc_factor > 1 ? m_reduc_factor : 1; + unsigned int uf = CEIL (reduc_factor * issue_info, nstmts_nonldst); +- uf = MIN ((unsigned int) loongarch_vect_unroll_limit, uf); ++ uf = MIN ((unsigned int) la_vect_unroll_limit, uf); + + return 1 << ceil_log2 (uf); + } +@@ -5540,7 +5540,7 @@ loongarch_expand_block_move (rtx dest, rtx src, rtx r_length, rtx r_align) + return false;
View file
_service:tar_scm:0109-aarch64-Minor-initial-adjustment-tweak.patch
Deleted
@@ -1,38 +0,0 @@ -From 08f71b4bb28fb74d20e8d2927a557e8119ce9f4d Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:54 +0100 -Subject: PATCH aarch64: Minor initial adjustment tweak - -This patch just changes a calculation of initial_adjust -to one that makes it slightly more obvious that the total -adjustment is frame.frame_size. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_layout_frame): Tweak - calculation of initial_adjust for frames in which all saves - are SVE saves. ---- - gcc/config/aarch64/aarch64.cc | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 81935852d5b2..4d9fcf3d1623 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8414,11 +8414,10 @@ aarch64_layout_frame (void) - { - /* Frame in which all saves are SVE saves: - -- sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size -+ sub sp, sp, frame_size - bytes_below_saved_regs - save SVE registers relative to SP - sub sp, sp, bytes_below_saved_regs */ -- frame.initial_adjust = (frame.bytes_above_hard_fp -- + frame.below_hard_fp_saved_regs_size); -+ frame.initial_adjust = frame.frame_size - frame.bytes_below_saved_regs; - frame.final_adjust = frame.bytes_below_saved_regs; - } - else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp) --- -2.43.5 -
View file
_service:tar_scm:0110-Backport-SME-aarch64-Rename-AARCH64_FL-architecture-.patch
Added
@@ -0,0 +1,220 @@ +From e1b067871c4c39565bf6059b4924a810923c6eeb Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:51 +0100 +Subject: PATCH 011/157 BackportSME aarch64: Rename AARCH64_FL + architecture-level macros + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=78aaafc3d4dc0ef997b4747349d3836ca2f7e301 + +Following on from the previous AARCH64_ISA patch, this one adds the +profile name directly to the end of architecture-level AARCH64_FL_* +macros. + +gcc/ + * config/aarch64/aarch64.h (AARCH64_FL_V8_1, AARCH64_FL_V8_2) + (AARCH64_FL_V8_3, AARCH64_FL_V8_4, AARCH64_FL_V8_5, AARCH64_FL_V8_6) + (AARCH64_FL_V9, AARCH64_FL_V8_7, AARCH64_FL_V8_8, AARCH64_FL_V9_1) + (AARCH64_FL_V9_2, AARCH64_FL_V9_3): Add "A" to the end of the name. + (AARCH64_FL_V8_R): Rename to AARCH64_FL_V8R. + (AARCH64_FL_FOR_ARCH8_1, AARCH64_FL_FOR_ARCH8_2): Update accordingly. + (AARCH64_FL_FOR_ARCH8_3, AARCH64_FL_FOR_ARCH8_4): Likewise. + (AARCH64_FL_FOR_ARCH8_5, AARCH64_FL_FOR_ARCH8_6): Likewise. + (AARCH64_FL_FOR_ARCH8_7, AARCH64_FL_FOR_ARCH8_8): Likewise. + (AARCH64_FL_FOR_ARCH8_R, AARCH64_FL_FOR_ARCH9): Likewise. + (AARCH64_FL_FOR_ARCH9_1, AARCH64_FL_FOR_ARCH9_2): Likewise. + (AARCH64_FL_FOR_ARCH9_3, AARCH64_ISA_V8_2A, AARCH64_ISA_V8_3A) + (AARCH64_ISA_V8_4A, AARCH64_ISA_V8_5A, AARCH64_ISA_V8_6A): Likewise. + (AARCH64_ISA_V8R, AARCH64_ISA_V9A, AARCH64_ISA_V9_1A): Likewise. + (AARCH64_ISA_V9_2A, AARCH64_ISA_V9_3A): Likewise. +--- + gcc/config/aarch64/aarch64.h | 72 ++++++++++++++++++------------------ + 1 file changed, 36 insertions(+), 36 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 356a263b2..5a91dfdd2 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -154,22 +154,22 @@ + /* ARMv8.1-A architecture extensions. */ + #define AARCH64_FL_LSE (1 << 4) /* Has Large System Extensions. */ + #define AARCH64_FL_RDMA (1 << 5) /* Has Round Double Multiply Add. */ +-#define AARCH64_FL_V8_1 (1 << 6) /* Has ARMv8.1-A extensions. */ ++#define AARCH64_FL_V8_1A (1 << 6) /* Has ARMv8.1-A extensions. */ + /* Armv8-R. */ +-#define AARCH64_FL_V8_R (1 << 7) /* Armv8-R AArch64. */ ++#define AARCH64_FL_V8R (1 << 7) /* Armv8-R AArch64. */ + /* ARMv8.2-A architecture extensions. */ +-#define AARCH64_FL_V8_2 (1 << 8) /* Has ARMv8.2-A features. */ ++#define AARCH64_FL_V8_2A (1 << 8) /* Has ARMv8.2-A features. */ + #define AARCH64_FL_F16 (1 << 9) /* Has ARMv8.2-A FP16 extensions. */ + #define AARCH64_FL_SVE (1 << 10) /* Has Scalable Vector Extensions. */ + /* ARMv8.3-A architecture extensions. */ +-#define AARCH64_FL_V8_3 (1 << 11) /* Has ARMv8.3-A features. */ ++#define AARCH64_FL_V8_3A (1 << 11) /* Has ARMv8.3-A features. */ + #define AARCH64_FL_RCPC (1 << 12) /* Has support for RCpc model. */ + #define AARCH64_FL_DOTPROD (1 << 13) /* Has ARMv8.2-A Dot Product ins. */ + /* New flags to split crypto into aes and sha2. */ + #define AARCH64_FL_AES (1 << 14) /* Has Crypto AES. */ + #define AARCH64_FL_SHA2 (1 << 15) /* Has Crypto SHA2. */ + /* ARMv8.4-A architecture extensions. */ +-#define AARCH64_FL_V8_4 (1 << 16) /* Has ARMv8.4-A features. */ ++#define AARCH64_FL_V8_4A (1 << 16) /* Has ARMv8.4-A features. */ + #define AARCH64_FL_SM4 (1 << 17) /* Has ARMv8.4-A SM3 and SM4. */ + #define AARCH64_FL_SHA3 (1 << 18) /* Has ARMv8.4-a SHA3 and SHA512. */ + #define AARCH64_FL_F16FML (1 << 19) /* Has ARMv8.4-a FP16 extensions. */ +@@ -179,7 +179,7 @@ + #define AARCH64_FL_PROFILE (1 << 21) + + /* ARMv8.5-A architecture extensions. */ +-#define AARCH64_FL_V8_5 (1 << 22) /* Has ARMv8.5-A features. */ ++#define AARCH64_FL_V8_5A (1 << 22) /* Has ARMv8.5-A features. */ + #define AARCH64_FL_RNG (1 << 23) /* ARMv8.5-A Random Number Insns. */ + #define AARCH64_FL_MEMTAG (1 << 24) /* ARMv8.5-A Memory Tagging + Extensions. */ +@@ -204,7 +204,7 @@ + #define AARCH64_FL_TME (1ULL << 33) /* Has TME instructions. */ + + /* Armv8.6-A architecture extensions. */ +-#define AARCH64_FL_V8_6 (1ULL << 34) ++#define AARCH64_FL_V8_6A (1ULL << 34) + + /* 8-bit Integer Matrix Multiply (I8MM) extensions. */ + #define AARCH64_FL_I8MM (1ULL << 35) +@@ -225,28 +225,28 @@ + #define AARCH64_FL_PAUTH (1ULL << 40) + + /* Armv9.0-A. */ +-#define AARCH64_FL_V9 (1ULL << 41) /* Armv9.0-A Architecture. */ ++#define AARCH64_FL_V9A (1ULL << 41) /* Armv9.0-A Architecture. */ + + /* 64-byte atomic load/store extensions. */ + #define AARCH64_FL_LS64 (1ULL << 42) + + /* Armv8.7-a architecture extensions. */ +-#define AARCH64_FL_V8_7 (1ULL << 43) ++#define AARCH64_FL_V8_7A (1ULL << 43) + + /* Hardware memory operation instructions. */ + #define AARCH64_FL_MOPS (1ULL << 44) + + /* Armv8.8-a architecture extensions. */ +-#define AARCH64_FL_V8_8 (1ULL << 45) ++#define AARCH64_FL_V8_8A (1ULL << 45) + + /* Armv9.1-A. */ +-#define AARCH64_FL_V9_1 (1ULL << 46) ++#define AARCH64_FL_V9_1A (1ULL << 46) + + /* Armv9.2-A. */ +-#define AARCH64_FL_V9_2 (1ULL << 47) ++#define AARCH64_FL_V9_2A (1ULL << 47) + + /* Armv9.3-A. */ +-#define AARCH64_FL_V9_3 (1ULL << 48) ++#define AARCH64_FL_V9_3A (1ULL << 48) + + /* Has FP and SIMD. */ + #define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD) +@@ -258,36 +258,36 @@ + #define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD) + #define AARCH64_FL_FOR_ARCH8_1 \ + (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC \ +- | AARCH64_FL_RDMA | AARCH64_FL_V8_1) ++ | AARCH64_FL_RDMA | AARCH64_FL_V8_1A) + #define AARCH64_FL_FOR_ARCH8_2 \ +- (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2) ++ (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2A) + #define AARCH64_FL_FOR_ARCH8_3 \ +- (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3 | AARCH64_FL_PAUTH) ++ (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH) + #define AARCH64_FL_FOR_ARCH8_4 \ +- (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4 | AARCH64_FL_F16FML \ ++ (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \ + | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM) + #define AARCH64_FL_FOR_ARCH8_5 \ +- (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_5 \ ++ (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_5A \ + | AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES) + #define AARCH64_FL_FOR_ARCH8_6 \ +- (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_V8_6 | AARCH64_FL_FPSIMD \ ++ (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_V8_6A | AARCH64_FL_FPSIMD \ + | AARCH64_FL_I8MM | AARCH64_FL_BF16) + #define AARCH64_FL_FOR_ARCH8_7 \ +- (AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V8_7 | AARCH64_FL_LS64) ++ (AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V8_7A | AARCH64_FL_LS64) + #define AARCH64_FL_FOR_ARCH8_8 \ +- (AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V8_8 | AARCH64_FL_MOPS) ++ (AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V8_8A | AARCH64_FL_MOPS) + + #define AARCH64_FL_FOR_ARCH8_R \ +- (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_R) ++ (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8R) + #define AARCH64_FL_FOR_ARCH9 \ +- (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9 \ ++ (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9A \ + | AARCH64_FL_F16) + #define AARCH64_FL_FOR_ARCH9_1 \ +- (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1) ++ (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1A) + #define AARCH64_FL_FOR_ARCH9_2 \ +- (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2) ++ (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2A) + #define AARCH64_FL_FOR_ARCH9_3 \ +- (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3) ++ (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3A) + + /* Macros to test ISA flags. */ + +@@ -297,7 +297,7 @@ + #define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD) + #define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE) + #define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_RDMA) +-#define AARCH64_ISA_V8_2A (aarch64_isa_flags & AARCH64_FL_V8_2) ++#define AARCH64_ISA_V8_2A (aarch64_isa_flags & AARCH64_FL_V8_2A) + #define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16) + #define AARCH64_ISA_SVE (aarch64_isa_flags & AARCH64_FL_SVE) + #define AARCH64_ISA_SVE2 (aarch64_isa_flags & AARCH64_FL_SVE2) +@@ -305,31 +305,31 @@ + #define AARCH64_ISA_SVE2_BITPERM (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM) + #define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3) + #define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4) +-#define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3) ++#define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3A) + #define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD) + #define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES) + #define AARCH64_ISA_SHA2 (aarch64_isa_flags & AARCH64_FL_SHA2) +-#define AARCH64_ISA_V8_4A (aarch64_isa_flags & AARCH64_FL_V8_4) ++#define AARCH64_ISA_V8_4A (aarch64_isa_flags & AARCH64_FL_V8_4A) + #define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4) + #define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3) + #define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML) + #define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4) + #define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG) +-#define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5) ++#define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5A) + #define AARCH64_ISA_TME (aarch64_isa_flags & AARCH64_FL_TME) + #define AARCH64_ISA_MEMTAG (aarch64_isa_flags & AARCH64_FL_MEMTAG) +-#define AARCH64_ISA_V8_6A (aarch64_isa_flags & AARCH64_FL_V8_6) ++#define AARCH64_ISA_V8_6A (aarch64_isa_flags & AARCH64_FL_V8_6A) + #define AARCH64_ISA_I8MM (aarch64_isa_flags & AARCH64_FL_I8MM)
View file
_service:tar_scm:0110-LoongArch-Redundant-sign-extension-elimination-optim.patch
Added
@@ -0,0 +1,234 @@ +From 54786cec1f52854a70369a3060ed22b1e070f000 Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Thu, 11 Jan 2024 19:36:19 +0800 +Subject: PATCH 110/188 LoongArch: Redundant sign extension elimination + optimization. + +We found that the current combine optimization pass in gcc cannot handle +the following redundant sign extension situations: + +(insn 77 76 78 5 (set (reg:SI 143) + (plus:SI (subreg/s/u:SI (reg/v:DI 104 len ) 0) + (const_int 1 0x1))) {addsi3} + (expr_list:REG_DEAD (reg/v:DI 104 len ) + (nil))) +(insn 78 77 82 5 (set (reg/v:DI 104 len ) + (sign_extend:DI (reg:SI 143))) {extendsidi2} + (nil)) + +Because reg:SI 143 is not died or set in insn 78, no replacement merge will +be performed for the insn sequence. We adjusted the add template to eliminate +redundant sign extensions during the expand pass. +Adjusted based on upstream comments: +https://gcc.gnu.org/pipermail/gcc-patches/2024-January/641988.html + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (add<mode>3): Removed. + (*addsi3): New. + (addsi3): Ditto. + (adddi3): Ditto. + (*addsi3_extended): Removed. + (addsi3_extended): New. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/sign-extend.c: Moved to... + * gcc.target/loongarch/sign-extend-1.c: ...here. + * gcc.target/loongarch/sign-extend-2.c: New test. +--- + gcc/config/loongarch/loongarch.md | 93 ++++++++++++++----- + .../{sign-extend.c => sign-extend-1.c} | 0 + .../gcc.target/loongarch/sign-extend-2.c | 59 ++++++++++++ + 3 files changed, 128 insertions(+), 24 deletions(-) + rename gcc/testsuite/gcc.target/loongarch/{sign-extend.c => sign-extend-1.c} (100%) + create mode 100644 gcc/testsuite/gcc.target/loongarch/sign-extend-2.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 6ebf33cbe..4c7e28ace 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -657,42 +657,87 @@ + (set_attr "type" "fadd") + (set_attr "mode" "<UNITMODE>")) + +-(define_insn_and_split "add<mode>3" +- (set (match_operand:GPR 0 "register_operand" "=r,r,r,r,r,r,r") +- (plus:GPR (match_operand:GPR 1 "register_operand" "r,r,r,r,r,r,r") +- (match_operand:GPR 2 "plus_<mode>_operand" +- "r,I,La,Lb,Lc,Ld,Le"))) ++(define_insn_and_split "*addsi3" ++ (set (match_operand:SI 0 "register_operand" "=r,r,r,r,r") ++ (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r") ++ (match_operand:SI 2 "plus_si_operand" ++ "r,I,La,Lb,Le"))) + "" + "@ +- add.<d>\t%0,%1,%2 +- addi.<d>\t%0,%1,%2 ++ add.w\t%0,%1,%2 ++ addi.w\t%0,%1,%2 + # + * operands2 = GEN_INT (INTVAL (operands2) / 65536); \ + return \"addu16i.d\t%0,%1,%2\"; ++ #" ++ "CONST_INT_P (operands2) && !IMM12_INT (operands2) \ ++ && !ADDU16I_OPERAND (INTVAL (operands2))" ++ (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3))) ++ (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 4))) ++ { ++ loongarch_split_plus_constant (&operands2, SImode); ++ } ++ (set_attr "alu_type" "add") ++ (set_attr "mode" "SI") ++ (set_attr "insn_count" "1,1,2,1,2")) ++ ++(define_expand "addsi3" ++ (set (match_operand:SI 0 "register_operand" "=r,r,r,r,r") ++ (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r") ++ (match_operand:SI 2 "plus_si_operand" "r,I,La,Le,Lb"))) ++ "TARGET_64BIT" ++{ ++ if (CONST_INT_P (operands2) && !IMM12_INT (operands2) ++ && ADDU16I_OPERAND (INTVAL (operands2))) ++ { ++ rtx t1 = gen_reg_rtx (DImode); ++ rtx t2 = gen_reg_rtx (DImode); ++ rtx t3 = gen_reg_rtx (DImode); ++ emit_insn (gen_extend_insn (t1, operands1, DImode, SImode, 0)); ++ t2 = operands2; ++ emit_insn (gen_adddi3 (t3, t1, t2)); ++ t3 = gen_lowpart (SImode, t3); ++ emit_move_insn (operands0, t3); ++ DONE; ++ } ++ else ++ { ++ rtx t = gen_reg_rtx (DImode); ++ emit_insn (gen_addsi3_extended (t, operands1, operands2)); ++ t = gen_lowpart (SImode, t); ++ SUBREG_PROMOTED_VAR_P (t) = 1; ++ SUBREG_PROMOTED_SET (t, SRP_SIGNED); ++ emit_move_insn (operands0, t); ++ DONE; ++ } ++}) ++ ++(define_insn_and_split "adddi3" ++ (set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r") ++ (plus:DI (match_operand:DI 1 "register_operand" "r,r,r,r,r,r") ++ (match_operand:DI 2 "plus_di_operand" ++ "r,I,La,Lb,Lc,Ld"))) ++ "TARGET_64BIT" ++ "@ ++ add.d\t%0,%1,%2 ++ addi.d\t%0,%1,%2 + # ++ * operands2 = GEN_INT (INTVAL (operands2) / 65536); \ ++ return \"addu16i.d\t%0,%1,%2\"; + # + #" +- "CONST_INT_P (operands2) && !IMM12_INT (operands2) \ ++ "&& CONST_INT_P (operands2) && !IMM12_INT (operands2) \ + && !ADDU16I_OPERAND (INTVAL (operands2))" +- (set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3))) +- (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4))) ++ (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 3))) ++ (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4))) + { +- loongarch_split_plus_constant (&operands2, <MODE>mode); ++ loongarch_split_plus_constant (&operands2, DImode); + } + (set_attr "alu_type" "add") +- (set_attr "mode" "<MODE>") +- (set_attr "insn_count" "1,1,2,1,2,2,2") +- (set (attr "enabled") +- (cond +- (match_test "<MODE>mode != DImode && which_alternative == 4") +- (const_string "no") +- (match_test "<MODE>mode != DImode && which_alternative == 5") +- (const_string "no") +- (match_test "<MODE>mode != SImode && which_alternative == 6") +- (const_string "no") +- (const_string "yes")))) +- +-(define_insn_and_split "*addsi3_extended" ++ (set_attr "mode" "DI") ++ (set_attr "insn_count" "1,1,2,1,2,2")) ++ ++(define_insn_and_split "addsi3_extended" + (set (match_operand:DI 0 "register_operand" "=r,r,r,r") + (sign_extend:DI + (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r") +diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-1.c +similarity index 100% +rename from gcc/testsuite/gcc.target/loongarch/sign-extend.c +rename to gcc/testsuite/gcc.target/loongarch/sign-extend-1.c +diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c +new file mode 100644 +index 000000000..a45dde4f7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c +@@ -0,0 +1,59 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mabi=lp64d -O2" } */ ++/* { dg-final { scan-assembler-times "slli.w\t\\\$r\0-9\+,\\\$r\0-9\+,0" 1 } } */ ++ ++#include <stdint.h> ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++void ++bt_skip_func (const uint32_t len_limit, const uint32_t pos, ++ const uint8_t *const cur, uint32_t cur_match, ++ uint32_t *const son, const uint32_t cyclic_pos, ++ const uint32_t cyclic_size) ++{ ++ uint32_t *ptr0 = son + (cyclic_pos << 1) + 1; ++ uint32_t *ptr1 = son + (cyclic_pos << 1); ++ ++ uint32_t len0 = 0; ++ uint32_t len1 = 0; ++ ++ while (1) ++ { ++ const uint32_t delta = pos - cur_match; ++ uint32_t *pair ++ = son ++ + ((cyclic_pos - delta + (delta > cyclic_pos ? cyclic_size : 0)) ++ << 1); ++ const uint8_t *pb = cur - delta;
View file
_service:tar_scm:0110-aarch64-Tweak-stack-clash-boundary-condition.patch
Deleted
@@ -1,125 +0,0 @@ -From f22315d5c19e8310e4dc880fd509678fd291fca8 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:54 +0100 -Subject: PATCH aarch64: Tweak stack clash boundary condition - -The AArch64 ABI says that, when stack clash protection is used, -there can be a maximum of 1KiB of unprobed space at sp on entry -to a function. Therefore, we need to probe when allocating ->= guard_size - 1KiB of data (>= rather than >). This is what -GCC does. - -If an allocation is exactly guard_size bytes, it is enough to allocate -those bytes and probe once at offset 1024. It isn't possible to use a -single probe at any other offset: higher would conmplicate later code, -by leaving more unprobed space than usual, while lower would risk -leaving an entire page unprobed. For simplicity, the code probes all -allocations at offset 1024. - -Some register saves also act as probes. If we need to allocate -more space below the last such register save probe, we need to -probe the allocation if it is > 1KiB. Again, this allocation is -then sometimes (but not always) probed at offset 1024. This sort of -allocation is currently only used for outgoing arguments, which are -rarely this big. - -However, the code also probed if this final outgoing-arguments -allocation was == 1KiB, rather than just > 1KiB. This isn't -necessary, since the register save then probes at offset 1024 -as required. Continuing to probe allocations of exactly 1KiB -would complicate later patches. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space): - Don't probe final allocations that are exactly 1KiB in size (after - unprobed space above the final allocation has been deducted). - -gcc/testsuite/ - * gcc.target/aarch64/stack-check-prologue-17.c: New test. ---- - gcc/config/aarch64/aarch64.cc | 4 +- - .../aarch64/stack-check-prologue-17.c | 55 +++++++++++++++++++ - 2 files changed, 58 insertions(+), 1 deletion(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 4d9fcf3d1623..34c1d8614cd9 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -9333,9 +9333,11 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - HOST_WIDE_INT guard_size - = 1 << param_stack_clash_protection_guard_size; - HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; -+ HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT; -+ gcc_assert (multiple_p (poly_size, byte_sp_alignment)); - HOST_WIDE_INT min_probe_threshold - = (final_adjustment_p -- ? guard_used_by_caller -+ ? guard_used_by_caller + byte_sp_alignment - : guard_size - guard_used_by_caller); - /* When doing the final adjustment for the outgoing arguments, take into - account any unprobed space there is above the current SP. There are -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c -new file mode 100644 -index 000000000000..0d8a25d73a24 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c -@@ -0,0 +1,55 @@ -+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+void f(int, ...); -+void g(); -+ -+/* -+** test1: -+** ... -+** str x30, \sp\ -+** sub sp, sp, #1024 -+** cbnz w0, .* -+** bl g -+** ... -+*/ -+int test1(int z) { -+ __uint128_t x = 0; -+ int y0x400; -+ if (z) -+ { -+ f(0, 0, 0, 0, 0, 0, 0, &y, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); -+ } -+ g(); -+ return 1; -+} -+ -+/* -+** test2: -+** ... -+** str x30, \sp\ -+** sub sp, sp, #1040 -+** str xzr, \sp\ -+** cbnz w0, .* -+** bl g -+** ... -+*/ -+int test2(int z) { -+ __uint128_t x = 0; -+ int y0x400; -+ if (z) -+ { -+ f(0, 0, 0, 0, 0, 0, 0, &y, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x); -+ } -+ g(); -+ return 1; -+} --- -2.43.5 -
View file
_service:tar_scm:0111-Backport-SME-aarch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch
Added
@@ -0,0 +1,398 @@ +From 7da27deb7413d7d1fd2c543617640e2de5b10db0 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:51 +0100 +Subject: PATCH 012/157 BackportSME aarch64: Rename AARCH64_FL_FOR_ARCH + macros + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0f833d1900176509e16b6f5563cfe58508fef5d2 + +This patch renames AARCH64_FL_FOR_ARCH* macros to follow the +same V<number><profile> names that we (now) use elsewhere. + +The names are only temporary -- a later patch will move the +information to the .def file instead. However, it helps with +the sequencing to do this first. + +gcc/ + * config/aarch64/aarch64.h (AARCH64_FL_FOR_ARCH8): Rename to... + (AARCH64_FL_FOR_V8A): ...this. + (AARCH64_FL_FOR_ARCH8_1): Rename to... + (AARCH64_FL_FOR_V8_1A): ...this. + (AARCH64_FL_FOR_ARCH8_2): Rename to... + (AARCH64_FL_FOR_V8_2A): ...this. + (AARCH64_FL_FOR_ARCH8_3): Rename to... + (AARCH64_FL_FOR_V8_3A): ...this. + (AARCH64_FL_FOR_ARCH8_4): Rename to... + (AARCH64_FL_FOR_V8_4A): ...this. + (AARCH64_FL_FOR_ARCH8_5): Rename to... + (AARCH64_FL_FOR_V8_5A): ...this. + (AARCH64_FL_FOR_ARCH8_6): Rename to... + (AARCH64_FL_FOR_V8_6A): ...this. + (AARCH64_FL_FOR_ARCH8_7): Rename to... + (AARCH64_FL_FOR_V8_7A): ...this. + (AARCH64_FL_FOR_ARCH8_8): Rename to... + (AARCH64_FL_FOR_V8_8A): ...this. + (AARCH64_FL_FOR_ARCH8_R): Rename to... + (AARCH64_FL_FOR_V8R): ...this. + (AARCH64_FL_FOR_ARCH9): Rename to... + (AARCH64_FL_FOR_V9A): ...this. + (AARCH64_FL_FOR_ARCH9_1): Rename to... + (AARCH64_FL_FOR_V9_1A): ...this. + (AARCH64_FL_FOR_ARCH9_2): Rename to... + (AARCH64_FL_FOR_V9_2A): ...this. + (AARCH64_FL_FOR_ARCH9_3): Rename to... + (AARCH64_FL_FOR_V9_3A): ...this. + * common/config/aarch64/aarch64-common.cc (all_cores): Update + accordingly. + * config/aarch64/aarch64-arches.def: Likewise. + * config/aarch64/aarch64-cores.def: Likewise. + * config/aarch64/aarch64.cc (all_cores): Likewise. +--- + gcc/common/config/aarch64/aarch64-common.cc | 2 +- + gcc/config/aarch64/aarch64-arches.def | 28 ++--- + gcc/config/aarch64/aarch64-cores.def | 130 ++++++++++---------- + gcc/config/aarch64/aarch64.cc | 2 +- + gcc/config/aarch64/aarch64.h | 56 ++++----- + 5 files changed, 109 insertions(+), 109 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index 3dc020f0c..0461201a5 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -253,7 +253,7 @@ static const struct processor_name_to_arch all_cores = + #define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \ + {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS}, + #include "config/aarch64/aarch64-cores.def" +- {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_ARCH8}, ++ {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_V8A}, + {"", aarch64_no_arch, 0} + }; + +diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def +index 6150448dc..c6bf7d82c 100644 +--- a/gcc/config/aarch64/aarch64-arches.def ++++ b/gcc/config/aarch64/aarch64-arches.def +@@ -30,19 +30,19 @@ + Due to the assumptions about the positions of these fields in config.gcc, + the NAME should be kept as the first argument and FLAGS as the last. */ + +-AARCH64_ARCH("armv8-a", generic, 8A, 8, AARCH64_FL_FOR_ARCH8) +-AARCH64_ARCH("armv8.1-a", generic, 8_1A, 8, AARCH64_FL_FOR_ARCH8_1) +-AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_ARCH8_2) +-AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_ARCH8_3) +-AARCH64_ARCH("armv8.4-a", generic, 8_4A, 8, AARCH64_FL_FOR_ARCH8_4) +-AARCH64_ARCH("armv8.5-a", generic, 8_5A, 8, AARCH64_FL_FOR_ARCH8_5) +-AARCH64_ARCH("armv8.6-a", generic, 8_6A, 8, AARCH64_FL_FOR_ARCH8_6) +-AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_ARCH8_7) +-AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_ARCH8_8) +-AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_ARCH8_R) +-AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_ARCH9) +-AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_ARCH9_1) +-AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_ARCH9_2) +-AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_ARCH9_3) ++AARCH64_ARCH("armv8-a", generic, 8A, 8, AARCH64_FL_FOR_V8A) ++AARCH64_ARCH("armv8.1-a", generic, 8_1A, 8, AARCH64_FL_FOR_V8_1A) ++AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_V8_2A) ++AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_V8_3A) ++AARCH64_ARCH("armv8.4-a", generic, 8_4A, 8, AARCH64_FL_FOR_V8_4A) ++AARCH64_ARCH("armv8.5-a", generic, 8_5A, 8, AARCH64_FL_FOR_V8_5A) ++AARCH64_ARCH("armv8.6-a", generic, 8_6A, 8, AARCH64_FL_FOR_V8_6A) ++AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_V8_7A) ++AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_V8_8A) ++AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_V8R) ++AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_V9A) ++AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_V9_1A) ++AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_V9_2A) ++AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_V9_3A) + + #undef AARCH64_ARCH +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 0402bfb74..c4038c641 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -46,132 +46,132 @@ + /* ARMv8-A Architecture Processors. */ + + /* ARM ('A') cores. */ +-AARCH64_CORE("cortex-a34", cortexa34, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) +-AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) +-AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) +-AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) +-AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) +-AARCH64_CORE("cortex-a73", cortexa73, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) ++AARCH64_CORE("cortex-a34", cortexa34, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) ++AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) ++AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) ++AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) ++AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) ++AARCH64_CORE("cortex-a73", cortexa73, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) + + /* Cavium ('C') cores. */ +-AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) ++AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) + /* Do not swap around "thunderxt88p1" and "thunderxt88", + this order is required to handle variant correctly. */ +-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) +-AARCH64_CORE("thunderxt88", thunderxt88, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) ++AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) ++AARCH64_CORE("thunderxt88", thunderxt88, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) + + /* OcteonTX is the official name for T81/T83. */ +-AARCH64_CORE("octeontx", octeontx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) +-AARCH64_CORE("octeontx81", octeontxt81, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) +-AARCH64_CORE("octeontx83", octeontxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) ++AARCH64_CORE("octeontx", octeontx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) ++AARCH64_CORE("octeontx81", octeontxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) ++AARCH64_CORE("octeontx83", octeontxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) + +-AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) +-AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) ++AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) ++AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) + + /* Ampere Computing ('\xC0') cores. */ +-AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6, ampere1, 0xC0, 0xac3, -1) ++AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1) + /* Do not swap around "emag" and "xgene1", + this order is required to handle variant correctly. */ +-AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) ++AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) + + /* APM ('P') cores. */ +-AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1) ++AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1) + + /* Qualcomm ('Q') cores. */ +-AARCH64_CORE("falkor", falkor, falkor, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) +-AARCH64_CORE("qdf24xx", qdf24xx, falkor, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) ++AARCH64_CORE("falkor", falkor, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) ++AARCH64_CORE("qdf24xx", qdf24xx, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) + + /* Samsung ('S') cores. */ +-AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) ++AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) + + /* HXT ('h') cores. */ +-AARCH64_CORE("phecda", phecda, falkor, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) ++AARCH64_CORE("phecda", phecda, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) + + /* ARMv8.1-A Architecture Processors. */ + + /* Broadcom ('B') cores. */ +-AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) +-AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) ++AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) ++AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) + + /* Cavium ('C') cores. */ +-AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) ++AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) + + /* ARMv8.2-A Architecture Processors. */ + + /* ARM ('A') cores. */ +-AARCH64_CORE("cortex-a55", cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) +-AARCH64_CORE("cortex-a75", cortexa75, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) +-AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) +-AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) +-AARCH64_CORE("cortex-a77", cortexa77, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) +-AARCH64_CORE("cortex-a78", cortexa78, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
View file
_service:tar_scm:0111-LoongArch-Redundant-sign-extension-elimination-optim.patch
Added
@@ -0,0 +1,56 @@ +From 7bb1a356ca9eefab48d64bd3deeaac081c1ae7ea Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Thu, 11 Jan 2024 19:36:33 +0800 +Subject: PATCH 111/188 LoongArch: Redundant sign extension elimination + optimization 2. + +Eliminate the redundant sign extension that exists after the conditional +move when the target register is SImode. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_expand_conditional_move): + Adjust. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/sign-extend-2.c: Adjust. +--- + gcc/config/loongarch/loongarch.cc | 6 ++++++ + gcc/testsuite/gcc.target/loongarch/sign-extend-2.c | 5 +++-- + 2 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 533bae5b2..13481130b 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5367,6 +5367,12 @@ loongarch_expand_conditional_move (rtx *operands) + rtx temp3 = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2))); + temp3 = gen_lowpart (GET_MODE (operands0), temp3); ++ /* Nonzero in a subreg if it was made when accessing an object that ++ was promoted to a wider mode in accord with the PROMOTED_MODE ++ machine description macro. */ ++ SUBREG_PROMOTED_VAR_P (temp3) = 1; ++ /* Sets promoted mode for SUBREG_PROMOTED_VAR_P. */ ++ SUBREG_PROMOTED_SET (temp3, SRP_SIGNED); + loongarch_emit_move (operands0, temp3); + } + else +diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c +index a45dde4f7..e57a2727d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c +@@ -1,6 +1,7 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O2" } */ +-/* { dg-final { scan-assembler-times "slli.w\t\\\$r\0-9\+,\\\$r\0-9\+,0" 1 } } */ ++/* { dg-options "-mabi=lp64d -O2 -fdump-rtl-expand" } */ ++/* { dg-final { scan-rtl-dump "subreg/s" "expand" } } */ ++/* { dg-final { scan-assembler-not "slli.w\t\\\$r\0-9\+,\\\$r\0-9\+,0" } } */ + + #include <stdint.h> + #define my_min(x, y) ((x) < (y) ? (x) : (y)) +-- +2.43.0 +
View file
_service:tar_scm:0111-aarch64-Put-LR-save-probe-in-first-16-bytes.patch
Deleted
@@ -1,406 +0,0 @@ -From 15e18831bf98fd25af098b970ebf0c9a6200a34b Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:55 +0100 -Subject: PATCH aarch64: Put LR save probe in first 16 bytes - --fstack-clash-protection uses the save of LR as a probe for the next -allocation. The next allocation could be: - -* another part of the static frame, e.g. when allocating SVE save slots - or outgoing arguments - -* an alloca in the same function - -* an allocation made by a callee function - -However, when -fomit-frame-pointer is used, the LR save slot is placed -above the other GPR save slots. It could therefore be up to 80 bytes -above the base of the GPR save area (which is also the hard fp address). - -aarch64_allocate_and_probe_stack_space took this into account when -deciding how much subsequent space could be allocated without needing -a probe. However, it interacted badly with: - - /* If doing a small final adjustment, we always probe at offset 0. - This is done to avoid issues when LR is not at position 0 or when - the final adjustment is smaller than the probing offset. */ - else if (final_adjustment_p && rounded_size == 0) - residual_probe_offset = 0; - -which forces any allocation that is smaller than the guard page size -to be probed at offset 0 rather than the usual offset 1024. It was -therefore possible to construct cases in which we had: - -* a probe using LR at SP + 80 bytes (or some other value >= 16) -* an allocation of the guard page size - 16 bytes -* a probe at SP + 0 - -which allocates guard page size + 64 consecutive unprobed bytes. - -This patch requires the LR probe to be in the first 16 bytes of the -save area when stack clash protection is active. Doing it -unconditionally would cause code-quality regressions. - -Putting LR before other registers prevents push/pop allocation -when shadow call stacks are enabled, since LR is restored -separately from the other callee-saved registers. - -The new comment doesn't say that the probe register is required -to be LR, since a later patch removes that restriction. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_layout_frame): Ensure that - the LR save slot is in the first 16 bytes of the register save area. - Only form STP/LDP push/pop candidates if both registers are valid. - (aarch64_allocate_and_probe_stack_space): Remove workaround for - when LR was not in the first 16 bytes. - -gcc/testsuite/ - * gcc.target/aarch64/stack-check-prologue-18.c: New test. - * gcc.target/aarch64/stack-check-prologue-19.c: Likewise. - * gcc.target/aarch64/stack-check-prologue-20.c: Likewise. ---- - gcc/config/aarch64/aarch64.cc | 72 ++++++------- - .../aarch64/stack-check-prologue-18.c | 100 ++++++++++++++++++ - .../aarch64/stack-check-prologue-19.c | 100 ++++++++++++++++++ - .../aarch64/stack-check-prologue-20.c | 3 + - 4 files changed, 233 insertions(+), 42 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 34c1d8614cd9..16433fb70f4f 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8273,26 +8273,34 @@ aarch64_layout_frame (void) - bool saves_below_hard_fp_p - = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); - frame.bytes_below_hard_fp = offset; -+ -+ auto allocate_gpr_slot = &(unsigned int regno) -+ { -+ frame.reg_offsetregno = offset; -+ if (frame.wb_push_candidate1 == INVALID_REGNUM) -+ frame.wb_push_candidate1 = regno; -+ else if (frame.wb_push_candidate2 == INVALID_REGNUM) -+ frame.wb_push_candidate2 = regno; -+ offset += UNITS_PER_WORD; -+ }; -+ - if (frame.emit_frame_chain) - { - /* FP and LR are placed in the linkage record. */ -- frame.reg_offsetR29_REGNUM = offset; -- frame.wb_push_candidate1 = R29_REGNUM; -- frame.reg_offsetR30_REGNUM = offset + UNITS_PER_WORD; -- frame.wb_push_candidate2 = R30_REGNUM; -- offset += 2 * UNITS_PER_WORD; -+ allocate_gpr_slot (R29_REGNUM); -+ allocate_gpr_slot (R30_REGNUM); - } -+ else if (flag_stack_clash_protection -+ && known_eq (frame.reg_offsetR30_REGNUM, SLOT_REQUIRED)) -+ /* Put the LR save slot first, since it makes a good choice of probe -+ for stack clash purposes. The idea is that the link register usually -+ has to be saved before a call anyway, and so we lose little by -+ stopping it from being individually shrink-wrapped. */ -+ allocate_gpr_slot (R30_REGNUM); - - for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) - if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) -- { -- frame.reg_offsetregno = offset; -- if (frame.wb_push_candidate1 == INVALID_REGNUM) -- frame.wb_push_candidate1 = regno; -- else if (frame.wb_push_candidate2 == INVALID_REGNUM) -- frame.wb_push_candidate2 = regno; -- offset += UNITS_PER_WORD; -- } -+ allocate_gpr_slot (regno); - - poly_int64 max_int_offset = offset; - offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); -@@ -8370,10 +8378,13 @@ aarch64_layout_frame (void) - max_push_offset to 0, because no registers are popped at this time, - so callee_adjust cannot be adjusted. */ - HOST_WIDE_INT max_push_offset = 0; -- if (frame.wb_pop_candidate2 != INVALID_REGNUM) -- max_push_offset = 512; -- else if (frame.wb_pop_candidate1 != INVALID_REGNUM) -- max_push_offset = 256; -+ if (frame.wb_pop_candidate1 != INVALID_REGNUM) -+ { -+ if (frame.wb_pop_candidate2 != INVALID_REGNUM) -+ max_push_offset = 512; -+ else -+ max_push_offset = 256; -+ } - - HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; - HOST_WIDE_INT const_saved_regs_size; -@@ -9339,29 +9350,6 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - = (final_adjustment_p - ? guard_used_by_caller + byte_sp_alignment - : guard_size - guard_used_by_caller); -- /* When doing the final adjustment for the outgoing arguments, take into -- account any unprobed space there is above the current SP. There are -- two cases: -- -- - When saving SVE registers below the hard frame pointer, we force -- the lowest save to take place in the prologue before doing the final -- adjustment (i.e. we don't allow the save to be shrink-wrapped). -- This acts as a probe at SP, so there is no unprobed space. -- -- - When there are no SVE register saves, we use the store of the link -- register as a probe. We can't assume that LR was saved at position 0 -- though, so treat any space below it as unprobed. */ -- if (final_adjustment_p -- && known_eq (frame.below_hard_fp_saved_regs_size, 0)) -- { -- poly_int64 lr_offset = (frame.reg_offsetLR_REGNUM -- - frame.bytes_below_saved_regs); -- if (known_ge (lr_offset, 0)) -- min_probe_threshold -= lr_offset.to_constant (); -- else -- gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0)); -- } -- - poly_int64 frame_size = frame.frame_size; - - /* We should always have a positive probe threshold. */ -@@ -9541,8 +9529,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - if (final_adjustment_p && rounded_size != 0) - min_probe_threshold = 0; - /* If doing a small final adjustment, we always probe at offset 0. -- This is done to avoid issues when LR is not at position 0 or when -- the final adjustment is smaller than the probing offset. */ -+ This is done to avoid issues when the final adjustment is smaller -+ than the probing offset. */ - else if (final_adjustment_p && rounded_size == 0) - residual_probe_offset = 0; - -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c -new file mode 100644 -index 000000000000..82447d20fff5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c -@@ -0,0 +1,100 @@ -+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+void f(int, ...); -+void g(); -+ -+/* -+** test1: -+** ... -+** str x30, \sp\ -+** sub sp, sp, #4064
View file
_service:tar_scm:0112-Backport-SME-aarch64-Add-V-to-aarch64-arches.def-nam.patch
Added
@@ -0,0 +1,315 @@ +From ed8ce0b31f2b608f0360af1ffd5375ea7809aba7 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:52 +0100 +Subject: PATCH 013/157 BackportSME aarch64: Add "V" to + aarch64-arches.def names + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=00c22ba69d8e738a4789b30165ff9c925c508fc1 + +This patch completes the renaming of architecture-level related +things by adding "V" to the name of the architecture in +aarch64-arches.def. Since the "V" is predictable, we can easily +drop it when we don't need it (as when matching /proc/cpuinfo). + +Having a valid C identifier is necessary for later patches. + +gcc/ + * config/aarch64/aarch64-arches.def: Add a leading "V" to the + ARCH_IDENT fields. + * config/aarch64/aarch64-cores.def: Update accordingly. + * common/config/aarch64/aarch64-common.cc (all_cores): Likewise. + * config/aarch64/aarch64.cc (all_cores): Likewise. + * config/aarch64/driver-aarch64.cc (aarch64_arches): Skip the + leading "V". +--- + gcc/common/config/aarch64/aarch64-common.cc | 2 +- + gcc/config/aarch64/aarch64-arches.def | 28 ++--- + gcc/config/aarch64/aarch64-cores.def | 130 ++++++++++---------- + gcc/config/aarch64/aarch64.cc | 2 +- + gcc/config/aarch64/driver-aarch64.cc | 3 +- + 5 files changed, 83 insertions(+), 82 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index 0461201a5..6ca89d31f 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -253,7 +253,7 @@ static const struct processor_name_to_arch all_cores = + #define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \ + {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS}, + #include "config/aarch64/aarch64-cores.def" +- {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_V8A}, ++ {"generic", AARCH64_ARCH_V8A, AARCH64_FL_FOR_V8A}, + {"", aarch64_no_arch, 0} + }; + +diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def +index c6bf7d82c..e42202822 100644 +--- a/gcc/config/aarch64/aarch64-arches.def ++++ b/gcc/config/aarch64/aarch64-arches.def +@@ -30,19 +30,19 @@ + Due to the assumptions about the positions of these fields in config.gcc, + the NAME should be kept as the first argument and FLAGS as the last. */ + +-AARCH64_ARCH("armv8-a", generic, 8A, 8, AARCH64_FL_FOR_V8A) +-AARCH64_ARCH("armv8.1-a", generic, 8_1A, 8, AARCH64_FL_FOR_V8_1A) +-AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_V8_2A) +-AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_V8_3A) +-AARCH64_ARCH("armv8.4-a", generic, 8_4A, 8, AARCH64_FL_FOR_V8_4A) +-AARCH64_ARCH("armv8.5-a", generic, 8_5A, 8, AARCH64_FL_FOR_V8_5A) +-AARCH64_ARCH("armv8.6-a", generic, 8_6A, 8, AARCH64_FL_FOR_V8_6A) +-AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_V8_7A) +-AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_V8_8A) +-AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_V8R) +-AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_V9A) +-AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_V9_1A) +-AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_V9_2A) +-AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_V9_3A) ++AARCH64_ARCH("armv8-a", generic, V8A, 8, AARCH64_FL_FOR_V8A) ++AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, AARCH64_FL_FOR_V8_1A) ++AARCH64_ARCH("armv8.2-a", generic, V8_2A, 8, AARCH64_FL_FOR_V8_2A) ++AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, AARCH64_FL_FOR_V8_3A) ++AARCH64_ARCH("armv8.4-a", generic, V8_4A, 8, AARCH64_FL_FOR_V8_4A) ++AARCH64_ARCH("armv8.5-a", generic, V8_5A, 8, AARCH64_FL_FOR_V8_5A) ++AARCH64_ARCH("armv8.6-a", generic, V8_6A, 8, AARCH64_FL_FOR_V8_6A) ++AARCH64_ARCH("armv8.7-a", generic, V8_7A, 8, AARCH64_FL_FOR_V8_7A) ++AARCH64_ARCH("armv8.8-a", generic, V8_8A, 8, AARCH64_FL_FOR_V8_8A) ++AARCH64_ARCH("armv8-r", generic, V8R , 8, AARCH64_FL_FOR_V8R) ++AARCH64_ARCH("armv9-a", generic, V9A , 9, AARCH64_FL_FOR_V9A) ++AARCH64_ARCH("armv9.1-a", generic, V9_1A, 9, AARCH64_FL_FOR_V9_1A) ++AARCH64_ARCH("armv9.2-a", generic, V9_2A, 9, AARCH64_FL_FOR_V9_2A) ++AARCH64_ARCH("armv9.3-a", generic, V9_3A, 9, AARCH64_FL_FOR_V9_3A) + + #undef AARCH64_ARCH +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index c4038c641..f4c2f4ea4 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -46,132 +46,132 @@ + /* ARMv8-A Architecture Processors. */ + + /* ARM ('A') cores. */ +-AARCH64_CORE("cortex-a34", cortexa34, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) +-AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) +-AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) +-AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) +-AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) +-AARCH64_CORE("cortex-a73", cortexa73, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) ++AARCH64_CORE("cortex-a34", cortexa34, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) ++AARCH64_CORE("cortex-a35", cortexa35, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) ++AARCH64_CORE("cortex-a53", cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) ++AARCH64_CORE("cortex-a57", cortexa57, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) ++AARCH64_CORE("cortex-a72", cortexa72, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) ++AARCH64_CORE("cortex-a73", cortexa73, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) + + /* Cavium ('C') cores. */ +-AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) ++AARCH64_CORE("thunderx", thunderx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) + /* Do not swap around "thunderxt88p1" and "thunderxt88", + this order is required to handle variant correctly. */ +-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) +-AARCH64_CORE("thunderxt88", thunderxt88, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) ++AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) ++AARCH64_CORE("thunderxt88", thunderxt88, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) + + /* OcteonTX is the official name for T81/T83. */ +-AARCH64_CORE("octeontx", octeontx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) +-AARCH64_CORE("octeontx81", octeontxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) +-AARCH64_CORE("octeontx83", octeontxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) ++AARCH64_CORE("octeontx", octeontx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) ++AARCH64_CORE("octeontx81", octeontxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) ++AARCH64_CORE("octeontx83", octeontxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) + +-AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) +-AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) ++AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) ++AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) + + /* Ampere Computing ('\xC0') cores. */ +-AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1) ++AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1) + /* Do not swap around "emag" and "xgene1", + this order is required to handle variant correctly. */ +-AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) ++AARCH64_CORE("emag", emag, xgene1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) + + /* APM ('P') cores. */ +-AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1) ++AARCH64_CORE("xgene1", xgene1, xgene1, V8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1) + + /* Qualcomm ('Q') cores. */ +-AARCH64_CORE("falkor", falkor, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) +-AARCH64_CORE("qdf24xx", qdf24xx, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) ++AARCH64_CORE("falkor", falkor, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) ++AARCH64_CORE("qdf24xx", qdf24xx, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) + + /* Samsung ('S') cores. */ +-AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) ++AARCH64_CORE("exynos-m1", exynosm1, exynosm1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) + + /* HXT ('h') cores. */ +-AARCH64_CORE("phecda", phecda, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) ++AARCH64_CORE("phecda", phecda, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) + + /* ARMv8.1-A Architecture Processors. */ + + /* Broadcom ('B') cores. */ +-AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) +-AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) ++AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) ++AARCH64_CORE("vulcan", vulcan, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) + + /* Cavium ('C') cores. */ +-AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) ++AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) + + /* ARMv8.2-A Architecture Processors. */ + + /* ARM ('A') cores. */ +-AARCH64_CORE("cortex-a55", cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) +-AARCH64_CORE("cortex-a75", cortexa75, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) +-AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) +-AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) +-AARCH64_CORE("cortex-a77", cortexa77, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) +-AARCH64_CORE("cortex-a78", cortexa78, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1) +-AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1) +-AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1) +-AARCH64_CORE("cortex-a65", cortexa65, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) +-AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) +-AARCH64_CORE("cortex-x1", cortexx1, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1) +-AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) +-AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) +-AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1) ++AARCH64_CORE("cortex-a55", cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) ++AARCH64_CORE("cortex-a75", cortexa75, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) ++AARCH64_CORE("cortex-a76", cortexa76, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) ++AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) ++AARCH64_CORE("cortex-a77", cortexa77, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) ++AARCH64_CORE("cortex-a78", cortexa78, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
View file
_service:tar_scm:0112-LoongArch-Assign-the-u-attribute-to-the-mem-to-which.patch
Added
@@ -0,0 +1,64 @@ +From 191675bdfd4cef0fbcf642f53da82a49bd23a3bf Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 12 Jan 2024 17:06:30 +0800 +Subject: PATCH 112/188 LoongArch: Assign the '/u' attribute to the mem to + which the global offset table belongs. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_split_symbol): + Assign the '/u' attribute to the mem. + +gcc/testsuite/ChangeLog: + + * g++.target/loongarch/got-load.C: New test. +--- + gcc/config/loongarch/loongarch.cc | 5 +++++ + gcc/testsuite/g++.target/loongarch/got-load.C | 19 +++++++++++++++++++ + 2 files changed, 24 insertions(+) + create mode 100644 gcc/testsuite/g++.target/loongarch/got-load.C + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 13481130b..7da00c132 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3198,6 +3198,11 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) + rtx mem = gen_rtx_MEM (Pmode, low); + *low_out = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, mem), + UNSPEC_LOAD_FROM_GOT); ++ ++ /* Nonzero in a mem, if the memory is statically allocated and ++ read-only. A common example of the later is a shared library’s ++ global offset table. */ ++ MEM_READONLY_P (mem) = 1; + } + + break; +diff --git a/gcc/testsuite/g++.target/loongarch/got-load.C b/gcc/testsuite/g++.target/loongarch/got-load.C +new file mode 100644 +index 000000000..20924c739 +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/got-load.C +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mabi=lp64d -O2 -mexplicit-relocs -mcmodel=normal -fdump-rtl-expand" } */ ++/* { dg-final { scan-rtl-dump-times "mem/u" 2 "expand" } } */ ++ ++#include <bits/stdc++.h> ++ ++using namespace std; ++ ++int lr1000052; ++ ++void ++test(void) ++{ ++ int n; ++ ++ cin >> n; ++ for (int i = 0; i < n; ++i) ++ cin >> lri0 >> lri1; ++} +-- +2.43.0 +
View file
_service:tar_scm:0112-aarch64-Simplify-probe-of-final-frame-allocation.patch
Deleted
@@ -1,123 +0,0 @@ -From c4f0e121faa36342f1d21919e54a05ad841c4f86 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:55 +0100 -Subject: PATCH aarch64: Simplify probe of final frame allocation - -Previous patches ensured that the final frame allocation only needs -a probe when the size is strictly greater than 1KiB. It's therefore -safe to use the normal 1024 probe offset in all cases. - -The main motivation for doing this is to simplify the code and -remove the number of special cases. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space): - Always probe the residual allocation at offset 1024, asserting - that that is in range. - -gcc/testsuite/ - * gcc.target/aarch64/stack-check-prologue-17.c: Expect the probe - to be at offset 1024 rather than offset 0. - * gcc.target/aarch64/stack-check-prologue-18.c: Likewise. - * gcc.target/aarch64/stack-check-prologue-19.c: Likewise. ---- - gcc/config/aarch64/aarch64.cc | 12 ++++-------- - .../gcc.target/aarch64/stack-check-prologue-17.c | 2 +- - .../gcc.target/aarch64/stack-check-prologue-18.c | 4 ++-- - .../gcc.target/aarch64/stack-check-prologue-19.c | 4 ++-- - 4 files changed, 9 insertions(+), 13 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 16433fb70f4f..8abf3d7a1e2b 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -9523,16 +9523,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - are still safe. */ - if (residual) - { -- HOST_WIDE_INT residual_probe_offset = guard_used_by_caller; -+ gcc_assert (guard_used_by_caller + byte_sp_alignment <= size); -+ - /* If we're doing final adjustments, and we've done any full page - allocations then any residual needs to be probed. */ - if (final_adjustment_p && rounded_size != 0) - min_probe_threshold = 0; -- /* If doing a small final adjustment, we always probe at offset 0. -- This is done to avoid issues when the final adjustment is smaller -- than the probing offset. */ -- else if (final_adjustment_p && rounded_size == 0) -- residual_probe_offset = 0; - - aarch64_sub_sp (temp1, temp2, residual, frame_related_p); - if (residual >= min_probe_threshold) -@@ -9543,8 +9539,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required." - "\n", residual); - -- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, -- residual_probe_offset)); -+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, -+ guard_used_by_caller)); - emit_insn (gen_blockage ()); - } - } -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c -index 0d8a25d73a24..f0ec1389771d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c -@@ -33,7 +33,7 @@ int test1(int z) { - ** ... - ** str x30, \sp\ - ** sub sp, sp, #1040 --** str xzr, \sp\ -+** str xzr, \sp, #?1024\ - ** cbnz w0, .* - ** bl g - ** ... -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c -index 82447d20fff5..6383bec5ebcd 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c -@@ -9,7 +9,7 @@ void g(); - ** ... - ** str x30, \sp\ - ** sub sp, sp, #4064 --** str xzr, \sp\ -+** str xzr, \sp, #?1024\ - ** cbnz w0, .* - ** bl g - ** ... -@@ -50,7 +50,7 @@ int test1(int z) { - ** ... - ** str x30, \sp\ - ** sub sp, sp, #1040 --** str xzr, \sp\ -+** str xzr, \sp, #?1024\ - ** cbnz w0, .* - ** bl g - ** ... -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c -index 73ac3e4e4eb0..562039b5e9b8 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c -@@ -9,7 +9,7 @@ void g(); - ** ... - ** str x30, \sp\ - ** sub sp, sp, #4064 --** str xzr, \sp\ -+** str xzr, \sp, #?1024\ - ** cbnz w0, .* - ** bl g - ** ... -@@ -50,7 +50,7 @@ int test1(int z) { - ** ... - ** str x30, \sp\ - ** sub sp, sp, #1040 --** str xzr, \sp\ -+** str xzr, \sp, #?1024\ - ** cbnz w0, .* - ** bl g - ** ... --- -2.43.5 -
View file
_service:tar_scm:0113-Backport-SME-aarch64-Small-config.gcc-cleanups.patch
Added
@@ -0,0 +1,55 @@ +From aac2b2d4191d08a107c3ff8d98602355988a5558 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:52 +0100 +Subject: PATCH 014/157 BackportSME aarch64: Small config.gcc cleanups + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0af214b447529453b356e8e480d7d35b3e642f0e + +The aarch64-option-extensions.def parsing in config.gcc had +some code left over from when it tried to parse the whole +macro definition. Also, config.gcc now only looks at the +first fields of the aarch64-arches.def entries. + +gcc/ + * config.gcc: Remove dead aarch64-option-extensions.def code. + * config/aarch64/aarch64-arches.def: Update comment. +--- + gcc/config.gcc | 8 -------- + gcc/config/aarch64/aarch64-arches.def | 2 +- + 2 files changed, 1 insertion(+), 9 deletions(-) + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 3be450471..da66603cd 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -4210,14 +4210,6 @@ case "${target}" in + options_parsed="`$ac_cv_prog_CPP -D"$opt_macro" -x c \ + ${srcdir}/config/aarch64/aarch64-option-extensions.def`" + +- # Match one element inside AARCH64_OPT_EXTENSION, we +- # consume anything that's not a ,. +- elem=" *\(^,\+\) *" +- +- # Repeat the pattern for the number of entries in the +- # AARCH64_OPT_EXTENSION, currently 6 times. +- sed_patt="^$elem,$elem,$elem,$elem,$elem,$elem" +- + while x"$ext_val" != x + do + ext_val=`echo $ext_val | sed -e 's/\+//'` +diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def +index e42202822..ece96e22a 100644 +--- a/gcc/config/aarch64/aarch64-arches.def ++++ b/gcc/config/aarch64/aarch64-arches.def +@@ -28,7 +28,7 @@ + ARCH_REV is an integer specifying the architecture major revision. + FLAGS are the flags implied by the architecture. + Due to the assumptions about the positions of these fields in config.gcc, +- the NAME should be kept as the first argument and FLAGS as the last. */ ++ NAME should be kept as the first argument. */ + + AARCH64_ARCH("armv8-a", generic, V8A, 8, AARCH64_FL_FOR_V8A) + AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, AARCH64_FL_FOR_V8_1A) +-- +2.33.0 +
View file
_service:tar_scm:0113-LoongArch-testsuite-Fix-fail-in-gen-vect-2-25-.c-fil.patch
Added
@@ -0,0 +1,51 @@ +From 1576f83f8cae0ead9de533566ec5f21e7a01f842 Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Sat, 13 Jan 2024 15:28:34 +0800 +Subject: PATCH 113/188 LoongArch: testsuite:Fix fail in gen-vect-{2,25}.c + file. + +1.Added dg-do compile on LoongArch. + When binutils does not support vector instruction sets, an error occurs +because the assembler does not recognize vector instructions. + +2.Added "-mlsx" option for vectorization on LoongArch. + +gcc/testsuite/ChangeLog: + + * gcc.dg/tree-ssa/gen-vect-2.c: Added detection of compilation + behavior and "-mlsx" option on LoongArch. + * gcc.dg/tree-ssa/gen-vect-25.c: Dito. +--- + gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c | 2 ++ + gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c +index 42171a2fb..395d6f7ee 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c +@@ -1,6 +1,8 @@ + /* { dg-do run { target vect_cmdline_needed } } */ ++/* { dg-do compile { target { loongarch_sx && {! loongarch_sx_hw } } } } */ + /* { dg-options "-O2 -fno-tree-loop-distribute-patterns -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */ + /* { dg-additional-options "-mno-sse" { target { i?86-*-* x86_64-*-* } } } */ ++/* { dg-additional-options "-mlsx" { target { loongarch*-*-* } } } */ + + #include <stdlib.h> + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c +index 60ec27054..cea7f246a 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c +@@ -1,6 +1,8 @@ + /* { dg-do run { target vect_cmdline_needed } } */ ++/* { dg-do compile { target { loongarch_sx && {! loongarch_sx_hw } } } } */ + /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */ + /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic -mno-sse" { target { i?86-*-* x86_64-*-* } } } */ ++/* { dg-additional-options "-mlsx" { target { loongarch*-*-* } } } */ + + #include <stdlib.h> + +-- +2.43.0 +
View file
_service:tar_scm:0113-aarch64-Explicitly-record-probe-registers-in-frame-info.patch
Deleted
@@ -1,277 +0,0 @@ -From 6f0ab0a9f46a17b68349ff6035aa776bf65f0575 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:56 +0100 -Subject: PATCH aarch64: Explicitly record probe registers in frame info - -The stack frame is currently divided into three areas: - -A: the area above the hard frame pointer -B: the SVE saves below the hard frame pointer -C: the outgoing arguments - -If the stack frame is allocated in one chunk, the allocation needs a -probe if the frame size is >= guard_size - 1KiB. In addition, if the -function is not a leaf function, it must probe an address no more than -1KiB above the outgoing SP. We ensured the second condition by - -(1) using single-chunk allocations for non-leaf functions only if - the link register save slot is within 512 bytes of the bottom - of the frame; and - -(2) using the link register save as a probe (meaning, for instance, - that it can't be individually shrink wrapped) - -If instead the stack is allocated in multiple chunks, then: - -* an allocation involving only the outgoing arguments (C above) requires - a probe if the allocation size is > 1KiB - -* any other allocation requires a probe if the allocation size - is >= guard_size - 1KiB - -* second and subsequent allocations require the previous allocation - to probe at the bottom of the allocated area, regardless of the size - of that previous allocation - -The final point means that, unlike for single allocations, -it can be necessary to have both a non-SVE register probe and -an SVE register probe. For example: - -* allocate A, probe using a non-SVE register save -* allocate B, probe using an SVE register save -* allocate C - -The non-SVE register used in this case was again the link register. -It was previously used even if the link register save slot was some -bytes above the bottom of the non-SVE register saves, but an earlier -patch avoided that by putting the link register save slot first. - -As a belt-and-braces fix, this patch explicitly records which -probe registers we're using and allows the non-SVE probe to be -whichever register comes first (as for SVE). - -The patch also avoids unnecessary probes in sve/pcs/stack_clash_3.c. - -gcc/ - * config/aarch64/aarch64.h (aarch64_frame::sve_save_and_probe) - (aarch64_frame::hard_fp_save_and_probe): New fields. - * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize them. - Rather than asserting that a leaf function saves LR, instead assert - that a leaf function saves something. - (aarch64_get_separate_components): Prevent the chosen probe - registers from being individually shrink-wrapped. - (aarch64_allocate_and_probe_stack_space): Remove workaround for - probe registers that aren't at the bottom of the previous allocation. - -gcc/testsuite/ - * gcc.target/aarch64/sve/pcs/stack_clash_3.c: Avoid redundant probes. ---- - gcc/config/aarch64/aarch64.cc | 68 +++++++++++++++---- - gcc/config/aarch64/aarch64.h | 8 +++ - .../aarch64/sve/pcs/stack_clash_3.c | 6 +- - 3 files changed, 64 insertions(+), 18 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 8abf3d7a1e2b..a8d907df8843 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8210,15 +8210,11 @@ aarch64_layout_frame (void) - && !crtl->abi->clobbers_full_reg_p (regno)) - frame.reg_offsetregno = SLOT_REQUIRED; - -- /* With stack-clash, LR must be saved in non-leaf functions. The saving of -- LR counts as an implicit probe which allows us to maintain the invariant -- described in the comment at expand_prologue. */ -- gcc_assert (crtl->is_leaf -- || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED)); - - poly_int64 offset = crtl->outgoing_args_size; - gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); - frame.bytes_below_saved_regs = offset; -+ frame.sve_save_and_probe = INVALID_REGNUM; - - /* Now assign stack slots for the registers. Start with the predicate - registers, since predicate LDR and STR have a relatively small -@@ -8226,6 +8222,8 @@ aarch64_layout_frame (void) - for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++) - if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) - { -+ if (frame.sve_save_and_probe == INVALID_REGNUM) -+ frame.sve_save_and_probe = regno; - frame.reg_offsetregno = offset; - offset += BYTES_PER_SVE_PRED; - } -@@ -8263,6 +8261,8 @@ aarch64_layout_frame (void) - for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) - if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) - { -+ if (frame.sve_save_and_probe == INVALID_REGNUM) -+ frame.sve_save_and_probe = regno; - frame.reg_offsetregno = offset; - offset += vector_save_size; - } -@@ -8272,10 +8272,18 @@ aarch64_layout_frame (void) - frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; - bool saves_below_hard_fp_p - = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); -+ gcc_assert (!saves_below_hard_fp_p -+ || (frame.sve_save_and_probe != INVALID_REGNUM -+ && known_eq (frame.reg_offsetframe.sve_save_and_probe, -+ frame.bytes_below_saved_regs))); -+ - frame.bytes_below_hard_fp = offset; -+ frame.hard_fp_save_and_probe = INVALID_REGNUM; - - auto allocate_gpr_slot = &(unsigned int regno) - { -+ if (frame.hard_fp_save_and_probe == INVALID_REGNUM) -+ frame.hard_fp_save_and_probe = regno; - frame.reg_offsetregno = offset; - if (frame.wb_push_candidate1 == INVALID_REGNUM) - frame.wb_push_candidate1 = regno; -@@ -8309,6 +8317,8 @@ aarch64_layout_frame (void) - for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) - if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) - { -+ if (frame.hard_fp_save_and_probe == INVALID_REGNUM) -+ frame.hard_fp_save_and_probe = regno; - /* If there is an alignment gap between integer and fp callee-saves, - allocate the last fp register to it if possible. */ - if (regno == last_fp_reg -@@ -8332,6 +8342,17 @@ aarch64_layout_frame (void) - offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); - - frame.saved_regs_size = offset - frame.bytes_below_saved_regs; -+ gcc_assert (known_eq (frame.saved_regs_size, -+ frame.below_hard_fp_saved_regs_size) -+ || (frame.hard_fp_save_and_probe != INVALID_REGNUM -+ && known_eq (frame.reg_offsetframe.hard_fp_save_and_probe, -+ frame.bytes_below_hard_fp))); -+ -+ /* With stack-clash, a register must be saved in non-leaf functions. -+ The saving of the bottommost register counts as an implicit probe, -+ which allows us to maintain the invariant described in the comment -+ at expand_prologue. */ -+ gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0)); - - offset += get_frame_size (); - offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); -@@ -8462,6 +8483,25 @@ aarch64_layout_frame (void) - frame.final_adjust = frame.bytes_below_saved_regs; - } - -+ /* The frame is allocated in pieces, with each non-final piece -+ including a register save at offset 0 that acts as a probe for -+ the following piece. In addition, the save of the bottommost register -+ acts as a probe for callees and allocas. Roll back any probes that -+ aren't needed. -+ -+ A probe isn't needed if it is associated with the final allocation -+ (including callees and allocas) that happens before the epilogue is -+ executed. */ -+ if (crtl->is_leaf -+ && !cfun->calls_alloca -+ && known_eq (frame.final_adjust, 0)) -+ { -+ if (maybe_ne (frame.sve_callee_adjust, 0)) -+ frame.sve_save_and_probe = INVALID_REGNUM; -+ else -+ frame.hard_fp_save_and_probe = INVALID_REGNUM; -+ } -+ - /* Make sure the individual adjustments add up to the full frame size. */ - gcc_assert (known_eq (frame.initial_adjust - + frame.callee_adjust -@@ -9039,13 +9079,6 @@ aarch64_get_separate_components (void) - - poly_int64 offset = frame.reg_offsetregno; - -- /* If the register is saved in the first SVE save slot, we use -- it as a stack probe for -fstack-clash-protection. */ -- if (flag_stack_clash_protection -- && maybe_ne (frame.below_hard_fp_saved_regs_size, 0) -- && known_eq (offset, frame.bytes_below_saved_regs)) -- continue; -- - /* Get the offset relative to the register we'll use. */ - if (frame_pointer_needed) - offset -= frame.bytes_below_hard_fp; -@@ -9080,6 +9113,13 @@ aarch64_get_separate_components (void)
View file
_service:tar_scm:0114-Backport-SME-aarch64-Avoid-redundancy-in-aarch64-cor.patch
Added
@@ -0,0 +1,273 @@ +From f6f28c50045f672a35f5b7344b556fc45dc0b3a1 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:53 +0100 +Subject: PATCH 015/157 BackportSME aarch64: Avoid redundancy in + aarch64-cores.def + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=198bb6ed327c74eb2b0450bf978e4e6a64a6406c + +The flags fields of the aarch64-cores.def always start with +AARCH64_FL_FOR_<ARCH>. After previous changes, <ARCH> is always +identical to the previous field, so we can drop the explicit +AARCH64_FL_FOR_<ARCH> and derive it programmatically. + +This isn't a big saving in itself, but it helps with later patches. + +gcc/ + * config/aarch64/aarch64-cores.def: Remove AARCH64_FL_FOR_<ARCH> + from the flags field. + * common/config/aarch64/aarch64-common.cc (all_cores): Add it + here instead. + * config/aarch64/aarch64.cc (all_cores): Likewise. + * config/aarch64/driver-aarch64.cc (all_cores): Likewise. +--- + gcc/common/config/aarch64/aarch64-common.cc | 2 +- + gcc/config/aarch64/aarch64-cores.def | 130 ++++++++++---------- + gcc/config/aarch64/aarch64.cc | 2 +- + gcc/config/aarch64/driver-aarch64.cc | 2 +- + 4 files changed, 68 insertions(+), 68 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index 6ca89d31f..a965ac660 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -251,7 +251,7 @@ struct arch_to_arch_name + static const struct processor_name_to_arch all_cores = + { + #define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \ +- {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS}, ++ {NAME, AARCH64_ARCH_##ARCH_IDENT, AARCH64_FL_FOR_##ARCH_IDENT | FLAGS}, + #include "config/aarch64/aarch64-cores.def" + {"generic", AARCH64_ARCH_V8A, AARCH64_FL_FOR_V8A}, + {"", aarch64_no_arch, 0} +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index f4c2f4ea4..008b0b8c1 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -46,132 +46,132 @@ + /* ARMv8-A Architecture Processors. */ + + /* ARM ('A') cores. */ +-AARCH64_CORE("cortex-a34", cortexa34, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) +-AARCH64_CORE("cortex-a35", cortexa35, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) +-AARCH64_CORE("cortex-a53", cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) +-AARCH64_CORE("cortex-a57", cortexa57, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) +-AARCH64_CORE("cortex-a72", cortexa72, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) +-AARCH64_CORE("cortex-a73", cortexa73, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) ++AARCH64_CORE("cortex-a34", cortexa34, cortexa53, V8A, AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) ++AARCH64_CORE("cortex-a35", cortexa35, cortexa53, V8A, AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) ++AARCH64_CORE("cortex-a53", cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) ++AARCH64_CORE("cortex-a57", cortexa57, cortexa57, V8A, AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) ++AARCH64_CORE("cortex-a72", cortexa72, cortexa57, V8A, AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) ++AARCH64_CORE("cortex-a73", cortexa73, cortexa57, V8A, AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) + + /* Cavium ('C') cores. */ +-AARCH64_CORE("thunderx", thunderx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) ++AARCH64_CORE("thunderx", thunderx, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) + /* Do not swap around "thunderxt88p1" and "thunderxt88", + this order is required to handle variant correctly. */ +-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) +-AARCH64_CORE("thunderxt88", thunderxt88, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) ++AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) ++AARCH64_CORE("thunderxt88", thunderxt88, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) + + /* OcteonTX is the official name for T81/T83. */ +-AARCH64_CORE("octeontx", octeontx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) +-AARCH64_CORE("octeontx81", octeontxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) +-AARCH64_CORE("octeontx83", octeontxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) ++AARCH64_CORE("octeontx", octeontx, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) ++AARCH64_CORE("octeontx81", octeontxt81, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) ++AARCH64_CORE("octeontx83", octeontxt83, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) + +-AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) +-AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) ++AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) ++AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) + + /* Ampere Computing ('\xC0') cores. */ +-AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1) ++AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, 0, ampere1, 0xC0, 0xac3, -1) + /* Do not swap around "emag" and "xgene1", + this order is required to handle variant correctly. */ +-AARCH64_CORE("emag", emag, xgene1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) ++AARCH64_CORE("emag", emag, xgene1, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) + + /* APM ('P') cores. */ +-AARCH64_CORE("xgene1", xgene1, xgene1, V8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1) ++AARCH64_CORE("xgene1", xgene1, xgene1, V8A, 0, xgene1, 0x50, 0x000, -1) + + /* Qualcomm ('Q') cores. */ +-AARCH64_CORE("falkor", falkor, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) +-AARCH64_CORE("qdf24xx", qdf24xx, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) ++AARCH64_CORE("falkor", falkor, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) ++AARCH64_CORE("qdf24xx", qdf24xx, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) + + /* Samsung ('S') cores. */ +-AARCH64_CORE("exynos-m1", exynosm1, exynosm1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) ++AARCH64_CORE("exynos-m1", exynosm1, exynosm1, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) + + /* HXT ('h') cores. */ +-AARCH64_CORE("phecda", phecda, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) ++AARCH64_CORE("phecda", phecda, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) + + /* ARMv8.1-A Architecture Processors. */ + + /* Broadcom ('B') cores. */ +-AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) +-AARCH64_CORE("vulcan", vulcan, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) ++AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) ++AARCH64_CORE("vulcan", vulcan, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) + + /* Cavium ('C') cores. */ +-AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) ++AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) + + /* ARMv8.2-A Architecture Processors. */ + + /* ARM ('A') cores. */ +-AARCH64_CORE("cortex-a55", cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) +-AARCH64_CORE("cortex-a75", cortexa75, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) +-AARCH64_CORE("cortex-a76", cortexa76, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) +-AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) +-AARCH64_CORE("cortex-a77", cortexa77, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) +-AARCH64_CORE("cortex-a78", cortexa78, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1) +-AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1) +-AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1) +-AARCH64_CORE("cortex-a65", cortexa65, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) +-AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) +-AARCH64_CORE("cortex-x1", cortexx1, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1) +-AARCH64_CORE("ares", ares, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) +-AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) +-AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1) ++AARCH64_CORE("cortex-a55", cortexa55, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) ++AARCH64_CORE("cortex-a75", cortexa75, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) ++AARCH64_CORE("cortex-a76", cortexa76, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) ++AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) ++AARCH64_CORE("cortex-a77", cortexa77, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) ++AARCH64_CORE("cortex-a78", cortexa78, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1) ++AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1) ++AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1) ++AARCH64_CORE("cortex-a65", cortexa65, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) ++AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) ++AARCH64_CORE("cortex-x1", cortexx1, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1) ++AARCH64_CORE("ares", ares, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) ++AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) ++AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1) + + /* Cavium ('C') cores. */ +-AARCH64_CORE("octeontx2", octeontx2, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1) +-AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1) +-AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) ++AARCH64_CORE("octeontx2", octeontx2, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1) ++AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1) ++AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) + /* Note OcteonTX2 T93 is an alias to OcteonTX2 T96. */ +-AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) +-AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1) +-AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1) +-AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1) ++AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) ++AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1)
View file
_service:tar_scm:0114-LoongArch-Remove-constraint-z-from-movsi_internal.patch
Added
@@ -0,0 +1,43 @@ +From 167a3f34b308d3d56e816559701c3fb1c4f88c7b Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 25 Oct 2024 03:30:35 +0000 +Subject: PATCH 114/188 LoongArch: Remove constraint z from movsi_internal + +We don't allow SImode in FCC, so constraint z is never really used +here. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (movsi_internal): Remove + constraint z. +--- + gcc/config/loongarch/loongarch.md | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 4c7e28ace..23d8dc126 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2197,8 +2197,8 @@ + }) + + (define_insn_and_split "*movsi_internal" +- (set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m,*r,*z") +- (match_operand:SI 1 "move_operand" "r,Yd,w,rJ,*r*J,*m,*f,*f,*z,*r")) ++ (set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,w,*f,f,*r,*m") ++ (match_operand:SI 1 "move_operand" "r,Yd,w,rJ,*r*J,m,*f,*f")) + "(register_operand (operands0, SImode) + || reg_or_0_operand (operands1, SImode))" + { return loongarch_output_move (operands0, operands1); } +@@ -2211,7 +2211,7 @@ + DONE; + } + " +- (set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore,mftg,mgtf") ++ (set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore") + (set_attr "mode" "SI")) + + ;; 16-bit Integer moves +-- +2.43.0 +
View file
_service:tar_scm:0114-aarch64-Remove-below-hard-fp-saved-regs-size.patch
Deleted
@@ -1,157 +0,0 @@ -From 8254e1b9cd500e0c278465a3657543477e9d1250 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:56 +0100 -Subject: PATCH aarch64: Remove below_hard_fp_saved_regs_size - -After previous patches, it's no longer necessary to store -saved_regs_size and below_hard_fp_saved_regs_size in the frame info. -All measurements instead use the top or bottom of the frame as -reference points. - -gcc/ - * config/aarch64/aarch64.h (aarch64_frame::saved_regs_size) - (aarch64_frame::below_hard_fp_saved_regs_size): Delete. - * config/aarch64/aarch64.cc (aarch64_layout_frame): Update accordingly. ---- - gcc/config/aarch64/aarch64.cc | 45 ++++++++++++++++------------------- - gcc/config/aarch64/aarch64.h | 7 ------ - 2 files changed, 21 insertions(+), 31 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index a8d907df8843..ac3d3b336a37 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8269,9 +8269,8 @@ aarch64_layout_frame (void) - - /* OFFSET is now the offset of the hard frame pointer from the bottom - of the callee save area. */ -- frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; -- bool saves_below_hard_fp_p -- = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); -+ auto below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; -+ bool saves_below_hard_fp_p = maybe_ne (below_hard_fp_saved_regs_size, 0); - gcc_assert (!saves_below_hard_fp_p - || (frame.sve_save_and_probe != INVALID_REGNUM - && known_eq (frame.reg_offsetframe.sve_save_and_probe, -@@ -8341,9 +8340,8 @@ aarch64_layout_frame (void) - - offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); - -- frame.saved_regs_size = offset - frame.bytes_below_saved_regs; -- gcc_assert (known_eq (frame.saved_regs_size, -- frame.below_hard_fp_saved_regs_size) -+ auto saved_regs_size = offset - frame.bytes_below_saved_regs; -+ gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size) - || (frame.hard_fp_save_and_probe != INVALID_REGNUM - && known_eq (frame.reg_offsetframe.hard_fp_save_and_probe, - frame.bytes_below_hard_fp))); -@@ -8352,7 +8350,7 @@ aarch64_layout_frame (void) - The saving of the bottommost register counts as an implicit probe, - which allows us to maintain the invariant described in the comment - at expand_prologue. */ -- gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0)); -+ gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0)); - - offset += get_frame_size (); - offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); -@@ -8409,7 +8407,7 @@ aarch64_layout_frame (void) - - HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; - HOST_WIDE_INT const_saved_regs_size; -- if (known_eq (frame.saved_regs_size, 0)) -+ if (known_eq (saved_regs_size, 0)) - frame.initial_adjust = frame.frame_size; - else if (frame.frame_size.is_constant (&const_size) - && const_size < max_push_offset -@@ -8422,7 +8420,7 @@ aarch64_layout_frame (void) - frame.callee_adjust = const_size; - } - else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs) -- && frame.saved_regs_size.is_constant (&const_saved_regs_size) -+ && saved_regs_size.is_constant (&const_saved_regs_size) - && const_below_saved_regs + const_saved_regs_size < 512 - /* We could handle this case even with data below the saved - registers, provided that that data left us with valid offsets -@@ -8441,8 +8439,7 @@ aarch64_layout_frame (void) - frame.initial_adjust = frame.frame_size; - } - else if (saves_below_hard_fp_p -- && known_eq (frame.saved_regs_size, -- frame.below_hard_fp_saved_regs_size)) -+ && known_eq (saved_regs_size, below_hard_fp_saved_regs_size)) - { - /* Frame in which all saves are SVE saves: - -@@ -8464,7 +8461,7 @@ aarch64_layout_frame (void) - save SVE registers relative to SP - sub sp, sp, bytes_below_saved_regs */ - frame.callee_adjust = const_above_fp; -- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; -+ frame.sve_callee_adjust = below_hard_fp_saved_regs_size; - frame.final_adjust = frame.bytes_below_saved_regs; - } - else -@@ -8479,7 +8476,7 @@ aarch64_layout_frame (void) - save SVE registers relative to SP - sub sp, sp, bytes_below_saved_regs */ - frame.initial_adjust = frame.bytes_above_hard_fp; -- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; -+ frame.sve_callee_adjust = below_hard_fp_saved_regs_size; - frame.final_adjust = frame.bytes_below_saved_regs; - } - -@@ -9621,17 +9618,17 @@ aarch64_epilogue_uses (int regno) - | local variables | <-- frame_pointer_rtx - | | - +-------------------------------+ -- | padding | \ -- +-------------------------------+ | -- | callee-saved registers | | frame.saved_regs_size -- +-------------------------------+ | -- | LR' | | -- +-------------------------------+ | -- | FP' | | -- +-------------------------------+ |<- hard_frame_pointer_rtx (aligned) -- | SVE vector registers | | \ -- +-------------------------------+ | | below_hard_fp_saved_regs_size -- | SVE predicate registers | / / -+ | padding | -+ +-------------------------------+ -+ | callee-saved registers | -+ +-------------------------------+ -+ | LR' | -+ +-------------------------------+ -+ | FP' | -+ +-------------------------------+ <-- hard_frame_pointer_rtx (aligned) -+ | SVE vector registers | -+ +-------------------------------+ -+ | SVE predicate registers | - +-------------------------------+ - | dynamic allocation | - +-------------------------------+ -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 46d4693e2064..01f7751bc783 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -871,18 +871,11 @@ struct GTY (()) aarch64_frame - STACK_BOUNDARY. */ - HOST_WIDE_INT saved_varargs_size; - -- /* The size of the callee-save registers with a slot in REG_OFFSET. */ -- poly_int64 saved_regs_size; -- - /* The number of bytes between the bottom of the static frame (the bottom - of the outgoing arguments) and the bottom of the register save area. - This value is always a multiple of STACK_BOUNDARY. */ - poly_int64 bytes_below_saved_regs; - -- /* The size of the callee-save registers with a slot in REG_OFFSET that -- are saved below the hard frame pointer. */ -- poly_int64 below_hard_fp_saved_regs_size; -- - /* The number of bytes between the bottom of the static frame (the bottom - of the outgoing arguments) and the hard frame pointer. This value is - always a multiple of STACK_BOUNDARY. */ --- -2.43.5 -
View file
_service:tar_scm:0115-Backport-SME-aarch64-Remove-AARCH64_FL_RCPC8_4-PR107.patch
Added
@@ -0,0 +1,83 @@ +From f6137d5be2761caea75dcc1c98d941ceec161456 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:53 +0100 +Subject: PATCH 016/157 BackportSME aarch64: Remove AARCH64_FL_RCPC8_4 + PR107025 + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0f244d848cffeda68f0eb4c5bb9c7e629bf2e957 + +AARCH64_FL_RCPC8_4 is an odd-one-out in that it has no associated +entry in aarch64-option-extensions.def. This means that, although +it is internally separated from AARCH64_FL_V8_4A, there is no +mechanism for turning it on and off individually, independently +of armv8.4-a. + +The only place that the flag was used independently was in the +entry for thunderx3t110, which enabled it alongside V8_3A. +As noted in PR107025, this means that any use of the extension +will fail to assemble. + +In the PR trail, Andrew suggested removing the core entry. +That might be best long-term, but since the barrier for removing +command-line options without a deprecation period is very high, +this patch instead just drops the flag from the core entry. +We'll still produce correct code. + +gcc/ + PR target/107025 + * config/aarch64/aarch64.h (oAARCH64_FL_RCPC8_4): Delete. + (AARCH64_FL_FOR_V8_4A): Update accordingly. + (AARCH64_ISA_RCPC8_4): Use AARCH64_FL_V8_4A directly. + * config/aarch64/aarch64-cores.def (thunderx3t110): Remove + AARCH64_FL_RCPC8_4. +--- + gcc/config/aarch64/aarch64-cores.def | 2 +- + gcc/config/aarch64/aarch64.h | 5 ++--- + 2 files changed, 3 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 008b0b8c1..cf500d0a9 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -133,7 +133,7 @@ AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_F + /* ARMv8.3-A Architecture Processors. */ + + /* Marvell cores (TX3). */ +-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) ++AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML, thunderx3t110, 0x43, 0x0b8, 0x0a) + + /* ARMv8.4-A Architecture Processors. */ + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 918a14193..f4e0cd148 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -173,7 +173,6 @@ + #define AARCH64_FL_SM4 (1 << 17) /* Has ARMv8.4-A SM3 and SM4. */ + #define AARCH64_FL_SHA3 (1 << 18) /* Has ARMv8.4-a SHA3 and SHA512. */ + #define AARCH64_FL_F16FML (1 << 19) /* Has ARMv8.4-a FP16 extensions. */ +-#define AARCH64_FL_RCPC8_4 (1 << 20) /* Has ARMv8.4-a RCPC extensions. */ + + /* Statistical Profiling extensions. */ + #define AARCH64_FL_PROFILE (1 << 21) +@@ -265,7 +264,7 @@ + (AARCH64_FL_FOR_V8_2A | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH) + #define AARCH64_FL_FOR_V8_4A \ + (AARCH64_FL_FOR_V8_3A | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \ +- | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM) ++ | AARCH64_FL_DOTPROD | AARCH64_FL_FLAGM) + #define AARCH64_FL_FOR_V8_5A \ + (AARCH64_FL_FOR_V8_4A | AARCH64_FL_V8_5A \ + | AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES) +@@ -313,7 +312,7 @@ + #define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4) + #define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3) + #define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML) +-#define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4) ++#define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_V8_4A) + #define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG) + #define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5A) + #define AARCH64_ISA_TME (aarch64_isa_flags & AARCH64_FL_TME) +-- +2.33.0 +
View file
_service:tar_scm:0115-LoongArch-doc-Add-attribute-descriptions-defined-in-.patch
Added
@@ -0,0 +1,47 @@ +From 0929961b9dd57e0dd18e4cccc6ba760706e74f77 Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Wed, 17 Jan 2024 09:24:06 +0800 +Subject: PATCH 115/188 LoongArch: doc: Add attribute descriptions defined in + the target-supports.exp. + +gcc/ChangeLog: + + * doc/sourcebuild.texi: Add attributes for keywords. +--- + gcc/doc/sourcebuild.texi | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi +index 71c04841d..a1ab0a1cb 100644 +--- a/gcc/doc/sourcebuild.texi ++++ b/gcc/doc/sourcebuild.texi +@@ -2292,6 +2292,26 @@ AArch64 target that is able to generate and execute armv8.3-a FJCVTZS + instruction. + @end table + ++@subsubsection LoongArch specific attributes ++ ++@table @code ++@item loongarch_sx ++LoongArch target that generates instructions for SX. ++ ++@item loongarch_asx ++LoongArch target that generates instructions for ASX. ++ ++@item loongarch_sx_hw ++LoongArch target that is able to generate and execute SX code. ++ ++@item loongarch_asx_hw ++LoongArch target that is able to generate and execute ASX code. ++ ++@item loongarch_call36_support ++LoongArch binutils supports call36 relocation. ++ ++@end table ++ + @subsubsection MIPS-specific attributes + + @table @code +-- +2.43.0 +
View file
_service:tar_scm:0115-aarch64-Make-stack-smash-canary-protect-saved-registers.patch
Deleted
@@ -1,298 +0,0 @@ -From 75c37e031408262263442f5b4cdb83d3777b6422 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:57 +0100 -Subject: PATCH aarch64: Make stack smash canary protect saved registers - -AArch64 normally puts the saved registers near the bottom of the frame, -immediately above any dynamic allocations. But this means that a -stack-smash attack on those dynamic allocations could overwrite the -saved registers without needing to reach as far as the stack smash -canary. - -The same thing could also happen for variable-sized arguments that are -passed by value, since those are allocated before a call and popped on -return. - -This patch avoids that by putting the locals (and thus the canary) below -the saved registers when stack smash protection is active. - -The patch fixes CVE-2023-4039. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_save_regs_above_locals_p): - New function. - (aarch64_layout_frame): Use it to decide whether locals should - go above or below the saved registers. - (aarch64_expand_prologue): Update stack layout comment. - Emit a stack tie after the final adjustment. - -gcc/testsuite/ - * gcc.target/aarch64/stack-protector-8.c: New test. - * gcc.target/aarch64/stack-protector-9.c: Likewise. ---- - gcc/config/aarch64/aarch64.cc | 46 +++++++-- - .../gcc.target/aarch64/stack-protector-8.c | 95 +++++++++++++++++++ - .../gcc.target/aarch64/stack-protector-9.c | 33 +++++++ - 3 files changed, 168 insertions(+), 6 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-8.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-9.c - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index ac3d3b336a37..96c3f48fdc49 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8133,6 +8133,20 @@ aarch64_needs_frame_chain (void) - return aarch64_use_frame_pointer; - } - -+/* Return true if the current function should save registers above -+ the locals area, rather than below it. */ -+ -+static bool -+aarch64_save_regs_above_locals_p () -+{ -+ /* When using stack smash protection, make sure that the canary slot -+ comes between the locals and the saved registers. Otherwise, -+ it would be possible for a carefully sized smash attack to change -+ the saved registers (particularly LR and FP) without reaching the -+ canary. */ -+ return crtl->stack_protect_guard; -+} -+ - /* Mark the registers that need to be saved by the callee and calculate - the size of the callee-saved registers area and frame record (both FP - and LR may be omitted). */ -@@ -8144,6 +8158,7 @@ aarch64_layout_frame (void) - poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode); - bool frame_related_fp_reg_p = false; - aarch64_frame &frame = cfun->machine->frame; -+ poly_int64 top_of_locals = -1; - - frame.emit_frame_chain = aarch64_needs_frame_chain (); - -@@ -8210,9 +8225,16 @@ aarch64_layout_frame (void) - && !crtl->abi->clobbers_full_reg_p (regno)) - frame.reg_offsetregno = SLOT_REQUIRED; - -+ bool regs_at_top_p = aarch64_save_regs_above_locals_p (); - - poly_int64 offset = crtl->outgoing_args_size; - gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); -+ if (regs_at_top_p) -+ { -+ offset += get_frame_size (); -+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); -+ top_of_locals = offset; -+ } - frame.bytes_below_saved_regs = offset; - frame.sve_save_and_probe = INVALID_REGNUM; - -@@ -8352,15 +8374,18 @@ aarch64_layout_frame (void) - at expand_prologue. */ - gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0)); - -- offset += get_frame_size (); -- offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); -- auto top_of_locals = offset; -- -+ if (!regs_at_top_p) -+ { -+ offset += get_frame_size (); -+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); -+ top_of_locals = offset; -+ } - offset += frame.saved_varargs_size; - gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); - frame.frame_size = offset; - - frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp; -+ gcc_assert (known_ge (top_of_locals, 0)); - frame.bytes_above_locals = frame.frame_size - top_of_locals; - - frame.initial_adjust = 0; -@@ -9615,10 +9640,10 @@ aarch64_epilogue_uses (int regno) - | for register varargs | - | | - +-------------------------------+ -- | local variables | <-- frame_pointer_rtx -+ | local variables (1) | <-- frame_pointer_rtx - | | - +-------------------------------+ -- | padding | -+ | padding (1) | - +-------------------------------+ - | callee-saved registers | - +-------------------------------+ -@@ -9630,6 +9655,10 @@ aarch64_epilogue_uses (int regno) - +-------------------------------+ - | SVE predicate registers | - +-------------------------------+ -+ | local variables (2) | -+ +-------------------------------+ -+ | padding (2) | -+ +-------------------------------+ - | dynamic allocation | - +-------------------------------+ - | padding | -@@ -9639,6 +9668,9 @@ aarch64_epilogue_uses (int regno) - +-------------------------------+ - | | <-- stack_pointer_rtx (aligned) - -+ The regions marked (1) and (2) are mutually exclusive. (2) is used -+ when aarch64_save_regs_above_locals_p is true. -+ - Dynamic stack allocations via alloca() decrease stack_pointer_rtx - but leave frame_pointer_rtx and hard_frame_pointer_rtx - unchanged. -@@ -9834,6 +9866,8 @@ aarch64_expand_prologue (void) - gcc_assert (known_eq (bytes_below_sp, final_adjust)); - aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust, - !frame_pointer_needed, true); -+ if (emit_frame_chain && maybe_ne (final_adjust, 0)) -+ emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); - } - - /* Return TRUE if we can use a simple_return insn. -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c -new file mode 100644 -index 000000000000..e71d820e3654 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c -@@ -0,0 +1,95 @@ -+/* { dg-options " -O -fstack-protector-strong -mstack-protector-guard=sysreg -mstack-protector-guard-reg=tpidr2_el0 -mstack-protector-guard-offset=16" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+void g(void *); -+__SVBool_t *h(void *); -+ -+/* -+** test1: -+** sub sp, sp, #288 -+** stp x29, x30, \sp, #?272\ -+** add x29, sp, #?272 -+** mrs (x0-9+), tpidr2_el0 -+** ldr (x0-9+), \\1, #?16\ -+** str \2, \sp, #?264\ -+** mov \2, #?0 -+** add x0, sp, #?8 -+** bl g -+** ... -+** mrs .* -+** ... -+** bne .* -+** ... -+** ldp x29, x30, \sp, #?272\ -+** add sp, sp, #?288 -+** ret -+** bl __stack_chk_fail -+*/ -+int test1() { -+ int y0x40; -+ g(y); -+ return 1; -+} -+ -+/* -+** test2: -+** stp x29, x30, \sp, #?-16\! -+** mov x29, sp -+** sub sp, sp, #1040
View file
_service:tar_scm:0116-Backport-SME-aarch64-Fix-transitive-closure-of-featu.patch
Added
@@ -0,0 +1,154 @@ +From c6698a5feb07fc0cda89a54a0ee4006295ac6dbe Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:53 +0100 +Subject: PATCH 017/157 BackportSME aarch64: Fix transitive closure of + features + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=b754d32d3053a4ba2a82361ac0f2739797a811f1 + +aarch64-option-extensions.def requires us to maintain the transitive +closure of options by hand. This patch fixes a few cases where a +flag was missed. + ++noaes and +nosha2 now disable +crypto, which IMO makes more +sense and is consistent with the Clang behaviour. + +gcc/ + * config/aarch64/aarch64-option-extensions.def (dotprod): Depend + on fp as well as simd. + (sha3): Likewise. + (aes): Likewise. Make +noaes disable crypto. + (sha2): Likewise +nosha2. Also make +nosha2 disable sha3 and + sve2-sha3. + (sve2-sha3): Depend on sha2 as well as sha3. + +gcc/testsuite/ + * gcc.target/aarch64/options_set_6.c: Expect +crypto+nosha2 to + disable crypto but keep aes. + * gcc.target/aarch64/pragma_cpp_predefs_4.c: New test. +--- + .../aarch64/aarch64-option-extensions.def | 16 ++++--- + .../gcc.target/aarch64/options_set_6.c | 5 +- + .../gcc.target/aarch64/pragma_cpp_predefs_4.c | 47 +++++++++++++++++++ + 3 files changed, 58 insertions(+), 10 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c + +diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def +index b4d0ac8b6..b98008127 100644 +--- a/gcc/config/aarch64/aarch64-option-extensions.def ++++ b/gcc/config/aarch64/aarch64-option-extensions.def +@@ -113,28 +113,29 @@ AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, \ + + /* Enabling "dotprod" also enables "simd". + Disabling "dotprod" only disables "dotprod". */ +-AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_SIMD, 0, \ ++AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_FPSIMD, 0, \ + false, "asimddp") + + /* Enabling "aes" also enables "simd". + Disabling "aes" disables "aes" and "sve2-aes'. */ +-AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_SIMD, \ +- AARCH64_FL_SVE2_AES, false, "aes") ++AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_FPSIMD, \ ++ AARCH64_FL_SVE2_AES | AARCH64_FL_CRYPTO, false, "aes") + + /* Enabling "sha2" also enables "simd". + Disabling "sha2" just disables "sha2". */ +-AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_SIMD, 0, false, \ +- "sha1 sha2") ++AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_FPSIMD, \ ++ AARCH64_FL_CRYPTO | AARCH64_FL_SHA3 | \ ++ AARCH64_FL_SVE2_SHA3, false, "sha1 sha2") + + /* Enabling "sha3" enables "simd" and "sha2". + Disabling "sha3" disables "sha3" and "sve2-sha3". */ +-AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_SIMD | \ ++AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_FPSIMD | \ + AARCH64_FL_SHA2, AARCH64_FL_SVE2_SHA3, false, \ + "sha3 sha512") + + /* Enabling "sm4" also enables "simd". + Disabling "sm4" disables "sm4" and "sve2-sm4". */ +-AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_SIMD, \ ++AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_FPSIMD, \ + AARCH64_FL_SVE2_SM4, false, "sm3 sm4") + + /* Enabling "fp16fml" also enables "fp" and "fp16". +@@ -192,6 +193,7 @@ AARCH64_OPT_EXTENSION("sve2-aes", AARCH64_FL_SVE2_AES, AARCH64_FL_AES | \ + /* Enabling "sve2-sha3" also enables "sha3", "simd", "fp16", "fp", "sve", and + "sve2". Disabling "sve2-sha3" just disables "sve2-sha3". */ + AARCH64_OPT_EXTENSION("sve2-sha3", AARCH64_FL_SVE2_SHA3, AARCH64_FL_SHA3 | \ ++ AARCH64_FL_SHA2 | \ + AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \ + AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "svesha3") + +diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_6.c b/gcc/testsuite/gcc.target/aarch64/options_set_6.c +index 90a055928..2a1d7fe5b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/options_set_6.c ++++ b/gcc/testsuite/gcc.target/aarch64/options_set_6.c +@@ -6,7 +6,6 @@ int main () + return 0; + } + +-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */ ++/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+aes} 1 } } */ + +-/* Group as a whole was requested to be turned on, crypto itself is a bit and so +- just turning off one feature can't turn it off. */ ++/* +crypto turns on +aes and +sha2, but +nosha2 disables +crypto. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c +new file mode 100644 +index 000000000..0e6461fa4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c +@@ -0,0 +1,47 @@ ++#pragma GCC target "+nothing+dotprod" ++#ifndef __ARM_FEATURE_FMA ++#error Foo ++#endif ++ ++#pragma GCC target "+nothing+aes" ++#ifndef __ARM_FEATURE_FMA ++#error Foo ++#endif ++ ++#pragma GCC target "+nothing+sha2" ++#ifndef __ARM_FEATURE_FMA ++#error Foo ++#endif ++ ++#pragma GCC target "+nothing+sha3" ++#ifndef __ARM_FEATURE_FMA ++#error Foo ++#endif ++ ++#pragma GCC target "+nothing+sm4" ++#ifndef __ARM_FEATURE_FMA ++#error Foo ++#endif ++ ++#pragma GCC target "+crypto+noaes" ++#ifdef __ARM_FEATURE_CRYPTO ++#error Foo ++#endif ++ ++#pragma GCC target "+crypto+nosha2" ++#ifdef __ARM_FEATURE_CRYPTO ++#error Foo ++#endif ++ ++#pragma GCC target "+nothing+sve2-sha3" ++#ifndef __ARM_FEATURE_SHA2 ++#error Foo ++#endif ++ ++#pragma GCC target "+sve2-sha3+nosha2" ++#ifdef __ARM_FEATURE_SHA3 ++#error Foo ++#endif ++#ifdef __ARM_FEATURE_SVE2_SHA3 ++#error Foo ++#endif +-- +2.33.0 +
View file
_service:tar_scm:0116-LoongArch-Disable-explicit-reloc-for-TLS-LD-GD-with-.patch
Added
@@ -0,0 +1,70 @@ +From c0b63b89a03c11bf6383f0175b60614d73295463 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 22 Jan 2024 18:07:42 +0800 +Subject: PATCH 116/188 LoongArch: Disable explicit reloc for TLS LD/GD with + -mexplicit-relocs=auto + +Binutils 2.42 supports TLS LD/GD relaxation which requires the assembler +macro. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_explicit_relocs_p): + If la_opt_explicit_relocs is EXPLICIT_RELOCS_AUTO, return false + for SYMBOL_TLS_LDM and SYMBOL_TLS_GD. + (loongarch_call_tls_get_addr): Do not split symbols of + SYMBOL_TLS_LDM or SYMBOL_TLS_GD if la_opt_explicit_relocs is + EXPLICIT_RELOCS_AUTO. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c: Check + for la.tls.ld and la.tls.gd. +--- + gcc/config/loongarch/loongarch.cc | 10 +++++----- + .../loongarch/explicit-relocs-auto-tls-ld-gd.c | 3 ++- + 2 files changed, 7 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 7da00c132..5f22b9dd8 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1967,11 +1967,11 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type) + { + case SYMBOL_TLS_IE: + case SYMBOL_TLS_LE: +- case SYMBOL_TLSGD: +- case SYMBOL_TLSLDM: + case SYMBOL_PCREL64: +- /* The linker don't know how to relax TLS accesses or 64-bit +- pc-relative accesses. */ ++ /* TLS IE cannot be relaxed. TLS LE relaxation is different from ++ the normal R_LARCH_RELAX-based relaxation and it **requires** ++ using the explicit %le_{lo12,hi20,add}_r relocs. The linker ++ does not relax 64-bit pc-relative accesses as at now. */ + return true; + case SYMBOL_GOT_DISP: + /* The linker don't know how to relax GOT accesses in extreme +@@ -2785,7 +2785,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + start_sequence (); + +- if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) ++ if (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS) + { + /* Split tls symbol to high and low. */ + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc)); +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c +index 957ff98df..ca55fcfc5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c +@@ -6,4 +6,5 @@ extern __thread int b __attribute__((visibility("default"))); + + int test() { return a + b; } + +-/* { dg-final { scan-assembler-not "la.tls" { target tls_native } } } */ ++/* { dg-final { scan-assembler "la\\.tls\\.ld" { target tls_native } } } */ ++/* { dg-final { scan-assembler "la\\.tls\\.gd" { target tls_native } } } */ +-- +2.43.0 +
View file
_service:tar_scm:0116-aarch64-Fix-return-register-handling-in-untyped_call.patch
Deleted
@@ -1,66 +0,0 @@ -From 38d0605ac8bc90324170041676fc05e7e595769e Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Wed, 20 Sep 2023 11:13:19 +0100 -Subject: PATCH aarch64: Fix return register handling in untyped_call - -While working on another patch, I hit a problem with the aarch64 -expansion of untyped_call. The expander emits the usual: - - (set (mem ...) (reg resN)) - -instructions to store the result registers to memory, but it didn't -say in RTL where those resN results came from. This eventually led -to a failure of gcc.dg/torture/stackalign/builtin-return-2.c, -via regrename. - -This patch turns the untyped call from a plain call to a call_value, -to represent that the call returns (or might return) a useful value. -The patch also uses a PARALLEL return rtx to represent all the possible -return registers. - -gcc/ - * config/aarch64/aarch64.md (untyped_call): Emit a call_value - rather than a call. List each possible destination register - in the call pattern. - -(cherry picked from commit 629efe27744d13c3b83bbe8338b84c37c83dbe4f) ---- - gcc/config/aarch64/aarch64.md | 20 +++++++++++++++++++- - 1 file changed, 19 insertions(+), 1 deletion(-) - -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index b9b3ba4f9164..cd568f3daa0f 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -1159,9 +1159,27 @@ - { - int i; - -+ /* Generate a PARALLEL that contains all of the register results. -+ The offsets are somewhat arbitrary, since we don't know the -+ actual return type. The main thing we need to avoid is having -+ overlapping byte ranges, since those might give the impression -+ that two registers are known to have data in common. */ -+ rtvec rets = rtvec_alloc (XVECLEN (operands2, 0)); -+ poly_int64 offset = 0; -+ for (i = 0; i < XVECLEN (operands2, 0); i++) -+ { -+ rtx reg = SET_SRC (XVECEXP (operands2, 0, i)); -+ gcc_assert (REG_P (reg)); -+ rtx offset_rtx = gen_int_mode (offset, Pmode); -+ rtx piece = gen_rtx_EXPR_LIST (VOIDmode, reg, offset_rtx); -+ RTVEC_ELT (rets, i) = piece; -+ offset += GET_MODE_SIZE (GET_MODE (reg)); -+ } -+ rtx ret = gen_rtx_PARALLEL (VOIDmode, rets); -+ - /* Untyped calls always use the default ABI. It's only possible to use - ABI variants if we know the type of the target function. */ -- emit_call_insn (gen_call (operands0, const0_rtx, const0_rtx)); -+ emit_call_insn (gen_call_value (ret, operands0, const0_rtx, const0_rtx)); - - for (i = 0; i < XVECLEN (operands2, 0); i++) - { --- -2.43.5 -
View file
_service:tar_scm:0117-Backport-SME-aarch64-Reorder-an-entry-in-aarch64-opt.patch
Added
@@ -0,0 +1,194 @@ +From 4a2d0bdf5c9a5f4ee615c1d0768cb2e8a3dfef4a Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:54 +0100 +Subject: PATCH 018/157 BackportSME aarch64: Reorder an entry in + aarch64-option-extensions.def + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c067c474f85b1e9c56fb34dd51ef0eec9221b766 + +aarch64-option-extensions.def was topologically sorted except +for one case: crypto came before its aes and sha2 dependencies. +This patch moves crypto after sha2 instead. + +gcc/ + * config/aarch64/aarch64-option-extensions.def: Move crypto + after sha2. + +gcc/testsuite/ + * gcc.target/aarch64/cpunative/native_cpu_0.c: Expect +crypto + to come after +crc. + * gcc.target/aarch64/cpunative/native_cpu_13.c: Likewise. + * gcc.target/aarch64/cpunative/native_cpu_16.c: Likewise. + * gcc.target/aarch64/cpunative/native_cpu_17.c: Likewise. + * gcc.target/aarch64/cpunative/native_cpu_6.c: Likewise. + * gcc.target/aarch64/cpunative/native_cpu_7.c: Likewise. + * gcc.target/aarch64/options_set_2.c: Likewise. + * gcc.target/aarch64/options_set_3.c: Likewise. + * gcc.target/aarch64/options_set_4.c: Likewise. +--- + .../aarch64/aarch64-option-extensions.def | 20 +++++++++---------- + .../aarch64/cpunative/native_cpu_0.c | 2 +- + .../aarch64/cpunative/native_cpu_13.c | 2 +- + .../aarch64/cpunative/native_cpu_16.c | 2 +- + .../aarch64/cpunative/native_cpu_17.c | 2 +- + .../aarch64/cpunative/native_cpu_6.c | 2 +- + .../aarch64/cpunative/native_cpu_7.c | 2 +- + .../gcc.target/aarch64/options_set_2.c | 2 +- + .../gcc.target/aarch64/options_set_3.c | 2 +- + .../gcc.target/aarch64/options_set_4.c | 4 ++-- + 10 files changed, 20 insertions(+), 20 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def +index b98008127..df2c8d19b 100644 +--- a/gcc/config/aarch64/aarch64-option-extensions.def ++++ b/gcc/config/aarch64/aarch64-option-extensions.def +@@ -76,16 +76,6 @@ AARCH64_OPT_EXTENSION("simd", AARCH64_FL_SIMD, AARCH64_FL_FP, \ + AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM, \ + false, "asimd") + +-/* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2". +- Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4", +- "sve2-aes", "sve2-sha3", "sve2-sm4". */ +-AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | \ +- AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, \ +- AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \ +- AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | \ +- AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4, true, \ +- "aes pmull sha1 sha2") +- + /* Enabling or disabling "crc" only changes "crc". */ + AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, 0, 0, false, "crc32") + +@@ -127,6 +117,16 @@ AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_FPSIMD, \ + AARCH64_FL_CRYPTO | AARCH64_FL_SHA3 | \ + AARCH64_FL_SVE2_SHA3, false, "sha1 sha2") + ++/* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2". ++ Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4", ++ "sve2-aes", "sve2-sha3", "sve2-sm4". */ ++AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | \ ++ AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, \ ++ AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \ ++ AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | \ ++ AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4, true, \ ++ "aes pmull sha1 sha2") ++ + /* Enabling "sha3" enables "simd" and "sha2". + Disabling "sha3" disables "sha3" and "sve2-sha3". */ + AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_FPSIMD | \ +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c +index f155f51ba..8499f87c3 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c +@@ -7,6 +7,6 @@ int main() + return 0; + } + +-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod} } } */ ++/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto} } } */ + + /* Test a normal looking procinfo. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c +index b7b3a8e13..551669091 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c +@@ -7,6 +7,6 @@ int main() + return 0; + } + +-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod} } } */ ++/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto} } } */ + + /* Test one with mixed order of feature bits. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c +index a424e7c56..2f963bb23 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c +@@ -7,6 +7,6 @@ int main() + return 0; + } + +-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod\+sve2} } } */ ++/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\+sve2} } } */ + + /* Test a normal looking procinfo. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c +index c269c5fef..c68a697aa 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c +@@ -7,6 +7,6 @@ int main() + return 0; + } + +-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod\+sve2} } } */ ++/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\+sve2} } } */ + + /* Test a normal looking procinfo. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c +index da72052e6..7608e8845 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c +@@ -7,7 +7,7 @@ int main() + return 0; + } + +-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+fp16} } } */ ++/* { dg-final { scan-assembler {\.arch armv8-a\+fp16\+crypto} } } */ + + /* Test one where the feature bits for crypto and fp16 are given in + same order as declared in options file. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c +index 96ad4c14d..72b14b4f6 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c +@@ -7,7 +7,7 @@ int main() + return 0; + } + +-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+fp16} } } */ ++/* { dg-final { scan-assembler {\.arch armv8-a\+fp16\+crypto} } } */ + + /* Test one where the crypto and fp16 options are specified in different + order from what is in the options file. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_2.c b/gcc/testsuite/gcc.target/aarch64/options_set_2.c +index 3476febce..f82cb5f78 100644 +--- a/gcc/testsuite/gcc.target/aarch64/options_set_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/options_set_2.c +@@ -6,6 +6,6 @@ int main () + return 0; + } + +-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */ ++/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */ + + /* Check to see if crc and crypto are maintained if crypto specified. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_3.c b/gcc/testsuite/gcc.target/aarch64/options_set_3.c +index 4558339f1..7d350cfa3 100644 +--- a/gcc/testsuite/gcc.target/aarch64/options_set_3.c ++++ b/gcc/testsuite/gcc.target/aarch64/options_set_3.c +@@ -6,6 +6,6 @@ int main () + return 0; + } + +-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */ ++/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */ + + /* Check if smallest set is maintained when outputting. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_4.c b/gcc/testsuite/gcc.target/aarch64/options_set_4.c +index 15514bfe9..5370e02e1 100644 +--- a/gcc/testsuite/gcc.target/aarch64/options_set_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/options_set_4.c +@@ -6,7 +6,7 @@ int main () + return 0; + } + +-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */ ++/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */ + + /* Check if individual bits that make up a grouping is specified that only the +- grouping is kept. */ +\ No newline at end of file ++ grouping is kept. */ +-- +2.33.0 +
View file
_service:tar_scm:0117-LoongArch-testsuite-Disable-stack-protector-for-got-.patch
Added
@@ -0,0 +1,35 @@ +From 7e10f7b95a598e9471bd1bc77454af43a69eb506 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Tue, 23 Jan 2024 19:32:38 +0800 +Subject: PATCH 117/188 LoongArch: testsuite: Disable stack protector for + got-load.C + +When building GCC with --enable-default-ssp, the stack protector is +enabled for got-load.C, causing additional GOT loads for +__stack_chk_guard. So mem/u will be matched more than 2 times and the +test will fail. + +Disable stack protector to fix this issue. + +gcc/testsuite: + + * g++.target/loongarch/got-load.C (dg-options): Add + -fno-stack-protector. +--- + gcc/testsuite/g++.target/loongarch/got-load.C | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/testsuite/g++.target/loongarch/got-load.C b/gcc/testsuite/g++.target/loongarch/got-load.C +index 20924c739..17870176a 100644 +--- a/gcc/testsuite/g++.target/loongarch/got-load.C ++++ b/gcc/testsuite/g++.target/loongarch/got-load.C +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O2 -mexplicit-relocs -mcmodel=normal -fdump-rtl-expand" } */ ++/* { dg-options "-mabi=lp64d -O2 -mexplicit-relocs -mcmodel=normal -fdump-rtl-expand -fno-stack-protector" } */ + /* { dg-final { scan-rtl-dump-times "mem/u" 2 "expand" } } */ + + #include <bits/stdc++.h> +-- +2.43.0 +
View file
_service:tar_scm:0117-aarch64-Fix-loose-ldpstp-check.patch
Deleted
@@ -1,119 +0,0 @@ -From 74f99f1adc696f446115f36974a3f94f66294a53 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Wed, 20 Sep 2023 11:13:20 +0100 -Subject: PATCH aarch64: Fix loose ldpstp check PR111411 - -aarch64_operands_ok_for_ldpstp contained the code: - - /* One of the memory accesses must be a mempair operand. - If it is not the first one, they need to be swapped by the - peephole. */ - if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1)) - && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2))) - return false; - -But the requirement isn't just that one of the accesses must be a -valid mempair operand. It's that the lower access must be, since -that's the access that will be used for the instruction operand. - -gcc/ - PR target/111411 - * config/aarch64/aarch64.cc (aarch64_operands_ok_for_ldpstp): Require - the lower memory access to a mem-pair operand. - -gcc/testsuite/ - PR target/111411 - * gcc.dg/rtl/aarch64/pr111411.c: New test. - -(cherry picked from commit 2d38f45bcca62ca0c7afef4b579f82c5c2a01610) ---- - gcc/config/aarch64/aarch64.cc | 8 ++- - gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c | 57 +++++++++++++++++++++ - 2 files changed, 60 insertions(+), 5 deletions(-) - create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 96c3f48fdc49..a979accd90a9 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -26031,11 +26031,9 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, - gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)), - GET_MODE_SIZE (GET_MODE (mem_2)))); - -- /* One of the memory accesses must be a mempair operand. -- If it is not the first one, they need to be swapped by the -- peephole. */ -- if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1)) -- && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2))) -+ /* The lower memory access must be a mem-pair operand. */ -+ rtx lower_mem = reversed ? mem_2 : mem_1; -+ if (!aarch64_mem_pair_operand (lower_mem, GET_MODE (lower_mem))) - return false; - - if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1))) -diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c b/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c -new file mode 100644 -index 000000000000..ad07e9c6c893 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c -@@ -0,0 +1,57 @@ -+/* { dg-do compile { target aarch64*-*-* } } */ -+/* { dg-require-effective-target lp64 } */ -+/* { dg-options "-O -fdisable-rtl-postreload -fpeephole2 -fno-schedule-fusion" } */ -+ -+extern int data; -+ -+void __RTL (startwith ("ira")) foo (void *ptr) -+{ -+ (function "foo" -+ (param "ptr" -+ (DECL_RTL (reg/v:DI <0> ptr )) -+ (DECL_RTL_INCOMING (reg/v:DI x0 ptr )) -+ ) ;; param "ptr" -+ (insn-chain -+ (block 2 -+ (edge-from entry (flags "FALLTHRU")) -+ (cnote 3 bb 2 NOTE_INSN_BASIC_BLOCK) -+ (insn 4 (set (reg:DI <0>) (reg:DI x0))) -+ (insn 5 (set (reg:DI <1>) -+ (plus:DI (reg:DI <0>) (const_int 768)))) -+ (insn 6 (set (mem:SI (plus:DI (reg:DI <0>) -+ (const_int 508)) 1 &data+508 S4 A4) -+ (const_int 0))) -+ (insn 7 (set (mem:SI (plus:DI (reg:DI <1>) -+ (const_int -256)) 1 &data+512 S4 A4) -+ (const_int 0))) -+ (edge-to exit (flags "FALLTHRU")) -+ ) ;; block 2 -+ ) ;; insn-chain -+ ) ;; function -+} -+ -+void __RTL (startwith ("ira")) bar (void *ptr) -+{ -+ (function "bar" -+ (param "ptr" -+ (DECL_RTL (reg/v:DI <0> ptr )) -+ (DECL_RTL_INCOMING (reg/v:DI x0 ptr )) -+ ) ;; param "ptr" -+ (insn-chain -+ (block 2 -+ (edge-from entry (flags "FALLTHRU")) -+ (cnote 3 bb 2 NOTE_INSN_BASIC_BLOCK) -+ (insn 4 (set (reg:DI <0>) (reg:DI x0))) -+ (insn 5 (set (reg:DI <1>) -+ (plus:DI (reg:DI <0>) (const_int 768)))) -+ (insn 6 (set (mem:SI (plus:DI (reg:DI <1>) -+ (const_int -256)) 1 &data+512 S4 A4) -+ (const_int 0))) -+ (insn 7 (set (mem:SI (plus:DI (reg:DI <0>) -+ (const_int 508)) 1 &data+508 S4 A4) -+ (const_int 0))) -+ (edge-to exit (flags "FALLTHRU")) -+ ) ;; block 2 -+ ) ;; insn-chain -+ ) ;; function -+} --- -2.43.5 -
View file
_service:tar_scm:0118-Backport-SME-aarch64-Simplify-feature-definitions.patch
Added
@@ -0,0 +1,1176 @@ +From deb18d5083d8f9edbdafac184c010a6720dc8dda Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:54 +0100 +Subject: PATCH 019/157 BackportSME aarch64: Simplify feature definitions + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=11a113d501ff64fa4843e28d0a21b3f4e9d0d3de + +Currently the aarch64-option-extensions.def entries, the +aarch64-cores.def entries, and the AARCH64_FL_FOR_* macros +have a transitive closure of dependencies that is maintained by hand. +This is a bit error-prone and is becoming less tenable as more features +are added. The main point of this patch is to maintain the closure +automatically instead. + +For example, the +sve2-aes extension requires sve2 and aes. +This is now described using: + + AARCH64_OPT_EXTENSION("sve2-aes", SVE2_AES, (SVE2, AES), ...) + +If life was simple, we could just give the name of the feature +and the list of features that it requires/depends on. But sadly +things are more complicated. For example: + +- the legacy +crypto option enables aes and sha2 only, but +nocrypto + disables all crypto-related extensions, including sm4. + +- +fp16fml enables fp16, but armv8.4-a enables fp16fml without fp16. + fp16fml only has an effect when fp16 is also present; see the + comments for more details. + +- +bf16 enables simd, but +bf16+nosimd is valid and enables just the + scalar bf16 instructions. rdma behaves similarly. + +To handle cases like these, the option entries have extra fields to +specify what an explicit +foo enables and what an explicit +nofoo +disables, in addition to the absolute dependencies. + +The other main changes are: + +- AARCH64_FL_* are now defined automatically. + +- the feature list for each architecture level moves from aarch64.h + to aarch64-arches.def. + +As a consequence, we now have a (redundant) V8A feature flag. + +While there, the patch uses a new typedef, aarch64_feature_flags, +for the set of feature flags. This should make it easier to switch +to a class if we run out of bits in the uint64_t. + +For now the patch hardcodes the fact that crypto is the only +synthetic option. A later patch will remove this field. + +To test for things that might not be covered by the testsuite, +I made the driver print out the all_extensions, all_cores and +all_archs arrays before and after the patch, with the following +tweaks: + +- renumber the old AARCH64_FL_* bit assignments to match the .def order +- remove the new V8A flag when printing the new tables +- treat CRYPTO and CRYPTO | AES | SHA2 the same way when printing the + core tables + +(On the last point: some cores enabled just CRYPTO while others enabled +CRYPTO, AES and SHA2. This doesn't cause a difference in behaviour +because of how the dependent macros are defined. With the new scheme, +all entries with CRYPTO automatically get AES and SHA2 too.) + +The only difference is that +nofp now turns off dotprod. This was +another instance of an incomplete transitive closure, but unlike the +instances fixed in a previous patch, it had no observable effect. + +gcc/ + * config/aarch64/aarch64-option-extensions.def: Switch to a new format. + * config/aarch64/aarch64-cores.def: Use the same format to specify + lists of features. + * config/aarch64/aarch64-arches.def: Likewise, moving that information + from aarch64.h. + * config/aarch64/aarch64-opts.h (aarch64_feature_flags): New typedef. + * config/aarch64/aarch64.h (aarch64_feature): New class enum. + Turn AARCH64_FL_* macros into constexprs, getting the definitions + from aarch64-option-extensions.def. Remove AARCH64_FL_FOR_* macros. + * common/config/aarch64/aarch64-common.cc: Include + aarch64-feature-deps.h. + (all_extensions): Update for new .def format. + (all_extensions_by_on, all_cores, all_architectures): Likewise. + * config/aarch64/driver-aarch64.cc: Include aarch64-feature-deps.h. + (aarch64_extensions): Update for new .def format. + (aarch64_cpu_data, aarch64_arches): Likewise. + * config/aarch64/aarch64.cc: Include aarch64-feature-deps.h. + (all_architectures, all_cores): Update for new .def format. + * config/aarch64/aarch64-sve-builtins.cc + (check_required_extensions): Likewise. +--- + gcc/common/config/aarch64/aarch64-common.cc | 29 +- + gcc/config/aarch64/aarch64-arches.def | 28 +- + gcc/config/aarch64/aarch64-cores.def | 130 +++---- + gcc/config/aarch64/aarch64-feature-deps.h | 121 +++++++ + .../aarch64/aarch64-option-extensions.def | 323 +++++++----------- + gcc/config/aarch64/aarch64-opts.h | 4 + + gcc/config/aarch64/aarch64-sve-builtins.cc | 5 +- + gcc/config/aarch64/aarch64.cc | 14 +- + gcc/config/aarch64/aarch64.h | 164 ++------- + gcc/config/aarch64/driver-aarch64.cc | 10 +- + 10 files changed, 374 insertions(+), 454 deletions(-) + create mode 100644 gcc/config/aarch64/aarch64-feature-deps.h + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index a965ac660..74729bb30 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -30,6 +30,7 @@ + #include "opts.h" + #include "flags.h" + #include "diagnostic.h" ++#include "config/aarch64/aarch64-feature-deps.h" + + #ifdef TARGET_BIG_ENDIAN_DEFAULT + #undef TARGET_DEFAULT_TARGET_FLAGS +@@ -214,9 +215,12 @@ struct aarch64_option_extension + /* ISA extensions in AArch64. */ + static const struct aarch64_option_extension all_extensions = + { +-#define AARCH64_OPT_EXTENSION(NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, \ +- SYNTHETIC, Z) \ +- {NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, SYNTHETIC}, ++#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \ ++ {NAME, AARCH64_FL_##IDENT, \ ++ feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \ ++ feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \ ++ & ~AARCH64_FL_##IDENT, \ ++ AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO}, + #include "config/aarch64/aarch64-option-extensions.def" + {NULL, 0, 0, 0, false} + }; +@@ -225,9 +229,12 @@ static const struct aarch64_option_extension all_extensions = + bits and extension turned on. Cached for efficiency. */ + static struct aarch64_option_extension all_extensions_by_on = + { +-#define AARCH64_OPT_EXTENSION(NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, \ +- SYNTHETIC, Z) \ +- {NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, SYNTHETIC}, ++#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \ ++ {NAME, AARCH64_FL_##IDENT, \ ++ feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \ ++ feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \ ++ & ~AARCH64_FL_##IDENT, \ ++ AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO}, + #include "config/aarch64/aarch64-option-extensions.def" + {NULL, 0, 0, 0, false} + }; +@@ -250,18 +257,18 @@ struct arch_to_arch_name + the default set of architectural feature flags they support. */ + static const struct processor_name_to_arch all_cores = + { +-#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \ +- {NAME, AARCH64_ARCH_##ARCH_IDENT, AARCH64_FL_FOR_##ARCH_IDENT | FLAGS}, ++#define AARCH64_CORE(NAME, CORE_IDENT, C, ARCH_IDENT, E, F, G, H, I) \ ++ {NAME, AARCH64_ARCH_##ARCH_IDENT, feature_deps::cpu_##CORE_IDENT}, + #include "config/aarch64/aarch64-cores.def" +- {"generic", AARCH64_ARCH_V8A, AARCH64_FL_FOR_V8A}, ++ {"generic", AARCH64_ARCH_V8A, feature_deps::V8A ().enable}, + {"", aarch64_no_arch, 0} + }; + + /* Map architecture revisions to their string representation. */ + static const struct arch_to_arch_name all_architectures = + { +-#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH, FLAGS) \ +- {AARCH64_ARCH_##ARCH_IDENT, NAME, FLAGS}, ++#define AARCH64_ARCH(NAME, B, ARCH_IDENT, D, E) \ ++ {AARCH64_ARCH_##ARCH_IDENT, NAME, feature_deps::ARCH_IDENT ().enable}, + #include "config/aarch64/aarch64-arches.def" + {aarch64_no_arch, "", 0} + }; +diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def +index ece96e22a..9f8246618 100644 +--- a/gcc/config/aarch64/aarch64-arches.def ++++ b/gcc/config/aarch64/aarch64-arches.def +@@ -30,19 +30,19 @@ + Due to the assumptions about the positions of these fields in config.gcc, + NAME should be kept as the first argument. */ + +-AARCH64_ARCH("armv8-a", generic, V8A, 8, AARCH64_FL_FOR_V8A) +-AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, AARCH64_FL_FOR_V8_1A) +-AARCH64_ARCH("armv8.2-a", generic, V8_2A, 8, AARCH64_FL_FOR_V8_2A) +-AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, AARCH64_FL_FOR_V8_3A) +-AARCH64_ARCH("armv8.4-a", generic, V8_4A, 8, AARCH64_FL_FOR_V8_4A) +-AARCH64_ARCH("armv8.5-a", generic, V8_5A, 8, AARCH64_FL_FOR_V8_5A) +-AARCH64_ARCH("armv8.6-a", generic, V8_6A, 8, AARCH64_FL_FOR_V8_6A) +-AARCH64_ARCH("armv8.7-a", generic, V8_7A, 8, AARCH64_FL_FOR_V8_7A) +-AARCH64_ARCH("armv8.8-a", generic, V8_8A, 8, AARCH64_FL_FOR_V8_8A) +-AARCH64_ARCH("armv8-r", generic, V8R , 8, AARCH64_FL_FOR_V8R) +-AARCH64_ARCH("armv9-a", generic, V9A , 9, AARCH64_FL_FOR_V9A) +-AARCH64_ARCH("armv9.1-a", generic, V9_1A, 9, AARCH64_FL_FOR_V9_1A) +-AARCH64_ARCH("armv9.2-a", generic, V9_2A, 9, AARCH64_FL_FOR_V9_2A) +-AARCH64_ARCH("armv9.3-a", generic, V9_3A, 9, AARCH64_FL_FOR_V9_3A) ++AARCH64_ARCH("armv8-a", generic, V8A, 8, (SIMD)) ++AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, (V8A, LSE, CRC, RDMA))
View file
_service:tar_scm:0118-LoongArch-Disable-TLS-type-symbols-from-generating-n.patch
Added
@@ -0,0 +1,65 @@ +From 8cab312a44efc4711fb7adad65f1314349295591 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Tue, 23 Jan 2024 11:28:09 +0800 +Subject: PATCH 118/188 LoongArch: Disable TLS type symbols from generating + non-zero offsets. + +TLS gd ld and ie type symbols will generate corresponding GOT entries, +so non-zero offsets cannot be generated. +The address of TLS le type symbol+addend is not implemented in binutils, +so non-zero offset is not generated here for the time being. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_symbolic_constant_p): + For symbols of type tls, non-zero Offset is not generated. +--- + gcc/config/loongarch/loongarch.cc | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 5f22b9dd8..9cdd4ed15 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1921,11 +1921,7 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type) + x = UNSPEC_ADDRESS (x); + } + else if (SYMBOL_REF_P (x) || LABEL_REF_P (x)) +- { +- *symbol_type = loongarch_classify_symbol (x); +- if (*symbol_type == SYMBOL_TLS) +- return true; +- } ++ *symbol_type = loongarch_classify_symbol (x); + else + return false; + +@@ -1936,17 +1932,21 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type) + relocations. */ + switch (*symbol_type) + { +- case SYMBOL_TLS_IE: +- case SYMBOL_TLS_LE: +- case SYMBOL_TLSGD: +- case SYMBOL_TLSLDM: + case SYMBOL_PCREL: + case SYMBOL_PCREL64: + /* GAS rejects offsets outside the range -2^31, 2^31-1. */ + return sext_hwi (INTVAL (offset), 32) == INTVAL (offset); + ++ /* The following symbol types do not allow non-zero offsets. */ + case SYMBOL_GOT_DISP: ++ case SYMBOL_TLS_IE: ++ case SYMBOL_TLSGD: ++ case SYMBOL_TLSLDM: + case SYMBOL_TLS: ++ /* From an implementation perspective, tls_le symbols are allowed to ++ have non-zero offsets, but currently binutils has not added support, ++ so the generation of non-zero offsets is prohibited here. */ ++ case SYMBOL_TLS_LE: + return false; + } + gcc_unreachable (); +-- +2.43.0 +
View file
_service:tar_scm:0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch
Added
@@ -0,0 +1,467 @@ +From e7ebc54e809e8647ff054a02fbaf946b41414004 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:55 +0100 +Subject: PATCH 020/157 BackportSME aarch64: Simplify generation of .arch + strings + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4ebf56f283ae5a98ae4c43079b7e8459945ef18d + +aarch64-common.cc has two arrays, one maintaining the original +definition order and one sorted by population count. Sorting +by population count was a way of ensuring topological ordering, +taking advantage of the fact that the entries are partially +ordered by the subset relation. However, the sorting is not +needed now that the .def file is forced to have topological +order from the outset. + +Other changes are: + +(1) The population count used: + + uint64_t total_flags_a = opt_a->flag_canonical & opt_a->flags_on; + uint64_t total_flags_b = opt_b->flag_canonical & opt_b->flags_on; + int popcnt_a = popcount_hwi ((HOST_WIDE_INT)total_flags_a); + int popcnt_b = popcount_hwi ((HOST_WIDE_INT)total_flags_b); + + where I think the & was supposed to be |. This meant that the + counts would always be 1 in practice, since flag_canonical is + a single bit. This led us to printing +nofp+nosimd even though + GCC "knows" (and GAS agrees) that +nofp disables simd. + +(2) The .arch output code converts +aes+sha2 to +crypto. I think + the main reason for doing this is to support assemblers that + predate the individual per-feature crypto flags. It therefore + seems more natural to treat it as a special case, rather than + as an instance of a general pattern. Hopefully we won't do + something similar in future! + + (There is already special handling of CRC, for different reasons.) + +(3) Previously, if the /proc/cpuinfo code saw a feature like sve, + it would assume the presence of all the features that sve + depends on. It would be possible to keep that behaviour + if necessary, but it was simpler to assume the presence of + fp16 (say) only when fphp is present. There's an argument + that that's more conservatively correct too. + +gcc/ + * common/config/aarch64/aarch64-common.cc + (TARGET_OPTION_INIT_STRUCT): Delete. + (aarch64_option_extension): Remove is_synthetic_flag. + (all_extensions): Update accordingly. + (all_extensions_by_on, opt_ext, opt_ext_cmp): Delete. + (aarch64_option_init_struct, aarch64_contains_opt): Delete. + (aarch64_get_extension_string_for_isa_flags): Rewrite to use + all_extensions instead of all_extensions_on. + +gcc/testsuite/ + * gcc.target/aarch64/cpunative/info_8: Add all dependencies of sve. + * gcc.target/aarch64/cpunative/info_9: Likewise svesm4. + * gcc.target/aarch64/cpunative/info_15: Likewise. + * gcc.target/aarch64/cpunative/info_16: Likewise sve2. + * gcc.target/aarch64/cpunative/info_17: Likewise. + * gcc.target/aarch64/cpunative/native_cpu_2.c: Expect just +nofp + rather than +nofp+nosimd. + * gcc.target/aarch64/cpunative/native_cpu_10.c: Likewise. + * gcc.target/aarch64/target_attr_15.c: Likewise. +--- + gcc/common/config/aarch64/aarch64-common.cc | 244 ++++-------------- + .../gcc.target/aarch64/cpunative/info_15 | 2 +- + .../gcc.target/aarch64/cpunative/info_16 | 2 +- + .../gcc.target/aarch64/cpunative/info_17 | 2 +- + .../gcc.target/aarch64/cpunative/info_8 | 2 +- + .../gcc.target/aarch64/cpunative/info_9 | 2 +- + .../aarch64/cpunative/native_cpu_10.c | 2 +- + .../aarch64/cpunative/native_cpu_2.c | 2 +- + .../gcc.target/aarch64/target_attr_15.c | 2 +- + 9 files changed, 55 insertions(+), 205 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index 74729bb30..057dc094d 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -42,8 +42,6 @@ + + #undef TARGET_OPTION_OPTIMIZATION_TABLE + #define TARGET_OPTION_OPTIMIZATION_TABLE aarch_option_optimization_table +-#undef TARGET_OPTION_INIT_STRUCT +-#define TARGET_OPTION_INIT_STRUCT aarch64_option_init_struct + + #define INVALID_IMP ((unsigned) -1) + +@@ -209,7 +207,6 @@ struct aarch64_option_extension + const uint64_t flag_canonical; + const uint64_t flags_on; + const uint64_t flags_off; +- const bool is_synthetic; + }; + + /* ISA extensions in AArch64. */ +@@ -219,24 +216,9 @@ static const struct aarch64_option_extension all_extensions = + {NAME, AARCH64_FL_##IDENT, \ + feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \ + feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \ +- & ~AARCH64_FL_##IDENT, \ +- AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO}, ++ & ~AARCH64_FL_##IDENT}, + #include "config/aarch64/aarch64-option-extensions.def" +- {NULL, 0, 0, 0, false} +-}; +- +-/* A copy of the ISA extensions list for AArch64 sorted by the popcount of +- bits and extension turned on. Cached for efficiency. */ +-static struct aarch64_option_extension all_extensions_by_on = +-{ +-#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \ +- {NAME, AARCH64_FL_##IDENT, \ +- feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \ +- feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \ +- & ~AARCH64_FL_##IDENT, \ +- AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO}, +-#include "config/aarch64/aarch64-option-extensions.def" +- {NULL, 0, 0, 0, false} ++ {NULL, 0, 0, 0} + }; + + struct processor_name_to_arch +@@ -353,79 +335,6 @@ aarch64_get_all_extension_candidates (auto_vec<const char *> *candidates) + candidates->safe_push (opt->name); + } + +-/* Comparer to sort aarch64's feature extensions by population count. Largest +- first. */ +- +-typedef const struct aarch64_option_extension opt_ext; +- +-int opt_ext_cmp (const void* a, const void* b) +-{ +- opt_ext *opt_a = (opt_ext *)a; +- opt_ext *opt_b = (opt_ext *)b; +- +- /* We consider the total set of bits an options turns on to be the union of +- the singleton set containing the option itself and the set of options it +- turns on as a dependency. As an example +dotprod turns on FL_DOTPROD and +- FL_SIMD. As such the set of bits represented by this option is +- {FL_DOTPROD, FL_SIMD}. */ +- uint64_t total_flags_a = opt_a->flag_canonical & opt_a->flags_on; +- uint64_t total_flags_b = opt_b->flag_canonical & opt_b->flags_on; +- int popcnt_a = popcount_hwi ((HOST_WIDE_INT)total_flags_a); +- int popcnt_b = popcount_hwi ((HOST_WIDE_INT)total_flags_b); +- int order = popcnt_b - popcnt_a; +- +- /* If they have the same amount of bits set, give it a more +- deterministic ordering by using the value of the bits themselves. */ +- if (order != 0) +- return order; +- +- if (total_flags_a != total_flags_b) +- return total_flags_a < total_flags_b ? 1 : -1; +- +- return 0; +-} +- +-/* Implement TARGET_OPTION_INIT_STRUCT. */ +- +-static void +-aarch64_option_init_struct (struct gcc_options *opts ATTRIBUTE_UNUSED) +-{ +- /* Sort the extensions based on how many bits they set, order the larger +- counts first. We sort the list because this makes processing the +- feature bits O(n) instead of O(n^2). While n is small, the function +- to calculate the feature strings is called on every options push, +- pop and attribute change (arm_neon headers, lto etc all cause this to +- happen quite frequently). It is a trade-off between time and space and +- so time won. */ +- int n_extensions +- = sizeof (all_extensions) / sizeof (struct aarch64_option_extension); +- qsort (&all_extensions_by_on, n_extensions, +- sizeof (struct aarch64_option_extension), opt_ext_cmp); +-} +- +-/* Checks to see if enough bits from the option OPT are enabled in +- ISA_FLAG_BITS to be able to replace the individual options with the +- canonicalized version of the option. This is done based on two rules: +- +- 1) Synthetic groups, such as +crypto we only care about the bits that are +- turned on. e.g. +aes+sha2 can be replaced with +crypto. +- +- 2) Options that themselves have a bit, such as +rdma, in this case, all the +- feature bits they turn on must be available and the bit for the option +- itself must be. In this case it's effectively a reduction rather than a +- grouping. e.g. +fp+simd is not enough to turn on +rdma, for that you would +- need +rdma+fp+simd which is reduced down to +rdma. +-*/ +- +-static bool +-aarch64_contains_opt (uint64_t isa_flag_bits, opt_ext *opt) +-{ +- uint64_t flags_check +- = opt->is_synthetic ? opt->flags_on : opt->flag_canonical;
View file
_service:tar_scm:0119-LoongArch-Remove-vec_concatz-mode-pattern.patch
Added
@@ -0,0 +1,75 @@ +From e19c5ba24839d7446f1874b0b33bd61e27e36905 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 24 Jan 2024 17:19:13 +0800 +Subject: PATCH 119/188 LoongArch: Remove vec_concatz<mode> pattern. + +It is incorrect to use vld/vori to implement the vec_concatz<mode> because when the LSX +instruction is used to update the value of the vector register, the upper 128 bits of +the vector register will not be zeroed. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (@vec_concatz<mode>): Remove this define_insn pattern. + * config/loongarch/loongarch.cc (loongarch_expand_vector_group_init): Use vec_concat<mode>. +--- + gcc/config/loongarch/lasx.md | 15 --------------- + gcc/config/loongarch/loongarch.cc | 17 ++++++----------- + 2 files changed, 6 insertions(+), 26 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index fdfd65e4a..a5128c30c 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -582,21 +582,6 @@ + (set_attr "type" "simd_insert") + (set_attr "mode" "<MODE>")) + +-(define_insn "@vec_concatz<mode>" +- (set (match_operand:LASX 0 "register_operand" "=f") +- (vec_concat:LASX +- (match_operand:<VHMODE256_ALL> 1 "nonimmediate_operand") +- (match_operand:<VHMODE256_ALL> 2 "const_0_operand"))) +- "ISA_HAS_LASX" +-{ +- if (MEM_P (operands1)) +- return "vld\t%w0,%1"; +- else +- return "vori.b\t%w0,%w1,0"; +-} +- (set_attr "type" "simd_splat") +- (set_attr "mode" "<MODE>")) +- + (define_insn "vec_concat<mode>" + (set (match_operand:LASX 0 "register_operand" "=f") + (vec_concat:LASX +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 9cdd4ed15..9bd931549 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -9912,17 +9912,12 @@ loongarch_expand_vector_group_init (rtx target, rtx vals) + gcc_unreachable (); + } + +- if (high == CONST0_RTX (half_mode)) +- emit_insn (gen_vec_concatz (vmode, target, low, high)); +- else +- { +- if (!register_operand (low, half_mode)) +- low = force_reg (half_mode, low); +- if (!register_operand (high, half_mode)) +- high = force_reg (half_mode, high); +- emit_insn (gen_rtx_SET (target, +- gen_rtx_VEC_CONCAT (vmode, low, high))); +- } ++ if (!register_operand (low, half_mode)) ++ low = force_reg (half_mode, low); ++ if (!register_operand (high, half_mode)) ++ high = force_reg (half_mode, high); ++ emit_insn (gen_rtx_SET (target, ++ gen_rtx_VEC_CONCAT (vmode, low, high))); + } + + /* Expand initialization of a vector which has all same elements. */ +-- +2.43.0 +
View file
_service:tar_scm:0120-Backport-SME-aarch64-Avoid-std-string-in-static-data.patch
Added
@@ -0,0 +1,43 @@ +From 7096be1673a10da5218a8620fb40b4b26e61c1d4 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:55 +0100 +Subject: PATCH 021/157 BackportSME aarch64: Avoid std::string in static + data + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=13af9e9fda391f4f0566ad8f0b4d0448a7e984d0 + +Just a minor patch to avoid having to construct std::strings +in static data. + +gcc/ + * common/config/aarch64/aarch64-common.cc (processor_name_to_arch) + (arch_to_arch_name): Use const char * instead of std::string. +--- + gcc/common/config/aarch64/aarch64-common.cc | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index 057dc094d..2bdf51b8b 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -223,7 +223,7 @@ static const struct aarch64_option_extension all_extensions = + + struct processor_name_to_arch + { +- const std::string processor_name; ++ const char *const processor_name; + const enum aarch64_arch arch; + const uint64_t flags; + }; +@@ -231,7 +231,7 @@ struct processor_name_to_arch + struct arch_to_arch_name + { + const enum aarch64_arch arch; +- const std::string arch_name; ++ const char *const arch_name; + const uint64_t flags; + }; + +-- +2.33.0 +
View file
_service:tar_scm:0120-LoongArch-Optimize-implementation-of-single-precisio.patch
Added
@@ -0,0 +1,107 @@ +From cb9180ef1fb7e7b97a60adc3d3908b9684771cd8 Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Wed, 24 Jan 2024 17:44:17 +0800 +Subject: PATCH 120/188 LoongArch: Optimize implementation of + single-precision floating-point approximate division. + +We found that in the spec17 521.wrf program, some loop invariant code generated +from single-precision floating-point approximate division calculation failed to +propose a loop. This is because the pseudo-register that stores the +intermediate temporary calculation results is rewritten in the implementation +of single-precision floating-point approximate division, failing to propose +invariants in the loop2_invariant pass. To this end, the intermediate temporary +calculation results are stored in new pseudo-registers without destroying the +read-write dependency, so that they could be recognized as loop invariants in +the loop2_invariant pass. +After optimization, the number of instructions of 521.wrf is reduced by 0.18% +compared with before optimization (1716612948501 -> 1713471771364). + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_emit_swdivsf): Adjust. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/invariant-recip.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 19 +++++++---- + .../gcc.target/loongarch/invariant-recip.c | 33 +++++++++++++++++++ + 2 files changed, 46 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/invariant-recip.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 9bd931549..5877b0acf 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10842,16 +10842,23 @@ void loongarch_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) + /* x0 = 1./b estimate. */ + emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), + unspec))); +- /* 2.0 - b * x0 */ ++ /* e0 = 2.0 - b * x0. */ + emit_insn (gen_rtx_SET (e0, gen_rtx_FMA (mode, + gen_rtx_NEG (mode, b), x0, mtwo))); + +- /* x0 = a * x0 */ + if (a != CONST1_RTX (mode)) +- emit_insn (gen_rtx_SET (x0, gen_rtx_MULT (mode, a, x0))); +- +- /* res = e0 * x0 */ +- emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e0, x0))); ++ { ++ rtx e1 = gen_reg_rtx (mode); ++ /* e1 = a * x0. */ ++ emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, a, x0))); ++ /* res = e0 * e1. */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e0, e1))); ++ } ++ else ++ { ++ /* res = e0 * x0. */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e0, x0))); ++ } + } + + static bool +diff --git a/gcc/testsuite/gcc.target/loongarch/invariant-recip.c b/gcc/testsuite/gcc.target/loongarch/invariant-recip.c +new file mode 100644 +index 000000000..2f64f6ed5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/invariant-recip.c +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Ofast -march=loongarch64 -mabi=lp64d -mrecip -mfrecipe -fdump-rtl-loop2_invariant " } */ ++/* { dg-final { scan-rtl-dump "Decided to move dependent invariant" "loop2_invariant" } } */ ++ ++void ++nislfv_rain_plm (int im, int km, float dzlimkm, float rqlimkm, ++ float dt) ++{ ++ int i, k; ++ float con1, decfl; ++ float dzkm, qnkm, wikm + 1; ++ ++ for (i = 0; i < im; i++) ++ { ++ for (k = 0; k < km; k++) ++ { ++ dzk = dzlik; ++ } ++ con1 = 0.05; ++ for (k = km - 1; k >= 0; k--) ++ { ++ decfl = (wik + 1 - wik) * dt / dzk; ++ if (decfl > con1) ++ { ++ wik = wik + 1 - con1 * dzk / dt; ++ } ++ } ++ for (k = 0; k < km; k++) ++ { ++ rqlik = qnk; ++ } ++ } ++} +-- +2.43.0 +
View file
_service:tar_scm:0121-Backport-SME-aarch64-Tweak-constness-of-option-relat.patch
Added
@@ -0,0 +1,195 @@ +From 99c5eb58e898417632b6d9a7b2b3d288b50e9b65 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:55 +0100 +Subject: PATCH 022/157 BackportSME aarch64: Tweak constness of + option-related data + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=60dee638c8a7ae59c033868de7e7638c88b38ed2 + +Some of the option structures have all-const member variables. +That doesn't seem necessary: we can just use const on the objects +that are supposed to be read-only. + +Also, with the new, more C++-heavy option handling, it seems +better to use constexpr for the static data, to make sure that +we're not adding unexpected overhead. + +gcc/ + * common/config/aarch64/aarch64-common.cc (aarch64_option_extension) + (processor_name_to_arch, arch_to_arch_name): Remove const from + member variables. + (all_extensions, all_cores, all_architectures): Make a constexpr. + * config/aarch64/aarch64.cc (processor): Remove const from + member variables. + (all_architectures): Make a constexpr. + * config/aarch64/driver-aarch64.cc (aarch64_core_data) + (aarch64_arch_driver_info): Remove const from member variables. + (aarch64_cpu_data, aarch64_arches): Make a constexpr. + (get_arch_from_id): Return a pointer to const. + (host_detect_local_cpu): Update accordingly. +--- + gcc/common/config/aarch64/aarch64-common.cc | 26 ++++++++++----------- + gcc/config/aarch64/aarch64.cc | 14 +++++------ + gcc/config/aarch64/driver-aarch64.cc | 15 ++++++------ + 3 files changed, 27 insertions(+), 28 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index 2bdf51b8b..ac3486d71 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -203,14 +203,14 @@ aarch64_handle_option (struct gcc_options *opts, + /* An ISA extension in the co-processor and main instruction set space. */ + struct aarch64_option_extension + { +- const char *const name; +- const uint64_t flag_canonical; +- const uint64_t flags_on; +- const uint64_t flags_off; ++ const char *name; ++ uint64_t flag_canonical; ++ uint64_t flags_on; ++ uint64_t flags_off; + }; + + /* ISA extensions in AArch64. */ +-static const struct aarch64_option_extension all_extensions = ++static constexpr aarch64_option_extension all_extensions = + { + #define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \ + {NAME, AARCH64_FL_##IDENT, \ +@@ -223,21 +223,21 @@ static const struct aarch64_option_extension all_extensions = + + struct processor_name_to_arch + { +- const char *const processor_name; +- const enum aarch64_arch arch; +- const uint64_t flags; ++ const char *processor_name; ++ aarch64_arch arch; ++ uint64_t flags; + }; + + struct arch_to_arch_name + { +- const enum aarch64_arch arch; +- const char *const arch_name; +- const uint64_t flags; ++ aarch64_arch arch; ++ const char *arch_name; ++ uint64_t flags; + }; + + /* Map processor names to the architecture revision they implement and + the default set of architectural feature flags they support. */ +-static const struct processor_name_to_arch all_cores = ++static constexpr processor_name_to_arch all_cores = + { + #define AARCH64_CORE(NAME, CORE_IDENT, C, ARCH_IDENT, E, F, G, H, I) \ + {NAME, AARCH64_ARCH_##ARCH_IDENT, feature_deps::cpu_##CORE_IDENT}, +@@ -247,7 +247,7 @@ static const struct processor_name_to_arch all_cores = + }; + + /* Map architecture revisions to their string representation. */ +-static const struct arch_to_arch_name all_architectures = ++static constexpr arch_to_arch_name all_architectures = + { + #define AARCH64_ARCH(NAME, B, ARCH_IDENT, D, E) \ + {AARCH64_ARCH_##ARCH_IDENT, NAME, feature_deps::ARCH_IDENT ().enable}, +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 1363873b1..71db7ace1 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -2925,16 +2925,16 @@ aarch64_tuning_override_functions = + /* A processor implementing AArch64. */ + struct processor + { +- const char *const name; +- enum aarch64_processor ident; +- enum aarch64_processor sched_core; +- enum aarch64_arch arch; +- const uint64_t flags; +- const struct tune_params *const tune; ++ const char *name; ++ aarch64_processor ident; ++ aarch64_processor sched_core; ++ aarch64_arch arch; ++ uint64_t flags; ++ const tune_params *tune; + }; + + /* Architectures implementing AArch64. */ +-static const struct processor all_architectures = ++static constexpr processor all_architectures = + { + #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, D, E) \ + {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, \ +diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc +index ddfc9451f..ee9cb65a5 100644 +--- a/gcc/config/aarch64/driver-aarch64.cc ++++ b/gcc/config/aarch64/driver-aarch64.cc +@@ -50,7 +50,7 @@ struct aarch64_core_data + unsigned char implementer_id; /* Exactly 8 bits */ + unsigned int part_no; /* 12 bits + 12 bits */ + unsigned variant; +- const uint64_t flags; ++ uint64_t flags; + }; + + #define AARCH64_BIG_LITTLE(BIG, LITTLE) \ +@@ -64,7 +64,7 @@ struct aarch64_core_data + #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \ + { CORE_NAME, #ARCH, IMP, PART, VARIANT, feature_deps::cpu_##CORE_IDENT }, + +-static struct aarch64_core_data aarch64_cpu_data = ++static constexpr aarch64_core_data aarch64_cpu_data = + { + #include "aarch64-cores.def" + { NULL, NULL, INVALID_IMP, INVALID_CORE, ALL_VARIANTS, 0 } +@@ -75,14 +75,14 @@ struct aarch64_arch_driver_info + { + const char* id; + const char* name; +- const uint64_t flags; ++ uint64_t flags; + }; + + /* Skip the leading "V" in the architecture name. */ + #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \ + { #ARCH_IDENT + 1, NAME, feature_deps::ARCH_IDENT ().enable }, + +-static struct aarch64_arch_driver_info aarch64_arches = ++static constexpr aarch64_arch_driver_info aarch64_arches = + { + #include "aarch64-arches.def" + {NULL, NULL, 0} +@@ -92,7 +92,7 @@ static struct aarch64_arch_driver_info aarch64_arches = + /* Return an aarch64_arch_driver_info for the architecture described + by ID, or NULL if ID describes something we don't know about. */ + +-static struct aarch64_arch_driver_info* ++static const aarch64_arch_driver_info * + get_arch_from_id (const char* id) + { + unsigned int i = 0; +@@ -396,8 +396,7 @@ host_detect_local_cpu (int argc, const char **argv) + + if (aarch64_cpu_datai.name == NULL) + { +- aarch64_arch_driver_info* arch_info +- = get_arch_from_id (DEFAULT_ARCH); ++ auto arch_info = get_arch_from_id (DEFAULT_ARCH); + + gcc_assert (arch_info); + +@@ -407,7 +406,7 @@ host_detect_local_cpu (int argc, const char **argv) + else if (arch) + { + const char *arch_id = aarch64_cpu_datai.arch; +- aarch64_arch_driver_info* arch_info = get_arch_from_id (arch_id); ++ auto arch_info = get_arch_from_id (arch_id); + + /* We got some arch indentifier that's not in aarch64-arches.def? */ + if (!arch_info) +-- +2.33.0 +
View file
_service:tar_scm:0121-LoongArch-Define-LOGICAL_OP_NON_SHORT_CIRCUIT.patch
Added
@@ -0,0 +1,71 @@ +From a2baa4807fdfd381c543eb7ea85edf14dc6c8a20 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Tue, 16 Jan 2024 10:32:31 +0800 +Subject: PATCH 121/188 LoongArch: Define LOGICAL_OP_NON_SHORT_CIRCUIT + +Define LOGICAL_OP_NON_SHORT_CIRCUIT as 0, for a short-circuit branch, use the +short-circuit operation instead of the non-short-circuit operation. + +SPEC2017 performance evaluation shows 1% performance improvement for fprate +GEOMEAN and no obvious regression for others. Especially, 526.blender_r +10.6% +on 3A6000. + +This modification will introduce the following FAIL items: + +FAIL: gcc.dg/tree-ssa/copy-headers-8.c scan-tree-dump-times ch2 "Conditional combines static and invariant" 1 +FAIL: gcc.dg/tree-ssa/copy-headers-8.c scan-tree-dump-times ch2 "Will duplicate bb" 2 +FAIL: gcc.dg/tree-ssa/update-threading.c scan-tree-dump-times optimized "Invalid sum" 0 + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (LOGICAL_OP_NON_SHORT_CIRCUIT): Define. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/short-circuit.c: New test. +--- + gcc/config/loongarch/loongarch.h | 1 + + .../gcc.target/loongarch/short-circuit.c | 19 +++++++++++++++++++ + 2 files changed, 20 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/short-circuit.c + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index f54b078b1..15261fdc0 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -869,6 +869,7 @@ typedef struct { + 1 is the default; other values are interpreted relative to that. */ + + #define BRANCH_COST(speed_p, predictable_p) la_branch_cost ++#define LOGICAL_OP_NON_SHORT_CIRCUIT 0 + + /* Return the asm template for a conditional branch instruction. + OPCODE is the opcode's mnemonic and OPERANDS is the asm template for +diff --git a/gcc/testsuite/gcc.target/loongarch/short-circuit.c b/gcc/testsuite/gcc.target/loongarch/short-circuit.c +new file mode 100644 +index 000000000..bed585ee1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/short-circuit.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -fdump-tree-gimple" } */ ++ ++int ++short_circuit (float *a) ++{ ++ float t1x = a0; ++ float t2x = a1; ++ float t1y = a2; ++ float t2y = a3; ++ float t1z = a4; ++ float t2z = a5; ++ ++ if (t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) ++ return 0; ++ ++ return 1; ++} ++/* { dg-final { scan-tree-dump-times "if" 6 "gimple" } } */ +-- +2.43.0 +
View file
_service:tar_scm:0122-Backport-SME-aarch64-Make-more-use-of-aarch64_featur.patch
Added
@@ -0,0 +1,394 @@ +From bdb91009cf250fb22c21ae7f5072263492f2b08c Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:56 +0100 +Subject: PATCH 023/157 BackportSME aarch64: Make more use of + aarch64_feature_flags + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fed55a60e5b230bc159617f26e33611073c672fd + +A previous patch added a aarch64_feature_flags typedef, to abstract +the representation of the feature flags. This patch makes existing +code use the typedef too. Hope I've caught them all! + +gcc/ + * common/config/aarch64/aarch64-common.cc: Use aarch64_feature_flags + for feature flags throughout. + * config/aarch64/aarch64-protos.h: Likewise. + * config/aarch64/aarch64-sve-builtins.h: Likewise. + * config/aarch64/aarch64-sve-builtins.cc: Likewise. + * config/aarch64/aarch64.cc: Likewise. + * config/aarch64/aarch64.opt: Likewise. + * config/aarch64/driver-aarch64.cc: Likewise. +--- + gcc/common/config/aarch64/aarch64-common.cc | 19 +++++++------- + gcc/config/aarch64/aarch64-protos.h | 5 ++-- + gcc/config/aarch64/aarch64-sve-builtins.cc | 29 ++++++++++++--------- + gcc/config/aarch64/aarch64-sve-builtins.h | 9 ++++--- + gcc/config/aarch64/aarch64.cc | 29 +++++++++++---------- + gcc/config/aarch64/aarch64.opt | 2 +- + gcc/config/aarch64/driver-aarch64.cc | 10 +++---- + 7 files changed, 56 insertions(+), 47 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index ac3486d71..3efa57b26 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -204,9 +204,9 @@ aarch64_handle_option (struct gcc_options *opts, + struct aarch64_option_extension + { + const char *name; +- uint64_t flag_canonical; +- uint64_t flags_on; +- uint64_t flags_off; ++ aarch64_feature_flags flag_canonical; ++ aarch64_feature_flags flags_on; ++ aarch64_feature_flags flags_off; + }; + + /* ISA extensions in AArch64. */ +@@ -225,14 +225,14 @@ struct processor_name_to_arch + { + const char *processor_name; + aarch64_arch arch; +- uint64_t flags; ++ aarch64_feature_flags flags; + }; + + struct arch_to_arch_name + { + aarch64_arch arch; + const char *arch_name; +- uint64_t flags; ++ aarch64_feature_flags flags; + }; + + /* Map processor names to the architecture revision they implement and +@@ -262,7 +262,7 @@ static constexpr arch_to_arch_name all_architectures = + a copy of the string is created and stored to INVALID_EXTENSION. */ + + enum aarch64_parse_opt_result +-aarch64_parse_extension (const char *str, uint64_t *isa_flags, ++aarch64_parse_extension (const char *str, aarch64_feature_flags *isa_flags, + std::string *invalid_extension) + { + /* The extension string is parsed left to right. */ +@@ -342,8 +342,9 @@ aarch64_get_all_extension_candidates (auto_vec<const char *> *candidates) + that all the "+" flags come before the "+no" flags. */ + + std::string +-aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags, +- uint64_t default_arch_flags) ++aarch64_get_extension_string_for_isa_flags ++ (aarch64_feature_flags isa_flags, ++ aarch64_feature_flags default_arch_flags) + { + std::string outstr = ""; + +@@ -451,7 +452,7 @@ aarch64_rewrite_selected_cpu (const char *name) + || a_to_an->arch == aarch64_no_arch) + fatal_error (input_location, "unknown value %qs for %<-mcpu%>", name); + +- uint64_t extensions = p_to_a->flags; ++ aarch64_feature_flags extensions = p_to_a->flags; + aarch64_parse_extension (extension_str.c_str (), &extensions, NULL); + + std::string outstr = a_to_an->arch_name +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index e60ce3c36..ef84df731 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -1037,10 +1037,11 @@ bool aarch64_handle_option (struct gcc_options *, struct gcc_options *, + const struct cl_decoded_option *, location_t); + const char *aarch64_rewrite_selected_cpu (const char *name); + enum aarch64_parse_opt_result aarch64_parse_extension (const char *, +- uint64_t *, ++ aarch64_feature_flags *, + std::string *); + void aarch64_get_all_extension_candidates (auto_vec<const char *> *candidates); +-std::string aarch64_get_extension_string_for_isa_flags (uint64_t, uint64_t); ++std::string aarch64_get_extension_string_for_isa_flags (aarch64_feature_flags, ++ aarch64_feature_flags); + + rtl_opt_pass *make_pass_fma_steering (gcc::context *); + rtl_opt_pass *make_pass_track_speculation (gcc::context *); +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index c06e99339..b927a886e 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -82,7 +82,7 @@ public: + + /* The architecture extensions that the function requires, as a set of + AARCH64_FL_* flags. */ +- uint64_t required_extensions; ++ aarch64_feature_flags required_extensions; + + /* True if the decl represents an overloaded function that needs to be + resolved by function_resolver. */ +@@ -694,13 +694,16 @@ check_required_registers (location_t location, tree fndecl) + Report an error against LOCATION if not. */ + static bool + check_required_extensions (location_t location, tree fndecl, +- uint64_t required_extensions) ++ aarch64_feature_flags required_extensions) + { +- uint64_t missing_extensions = required_extensions & ~aarch64_isa_flags; ++ auto missing_extensions = required_extensions & ~aarch64_isa_flags; + if (missing_extensions == 0) + return check_required_registers (location, fndecl); + +- static const struct { uint64_t flag; const char *name; } extensions = { ++ static const struct { ++ aarch64_feature_flags flag; ++ const char *name; ++ } extensions = { + #define AARCH64_OPT_EXTENSION(EXT_NAME, IDENT, C, D, E, F) \ + { AARCH64_FL_##IDENT, EXT_NAME }, + #include "aarch64-option-extensions.def" +@@ -992,7 +995,7 @@ function_builder::get_attributes (const function_instance &instance) + registered_function & + function_builder::add_function (const function_instance &instance, + const char *name, tree fntype, tree attrs, +- uint64_t required_extensions, ++ aarch64_feature_flags required_extensions, + bool overloaded_p, + bool placeholder_p) + { +@@ -1034,11 +1037,12 @@ function_builder::add_function (const function_instance &instance, + one-to-one mapping between "short" and "full" names, and if standard + overload resolution therefore isn't necessary. */ + void +-function_builder::add_unique_function (const function_instance &instance, +- tree return_type, +- vec<tree> &argument_types, +- uint64_t required_extensions, +- bool force_direct_overloads) ++function_builder:: ++add_unique_function (const function_instance &instance, ++ tree return_type, ++ vec<tree> &argument_types, ++ aarch64_feature_flags required_extensions, ++ bool force_direct_overloads) + { + /* Add the function under its full (unique) name. */ + char *name = get_name (instance, false); +@@ -1081,8 +1085,9 @@ function_builder::add_unique_function (const function_instance &instance, + features are available as part of resolving the function to the + relevant unique function. */ + void +-function_builder::add_overloaded_function (const function_instance &instance, +- uint64_t required_extensions) ++function_builder:: ++add_overloaded_function (const function_instance &instance, ++ aarch64_feature_flags required_extensions) + { + char *name = get_name (instance, true); + if (registered_function **map_value = m_overload_names.get (name)) +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index 24594d584..63d1db776 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -263,7 +263,7 @@ struct function_group_info + + /* The architecture extensions that the functions require, as a set of + AARCH64_FL_* flags. */ +- uint64_t required_extensions; ++ aarch64_feature_flags required_extensions; + }; + + /* Describes a single fully-resolved function (i.e. one that has a +@@ -321,8 +321,9 @@ public:
View file
_service:tar_scm:0122-LoongArch-Split-vec_selects-of-bottom-elements-into-.patch
Added
@@ -0,0 +1,84 @@ +From 5cab5d1a9fb9cfaa0d12d229aa0ee19e0dd55cc5 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Tue, 16 Jan 2024 10:23:20 +0800 +Subject: PATCH 122/188 LoongArch: Split vec_selects of bottom elements into + simple move + +For below pattern, can be treated as a simple move because floating point +and vector share a common register on loongarch64. + +(set (reg/v:SF 32 $f0 orig:93 res 93) + (vec_select:SF (reg:V8SF 32 $f0 115) + (parallel + (const_int 0 0) + ))) + +gcc/ChangeLog: + + * config/loongarch/lasx.md (vec_extract<mode>_0): + New define_insn_and_split patten. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-extract.c: New test. +--- + gcc/config/loongarch/lasx.md | 15 ++++++++++++++ + .../gcc.target/loongarch/vect-extract.c | 20 +++++++++++++++++++ + 2 files changed, 35 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-extract.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index a5128c30c..946811e1a 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -746,6 +746,21 @@ + DONE; + }) + ++(define_insn_and_split "vec_extract<mode>_0" ++ (set (match_operand:<UNITMODE> 0 "register_operand" "=f") ++ (vec_select:<UNITMODE> ++ (match_operand:FLASX 1 "register_operand" "f") ++ (parallel (const_int 0)))) ++ "ISA_HAS_LSX" ++ "#" ++ "&& reload_completed" ++ (set (match_dup 0) (match_dup 1)) ++{ ++ operands1 = gen_rtx_REG (<UNITMODE>mode, REGNO (operands1)); ++} ++ (set_attr "move_type" "fmove") ++ (set_attr "mode" "<UNITMODE>")) ++ + (define_expand "vec_perm<mode>" + (match_operand:LASX 0 "register_operand") + (match_operand:LASX 1 "register_operand") +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-extract.c b/gcc/testsuite/gcc.target/loongarch/vect-extract.c +new file mode 100644 +index 000000000..ce126e3a4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-extract.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mlasx -fno-vect-cost-model -fno-unroll-loops" } */ ++/* { dg-final { scan-assembler-not "xvpickve.w" } } */ ++/* { dg-final { scan-assembler-not "xvpickve.d" } } */ ++ ++float ++sum_float (float *a, int n) { ++ float res = 0.0; ++ for (int i = 0; i < n; i++) ++ res += ai; ++ return res; ++} ++ ++double ++sum_double (double *a, int n) { ++ double res = 0.0; ++ for (int i = 0; i < n; i++) ++ res += ai; ++ return res; ++} +-- +2.43.0 +
View file
_service:tar_scm:0123-Backport-SME-aarch64-Tweak-contents-of-flags_on-off-.patch
Added
@@ -0,0 +1,70 @@ +From eb92c185c1c71edcbd83b1c66fe4f9e7d52a98b3 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:56 +0100 +Subject: PATCH 024/157 BackportSME aarch64: Tweak contents of + flags_on/off fields + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bb7f43b62a58a0f0326fd3060f0bd43e6f3ef971 + +After previous changes, it's more convenient if the flags_on and +flags_off fields of all_extensions include the feature flag itself. + +gcc/ + * common/config/aarch64/aarch64-common.cc (all_extensions): + Include the feature flag in flags_on and flags_off. + (aarch64_parse_extension): Update accordingly. + (aarch64_get_extension_string_for_isa_flags): Likewise. +--- + gcc/common/config/aarch64/aarch64-common.cc | 14 ++++++-------- + 1 file changed, 6 insertions(+), 8 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index 3efa57b26..752ba5632 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -213,10 +213,8 @@ struct aarch64_option_extension + static constexpr aarch64_option_extension all_extensions = + { + #define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \ +- {NAME, AARCH64_FL_##IDENT, \ +- feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \ +- feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \ +- & ~AARCH64_FL_##IDENT}, ++ {NAME, AARCH64_FL_##IDENT, feature_deps::IDENT ().explicit_on, \ ++ feature_deps::get_flags_off (feature_deps::root_off_##IDENT)}, + #include "config/aarch64/aarch64-option-extensions.def" + {NULL, 0, 0, 0} + }; +@@ -304,9 +302,9 @@ aarch64_parse_extension (const char *str, aarch64_feature_flags *isa_flags, + { + /* Add or remove the extension. */ + if (adding_ext) +- *isa_flags |= (opt->flags_on | opt->flag_canonical); ++ *isa_flags |= opt->flags_on; + else +- *isa_flags &= ~(opt->flags_off | opt->flag_canonical); ++ *isa_flags &= ~opt->flags_off; + break; + } + } +@@ -380,7 +378,7 @@ aarch64_get_extension_string_for_isa_flags + + if ((flags & isa_flags & (explicit_flags | ~current_flags)) == flags) + { +- current_flags |= opt.flag_canonical | opt.flags_on; ++ current_flags |= opt.flags_on; + added |= opt.flag_canonical; + } + } +@@ -395,7 +393,7 @@ aarch64_get_extension_string_for_isa_flags + for (auto &opt : all_extensions) + if (opt.flag_canonical & current_flags & ~isa_flags) + { +- current_flags &= ~(opt.flag_canonical | opt.flags_off); ++ current_flags &= ~opt.flags_off; + outstr += "+no"; + outstr += opt.name; + } +-- +2.33.0 +
View file
_service:tar_scm:0123-LoongArch-Modify-the-address-calculation-logic-for-o.patch
Added
@@ -0,0 +1,112 @@ +From c4815d70715bed71b8e89888ef19eb43e9171229 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Tue, 30 Jan 2024 15:02:32 +0800 +Subject: PATCH 123/188 LoongArch: Modify the address calculation logic for + obtaining array element values through fp. + +Modify address calculation logic from (((a x C) + fp) + offset) to ((fp + offset) + a x C). +Thereby modifying the register dependencies and optimizing the code. +The value of C is 2 4 or 8. + +The following is the assembly code before and after a loop modification in spec2006 401.bzip: + + old | new + 735 .L71: | 735 .L71: + 736 slli.d $r12,$r15,2 | 736 slli.d $r12,$r15,2 + 737 ldx.w $r13,$r22,$r12 | 737 ldx.w $r13,$r22,$r12 + 738 addi.d $r15,$r15,-1 | 738 addi.d $r15,$r15,-1 + 739 slli.w $r16,$r15,0 | 739 slli.w $r16,$r15,0 + 740 addi.w $r13,$r13,-1 | 740 addi.w $r13,$r13,-1 + 741 slti $r14,$r13,0 | 741 slti $r14,$r13,0 + 742 add.w $r12,$r26,$r13 | 742 add.w $r12,$r26,$r13 + 743 maskeqz $r12,$r12,$r14 | 743 maskeqz $r12,$r12,$r14 + 744 masknez $r14,$r13,$r14 | 744 masknez $r14,$r13,$r14 + 745 or $r12,$r12,$r14 | 745 or $r12,$r12,$r14 + 746 ldx.bu $r14,$r30,$r12 | 746 ldx.bu $r14,$r30,$r12 + 747 lu12i.w $r13,4096>>12 | 747 alsl.d $r14,$r14,$r18,2 + 748 ori $r13,$r13,432 | 748 ldptr.w $r13,$r14,0 + 749 add.d $r13,$r13,$r3 | 749 addi.w $r17,$r13,-1 + 750 alsl.d $r14,$r14,$r13,2 | 750 stptr.w $r17,$r14,0 + 751 ldptr.w $r13,$r14,-1968 | 751 slli.d $r13,$r13,2 + 752 addi.w $r17,$r13,-1 | 752 stx.w $r12,$r22,$r13 + 753 st.w $r17,$r14,-1968 | 753 ldptr.w $r12,$r19,0 + 754 slli.d $r13,$r13,2 | 754 blt $r12,$r16,.L71 + 755 stx.w $r12,$r22,$r13 | 755 .align 4 + 756 ldptr.w $r12,$r18,-2048 | 756 + 757 blt $r12,$r16,.L71 | 757 + 758 .align 4 | 758 + +This patch is ported from riscv's commit r14-3111. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (mem_shadd_or_shadd_rtx_p): New function. + (loongarch_legitimize_address): Add logical transformation code. +--- + gcc/config/loongarch/loongarch.cc | 43 +++++++++++++++++++++++++++++++ + 1 file changed, 43 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 5877b0acf..612a9c138 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3215,6 +3215,22 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) + return true; + } + ++/* Helper loongarch_legitimize_address. Given X, return true if it ++ is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8. ++ ++ This respectively represent canonical shift-add rtxs or scaled ++ memory addresses. */ ++static bool ++mem_shadd_or_shadd_rtx_p (rtx x) ++{ ++ return ((GET_CODE (x) == ASHIFT ++ || GET_CODE (x) == MULT) ++ && CONST_INT_P (XEXP (x, 1)) ++ && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3)) ++ || (GET_CODE (x) == MULT ++ && IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3)))); ++} ++ + /* This function is used to implement LEGITIMIZE_ADDRESS. If X can + be legitimized in a way that the generic machinery might not expect, + return a new address, otherwise return NULL. MODE is the mode of +@@ -3238,6 +3254,33 @@ loongarch_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, + loongarch_split_plus (x, &base, &offset); + if (offset != 0) + { ++ /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case. */ ++ if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0)) ++ && IMM12_OPERAND (offset)) ++ { ++ rtx index = XEXP (base, 0); ++ rtx fp = XEXP (base, 1); ++ ++ if (REG_P (fp) && REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM) ++ { ++ /* If we were given a MULT, we must fix the constant ++ as we're going to create the ASHIFT form. */ ++ int shift_val = INTVAL (XEXP (index, 1)); ++ if (GET_CODE (index) == MULT) ++ shift_val = exact_log2 (shift_val); ++ ++ rtx reg1 = gen_reg_rtx (Pmode); ++ rtx reg3 = gen_reg_rtx (Pmode); ++ loongarch_emit_binary (PLUS, reg1, fp, GEN_INT (offset)); ++ loongarch_emit_binary (PLUS, reg3, ++ gen_rtx_ASHIFT (Pmode, XEXP (index, 0), ++ GEN_INT (shift_val)), ++ reg1); ++ ++ return reg3; ++ } ++ } ++ + if (!loongarch_valid_base_register_p (base, mode, false)) + base = copy_to_mode_reg (Pmode, base); + addr = loongarch_add_offset (NULL, base, offset); +-- +2.43.0 +
View file
_service:tar_scm:0124-Backport-SME-aarch64-Tweak-handling-of-mgeneral-regs.patch
Added
@@ -0,0 +1,370 @@ +From 91f7471cbc7dec42673b58a1896330d64eb6be2a Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:57 +0100 +Subject: PATCH 025/157 BackportSME aarch64: Tweak handling of + -mgeneral-regs-only + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2a269bda9e7b8f9353699d0c965e7e9246500aa0 + +-mgeneral-regs-only is effectively "+nofp for the compiler without +changing the assembler's ISA flags". Currently that's implemented +by making TARGET_FLOAT, TARGET_SIMD and TARGET_SVE depend on +!TARGET_GENERAL_REGS_ONLY and then making any feature that needs FP +registers depend (directly or indirectly) on one of those three TARGET +macros. The problem is that it's easy to forgot to do the last bit. + +This patch instead represents the distinction between "assemnbler +ISA flags" and "compiler ISA flags" more directly, funnelling +all updates through a new function that sets both sets of flags +together. + +gcc/ + * config/aarch64/aarch64.opt (aarch64_asm_isa_flags): New variable. + * config/aarch64/aarch64.h (aarch64_asm_isa_flags) + (aarch64_isa_flags): Redefine as read-only macros. + (TARGET_SIMD, TARGET_FLOAT, TARGET_SVE): Don't depend on + !TARGET_GENERAL_REGS_ONLY. + * common/config/aarch64/aarch64-common.cc + (aarch64_set_asm_isa_flags): New function. + (aarch64_handle_option): Call it when updating -mgeneral-regs. + * config/aarch64/aarch64-protos.h (aarch64_simd_switcher): Replace + m_old_isa_flags with m_old_asm_isa_flags. + (aarch64_set_asm_isa_flags): Declare. + * config/aarch64/aarch64-builtins.cc + (aarch64_simd_switcher::aarch64_simd_switcher) + (aarch64_simd_switcher::~aarch64_simd_switcher): Save and restore + aarch64_asm_isa_flags instead of aarch64_isa_flags. + * config/aarch64/aarch64-sve-builtins.cc + (check_required_extensions): Use aarch64_asm_isa_flags instead + of aarch64_isa_flags. + * config/aarch64/aarch64.cc (aarch64_set_asm_isa_flags): New function. + (aarch64_override_options, aarch64_handle_attr_arch) + (aarch64_handle_attr_cpu, aarch64_handle_attr_isa_flags): Use + aarch64_set_asm_isa_flags to set the ISA flags. + (aarch64_option_print, aarch64_declare_function_name) + (aarch64_start_file): Use aarch64_asm_isa_flags instead + of aarch64_isa_flags. + (aarch64_can_inline_p): Check aarch64_asm_isa_flags as well as + aarch64_isa_flags. +--- + gcc/common/config/aarch64/aarch64-common.cc | 12 ++++++ + gcc/config/aarch64/aarch64-builtins.cc | 6 +-- + gcc/config/aarch64/aarch64-protos.h | 5 ++- + gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +- + gcc/config/aarch64/aarch64.cc | 45 ++++++++++++++------- + gcc/config/aarch64/aarch64.h | 17 ++++++-- + gcc/config/aarch64/aarch64.opt | 3 ++ + 7 files changed, 68 insertions(+), 22 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index 752ba5632..c64b4987e 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -137,6 +137,17 @@ reset_tsv110_option () + } + } + ++/* Set OPTS->x_aarch64_asm_isa_flags to FLAGS and update ++ OPTS->x_aarch64_isa_flags accordingly. */ ++void ++aarch64_set_asm_isa_flags (gcc_options *opts, aarch64_feature_flags flags) ++{ ++ opts->x_aarch64_asm_isa_flags = flags; ++ opts->x_aarch64_isa_flags = flags; ++ if (opts->x_target_flags & MASK_GENERAL_REGS_ONLY) ++ opts->x_aarch64_isa_flags &= ~feature_deps::get_flags_off (AARCH64_FL_FP); ++} ++ + /* Implement TARGET_HANDLE_OPTION. + This function handles the target specific options for CPU/target selection. + +@@ -174,6 +185,7 @@ aarch64_handle_option (struct gcc_options *opts, + + case OPT_mgeneral_regs_only: + opts->x_target_flags |= MASK_GENERAL_REGS_ONLY; ++ aarch64_set_asm_isa_flags (opts, opts->x_aarch64_asm_isa_flags); + return true; + + case OPT_mfix_cortex_a53_835769: +diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc +index 42276e7ca..015e9d975 100644 +--- a/gcc/config/aarch64/aarch64-builtins.cc ++++ b/gcc/config/aarch64/aarch64-builtins.cc +@@ -1336,20 +1336,20 @@ aarch64_scalar_builtin_type_p (aarch64_simd_type t) + /* Enable AARCH64_FL_* flags EXTRA_FLAGS on top of the base Advanced SIMD + set. */ + aarch64_simd_switcher::aarch64_simd_switcher (unsigned int extra_flags) +- : m_old_isa_flags (aarch64_isa_flags), ++ : m_old_asm_isa_flags (aarch64_asm_isa_flags), + m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY) + { + /* Changing the ISA flags should be enough here. We shouldn't need to + pay the compile-time cost of a full target switch. */ +- aarch64_isa_flags = AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags; + global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY; ++ aarch64_set_asm_isa_flags (AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags); + } + + aarch64_simd_switcher::~aarch64_simd_switcher () + { + if (m_old_general_regs_only) + global_options.x_target_flags |= MASK_GENERAL_REGS_ONLY; +- aarch64_isa_flags = m_old_isa_flags; ++ aarch64_set_asm_isa_flags (m_old_asm_isa_flags); + } + + /* Implement #pragma GCC aarch64 "arm_neon.h". */ +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index ef84df731..86e444a60 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -747,7 +747,7 @@ public: + ~aarch64_simd_switcher (); + + private: +- unsigned long m_old_isa_flags; ++ unsigned long m_old_asm_isa_flags; + bool m_old_general_regs_only; + }; + +@@ -1032,7 +1032,10 @@ extern bool aarch64_classify_address (struct aarch64_address_info *, rtx, + machine_mode, bool, + aarch64_addr_query_type = ADDR_QUERY_M); + ++void aarch64_set_asm_isa_flags (aarch64_feature_flags); ++ + /* Defined in common/config/aarch64-common.cc. */ ++void aarch64_set_asm_isa_flags (gcc_options *, aarch64_feature_flags); + bool aarch64_handle_option (struct gcc_options *, struct gcc_options *, + const struct cl_decoded_option *, location_t); + const char *aarch64_rewrite_selected_cpu (const char *name); +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index b927a886e..a70e3a6b4 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -696,7 +696,7 @@ static bool + check_required_extensions (location_t location, tree fndecl, + aarch64_feature_flags required_extensions) + { +- auto missing_extensions = required_extensions & ~aarch64_isa_flags; ++ auto missing_extensions = required_extensions & ~aarch64_asm_isa_flags; + if (missing_extensions == 0) + return check_required_registers (location, fndecl); + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 8cb820767..3e83e48ec 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -18432,10 +18432,19 @@ aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value) + return (int) value / 64; + } + ++/* Set the global aarch64_asm_isa_flags to FLAGS and update ++ aarch64_isa_flags accordingly. */ ++ ++void ++aarch64_set_asm_isa_flags (aarch64_feature_flags flags) ++{ ++ aarch64_set_asm_isa_flags (&global_options, flags); ++} ++ + /* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning + and is used to parse the -m{cpu,tune,arch} strings and setup the initial + tuning structs. In particular it must set selected_tune and +- aarch64_isa_flags that define the available ISA features and tuning ++ aarch64_asm_isa_flags that define the available ISA features and tuning + decisions. It must also set selected_arch as this will be used to + output the .arch asm tags for each function. */ + +@@ -18444,7 +18453,7 @@ aarch64_override_options (void) + { + aarch64_feature_flags cpu_isa = 0; + aarch64_feature_flags arch_isa = 0; +- aarch64_isa_flags = 0; ++ aarch64_set_asm_isa_flags (0); + + const struct processor *cpu = NULL; + const struct processor *arch = NULL; +@@ -18484,25 +18493,25 @@ aarch64_override_options (void) + } + + selected_arch = arch->arch; +- aarch64_isa_flags = arch_isa; ++ aarch64_set_asm_isa_flags (arch_isa); + } + else if (cpu) + { + selected_arch = cpu->arch; +- aarch64_isa_flags = cpu_isa; ++ aarch64_set_asm_isa_flags (cpu_isa);
View file
_service:tar_scm:0124-LoongArch-Merge-template-got_load_tls_-ld-gd-le-ie.patch
Added
@@ -0,0 +1,214 @@ +From 3f45bbfe924ffe38832b2ad0050589b9f188422e Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 25 Jan 2024 14:44:39 +0800 +Subject: PATCH 124/188 LoongArch: Merge template got_load_tls_{ld/gd/le/ie}. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_load_tls): + Load all types of tls symbols through one function. + (loongarch_got_load_tls_gd): Delete. + (loongarch_got_load_tls_ld): Delete. + (loongarch_got_load_tls_ie): Delete. + (loongarch_got_load_tls_le): Delete. + (loongarch_call_tls_get_addr): Modify the called function name. + (loongarch_legitimize_tls_address): Likewise. + * config/loongarch/loongarch.md (@got_load_tls_gd<mode>): Delete. + (@load_tls<mode>): New template. + (@got_load_tls_ld<mode>): Delete. + (@got_load_tls_le<mode>): Delete. + (@got_load_tls_ie<mode>): Delete. +--- + gcc/config/loongarch/loongarch.cc | 47 +++++------------------- + gcc/config/loongarch/loongarch.md | 59 ++++++++++++------------------- + 2 files changed, 30 insertions(+), 76 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 612a9c138..ced7e58c2 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2732,36 +2732,12 @@ loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset) + /* The __tls_get_attr symbol. */ + static GTY (()) rtx loongarch_tls_symbol; + +-/* Load an entry from the GOT for a TLS GD access. */ ++/* Load an entry for a TLS access. */ + + static rtx +-loongarch_got_load_tls_gd (rtx dest, rtx sym) ++loongarch_load_tls (rtx dest, rtx sym) + { +- return gen_got_load_tls_gd (Pmode, dest, sym); +-} +- +-/* Load an entry from the GOT for a TLS LD access. */ +- +-static rtx +-loongarch_got_load_tls_ld (rtx dest, rtx sym) +-{ +- return gen_got_load_tls_ld (Pmode, dest, sym); +-} +- +-/* Load an entry from the GOT for a TLS IE access. */ +- +-static rtx +-loongarch_got_load_tls_ie (rtx dest, rtx sym) +-{ +- return gen_got_load_tls_ie (Pmode, dest, sym); +-} +- +-/* Add in the thread pointer for a TLS LE access. */ +- +-static rtx +-loongarch_got_load_tls_le (rtx dest, rtx sym) +-{ +- return gen_got_load_tls_le (Pmode, dest, sym); ++ return gen_load_tls (Pmode, dest, sym); + } + + /* Return an instruction sequence that calls __tls_get_addr. SYM is +@@ -2805,14 +2781,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + emit_insn (gen_tls_low (Pmode, a0, high, loc)); + } + else +- { +- if (type == SYMBOL_TLSLDM) +- emit_insn (loongarch_got_load_tls_ld (a0, loc)); +- else if (type == SYMBOL_TLSGD) +- emit_insn (loongarch_got_load_tls_gd (a0, loc)); +- else +- gcc_unreachable (); +- } ++ emit_insn (loongarch_load_tls (a0, loc)); + + if (flag_plt) + { +@@ -2949,10 +2918,10 @@ loongarch_legitimize_tls_address (rtx loc) + /* la.tls.ie; tp-relative add. */ + tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); + tmp1 = gen_reg_rtx (Pmode); ++ tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE); + dest = gen_reg_rtx (Pmode); + if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { +- tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE); + tmp3 = gen_reg_rtx (Pmode); + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); + high = loongarch_force_temporary (tmp3, high); +@@ -2975,7 +2944,7 @@ loongarch_legitimize_tls_address (rtx loc) + emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2)); + } + else +- emit_insn (loongarch_got_load_tls_ie (tmp1, loc)); ++ emit_insn (loongarch_load_tls (tmp1, tmp2)); + emit_insn (gen_add3_insn (dest, tmp1, tp)); + } + break; +@@ -3007,11 +2976,11 @@ loongarch_legitimize_tls_address (rtx loc) + + tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); + tmp1 = gen_reg_rtx (Pmode); ++ tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE); + dest = gen_reg_rtx (Pmode); + + if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { +- tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE); + tmp3 = gen_reg_rtx (Pmode); + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); + high = loongarch_force_temporary (tmp3, high); +@@ -3039,7 +3008,7 @@ loongarch_legitimize_tls_address (rtx loc) + } + } + else +- emit_insn (loongarch_got_load_tls_le (tmp1, loc)); ++ emit_insn (loongarch_load_tls (tmp1, tmp2)); + emit_insn (gen_add3_insn (dest, tmp1, tp)); + } + break; +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 23d8dc126..4f9a92334 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -51,10 +51,7 @@ + UNSPEC_BITREV_8B + + ;; TLS +- UNSPEC_TLS_GD +- UNSPEC_TLS_LD +- UNSPEC_TLS_LE +- UNSPEC_TLS_IE ++ UNSPEC_TLS + + ;; Stack tie + UNSPEC_TIE +@@ -2701,45 +2698,33 @@ + + ;; Thread-Local Storage + +-(define_insn "@got_load_tls_gd<mode>" ++(define_insn "@load_tls<mode>" + (set (match_operand:P 0 "register_operand" "=r") + (unspec:P + (match_operand:P 1 "symbolic_operand" "") +- UNSPEC_TLS_GD)) ++ UNSPEC_TLS)) + "" +- "la.tls.gd\t%0,%1" +- (set_attr "got" "load") +- (set_attr "mode" "<MODE>")) +- +-(define_insn "@got_load_tls_ld<mode>" +- (set (match_operand:P 0 "register_operand" "=r") +- (unspec:P +- (match_operand:P 1 "symbolic_operand" "") +- UNSPEC_TLS_LD)) +- "" +- "la.tls.ld\t%0,%1" +- (set_attr "got" "load") +- (set_attr "mode" "<MODE>")) ++{ ++ enum loongarch_symbol_type symbol_type; ++ gcc_assert (loongarch_symbolic_constant_p (operands1, &symbol_type)); + +-(define_insn "@got_load_tls_le<mode>" +- (set (match_operand:P 0 "register_operand" "=r") +- (unspec:P +- (match_operand:P 1 "symbolic_operand" "") +- UNSPEC_TLS_LE)) +- "" +- "la.tls.le\t%0,%1" +- (set_attr "got" "load") +- (set_attr "mode" "<MODE>")) ++ switch (symbol_type) ++ { ++ case SYMBOL_TLS_LE: ++ return "la.tls.le\t%0,%1"; ++ case SYMBOL_TLS_IE: ++ return "la.tls.ie\t%0,%1"; ++ case SYMBOL_TLSLDM: ++ return "la.tls.ld\t%0,%1"; ++ case SYMBOL_TLSGD: ++ return "la.tls.gd\t%0,%1"; + +-(define_insn "@got_load_tls_ie<mode>" +- (set (match_operand:P 0 "register_operand" "=r") +- (unspec:P +- (match_operand:P 1 "symbolic_operand" "") +- UNSPEC_TLS_IE)) +- ""
View file
_service:tar_scm:0125-Backport-SME-aarch64-Remove-redundant-TARGET_-checks.patch
Added
@@ -0,0 +1,453 @@ +From 77a86d955dd1c9cd8c7fc35e6caf0cb707799129 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:57 +0100 +Subject: PATCH 026/157 BackportSME aarch64: Remove redundant TARGET_* + checks + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a31641840af2c40cf36036fa472df34d4a4402c3 + +After previous patches, it's possible to remove TARGET_* +options that are redundant due to (IMO) obvious dependencies. + +gcc/ + * config/aarch64/aarch64.h (TARGET_CRYPTO, TARGET_SHA3, TARGET_SM4) + (TARGET_DOTPROD): Don't depend on TARGET_SIMD. + (TARGET_AES, TARGET_SHA2): Likewise. Remove TARGET_CRYPTO test. + (TARGET_FP_F16INST): Don't depend on TARGET_FLOAT. + (TARGET_SVE2, TARGET_SVE_F32MM, TARGET_SVE_F64MM): Don't depend + on TARGET_SVE. + (TARGET_SVE2_AES, TARGET_SVE2_BITPERM, TARGET_SVE2_SHA3) + (TARGET_SVE2_SM4): Don't depend on TARGET_SVE2. + (TARGET_F32MM, TARGET_F64MM): Delete. + * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Guard + float macros with just TARGET_FLOAT rather than TARGET_FLOAT + || TARGET_SIMD. + * config/aarch64/aarch64-simd.md (copysign<mode>3): Depend + only on TARGET_SIMD, rather than TARGET_FLOAT && TARGET_SIMD. + (aarch64_crypto_aes<aes_op>v16qi): Depend only on TARGET_AES, + rather than TARGET_SIMD && TARGET_AES. + (aarch64_crypto_aes<aesmc_op>v16qi): Likewise. + (*aarch64_crypto_aese_fused): Likewise. + (*aarch64_crypto_aesd_fused): Likewise. + (aarch64_crypto_pmulldi): Likewise. + (aarch64_crypto_pmullv2di): Likewise. + (aarch64_crypto_sha1hsi): Likewise TARGET_SHA2. + (aarch64_crypto_sha1hv4si): Likewise. + (aarch64_be_crypto_sha1hv4si): Likewise. + (aarch64_crypto_sha1su1v4si): Likewise. + (aarch64_crypto_sha1<sha1_op>v4si): Likewise. + (aarch64_crypto_sha1su0v4si): Likewise. + (aarch64_crypto_sha256h<sha256_op>v4si): Likewise. + (aarch64_crypto_sha256su0v4si): Likewise. + (aarch64_crypto_sha256su1v4si): Likewise. + (aarch64_crypto_sha512h<sha512_op>qv2di): Likewise TARGET_SHA3. + (aarch64_crypto_sha512su0qv2di): Likewise. + (aarch64_crypto_sha512su1qv2di, eor3q<mode>4): Likewise. + (aarch64_rax1qv2di, aarch64_xarqv2di, bcaxq<mode>4): Likewise. + (aarch64_sm3ss1qv4si): Likewise TARGET_SM4. + (aarch64_sm3tt<sm3tt_op>qv4si): Likewise. + (aarch64_sm3partw<sm3part_op>qv4si): Likewise. + (aarch64_sm4eqv4si, aarch64_sm4ekeyqv4si): Likewise. + * config/aarch64/aarch64.md (<FLOATUORS:optab>dihf2) + (copysign<GPF:mode>3, copysign<GPF:mode>3_insn) + (xorsign<mode>3): Remove redundant TARGET_FLOAT condition. +--- + gcc/config/aarch64/aarch64-c.cc | 2 +- + gcc/config/aarch64/aarch64-simd.md | 56 +++++++++++++++--------------- + gcc/config/aarch64/aarch64.h | 30 ++++++++-------- + gcc/config/aarch64/aarch64.md | 8 ++--- + 4 files changed, 47 insertions(+), 49 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index 18c9b975b..2dfe2b8f8 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -92,7 +92,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) + + aarch64_def_or_undef (TARGET_FLOAT, "__ARM_FEATURE_FMA", pfile); + +- if (TARGET_FLOAT || TARGET_SIMD) ++ if (TARGET_FLOAT) + { + builtin_define_with_int_value ("__ARM_FP", 0x0E); + builtin_define ("__ARM_FP16_FORMAT_IEEE"); +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index de92802f5..a47b39281 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -693,7 +693,7 @@ + (match_operand:VHSDF 0 "register_operand") + (match_operand:VHSDF 1 "register_operand") + (match_operand:VHSDF 2 "register_operand") +- "TARGET_FLOAT && TARGET_SIMD" ++ "TARGET_SIMD" + { + rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode); + int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; +@@ -8352,7 +8352,7 @@ + (match_operand:V16QI 1 "register_operand" "%0") + (match_operand:V16QI 2 "register_operand" "w")) + CRYPTO_AES)) +- "TARGET_SIMD && TARGET_AES" ++ "TARGET_AES" + "aes<aes_op>\\t%0.16b, %2.16b" + (set_attr "type" "crypto_aese") + ) +@@ -8361,7 +8361,7 @@ + (set (match_operand:V16QI 0 "register_operand" "=w") + (unspec:V16QI (match_operand:V16QI 1 "register_operand" "w") + CRYPTO_AESMC)) +- "TARGET_SIMD && TARGET_AES" ++ "TARGET_AES" + "aes<aesmc_op>\\t%0.16b, %1.16b" + (set_attr "type" "crypto_aesmc") + ) +@@ -8380,7 +8380,7 @@ + (match_operand:V16QI 2 "register_operand" "w")) + UNSPEC_AESE) + UNSPEC_AESMC)) +- "TARGET_SIMD && TARGET_AES ++ "TARGET_AES + && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" + "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b" + (set_attr "type" "crypto_aese") +@@ -8401,7 +8401,7 @@ + (match_operand:V16QI 2 "register_operand" "w")) + UNSPEC_AESD) + UNSPEC_AESIMC)) +- "TARGET_SIMD && TARGET_AES ++ "TARGET_AES + && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" + "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b" + (set_attr "type" "crypto_aese") +@@ -8415,7 +8415,7 @@ + (unspec:SI (match_operand:SI 1 + "register_operand" "w") + UNSPEC_SHA1H)) +- "TARGET_SIMD && TARGET_SHA2" ++ "TARGET_SHA2" + "sha1h\\t%s0, %s1" + (set_attr "type" "crypto_sha1_fast") + ) +@@ -8425,7 +8425,7 @@ + (unspec:SI (vec_select:SI (match_operand:V4SI 1 "register_operand" "w") + (parallel (const_int 0))) + UNSPEC_SHA1H)) +- "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN" ++ "TARGET_SHA2 && !BYTES_BIG_ENDIAN" + "sha1h\\t%s0, %s1" + (set_attr "type" "crypto_sha1_fast") + ) +@@ -8435,7 +8435,7 @@ + (unspec:SI (vec_select:SI (match_operand:V4SI 1 "register_operand" "w") + (parallel (const_int 3))) + UNSPEC_SHA1H)) +- "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN" ++ "TARGET_SHA2 && BYTES_BIG_ENDIAN" + "sha1h\\t%s0, %s1" + (set_attr "type" "crypto_sha1_fast") + ) +@@ -8445,7 +8445,7 @@ + (unspec:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w") + UNSPEC_SHA1SU1)) +- "TARGET_SIMD && TARGET_SHA2" ++ "TARGET_SHA2" + "sha1su1\\t%0.4s, %2.4s" + (set_attr "type" "crypto_sha1_fast") + ) +@@ -8456,7 +8456,7 @@ + (match_operand:SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w") + CRYPTO_SHA1)) +- "TARGET_SIMD && TARGET_SHA2" ++ "TARGET_SHA2" + "sha1<sha1_op>\\t%q0, %s2, %3.4s" + (set_attr "type" "crypto_sha1_slow") + ) +@@ -8467,7 +8467,7 @@ + (match_operand:V4SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w") + UNSPEC_SHA1SU0)) +- "TARGET_SIMD && TARGET_SHA2" ++ "TARGET_SHA2" + "sha1su0\\t%0.4s, %2.4s, %3.4s" + (set_attr "type" "crypto_sha1_xor") + ) +@@ -8480,7 +8480,7 @@ + (match_operand:V4SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w") + CRYPTO_SHA256)) +- "TARGET_SIMD && TARGET_SHA2" ++ "TARGET_SHA2" + "sha256h<sha256_op>\\t%q0, %q2, %3.4s" + (set_attr "type" "crypto_sha256_slow") + ) +@@ -8490,7 +8490,7 @@ + (unspec:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w") + UNSPEC_SHA256SU0)) +- "TARGET_SIMD && TARGET_SHA2" ++ "TARGET_SHA2" + "sha256su0\\t%0.4s, %2.4s" + (set_attr "type" "crypto_sha256_fast") + ) +@@ -8501,7 +8501,7 @@ + (match_operand:V4SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w") + UNSPEC_SHA256SU1)) +- "TARGET_SIMD && TARGET_SHA2"
View file
_service:tar_scm:0125-LoongArch-Add-the-macro-implementation-of-mcmodel-ex.patch
Added
@@ -0,0 +1,453 @@ +From cd177538c2a0f5248e9e7af6247b4d1ba6fe55db Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 25 Jan 2024 19:10:46 +0800 +Subject: PATCH 125/188 LoongArch: Add the macro implementation of + mcmodel=extreme. + +gcc/ChangeLog: + + * config/loongarch/loongarch-protos.h (loongarch_symbol_extreme_p): + Add function declaration. + * config/loongarch/loongarch.cc (loongarch_symbolic_constant_p): + For SYMBOL_PCREL64, non-zero addend of "la.local $rd,$rt,sym+addend" + is not allowed + (loongarch_load_tls): Added macro support in extreme mode. + (loongarch_call_tls_get_addr): Likewise. + (loongarch_legitimize_tls_address): Likewise. + (loongarch_force_address): Likewise. + (loongarch_legitimize_move): Likewise. + (loongarch_output_mi_thunk): Likewise. + (loongarch_option_override_internal): Remove the code that detects + explicit relocs status. + (loongarch_handle_model_attribute): Likewise. + * config/loongarch/loongarch.md (movdi_symbolic_off64): New template. + * config/loongarch/predicates.md (symbolic_off64_operand): New predicate. + (symbolic_off64_or_reg_operand): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/attr-model-5.c: New test. + * gcc.target/loongarch/func-call-extreme-5.c: New test. + * gcc.target/loongarch/func-call-extreme-6.c: New test. + * gcc.target/loongarch/tls-extreme-macro.c: New test. +--- + gcc/config/loongarch/loongarch-protos.h | 1 + + gcc/config/loongarch/loongarch.cc | 110 +++++++++++------- + gcc/config/loongarch/loongarch.md | 48 +++++++- + gcc/config/loongarch/predicates.md | 12 ++ + .../gcc.target/loongarch/attr-model-5.c | 8 ++ + .../loongarch/func-call-extreme-5.c | 7 ++ + .../loongarch/func-call-extreme-6.c | 7 ++ + .../gcc.target/loongarch/tls-extreme-macro.c | 35 ++++++ + 8 files changed, 184 insertions(+), 44 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-5.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 5060efbb6..87b94e8b0 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -222,4 +222,5 @@ extern rtx loongarch_build_signbit_mask (machine_mode, bool, bool); + extern void loongarch_emit_swrsqrtsf (rtx, rtx, machine_mode, bool); + extern void loongarch_emit_swdivsf (rtx, rtx, rtx, machine_mode); + extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type); ++extern bool loongarch_symbol_extreme_p (enum loongarch_symbol_type); + #endif /* ! GCC_LOONGARCH_PROTOS_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index ced7e58c2..9cfe5bfb2 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1932,8 +1932,13 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type) + relocations. */ + switch (*symbol_type) + { +- case SYMBOL_PCREL: + case SYMBOL_PCREL64: ++ /* When the code model is extreme, the non-zero offset situation ++ has not been handled well, so it is disabled here now. */ ++ if (!loongarch_explicit_relocs_p (SYMBOL_PCREL64)) ++ return false; ++ /* fall through */ ++ case SYMBOL_PCREL: + /* GAS rejects offsets outside the range -2^31, 2^31-1. */ + return sext_hwi (INTVAL (offset), 32) == INTVAL (offset); + +@@ -2735,9 +2740,15 @@ static GTY (()) rtx loongarch_tls_symbol; + /* Load an entry for a TLS access. */ + + static rtx +-loongarch_load_tls (rtx dest, rtx sym) ++loongarch_load_tls (rtx dest, rtx sym, enum loongarch_symbol_type type) + { +- return gen_load_tls (Pmode, dest, sym); ++ /* TLS LE gets a 32 or 64 bit offset here, so one register can do it. */ ++ if (type == SYMBOL_TLS_LE) ++ return gen_load_tls (Pmode, dest, sym); ++ ++ return loongarch_symbol_extreme_p (type) ++ ? gen_movdi_symbolic_off64 (dest, sym, gen_reg_rtx (DImode)) ++ : gen_load_tls (Pmode, dest, sym); + } + + /* Return an instruction sequence that calls __tls_get_addr. SYM is +@@ -2769,8 +2780,6 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + if (TARGET_CMODEL_EXTREME) + { +- gcc_assert (TARGET_EXPLICIT_RELOCS); +- + rtx tmp1 = gen_reg_rtx (Pmode); + emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc)); + emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc)); +@@ -2781,7 +2790,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + emit_insn (gen_tls_low (Pmode, a0, high, loc)); + } + else +- emit_insn (loongarch_load_tls (a0, loc)); ++ emit_insn (loongarch_load_tls (a0, loc, type)); + + if (flag_plt) + { +@@ -2848,22 +2857,28 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + case CMODEL_EXTREME: + { +- gcc_assert (TARGET_EXPLICIT_RELOCS); +- +- rtx tmp1 = gen_reg_rtx (Pmode); +- rtx high = gen_reg_rtx (Pmode); +- +- loongarch_emit_move (high, +- gen_rtx_HIGH (Pmode, loongarch_tls_symbol)); +- loongarch_emit_move (tmp1, gen_rtx_LO_SUM (Pmode, +- gen_rtx_REG (Pmode, 0), +- loongarch_tls_symbol)); +- emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol)); +- emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol)); +- loongarch_emit_move (dest, +- gen_rtx_MEM (Pmode, +- gen_rtx_PLUS (Pmode, +- high, tmp1))); ++ if (loongarch_explicit_relocs_p (SYMBOL_GOT_DISP)) ++ { ++ rtx tmp1 = gen_reg_rtx (Pmode); ++ rtx high = gen_reg_rtx (Pmode); ++ ++ loongarch_emit_move (high, ++ gen_rtx_HIGH (Pmode, ++ loongarch_tls_symbol)); ++ loongarch_emit_move (tmp1, ++ gen_rtx_LO_SUM (Pmode, ++ gen_rtx_REG (Pmode, 0), ++ loongarch_tls_symbol)); ++ emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol)); ++ emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol)); ++ loongarch_emit_move (dest, ++ gen_rtx_MEM (Pmode, ++ gen_rtx_PLUS (Pmode, ++ high, tmp1))); ++ } ++ else ++ emit_insn (gen_movdi_symbolic_off64 (dest, loongarch_tls_symbol, ++ gen_reg_rtx (DImode))); + } + break; + +@@ -2928,8 +2943,6 @@ loongarch_legitimize_tls_address (rtx loc) + + if (TARGET_CMODEL_EXTREME) + { +- gcc_assert (TARGET_EXPLICIT_RELOCS); +- + rtx tmp3 = gen_reg_rtx (Pmode); + emit_insn (gen_tls_low (Pmode, tmp3, + gen_rtx_REG (Pmode, 0), tmp2)); +@@ -2944,7 +2957,7 @@ loongarch_legitimize_tls_address (rtx loc) + emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2)); + } + else +- emit_insn (loongarch_load_tls (tmp1, tmp2)); ++ emit_insn (loongarch_load_tls (tmp1, tmp2, SYMBOL_TLS_IE)); + emit_insn (gen_add3_insn (dest, tmp1, tp)); + } + break; +@@ -3001,14 +3014,12 @@ loongarch_legitimize_tls_address (rtx loc) + + if (TARGET_CMODEL_EXTREME) + { +- gcc_assert (TARGET_EXPLICIT_RELOCS); +- + emit_insn (gen_lui_h_lo20 (tmp1, tmp1, tmp2)); + emit_insn (gen_lui_h_hi12 (tmp1, tmp1, tmp2)); + } + } + else +- emit_insn (loongarch_load_tls (tmp1, tmp2)); ++ emit_insn (loongarch_load_tls (tmp1, tmp2, SYMBOL_TLS_LE)); + emit_insn (gen_add3_insn (dest, tmp1, tp)); + } + break; +@@ -3081,7 +3092,7 @@ loongarch_force_address (rtx x, machine_mode mode) + return x; + } + +-static bool ++bool + loongarch_symbol_extreme_p (enum loongarch_symbol_type type) + {
View file
_service:tar_scm:0126-Backport-SME-aarch64-Define-__ARM_FEATURE_RCPC.patch
Added
@@ -0,0 +1,132 @@ +From 53a858c0c371cbea27ed4170a94fb3918b9fcdcf Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 4 Oct 2022 16:39:18 +0100 +Subject: PATCH 027/157 BackportSME aarch64: Define __ARM_FEATURE_RCPC + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c1b0a767f04a8ccbaff2a7b71d5c817cdb469630 + +https://github.com/ARM-software/acle/pull/199 adds a new feature +macro for RCPC, for use in things like inline assembly. This patch +adds the associated support to GCC. + +Also, RCPC is required for Armv8.3-A and later, but the armv8.3-a +entry didn't include it. This was probably harmless in practice +since GCC simply ignored the extension until now. (The GAS +definition is OK.) + +gcc/ + * config/aarch64/aarch64.h (AARCH64_ISA_RCPC): New macro. + * config/aarch64/aarch64-arches.def (armv8.3-a): Include RCPC. + * config/aarch64/aarch64-cores.def (thunderx3t110, zeus, neoverse-v1) + (neoverse-512tvb, saphira): Remove RCPC from these Armv8.3-A+ cores. + * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Define + __ARM_FEATURE_RCPC when appropriate. + +gcc/testsuite/ + * gcc.target/aarch64/pragma_cpp_predefs_1.c: Add RCPC tests. +--- + gcc/config/aarch64/aarch64-arches.def | 2 +- + gcc/config/aarch64/aarch64-c.cc | 1 + + gcc/config/aarch64/aarch64-cores.def | 10 +++++----- + gcc/config/aarch64/aarch64.h | 1 + + .../gcc.target/aarch64/pragma_cpp_predefs_1.c | 20 +++++++++++++++++++ + 5 files changed, 28 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def +index 9f8246618..5a9eff336 100644 +--- a/gcc/config/aarch64/aarch64-arches.def ++++ b/gcc/config/aarch64/aarch64-arches.def +@@ -33,7 +33,7 @@ + AARCH64_ARCH("armv8-a", generic, V8A, 8, (SIMD)) + AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, (V8A, LSE, CRC, RDMA)) + AARCH64_ARCH("armv8.2-a", generic, V8_2A, 8, (V8_1A)) +-AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, (V8_2A, PAUTH)) ++AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, (V8_2A, PAUTH, RCPC)) + AARCH64_ARCH("armv8.4-a", generic, V8_4A, 8, (V8_3A, F16FML, DOTPROD, FLAGM)) + AARCH64_ARCH("armv8.5-a", generic, V8_5A, 8, (V8_4A, SB, SSBS, PREDRES)) + AARCH64_ARCH("armv8.6-a", generic, V8_6A, 8, (V8_5A, I8MM, BF16)) +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index 2dfe2b8f8..4085ad840 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -202,6 +202,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) + "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile); + aarch64_def_or_undef (TARGET_LS64, + "__ARM_FEATURE_LS64", pfile); ++ aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile); + + /* Not for ACLE, but required to keep "float.h" correct if we switch + target between implementations that do or do not support ARMv8.2-A +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 60299160b..b50628d6b 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -133,17 +133,17 @@ AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, (CRYPTO, F16), tsv110, 0x48, 0 + /* ARMv8.3-A Architecture Processors. */ + + /* Marvell cores (TX3). */ +-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, (CRYPTO, RCPC, SM4, SHA3, F16FML), thunderx3t110, 0x43, 0x0b8, 0x0a) ++AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, (CRYPTO, SM4, SHA3, F16FML), thunderx3t110, 0x43, 0x0b8, 0x0a) + + /* ARMv8.4-A Architecture Processors. */ + + /* Arm ('A') cores. */ +-AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1) +-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1) +-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) ++AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1) ++AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1) ++AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) + + /* Qualcomm ('Q') cores. */ +-AARCH64_CORE("saphira", saphira, saphira, V8_4A, (CRYPTO, RCPC), saphira, 0x51, 0xC01, -1) ++AARCH64_CORE("saphira", saphira, saphira, V8_4A, (CRYPTO), saphira, 0x51, 0xC01, -1) + + /* ARMv8-A big.LITTLE implementations. */ + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 2a9d2d031..19b82b4f3 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -201,6 +201,7 @@ enum class aarch64_feature : unsigned char { + #define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4) + #define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3) + #define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML) ++#define AARCH64_ISA_RCPC (aarch64_isa_flags & AARCH64_FL_RCPC) + #define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_V8_4A) + #define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG) + #define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5A) +diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c +index bfb044f5d..307fa3d67 100644 +--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c +@@ -248,6 +248,26 @@ + #error "__ARM_FEATURE_CRC32 is not defined but should be!" + #endif + ++#pragma GCC target ("arch=armv8.2-a") ++#ifdef __ARM_FEATURE_RCPC ++#error "__ARM_FEATURE_RCPC is defined but should not be!" ++#endif ++ ++#pragma GCC target ("arch=armv8.2-a+rcpc") ++#ifndef __ARM_FEATURE_RCPC ++#error "__ARM_FEATURE_RCPC is not defined but should be!" ++#endif ++ ++#pragma GCC target ("+norcpc") ++#ifdef __ARM_FEATURE_RCPC ++#error "__ARM_FEATURE_RCPC is defined but should not be!" ++#endif ++ ++#pragma GCC target ("arch=armv8.3-a") ++#ifndef __ARM_FEATURE_RCPC ++#error "__ARM_FEATURE_RCPC is not defined but should be!" ++#endif ++ + int + foo (int a) + { +-- +2.33.0 +
View file
_service:tar_scm:0126-LoongArch-Enable-explicit-reloc-for-extreme-TLS-GD-L.patch
Added
@@ -0,0 +1,126 @@ +From 1ccf16353b2be4308c79f3b011cb800bfa6f94f4 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 26 Jan 2024 10:46:51 +0800 +Subject: PATCH 126/188 LoongArch: Enable explicit reloc for extreme TLS + GD/LD with -mexplicit-relocs=auto. + +Binutils does not support relaxation using four instructions to obtain +symbol addresses + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_explicit_relocs_p): + When the code model of the symbol is extreme and -mexplicit-relocs=auto, + the macro instruction loading symbol address is not applicable. + (loongarch_call_tls_get_addr): Adjust code. + (loongarch_legitimize_tls_address): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c: New test. + * gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 19 +++++++++---------- + .../explicit-relocs-extreme-auto-tls-ld-gd.c | 5 +++++ + .../explicit-relocs-medium-auto-tls-ld-gd.c | 5 +++++ + 3 files changed, 19 insertions(+), 10 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 9cfe5bfb2..84b949021 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1968,6 +1968,10 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type) + if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO) + return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS; + ++ /* The linker don't know how to relax accesses in extreme code model. */ ++ if (loongarch_symbol_extreme_p (type)) ++ return true; ++ + switch (type) + { + case SYMBOL_TLS_IE: +@@ -1979,11 +1983,6 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type) + does not relax 64-bit pc-relative accesses as at now. */ + return true; + case SYMBOL_GOT_DISP: +- /* The linker don't know how to relax GOT accesses in extreme +- code model. */ +- if (TARGET_CMODEL_EXTREME) +- return true; +- + /* If we are performing LTO for a final link, and we have the + linker plugin so we know the resolution of the symbols, then + all GOT references are binding to external symbols or +@@ -2772,7 +2771,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + start_sequence (); + +- if (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS) ++ if (loongarch_explicit_relocs_p (type)) + { + /* Split tls symbol to high and low. */ + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc)); +@@ -2805,7 +2804,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + case CMODEL_MEDIUM: + { + rtx reg = gen_reg_rtx (Pmode); +- if (TARGET_EXPLICIT_RELOCS) ++ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { + emit_insn (gen_pcalau12i (Pmode, reg, loongarch_tls_symbol)); + rtx call = gen_call_value_internal_1 (Pmode, v0, reg, +@@ -2841,7 +2840,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + case CMODEL_NORMAL: + case CMODEL_MEDIUM: + { +- if (TARGET_EXPLICIT_RELOCS) ++ if (loongarch_explicit_relocs_p (SYMBOL_GOT_DISP)) + { + rtx high = gen_reg_rtx (Pmode); + loongarch_emit_move (high, +@@ -2935,7 +2934,7 @@ loongarch_legitimize_tls_address (rtx loc) + tmp1 = gen_reg_rtx (Pmode); + tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE); + dest = gen_reg_rtx (Pmode); +- if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) ++ if (loongarch_explicit_relocs_p (SYMBOL_TLS_IE)) + { + tmp3 = gen_reg_rtx (Pmode); + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); +@@ -2992,7 +2991,7 @@ loongarch_legitimize_tls_address (rtx loc) + tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE); + dest = gen_reg_rtx (Pmode); + +- if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) ++ if (loongarch_explicit_relocs_p (SYMBOL_TLS_LE)) + { + tmp3 = gen_reg_rtx (Pmode); + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c +new file mode 100644 +index 000000000..35bd4570a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=extreme -fno-plt" } */ ++/* { dg-final { scan-assembler-not "la.tls.\lg\d" { target tls_native } } } */ ++ ++#include "./explicit-relocs-auto-tls-ld-gd.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c +new file mode 100644 +index 000000000..47bffae8a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=medium -fplt" } */ ++/* { dg-final { scan-assembler-not "la.global" { target tls_native } } } */ ++ ++#include "./explicit-relocs-auto-tls-ld-gd.c" +-- +2.43.0 +
View file
_service:tar_scm:0127-Backport-SME-Add-Ampere-1-and-Ampere-1A-core-definit.patch
Added
@@ -0,0 +1,29 @@ +From f6b2917888292c694bae1debe8abb0d6c2c6f59e Mon Sep 17 00:00:00 2001 +From: xiezhiheng <xiezhiheng@huawei.com> +Date: Tue, 20 Feb 2024 11:03:47 +0800 +Subject: PATCH 028/157 BackportSME Add Ampere-1 and Ampere-1A core + definition in aarch64-cores.def + +From commit db2f5d661239737157cf131de7d4df1c17d8d88d and +590a06afbf0e96813b5879742f38f3665512c854 +--- + gcc/config/aarch64/aarch64-cores.def | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index b50628d6b..f069c81cf 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -69,7 +69,8 @@ AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, (CRC, CRYPTO), thu + AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, (CRC, CRYPTO), thunderx, 0x43, 0x0a3, -1) + + /* Ampere Computing ('\xC0') cores. */ +-AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, (), ampere1, 0xC0, 0xac3, -1) ++AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, (F16, RNG, AES, SHA3), ampere1, 0xC0, 0xac3, -1) ++AARCH64_CORE("ampere1a", ampere1a, cortexa57, V8_6A, (F16, RNG, AES, SHA3, MEMTAG), ampere1a, 0xC0, 0xac4, -1) + /* Do not swap around "emag" and "xgene1", + this order is required to handle variant correctly. */ + AARCH64_CORE("emag", emag, xgene1, V8A, (CRC, CRYPTO), emag, 0x50, 0x000, 3) +-- +2.33.0 +
View file
_service:tar_scm:0127-LoongArch-Added-support-for-loading-__get_tls_addr-s.patch
Added
@@ -0,0 +1,72 @@ +From d802fd5eb24bba0c274edeea5aff33e794927aaa Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 26 Jan 2024 11:14:00 +0800 +Subject: PATCH 127/188 LoongArch: Added support for loading __get_tls_addr + symbol address using call36. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_call_tls_get_addr): + Add support for call36. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 22 ++++++++++++++----- + ...icit-relocs-medium-call36-auto-tls-ld-gd.c | 5 +++++ + 2 files changed, 21 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 84b949021..0050813df 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2803,17 +2803,27 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + case CMODEL_MEDIUM: + { +- rtx reg = gen_reg_rtx (Pmode); + if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { +- emit_insn (gen_pcalau12i (Pmode, reg, loongarch_tls_symbol)); +- rtx call = gen_call_value_internal_1 (Pmode, v0, reg, +- loongarch_tls_symbol, +- const0_rtx); +- insn = emit_call_insn (call); ++ rtx call; ++ ++ if (HAVE_AS_SUPPORT_CALL36) ++ call = gen_call_value_internal (v0, loongarch_tls_symbol, ++ const0_rtx); ++ else ++ { ++ rtx reg = gen_reg_rtx (Pmode); ++ emit_insn (gen_pcalau12i (Pmode, reg, ++ loongarch_tls_symbol)); ++ call = gen_call_value_internal_1 (Pmode, v0, reg, ++ loongarch_tls_symbol, ++ const0_rtx); ++ } ++ insn = emit_call_insn (call); + } + else + { ++ rtx reg = gen_reg_rtx (Pmode); + emit_move_insn (reg, loongarch_tls_symbol); + insn = emit_call_insn (gen_call_value_internal (v0, + reg, +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c +new file mode 100644 +index 000000000..d1a482083 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=medium -fplt" } */ ++/* { dg-final { scan-assembler "pcaddu18i\t\\\$r1,%call36\\\(__tls_get_addr\\\)" { target { tls_native && loongarch_call36_support } } } } */ ++ ++#include "./explicit-relocs-auto-tls-ld-gd.c" +-- +2.43.0 +
View file
_service:tar_scm:0128-Backport-SME-aarch64-Fix-nosimd-handling-of-FPR-move.patch
Added
@@ -0,0 +1,968 @@ +From 81a4b464d01cf00f8b355115588e67bf2c021acd Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Wed, 7 Sep 2022 10:52:04 +0100 +Subject: PATCH 029/157 BackportSME aarch64: Fix +nosimd handling of FPR + moves + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d6106132907f6bd01109f2616d20a87edecc6fc6 + +8-bit and 16-bit FPR moves would ICE for +nosimd+fp, and some other +moves would handle FPR<-zero inefficiently. This is very much a +niche case at the moment, but something like it becomes more +important with SME streaming mode. + +The si, di and vector tests already passed, they're just included for +completeness. + +We're a bit inconsistent about whether alternatives involving FPRs +are marked with arch==fp or arch=* (i.e. default). E.g. FPR loads +and stores are sometimes * and sometimes fp. + +IMO * makes more sense. FPRs should not be used at all without +TARGET_FLOAT, so TARGET_FLOAT represents the base architecture +when FPRs are enabled. I think it's more useful if non-default +arches represent a genuine restriction. + +gcc/ + * config/aarch64/aarch64.md (*mov<SHORT:mode>_aarch64): Extend + w<-w, r<-w and w<-r alternatives to !simd, using 32-bit moves + in that case. Extend w<-r to w<-Z. + (*mov<HFBF:mode>_aarch64): Likewise, but with Y instead of Z. + (*movti_aarch64): Use an FMOV from XZR for w<-Z if MOVI is not + available. + (define_split): Do not apply the floating-point immediate-to-register + split to zeros, even if MOVI is not available. + +gcc/testsuite/ + * gcc.target/aarch64/movqi_1.c: New test. + * gcc.target/aarch64/movhi_1.c: Likewise. + * gcc.target/aarch64/movsi_1.c: Likewise. + * gcc.target/aarch64/movdi_2.c: Likewise. + * gcc.target/aarch64/movti_2.c: Likewise. + * gcc.target/aarch64/movhf_1.c: Likewise. + * gcc.target/aarch64/movsf_1.c: Likewise. + * gcc.target/aarch64/movdf_1.c: Likewise. + * gcc.target/aarch64/movtf_2.c: Likewise. + * gcc.target/aarch64/movv8qi_1.c: Likewise. + * gcc.target/aarch64/movv16qi_1.c: Likewise. +--- + gcc/config/aarch64/aarch64.md | 38 ++++---- + gcc/testsuite/gcc.target/aarch64/movdf_1.c | 53 ++++++++++++ + gcc/testsuite/gcc.target/aarch64/movdi_2.c | 61 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movhf_1.c | 53 ++++++++++++ + gcc/testsuite/gcc.target/aarch64/movhi_1.c | 61 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movqi_1.c | 61 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movsf_1.c | 53 ++++++++++++ + gcc/testsuite/gcc.target/aarch64/movsi_1.c | 61 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movtf_2.c | 81 +++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movti_2.c | 86 +++++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movv16qi_1.c | 82 ++++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movv8qi_1.c | 55 ++++++++++++ + 12 files changed, 729 insertions(+), 16 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/movdf_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movdi_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movhf_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movhi_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movqi_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movsf_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movsi_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv8qi_1.c + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 7ee26284d..7267a74d6 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -1201,7 +1201,7 @@ + + (define_insn "*mov<mode>_aarch64" + (set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, w,r ,r,w, m,m,r,w,w") +- (match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D<hq>,Usv,m,m,rZ,w,w,r,w")) ++ (match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D<hq>,Usv,m,m,rZ,w,w,rZ,w")) + "(register_operand (operands0, <MODE>mode) + || aarch64_reg_or_zero (operands1, <MODE>mode))" + { +@@ -1225,11 +1225,11 @@ + case 7: + return "str\t%<size>1, %0"; + case 8: +- return "umov\t%w0, %1.<v>0"; ++ return TARGET_SIMD ? "umov\t%w0, %1.<v>0" : "fmov\t%w0, %s1"; + case 9: +- return "dup\t%0.<Vallxd>, %w1"; ++ return TARGET_SIMD ? "dup\t%0.<Vallxd>, %w1" : "fmov\t%s0, %w1"; + case 10: +- return "dup\t%<Vetype>0, %1.<v>0"; ++ return TARGET_SIMD ? "dup\t%<Vetype>0, %1.<v>0" : "fmov\t%s0, %s1"; + default: + gcc_unreachable (); + } +@@ -1237,7 +1237,7 @@ + ;; The "mov_imm" type for CNT is just a placeholder. + (set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4, + store_4,neon_to_gp<q>,neon_from_gp<q>,neon_dup") +- (set_attr "arch" "*,*,simd,sve,*,*,*,*,simd,simd,simd") ++ (set_attr "arch" "*,*,simd,sve,*,*,*,*,*,*,*") + ) + + (define_expand "mov<mode>" +@@ -1399,14 +1399,15 @@ + + (define_insn "*movti_aarch64" + (set (match_operand:TI 0 +- "nonimmediate_operand" "= r,w,w, r,w,r,m,m,w,m") ++ "nonimmediate_operand" "= r,w,w,w, r,w,r,m,m,w,m") + (match_operand:TI 1 +- "aarch64_movti_operand" " rUti,Z,r, w,w,m,r,Z,m,w")) ++ "aarch64_movti_operand" " rUti,Z,Z,r, w,w,m,r,Z,m,w")) + "(register_operand (operands0, TImode) + || aarch64_reg_or_zero (operands1, TImode))" + "@ + # + movi\\t%0.2d, #0 ++ fmov\t%d0, xzr + # + # + mov\\t%0.16b, %1.16b +@@ -1415,11 +1416,11 @@ + stp\\txzr, xzr, %0 + ldr\\t%q0, %1 + str\\t%q1, %0" +- (set_attr "type" "multiple,neon_move,f_mcr,f_mrc,neon_logic_q, \ ++ (set_attr "type" "multiple,neon_move,f_mcr,f_mcr,f_mrc,neon_logic_q, \ + load_16,store_16,store_16,\ + load_16,store_16") +- (set_attr "length" "8,4,8,8,4,4,4,4,4,4") +- (set_attr "arch" "*,simd,*,*,simd,*,*,*,fp,fp") ++ (set_attr "length" "8,4,4,8,8,4,4,4,4,4,4") ++ (set_attr "arch" "*,simd,*,*,*,simd,*,*,*,fp,fp") + ) + + ;; Split a TImode register-register or register-immediate move into +@@ -1458,16 +1459,19 @@ + ) + + (define_insn "*mov<mode>_aarch64" +- (set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w , w,?r,w,w ,w ,w,m,r,m ,r") +- (match_operand:HFBF 1 "general_operand" "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r")) ++ (set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w ,w ,w ,?r,?r,w,w,w ,w ,w,m,r,m ,r") ++ (match_operand:HFBF 1 "general_operand" "Y ,?rY,?r,?rY, w, w,w,w,Ufc,Uvi,m,w,m,rY,r")) + "TARGET_FLOAT && (register_operand (operands0, <MODE>mode) + || aarch64_reg_or_fp_zero (operands1, <MODE>mode))" + "@ + movi\\t%0.4h, #0 + fmov\\t%h0, %w1 + dup\\t%w0.4h, %w1 ++ fmov\\t%s0, %w1 + umov\\t%w0, %1.h0 ++ fmov\\t%w0, %s1 + mov\\t%0.h0, %1.h0 ++ fmov\\t%s0, %s1 + fmov\\t%h0, %1 + * return aarch64_output_scalar_simd_mov_immediate (operands1, HImode); + ldr\\t%h0, %1 +@@ -1475,9 +1479,10 @@ + ldrh\\t%w0, %1 + strh\\t%w1, %0 + mov\\t%w0, %w1" +- (set_attr "type" "neon_move,f_mcr,neon_move,neon_to_gp, neon_move,fconsts, \ +- neon_move,f_loads,f_stores,load_4,store_4,mov_reg") +- (set_attr "arch" "simd,fp16,simd,simd,simd,fp16,simd,*,*,*,*,*") ++ (set_attr "type" "neon_move,f_mcr,neon_move,f_mcr,neon_to_gp,f_mrc, ++ neon_move,fmov,fconsts,neon_move,f_loads,f_stores, ++ load_4,store_4,mov_reg") ++ (set_attr "arch" "simd,fp16,simd,*,simd,*,simd,*,fp16,simd,*,*,*,*,*") + ) + + (define_insn "*movsf_aarch64" +@@ -1530,10 +1535,11 @@ + + (define_split + (set (match_operand:GPF_HF 0 "nonimmediate_operand") +- (match_operand:GPF_HF 1 "general_operand")) ++ (match_operand:GPF_HF 1 "const_double_operand")) + "can_create_pseudo_p () + && !aarch64_can_const_movi_rtx_p (operands1, <MODE>mode) + && !aarch64_float_const_representable_p (operands1) ++ && !aarch64_float_const_zero_rtx_p (operands1) + && aarch64_float_const_rtx_p (operands1)" + (const_int 0) + { +diff --git a/gcc/testsuite/gcc.target/aarch64/movdf_1.c b/gcc/testsuite/gcc.target/aarch64/movdf_1.c +new file mode 100644 +index 000000000..a51ded1d6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movdf_1.c +@@ -0,0 +1,53 @@ ++/* { dg-do assemble } */
View file
_service:tar_scm:0128-LoongArch-Don-t-split-the-instructions-containing-re.patch
Added
@@ -0,0 +1,514 @@ +From 45aace43891ccaef756f2f1356edbb0da676629b Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 29 Jan 2024 15:20:07 +0800 +Subject: PATCH 128/188 LoongArch: Don't split the instructions containing + relocs for extreme code model. + +The ABI mandates the pcalau12i/addi.d/lu32i.d/lu52i.d instructions for +addressing a symbol to be adjacent. So model them as "one large +instruction", i.e. define_insn, with two output registers. The real +address is the sum of these two registers. + +The advantage of this approach is the RTL passes can still use ldx/stx +instructions to skip an addi.d instruction. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (unspec): Add + UNSPEC_LA_PCREL_64_PART1 and UNSPEC_LA_PCREL_64_PART2. + (la_pcrel64_two_parts): New define_insn. + * config/loongarch/loongarch.cc (loongarch_tls_symbol): Fix a + typo in the comment. + (loongarch_call_tls_get_addr): If -mcmodel=extreme + -mexplicit-relocs={always,auto}, use la_pcrel64_two_parts for + addressing the TLS symbol and __tls_get_addr. Emit an REG_EQUAL + note to allow CSE addressing __tls_get_addr. + (loongarch_legitimize_tls_address): If -mcmodel=extreme + -mexplicit-relocs={always,auto}, address TLS IE symbols with + la_pcrel64_two_parts. + (loongarch_split_symbol): If -mcmodel=extreme + -mexplicit-relocs={always,auto}, address symbols with + la_pcrel64_two_parts. + (loongarch_output_mi_thunk): Clean up unreachable code. If + -mcmodel=extreme -mexplicit-relocs={always,auto}, address the MI + thunks with la_pcrel64_two_parts. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/func-call-extreme-1.c (dg-options): + Use -O2 instead of -O0 to ensure the pcalau12i/addi/lu32i/lu52i + instruction sequences are not reordered by the compiler. + (NOIPA): Disallow interprocedural optimizations. + * gcc.target/loongarch/func-call-extreme-2.c: Remove the content + duplicated from func-call-extreme-1.c, include it instead. + (dg-options): Likewise. + * gcc.target/loongarch/func-call-extreme-3.c (dg-options): + Likewise. + * gcc.target/loongarch/func-call-extreme-4.c (dg-options): + Likewise. + * gcc.target/loongarch/cmodel-extreme-1.c: New test. + * gcc.target/loongarch/cmodel-extreme-2.c: New test. + * g++.target/loongarch/cmodel-extreme-mi-thunk-1.C: New test. + * g++.target/loongarch/cmodel-extreme-mi-thunk-2.C: New test. + * g++.target/loongarch/cmodel-extreme-mi-thunk-3.C: New test. +--- + gcc/config/loongarch/loongarch.cc | 131 ++++++++++-------- + gcc/config/loongarch/loongarch.md | 20 +++ + .../loongarch/cmodel-extreme-mi-thunk-1.C | 11 ++ + .../loongarch/cmodel-extreme-mi-thunk-2.C | 6 + + .../loongarch/cmodel-extreme-mi-thunk-3.C | 6 + + .../gcc.target/loongarch/cmodel-extreme-1.c | 18 +++ + .../gcc.target/loongarch/cmodel-extreme-2.c | 7 + + .../loongarch/func-call-extreme-1.c | 14 +- + .../loongarch/func-call-extreme-2.c | 29 +--- + .../loongarch/func-call-extreme-3.c | 2 +- + .../loongarch/func-call-extreme-4.c | 2 +- + 11 files changed, 154 insertions(+), 92 deletions(-) + create mode 100644 gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-1.C + create mode 100644 gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-2.C + create mode 100644 gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-3.C + create mode 100644 gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 0050813df..b8f0291ab 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2733,7 +2733,7 @@ loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset) + return plus_constant (Pmode, reg, offset); + } + +-/* The __tls_get_attr symbol. */ ++/* The __tls_get_addr symbol. */ + static GTY (()) rtx loongarch_tls_symbol; + + /* Load an entry for a TLS access. */ +@@ -2773,20 +2773,22 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + if (loongarch_explicit_relocs_p (type)) + { +- /* Split tls symbol to high and low. */ +- rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc)); +- high = loongarch_force_temporary (tmp, high); +- + if (TARGET_CMODEL_EXTREME) + { +- rtx tmp1 = gen_reg_rtx (Pmode); +- emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc)); +- emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc)); +- emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loc)); +- emit_move_insn (a0, gen_rtx_PLUS (Pmode, high, tmp1)); ++ rtx part1 = gen_reg_rtx (Pmode); ++ rtx part2 = gen_reg_rtx (Pmode); ++ ++ emit_insn (gen_la_pcrel64_two_parts (part1, part2, loc)); ++ emit_move_insn (a0, gen_rtx_PLUS (Pmode, part1, part2)); + } + else +- emit_insn (gen_tls_low (Pmode, a0, high, loc)); ++ { ++ /* Split tls symbol to high and low. */ ++ rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc)); ++ ++ high = loongarch_force_temporary (tmp, high); ++ emit_insn (gen_tls_low (Pmode, a0, high, loc)); ++ } + } + else + emit_insn (loongarch_load_tls (a0, loc, type)); +@@ -2868,22 +2870,28 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + { + if (loongarch_explicit_relocs_p (SYMBOL_GOT_DISP)) + { +- rtx tmp1 = gen_reg_rtx (Pmode); +- rtx high = gen_reg_rtx (Pmode); ++ gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE); + +- loongarch_emit_move (high, +- gen_rtx_HIGH (Pmode, +- loongarch_tls_symbol)); +- loongarch_emit_move (tmp1, +- gen_rtx_LO_SUM (Pmode, +- gen_rtx_REG (Pmode, 0), ++ rtx part1 = gen_reg_rtx (Pmode); ++ rtx part2 = gen_reg_rtx (Pmode); ++ ++ emit_insn (gen_la_pcrel64_two_parts (part1, part2, + loongarch_tls_symbol)); +- emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol)); +- emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol)); +- loongarch_emit_move (dest, +- gen_rtx_MEM (Pmode, +- gen_rtx_PLUS (Pmode, +- high, tmp1))); ++ loongarch_emit_move ( ++ dest, ++ gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, ++ part1, ++ part2))); ++ ++ /* Put an REG_EQUAL note here to allow CSE (storing ++ part1 + part2, i.e. the address of tls_get_addr into ++ a saved register and use it for multiple TLS ++ accesses). */ ++ rtx sum = gen_rtx_UNSPEC ( ++ Pmode, gen_rtvec (1, loongarch_tls_symbol), ++ UNSPEC_ADDRESS_FIRST ++ + loongarch_classify_symbol (loongarch_tls_symbol)); ++ set_unique_reg_note (get_last_insn (), REG_EQUAL, sum); + } + else + emit_insn (gen_movdi_symbolic_off64 (dest, loongarch_tls_symbol, +@@ -2946,24 +2954,30 @@ loongarch_legitimize_tls_address (rtx loc) + dest = gen_reg_rtx (Pmode); + if (loongarch_explicit_relocs_p (SYMBOL_TLS_IE)) + { +- tmp3 = gen_reg_rtx (Pmode); +- rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); +- high = loongarch_force_temporary (tmp3, high); +- + if (TARGET_CMODEL_EXTREME) + { +- rtx tmp3 = gen_reg_rtx (Pmode); +- emit_insn (gen_tls_low (Pmode, tmp3, +- gen_rtx_REG (Pmode, 0), tmp2)); +- emit_insn (gen_lui_h_lo20 (tmp3, tmp3, tmp2)); +- emit_insn (gen_lui_h_hi12 (tmp3, tmp3, tmp2)); ++ gcc_assert (la_opt_explicit_relocs ++ != EXPLICIT_RELOCS_NONE); ++ ++ rtx part1 = gen_reg_rtx (Pmode); ++ rtx part2 = gen_reg_rtx (Pmode); ++ ++ emit_insn (gen_la_pcrel64_two_parts (part1, part2, ++ tmp2)); + emit_move_insn (tmp1, + gen_rtx_MEM (Pmode, + gen_rtx_PLUS (Pmode, +- high, tmp3))); ++ part1, ++ part2))); + } + else +- emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2)); ++ { ++ tmp3 = gen_reg_rtx (Pmode); ++ rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); ++ ++ high = loongarch_force_temporary (tmp3, high); ++ emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
View file
_service:tar_scm:0129-Backport-SME-aarch64-Commonise-some-folding-code.patch
Added
@@ -0,0 +1,83 @@ +From 805a7aec3ddab49b92bf2d5c1a3e288860cc14bf Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 20 Oct 2022 10:37:35 +0100 +Subject: PATCH 030/157 BackportSME aarch64: Commonise some folding code + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=df99e9e42094dee0833ac38f53e7fae09b4d133c + +Add an aarch64_sve::gimple_folder helper for folding calls +to integer constants. SME will make more use of this. + +gcc/ + * config/aarch64/aarch64-sve-builtins.h + (gimple_folder::fold_to_cstu): New member function. + * config/aarch64/aarch64-sve-builtins.cc + (gimple_folder::fold_to_cstu): Define. + * config/aarch64/aarch64-sve-builtins-base.cc + (svcnt_bhwd_impl::fold): Use it. +--- + gcc/config/aarch64/aarch64-sve-builtins-base.cc | 9 ++------- + gcc/config/aarch64/aarch64-sve-builtins.cc | 7 +++++++ + gcc/config/aarch64/aarch64-sve-builtins.h | 1 + + 3 files changed, 10 insertions(+), 7 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc +index c24c05487..56c9d75e7 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc +@@ -516,9 +516,7 @@ public: + gimple * + fold (gimple_folder &f) const OVERRIDE + { +- tree count = build_int_cstu (TREE_TYPE (f.lhs), +- GET_MODE_NUNITS (m_ref_mode)); +- return gimple_build_assign (f.lhs, count); ++ return f.fold_to_cstu (GET_MODE_NUNITS (m_ref_mode)); + } + + rtx +@@ -553,10 +551,7 @@ public: + unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode); + HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, elements_per_vq); + if (value >= 0) +- { +- tree count = build_int_cstu (TREE_TYPE (f.lhs), value); +- return gimple_build_assign (f.lhs, count); +- } ++ return f.fold_to_cstu (value); + + return NULL; + } +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index a70e3a6b4..e168c8334 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -2615,6 +2615,13 @@ gimple_folder::redirect_call (const function_instance &instance) + return call; + } + ++/* Fold the call to constant VAL. */ ++gimple * ++gimple_folder::fold_to_cstu (poly_uint64 val) ++{ ++ return gimple_build_assign (lhs, build_int_cstu (TREE_TYPE (lhs), val)); ++} ++ + /* Fold the call to a PTRUE, taking the element size from type suffix 0. */ + gimple * + gimple_folder::fold_to_ptrue () +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index 63d1db776..0d130b871 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -500,6 +500,7 @@ public: + tree load_store_cookie (tree); + + gimple *redirect_call (const function_instance &); ++ gimple *fold_to_cstu (poly_uint64); + gimple *fold_to_pfalse (); + gimple *fold_to_ptrue (); + gimple *fold_to_vl_pred (unsigned int); +-- +2.33.0 +
View file
_service:tar_scm:0129-LoongArch-Adjust-cost-of-vector_stmt-that-match-mult.patch
Added
@@ -0,0 +1,173 @@ +From 825847768a29ec9d50e01015167002998150cb27 Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Fri, 26 Jan 2024 16:41:11 +0800 +Subject: PATCH 129/188 LoongArch: Adjust cost of vector_stmt that match + multiply-add pattern. + +We found that when only 128-bit vectorization was enabled, 549.fotonik3d_r +failed to vectorize effectively. For this reason, we adjust the cost of +128-bit vector_stmt that match the multiply-add pattern to facilitate 128-bit +vectorization. +The experimental results show that after the modification, 549.fotonik3d_r +performance can be improved by 9.77% under the 128-bit vectorization option. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_multiply_add_p): New. + (loongarch_vector_costs::add_stmt_cost): Adjust. + +gcc/testsuite/ChangeLog: + + * gfortran.dg/vect/vect-10.f90: New test. +--- + gcc/config/loongarch/loongarch.cc | 48 +++++++++++++++ + gcc/testsuite/gfortran.dg/vect/vect-10.f90 | 71 ++++++++++++++++++++++ + 2 files changed, 119 insertions(+) + create mode 100644 gcc/testsuite/gfortran.dg/vect/vect-10.f90 + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index b8f0291ab..526ea0bcb 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4153,6 +4153,37 @@ loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vi + return 1 << ceil_log2 (uf); + } + ++/* Check if assign stmt rhs op comes from a multiply-add operation. */ ++static bool ++loongarch_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info) ++{ ++ gassign *assign = dyn_cast<gassign *> (stmt_info->stmt); ++ if (!assign) ++ return false; ++ tree_code code = gimple_assign_rhs_code (assign); ++ if (code != PLUS_EXPR && code != MINUS_EXPR) ++ return false; ++ ++ auto is_mul_result = &(int i) ++ { ++ tree rhs = gimple_op (assign, i); ++ if (TREE_CODE (rhs) != SSA_NAME) ++ return false; ++ ++ stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs); ++ if (!def_stmt_info ++ || STMT_VINFO_DEF_TYPE (def_stmt_info) != vect_internal_def) ++ return false; ++ gassign *rhs_assign = dyn_cast<gassign *> (def_stmt_info->stmt); ++ if (!rhs_assign || gimple_assign_rhs_code (rhs_assign) != MULT_EXPR) ++ return false; ++ ++ return true; ++ }; ++ ++ return is_mul_result (1) || is_mul_result (2); ++} ++ + unsigned + loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, + stmt_vec_info stmt_info, slp_tree, +@@ -4165,6 +4196,23 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, + { + int stmt_cost = loongarch_builtin_vectorization_cost (kind, vectype, + misalign); ++ if (vectype && stmt_info) ++ { ++ gassign *assign = dyn_cast<gassign *> (STMT_VINFO_STMT (stmt_info)); ++ machine_mode mode = TYPE_MODE (vectype); ++ ++ /* We found through testing that this strategy (the stmt that ++ matches the multiply-add pattern) has positive returns only ++ when applied to the 128-bit vector stmt, so this restriction ++ is currently made. */ ++ if (kind == vector_stmt && GET_MODE_SIZE (mode) == 16 && assign) ++ { ++ if (!vect_is_reduction (stmt_info) ++ && loongarch_multiply_add_p (m_vinfo, stmt_info)) ++ stmt_cost = 0; ++ } ++ } ++ + retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost); + m_costswhere += retval; + +diff --git a/gcc/testsuite/gfortran.dg/vect/vect-10.f90 b/gcc/testsuite/gfortran.dg/vect/vect-10.f90 +new file mode 100644 +index 000000000..b85bc2702 +--- /dev/null ++++ b/gcc/testsuite/gfortran.dg/vect/vect-10.f90 +@@ -0,0 +1,71 @@ ++! { dg-do compile } ++! { dg-additional-options "-Ofast -mlsx -fvect-cost-model=dynamic" { target loongarch64*-*-* } } ++ ++MODULE material_mod ++ ++IMPLICIT NONE ++ ++integer, parameter :: dfp = selected_real_kind (13, 99) ++integer, parameter :: rfp = dfp ++ ++PUBLIC Mat_updateE, iepx, iepy, iepz ++ ++PRIVATE ++ ++integer, dimension (:, :, :), allocatable :: iepx, iepy, iepz ++real (kind = rfp), dimension (:), allocatable :: Dbdx, Dbdy, Dbdz ++integer :: imin, jmin, kmin ++integer, dimension (6) :: Exsize ++integer, dimension (6) :: Eysize ++integer, dimension (6) :: Ezsize ++integer, dimension (6) :: Hxsize ++integer, dimension (6) :: Hysize ++integer, dimension (6) :: Hzsize ++ ++CONTAINS ++ ++SUBROUTINE mat_updateE (nx, ny, nz, Hx, Hy, Hz, Ex, Ey, Ez) ++ ++integer, intent (in) :: nx, ny, nz ++ ++real (kind = rfp), intent (inout), & ++ dimension (Exsize (1) : Exsize (2), Exsize (3) : Exsize (4), Exsize (5) : Exsize (6)) :: Ex ++real (kind = rfp), intent (inout), & ++ dimension (Eysize (1) : Eysize (2), Eysize (3) : Eysize (4), Eysize (5) : Eysize (6)) :: Ey ++real (kind = rfp), intent (inout), & ++ dimension (Ezsize (1) : Ezsize (2), Ezsize (3) : Ezsize (4), Ezsize (5) : Ezsize (6)) :: Ez ++real (kind = rfp), intent (in), & ++ dimension (Hxsize (1) : Hxsize (2), Hxsize (3) : Hxsize (4), Hxsize (5) : Hxsize (6)) :: Hx ++real (kind = rfp), intent (in), & ++ dimension (Hysize (1) : Hysize (2), Hysize (3) : Hysize (4), Hysize (5) : Hysize (6)) :: Hy ++real (kind = rfp), intent (in), & ++ dimension (Hzsize (1) : Hzsize (2), Hzsize (3) : Hzsize (4), Hzsize (5) : Hzsize (6)) :: Hz ++ ++integer :: i, j, k, mp ++ ++do k = kmin, nz ++ do j = jmin, ny ++ do i = imin, nx ++ mp = iepx (i, j, k) ++ Ex (i, j, k) = Ex (i, j, k) + & ++ Dbdy (mp) * (Hz (i, j, k ) - Hz (i, j-1, k)) + & ++ Dbdz (mp) * (Hy (i, j, k-1) - Hy (i, j , k)) ++ ++ mp = iepy (i, j, k) ++ Ey (i, j, k) = Ey (i, j, k) + & ++ Dbdz (mp) * (Hx (i , j, k) - Hx (i, j, k-1)) + & ++ Dbdx (mp) * (Hz (i-1, j, k) - Hz (i, j, k )) ++ ++ mp = iepz (i, j, k) ++ Ez (i, j, k) = Ez (i, j, k) + & ++ Dbdx (mp) * (Hy (i, j , k) - Hy (i-1, j, k)) + & ++ Dbdy (mp) * (Hx (i, j-1, k) - Hx (i , j, k)) ++ end do ++ end do ++end do ++ ++END SUBROUTINE mat_updateE ++ ++END MODULE material_mod ++ ++! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target loongarch64*-*-* } } } +-- +2.43.0 +
View file
_service:tar_scm:0130-Backport-SME-aarch64-Add-a-Z-operand-modifier-for-SV.patch
Added
@@ -0,0 +1,49 @@ +From 8dc1eee26c61bea8aab62080bd961825142685f9 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 7 Nov 2023 15:22:57 +0000 +Subject: PATCH 031/157 BackportSME aarch64: Add a %Z operand modifier + for SVE registers + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f40eac535bd55192cf93daca16235efbcd91157a + +This patch adds a %Z operand modifier that prints registers as SVE z +registers. The SME patches need this, but so do Tamar's patches. +I'm separating this out to unblock those. + +We should probably document the wxbhsdqZ modifiers as +user-facing, but doing that for all of them is a separate patch. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_print_operand): Add a %Z + modifier for SVE registers. +--- + gcc/config/aarch64/aarch64.cc | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 3e83e48ec..fd1114b52 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -11901,6 +11901,10 @@ sizetochar (int size) + 'N': Take the duplicated element in a vector constant + and print the negative of it in decimal. + 'b/h/s/d/q': Print a scalar FP/SIMD register name. ++ 'Z': Same for SVE registers. ('z' was already taken.) ++ Note that it is not necessary to use %Z for operands ++ that have SVE modes. The convention is to use %Z ++ only for non-SVE (or potentially non-SVE) modes. + 'S/T/U/V': Print a FP/SIMD register name for a register list. + The register printed is the FP/SIMD register name + of X + 0/1/2/3 for S/T/U/V. +@@ -12073,6 +12077,8 @@ aarch64_print_operand (FILE *f, rtx x, int code) + case 's': + case 'd': + case 'q': ++ case 'Z': ++ code = TOLOWER (code); + if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) + { + output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); +-- +2.33.0 +
View file
_service:tar_scm:0130-LoongArch-Fix-incorrect-return-type-for-frecipe-frsq.patch
Added
@@ -0,0 +1,113 @@ +From 99a48268961f05e87f4f9d6f3f22903869f50af7 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 24 Jan 2024 17:19:32 +0800 +Subject: PATCH 130/188 LoongArch: Fix incorrect return type for + frecipe/frsqrte intrinsic functions + +gcc/ChangeLog: + + * config/loongarch/larchintrin.h + (__frecipe_s): Update function return type. + (__frecipe_d): Ditto. + (__frsqrte_s): Ditto. + (__frsqrte_d): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/larch-frecipe-intrinsic.c: New test. +--- + gcc/config/loongarch/larchintrin.h | 16 +++++----- + .../loongarch/larch-frecipe-intrinsic.c | 30 +++++++++++++++++++ + 2 files changed, 38 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/larch-frecipe-intrinsic.c + +diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h +index 22035e767..6582dfe49 100644 +--- a/gcc/config/loongarch/larchintrin.h ++++ b/gcc/config/loongarch/larchintrin.h +@@ -336,38 +336,38 @@ __iocsrwr_d (unsigned long int _1, unsigned int _2) + #ifdef __loongarch_frecipe + /* Assembly instruction format: fd, fj. */ + /* Data types in instruction templates: SF, SF. */ +-extern __inline void ++extern __inline float + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frecipe_s (float _1) + { +- __builtin_loongarch_frecipe_s ((float) _1); ++ return (float) __builtin_loongarch_frecipe_s ((float) _1); + } + + /* Assembly instruction format: fd, fj. */ + /* Data types in instruction templates: DF, DF. */ +-extern __inline void ++extern __inline double + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frecipe_d (double _1) + { +- __builtin_loongarch_frecipe_d ((double) _1); ++ return (double) __builtin_loongarch_frecipe_d ((double) _1); + } + + /* Assembly instruction format: fd, fj. */ + /* Data types in instruction templates: SF, SF. */ +-extern __inline void ++extern __inline float + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frsqrte_s (float _1) + { +- __builtin_loongarch_frsqrte_s ((float) _1); ++ return (float) __builtin_loongarch_frsqrte_s ((float) _1); + } + + /* Assembly instruction format: fd, fj. */ + /* Data types in instruction templates: DF, DF. */ +-extern __inline void ++extern __inline double + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frsqrte_d (double _1) + { +- __builtin_loongarch_frsqrte_d ((double) _1); ++ return (double) __builtin_loongarch_frsqrte_d ((double) _1); + } + #endif + +diff --git a/gcc/testsuite/gcc.target/loongarch/larch-frecipe-intrinsic.c b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-intrinsic.c +new file mode 100644 +index 000000000..6ce2bde0a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-intrinsic.c +@@ -0,0 +1,30 @@ ++/* Test intrinsics for frecipe.{s/d} and frsqrte.{s/d} instructions */ ++/* { dg-do compile } */ ++/* { dg-options "-mfrecipe -O2" } */ ++/* { dg-final { scan-assembler-times "test_frecipe_s:.*frecipe\\.s.*test_frecipe_s" 1 } } */ ++/* { dg-final { scan-assembler-times "test_frecipe_d:.*frecipe\\.d.*test_frecipe_d" 1 } } */ ++/* { dg-final { scan-assembler-times "test_frsqrte_s:.*frsqrte\\.s.*test_frsqrte_s" 1 } } */ ++/* { dg-final { scan-assembler-times "test_frsqrte_d:.*frsqrte\\.d.*test_frsqrte_d" 1 } } */ ++ ++#include <larchintrin.h> ++ ++float ++test_frecipe_s (float _1) ++{ ++ return __frecipe_s (_1); ++} ++double ++test_frecipe_d (double _1) ++{ ++ return __frecipe_d (_1); ++} ++float ++test_frsqrte_s (float _1) ++{ ++ return __frsqrte_s (_1); ++} ++double ++test_frsqrte_d (double _1) ++{ ++ return __frsqrte_d (_1); ++} +-- +2.43.0 +
View file
_service:tar_scm:0131-Backport-SME-mode-switching-Remove-unused-bbnum-fiel.patch
Added
@@ -0,0 +1,104 @@ +From 8a43bd7885ce479cadb0643fbb0fc22d2b0ffced Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sun, 5 Nov 2023 18:28:46 +0000 +Subject: PATCH 032/157 BackportSME mode-switching: Remove unused bbnum + field + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2d55ed2b8a754d7279cd002941f7cb481f0fd133 + +seginfo had an unused bbnum field, presumably dating from before +BB information was attached directly to insns. + +gcc/ + * mode-switching.cc: Remove unused forward references. + (seginfo): Remove bbnum. + (new_seginfo): Remove associated argument. + (optimize_mode_switching): Update calls accordingly. +--- + gcc/mode-switching.cc | 18 +++++------------- + 1 file changed, 5 insertions(+), 13 deletions(-) + +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 6e3f1dc65..4cf8f03a0 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -65,13 +65,11 @@ along with GCC; see the file COPYING3. If not see + MODE is the mode this insn must be executed in. + INSN_PTR is the insn to be executed (may be the note that marks the + beginning of a basic block). +- BBNUM is the flow graph basic block this insn occurs in. + NEXT is the next insn in the same basic block. */ + struct seginfo + { + int mode; + rtx_insn *insn_ptr; +- int bbnum; + struct seginfo *next; + HARD_REG_SET regs_live; + }; +@@ -84,11 +82,6 @@ struct bb_info + int mode_in; + }; + +-static struct seginfo * new_seginfo (int, rtx_insn *, int, HARD_REG_SET); +-static void add_seginfo (struct bb_info *, struct seginfo *); +-static void reg_dies (rtx, HARD_REG_SET *); +-static void reg_becomes_live (rtx, const_rtx, void *); +- + /* Clear ode I from entity J in bitmap B. */ + #define clear_mode_bit(b, j, i) \ + bitmap_clear_bit (b, (j * max_num_modes) + i) +@@ -148,13 +141,13 @@ commit_mode_sets (struct edge_list *edge_list, int e, struct bb_info *info) + } + + /* Allocate a new BBINFO structure, initialized with the MODE, INSN, +- and basic block BB parameters. ++ and REGS_LIVE parameters. + INSN may not be a NOTE_INSN_BASIC_BLOCK, unless it is an empty + basic block; that allows us later to insert instructions in a FIFO-like + manner. */ + + static struct seginfo * +-new_seginfo (int mode, rtx_insn *insn, int bb, HARD_REG_SET regs_live) ++new_seginfo (int mode, rtx_insn *insn, const HARD_REG_SET ®s_live) + { + struct seginfo *ptr; + +@@ -163,7 +156,6 @@ new_seginfo (int mode, rtx_insn *insn, int bb, HARD_REG_SET regs_live) + ptr = XNEW (struct seginfo); + ptr->mode = mode; + ptr->insn_ptr = insn; +- ptr->bbnum = bb; + ptr->next = NULL; + ptr->regs_live = regs_live; + return ptr; +@@ -604,7 +596,7 @@ optimize_mode_switching (void) + gcc_assert (NOTE_INSN_BASIC_BLOCK_P (ins_pos)); + if (ins_pos != BB_END (bb)) + ins_pos = NEXT_INSN (ins_pos); +- ptr = new_seginfo (no_mode, ins_pos, bb->index, live_now); ++ ptr = new_seginfo (no_mode, ins_pos, live_now); + add_seginfo (info + bb->index, ptr); + for (i = 0; i < no_mode; i++) + clear_mode_bit (transpbb->index, j, i); +@@ -622,7 +614,7 @@ optimize_mode_switching (void) + { + any_set_required = true; + last_mode = mode; +- ptr = new_seginfo (mode, insn, bb->index, live_now); ++ ptr = new_seginfo (mode, insn, live_now); + add_seginfo (info + bb->index, ptr); + for (i = 0; i < no_mode; i++) + clear_mode_bit (transpbb->index, j, i); +@@ -651,7 +643,7 @@ optimize_mode_switching (void) + mark the block as nontransparent. */ + if (!any_set_required) + { +- ptr = new_seginfo (no_mode, BB_END (bb), bb->index, live_now); ++ ptr = new_seginfo (no_mode, BB_END (bb), live_now); + add_seginfo (info + bb->index, ptr); + if (last_mode != no_mode) + for (i = 0; i < no_mode; i++) +-- +2.33.0 +
View file
_service:tar_scm:0131-LoongArch-Fix-an-ODR-violation.patch
Added
@@ -0,0 +1,60 @@ +From 89ebd7012ecf49c60bad8dd018e0aa573b58844b Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 2 Feb 2024 05:37:38 +0800 +Subject: PATCH 131/188 LoongArch: Fix an ODR violation + +When bootstrapping GCC 14 with --with-build-config=bootstrap-lto, an ODR +violation is detected: + + ../../gcc/config/loongarch/loongarch-opts.cc:57: warning: + 'abi_minimal_isa' violates the C++ One Definition Rule -Wodr + 57 | abi_minimal_isaN_ABI_BASE_TYPESN_ABI_EXT_TYPES; + ../../gcc/config/loongarch/loongarch-def.cc:186: note: + 'abi_minimal_isa' was previously declared here + 186 | abi_minimal_isa = array<array<loongarch_isa, N_ABI_EXT_TYPES>, + ../../gcc/config/loongarch/loongarch-def.cc:186: note: + code may be misoptimized unless '-fno-strict-aliasing' is used + +Fix it by adding a proper declaration of abi_minimal_isa into +loongarch-def.h and remove the ODR-violating local declaration in +loongarch-opts.cc. + +gcc/ChangeLog: + + * config/loongarch/loongarch-def.h (abi_minimal_isa): Declare. + * config/loongarch/loongarch-opts.cc (abi_minimal_isa): Remove + the ODR-violating locale declaration. +--- + gcc/config/loongarch/loongarch-def.h | 3 +++ + gcc/config/loongarch/loongarch-opts.cc | 2 -- + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index 28da3ae5f..fdcf43fc7 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -203,5 +203,8 @@ extern loongarch_def_array<loongarch_align, N_TUNE_TYPES> + loongarch_cpu_align; + extern loongarch_def_array<loongarch_rtx_cost_data, N_TUNE_TYPES> + loongarch_cpu_rtx_cost_data; ++extern loongarch_def_array< ++ loongarch_def_array<loongarch_isa, N_ABI_EXT_TYPES>, ++ N_ABI_BASE_TYPES> abi_minimal_isa; + + #endif /* LOONGARCH_DEF_H */ +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index a2b069d83..2ea3972d1 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -53,8 +53,6 @@ static const int tm_multilib_list = { TM_MULTILIB_LIST }; + static int enabled_abi_typesN_ABI_BASE_TYPESN_ABI_EXT_TYPES = { 0 }; + + #define isa_required(ABI) (abi_minimal_isa(ABI).base(ABI).ext) +-extern "C" const struct loongarch_isa +-abi_minimal_isaN_ABI_BASE_TYPESN_ABI_EXT_TYPES; + + static inline int + is_multilib_enabled (struct loongarch_abi abi) +-- +2.43.0 +
View file
_service:tar_scm:0132-Backport-SME-mode-switching-Tweak-the-macro-hook-doc.patch
Added
@@ -0,0 +1,311 @@ +From c980e40d2c27ac3ee33c9b6aea6d2b0d4080852e Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:54 +0000 +Subject: PATCH 033/157 BackportSME mode-switching: Tweak the macro/hook + documentation + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=8479a3759025961f80cf0cd6bb3f127e09d0510d + +I found the documentation for the mode-switching macros/hooks +a bit hard to follow at first. This patch tries to add the +information that I think would have made it easier to understand. + +Of course, documentation preferences are personal, and so I could +be changing something that others understood to something that +seems impenetrable. + +Some notes on specific changes: + +- "in an optimizing compilation" didn't seem accurate; the pass + is run even at -O0, and often needs to be for correctness. + +- "at run time" meant when the compiler was run, rather than when + the compiled code was run. + +- Removing the list of optional macros isn't a clarification, + but it means that upcoming patches don't create an absurdly + long list. + +- I don't really understand the purpose of TARGET_MODE_PRIORITY, + so I mostly left that alone. + +gcc/ + * target.def: Tweak documentation of mode-switching hooks. + * doc/tm.texi.in (OPTIMIZE_MODE_SWITCHING): Tweak documentation. + (NUM_MODES_FOR_MODE_SWITCHING): Likewise. + * doc/tm.texi: Regenerate. +--- + gcc/doc/tm.texi | 69 ++++++++++++++++++++++++++++------------------ + gcc/doc/tm.texi.in | 26 +++++++++-------- + gcc/target.def | 43 ++++++++++++++++++----------- + 3 files changed, 84 insertions(+), 54 deletions(-) + +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 851d31c18..553aa4cf2 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -10234,7 +10234,7 @@ The following macros control mode switching optimizations: + + @defmac OPTIMIZE_MODE_SWITCHING (@var{entity}) + Define this macro if the port needs extra instructions inserted for mode +-switching in an optimizing compilation. ++switching. + + For an example, the SH4 can perform both single and double precision + floating point operations, but to perform a single precision operation, +@@ -10244,73 +10244,88 @@ purpose register as a scratch register, hence these FPSCR sets have to + be inserted before reload, i.e.@: you cannot put this into instruction emitting + or @code{TARGET_MACHINE_DEPENDENT_REORG}. + +-You can have multiple entities that are mode-switched, and select at run time +-which entities actually need it. @code{OPTIMIZE_MODE_SWITCHING} should +-return nonzero for any @var{entity} that needs mode-switching. ++You can have multiple entities that are mode-switched, some of which might ++only be needed conditionally. The entities are identified by their index ++into the @code{NUM_MODES_FOR_MODE_SWITCHING} initializer, with the length ++of the initializer determining the number of entities. ++ ++@code{OPTIMIZE_MODE_SWITCHING} should return nonzero for any @var{entity} ++that needs mode-switching. ++ + If you define this macro, you also have to define + @code{NUM_MODES_FOR_MODE_SWITCHING}, @code{TARGET_MODE_NEEDED}, + @code{TARGET_MODE_PRIORITY} and @code{TARGET_MODE_EMIT}. +-@code{TARGET_MODE_AFTER}, @code{TARGET_MODE_ENTRY}, and @code{TARGET_MODE_EXIT} +-are optional. ++The other macros in this section are optional. + @end defmac + + @defmac NUM_MODES_FOR_MODE_SWITCHING + If you define @code{OPTIMIZE_MODE_SWITCHING}, you have to define this as + initializer for an array of integers. Each initializer element + N refers to an entity that needs mode switching, and specifies the number +-of different modes that might need to be set for this entity. +-The position of the initializer in the initializer---starting counting at ++of different modes that are defined for that entity. ++The position of the element in the initializer---starting counting at + zero---determines the integer that is used to refer to the mode-switched + entity in question. +-In macros that take mode arguments / yield a mode result, modes are +-represented as numbers 0 @dots{} N @minus{} 1. N is used to specify that no mode +-switch is needed / supplied. ++Modes are represented as numbers 0 @dots{} N @minus{} 1. ++In mode arguments and return values, N either represents an unknown ++mode or ``no mode'', depending on context. + @end defmac + + @deftypefn {Target Hook} void TARGET_MODE_EMIT (int @var{entity}, int @var{mode}, int @var{prev_mode}, HARD_REG_SET @var{regs_live}) + Generate one or more insns to set @var{entity} to @var{mode}. + @var{hard_reg_live} is the set of hard registers live at the point where + the insn(s) are to be inserted. @var{prev_moxde} indicates the mode +-to switch from. Sets of a lower numbered entity will be emitted before ++to switch from, or is the number of modes if the previous mode is not ++known. Sets of a lower numbered entity will be emitted before + sets of a higher numbered entity to a mode of the same or lower priority. + @end deftypefn + + @deftypefn {Target Hook} int TARGET_MODE_NEEDED (int @var{entity}, rtx_insn *@var{insn}) + @var{entity} is an integer specifying a mode-switched entity. +-If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this macro +-to return an integer value not larger than the corresponding element +-in @code{NUM_MODES_FOR_MODE_SWITCHING}, to denote the mode that @var{entity} +-must be switched into prior to the execution of @var{insn}. ++If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook ++to return the mode that @var{entity} must be switched into prior to the ++execution of @var{insn}, or the number of modes if @var{insn} has no ++such requirement. + @end deftypefn + + @deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn}) + @var{entity} is an integer specifying a mode-switched entity. +-If this macro is defined, it is evaluated for every @var{insn} during mode +-switching. It determines the mode that an insn results +-in (if different from the incoming mode). ++If this hook is defined, it is evaluated for every @var{insn} during mode ++switching. It returns the mode that @var{entity} is in after @var{insn} ++has been executed. @var{mode} is the mode that @var{entity} was in ++before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}. ++ ++@var{mode} is equal to the number of modes defined for @var{entity} ++if the mode before @var{insn} is unknown. The hook should likewise return ++the number of modes if it does not know what mode @var{entity} has after ++@var{insn}. ++ ++Not defining the hook is equivalent to returning @var{mode}. + @end deftypefn + + @deftypefn {Target Hook} int TARGET_MODE_ENTRY (int @var{entity}) +-If this macro is defined, it is evaluated for every @var{entity} that +-needs mode switching. It should evaluate to an integer, which is a mode +-that @var{entity} is assumed to be switched to at function entry. ++If this hook is defined, it is evaluated for every @var{entity} that ++needs mode switching. It should return the mode that @var{entity} is ++guaranteed to be in on entry to the function, or the number of modes ++if there is no such guarantee. + If @code{TARGET_MODE_ENTRY} is defined then @code{TARGET_MODE_EXIT} + must be defined. + @end deftypefn + + @deftypefn {Target Hook} int TARGET_MODE_EXIT (int @var{entity}) +-If this macro is defined, it is evaluated for every @var{entity} that +-needs mode switching. It should evaluate to an integer, which is a mode +-that @var{entity} is assumed to be switched to at function exit. ++If this hook is defined, it is evaluated for every @var{entity} that ++needs mode switching. It should return the mode that @var{entity} must ++be in on return from the function, or the number of modes if there is no ++such requirement. + If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY} + must be defined. + @end deftypefn + + @deftypefn {Target Hook} int TARGET_MODE_PRIORITY (int @var{entity}, int @var{n}) +-This macro specifies the order in which modes for @var{entity} ++This hook specifies the order in which modes for @var{entity} + are processed. 0 is the highest priority, + @code{NUM_MODES_FOR_MODE_SWITCHING@var{entity} - 1} the lowest. +-The value of the macro should be an integer designating a mode ++The hook returns an integer designating a mode + for @var{entity}. For any fixed @var{entity}, @code{mode_priority} + (@var{entity}, @var{n}) shall be a bijection in 0 @dots{} + @code{num_modes_for_mode_switching@var{entity} - 1}. +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index ac95cdf7a..9ec11b15c 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -6879,7 +6879,7 @@ The following macros control mode switching optimizations: + + @defmac OPTIMIZE_MODE_SWITCHING (@var{entity}) + Define this macro if the port needs extra instructions inserted for mode +-switching in an optimizing compilation. ++switching. + + For an example, the SH4 can perform both single and double precision + floating point operations, but to perform a single precision operation, +@@ -6889,27 +6889,31 @@ purpose register as a scratch register, hence these FPSCR sets have to + be inserted before reload, i.e.@: you cannot put this into instruction emitting + or @code{TARGET_MACHINE_DEPENDENT_REORG}. + +-You can have multiple entities that are mode-switched, and select at run time +-which entities actually need it. @code{OPTIMIZE_MODE_SWITCHING} should +-return nonzero for any @var{entity} that needs mode-switching. ++You can have multiple entities that are mode-switched, some of which might ++only be needed conditionally. The entities are identified by their index ++into the @code{NUM_MODES_FOR_MODE_SWITCHING} initializer, with the length ++of the initializer determining the number of entities. ++ ++@code{OPTIMIZE_MODE_SWITCHING} should return nonzero for any @var{entity} ++that needs mode-switching. ++ + If you define this macro, you also have to define
View file
_service:tar_scm:0132-LoongArch-testsuite-Fix-gcc.dg-vect-vect-reduc-mul_-.patch
Added
@@ -0,0 +1,359 @@ +From f4a447bff86c7f5598a7461e353a3c6f4a101ed4 Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Fri, 2 Feb 2024 09:42:28 +0800 +Subject: PATCH 132/188 LoongArch: testsuite: Fix + gcc.dg/vect/vect-reduc-mul_{1, 2}.c FAIL. + +This FAIL was introduced from r14-6908. The reason is that when merging +constant vector permutation implementations, the 128-bit matching situation +was not fully considered. In fact, the expansion of 128-bit vectors after +merging only supports value-based 4 elements set shuffle, so this time is a +complete implementation of the entire 128-bit vector constant permutation, +and some structural adjustments have also been made to the code. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_expand_vselect): Adjust. + (loongarch_expand_vselect_vconcat): Ditto. + (loongarch_try_expand_lsx_vshuf_const): New, use vshuf to implement + all 128-bit constant permutation situations. + (loongarch_expand_lsx_shuffle): Adjust and rename function name. + (loongarch_is_imm_set_shuffle): Renamed function name. + (loongarch_expand_vec_perm_even_odd): Function forward declaration. + (loongarch_expand_vec_perm_even_odd_1): Add implement for 128-bit + extract-even and extract-odd permutations. + (loongarch_is_odd_extraction): Delete. + (loongarch_is_even_extraction): Ditto. + (loongarch_expand_vec_perm_const): Adjust. +--- + gcc/config/loongarch/loongarch.cc | 218 ++++++++++++++++++++++-------- + 1 file changed, 163 insertions(+), 55 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 526ea0bcb..a0e0906af 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -8025,7 +8025,8 @@ struct expand_vec_perm_d + + static bool + loongarch_expand_vselect (rtx target, rtx op0, +- const unsigned char *perm, unsigned nelt) ++ const unsigned char *perm, unsigned nelt, ++ bool testing_p) + { + rtx rpermMAX_VECT_LEN, x; + rtx_insn *insn; +@@ -8044,6 +8045,9 @@ loongarch_expand_vselect (rtx target, rtx op0, + remove_insn (insn); + return false; + } ++ ++ if (testing_p) ++ remove_insn (insn); + return true; + } + +@@ -8051,7 +8055,8 @@ loongarch_expand_vselect (rtx target, rtx op0, + + static bool + loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1, +- const unsigned char *perm, unsigned nelt) ++ const unsigned char *perm, unsigned nelt, ++ bool testing_p) + { + machine_mode v2mode; + rtx x; +@@ -8059,7 +8064,7 @@ loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1, + if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode)) + return false; + x = gen_rtx_VEC_CONCAT (v2mode, op0, op1); +- return loongarch_expand_vselect (target, x, perm, nelt); ++ return loongarch_expand_vselect (target, x, perm, nelt, testing_p); + } + + static tree +@@ -8315,11 +8320,87 @@ loongarch_set_handled_components (sbitmap components) + #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" + #undef TARGET_ASM_ALIGNED_DI_OP + #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t" ++ ++/* Use the vshuf instruction to implement all 128-bit constant vector ++ permuatation. */ ++ ++static bool ++loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) ++{ ++ int i; ++ rtx target, op0, op1, sel, tmp; ++ rtx rpermMAX_VECT_LEN; ++ ++ if (GET_MODE_SIZE (d->vmode) == 16) ++ { ++ target = d->target; ++ op0 = d->op0; ++ op1 = d->one_vector_p ? d->op0 : d->op1; ++ ++ if (GET_MODE (op0) != GET_MODE (op1) ++ || GET_MODE (op0) != GET_MODE (target)) ++ return false; ++ ++ if (d->testing_p) ++ return true; ++ ++ for (i = 0; i < d->nelt; i += 1) ++ rpermi = GEN_INT (d->permi); ++ ++ if (d->vmode == E_V2DFmode) ++ { ++ sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm)); ++ tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0); ++ emit_move_insn (tmp, sel); ++ } ++ else if (d->vmode == E_V4SFmode) ++ { ++ sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm)); ++ tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0); ++ emit_move_insn (tmp, sel); ++ } ++ else ++ { ++ sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm)); ++ emit_move_insn (d->target, sel); ++ } ++ ++ switch (d->vmode) ++ { ++ case E_V2DFmode: ++ emit_insn (gen_lsx_vshuf_d_f (target, target, op1, op0)); ++ break; ++ case E_V2DImode: ++ emit_insn (gen_lsx_vshuf_d (target, target, op1, op0)); ++ break; ++ case E_V4SFmode: ++ emit_insn (gen_lsx_vshuf_w_f (target, target, op1, op0)); ++ break; ++ case E_V4SImode: ++ emit_insn (gen_lsx_vshuf_w (target, target, op1, op0)); ++ break; ++ case E_V8HImode: ++ emit_insn (gen_lsx_vshuf_h (target, target, op1, op0)); ++ break; ++ case E_V16QImode: ++ emit_insn (gen_lsx_vshuf_b (target, op1, op0, target)); ++ break; ++ default: ++ break; ++ } ++ ++ return true; ++ } ++ return false; ++} ++ + /* Construct (set target (vec_select op0 (parallel selector))) and +- return true if that's a valid instruction in the active ISA. */ ++ return true if that's a valid instruction in the active ISA. ++ In fact, it matches the special constant vector with repeated ++ 4-element sets. */ + + static bool +-loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) ++loongarch_is_imm_set_shuffle (struct expand_vec_perm_d *d) + { + rtx x, eltsMAX_VECT_LEN; + rtvec v; +@@ -8338,6 +8419,9 @@ loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) + if (!loongarch_const_vector_shuffle_set_p (x, d->vmode)) + return false; + ++ if (d->testing_p) ++ return true; ++ + x = gen_rtx_VEC_SELECT (d->vmode, d->op0, x); + x = gen_rtx_SET (d->target, x); + +@@ -8350,6 +8434,27 @@ loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) + return true; + } + ++static bool ++loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *); ++ ++/* Try to match and expand all kinds of 128-bit const vector permutation ++ cases. */ ++ ++static bool ++loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) ++{ ++ if (!ISA_HAS_LSX && GET_MODE_SIZE (d->vmode) != 16) ++ return false; ++ ++ if (loongarch_is_imm_set_shuffle (d)) ++ return true; ++ ++ if (loongarch_expand_vec_perm_even_odd (d)) ++ return true; ++ ++ return loongarch_try_expand_lsx_vshuf_const (d); ++} ++
View file
_service:tar_scm:0133-Backport-SME-mode-switching-Add-note-problem.patch
Added
@@ -0,0 +1,35 @@ +From 7ab54a765239bdd2ce548cffdd5b83f9c20f69da Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:55 +0000 +Subject: PATCH 034/157 BackportSME mode-switching: Add note problem + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3cd3a09b3f91a1d023cb180763d40598d6bb274b + +optimize_mode_switching uses REG_DEAD notes to track register +liveness, but it failed to tell DF to calculate up-to-date notes. + +Noticed by inspection. I don't have a testcase that fails +because of this. + +gcc/ + * mode-switching.cc (optimize_mode_switching): Call + df_note_add_problem. +--- + gcc/mode-switching.cc | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 4cf8f03a0..2a9f98793 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -540,6 +540,7 @@ optimize_mode_switching (void) + pre_exit = create_pre_exit (n_entities, entity_map, num_modes); + } + ++ df_note_add_problem (); + df_analyze (); + + /* Create the bitmap vectors. */ +-- +2.33.0 +
View file
_service:tar_scm:0133-LoongArch-Avoid-out-of-bounds-access-in-loongarch_sy.patch
Added
@@ -0,0 +1,72 @@ +From 6364467c68ac1ee2b54b866f462fb670a43029fa Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 2 Feb 2024 08:51:08 +0800 +Subject: PATCH 133/188 LoongArch: Avoid out-of-bounds access in + loongarch_symbol_insns + +We call loongarch_symbol_insns with mode = MAX_MACHINE_MODE sometimes. +But in loongarch_symbol_insns: + + if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)) + return 0; + +And LSX_SUPPORTED_MODE_P is defined as: + + #define LSX_SUPPORTED_MODE_P(MODE) \ + (ISA_HAS_LSX \ + && GET_MODE_SIZE (MODE) == UNITS_PER_LSX_REG ... ... + +GET_MODE_SIZE is expanded to a call to mode_to_bytes, which is defined: + + ALWAYS_INLINE poly_uint16 + mode_to_bytes (machine_mode mode) + { + #if GCC_VERSION >= 4001 + return (__builtin_constant_p (mode) + ? mode_size_inline (mode) : mode_sizemode); + #else + return mode_sizemode; + #endif + } + +There is an assertion in mode_size_inline: + + gcc_assert (mode >= 0 && mode < NUM_MACHINE_MODES); + +Note that NUM_MACHINE_MODES = MAX_MACHINE_MODE (emitted by genmodes.cc), +thus if __builtin_constant_p (mode) is evaluated true (it happens when +GCC is bootstrapped with LTO+PGO), the assertion will be triggered and +cause an ICE. OTOH if __builtin_constant_p (mode) is evaluated false, +mode_sizemode is still an out-of-bound array access (the length or the +mode_size array is NUM_MACHINE_MODES). + +So we shouldn't call LSX_SUPPORTED_MODE_P or LASX_SUPPORTED_MODE_P with +MAX_MACHINE_MODE in loongarch_symbol_insns. This is very similar to a +MIPS bug PR98491 fixed by me about 3 years ago. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_symbol_insns): Do not + use LSX_SUPPORTED_MODE_P or LASX_SUPPORTED_MODE_P if mode is + MAX_MACHINE_MODE. +--- + gcc/config/loongarch/loongarch.cc | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index a0e0906af..d23b09cc5 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2004,7 +2004,8 @@ loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode) + { + /* LSX LD.* and ST.* cannot support loading symbols via an immediate + operand. */ +- if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)) ++ if (mode != MAX_MACHINE_MODE ++ && (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))) + return 0; + + switch (type) +-- +2.43.0 +
View file
_service:tar_scm:0134-Backport-SME-mode-switching-Avoid-quadractic-list-op.patch
Added
@@ -0,0 +1,90 @@ +From a2a8b560c1749293d3b6d027e20753a7ea042c80 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:55 +0000 +Subject: PATCH 035/157 BackportSME mode-switching: Avoid quadractic list + operation + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=174ee5115a3004d3664165e9d619535b579111d4 + +add_seginfo chained insn information to the end of a list +by starting at the head of the list. This patch avoids the +quadraticness by keeping track of the tail pointer. + +gcc/ + * mode-switching.cc (add_seginfo): Replace head pointer with + a pointer to the tail pointer. + (optimize_mode_switching): Update calls accordingly. +--- + gcc/mode-switching.cc | 24 ++++++++---------------- + 1 file changed, 8 insertions(+), 16 deletions(-) + +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 2a9f98793..6a13951c9 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -162,23 +162,14 @@ new_seginfo (int mode, rtx_insn *insn, const HARD_REG_SET ®s_live) + } + + /* Add a seginfo element to the end of a list. +- HEAD is a pointer to the list beginning. ++ TAIL is a pointer to the list's null terminator. + INFO is the structure to be linked in. */ + + static void +-add_seginfo (struct bb_info *head, struct seginfo *info) ++add_seginfo (struct seginfo ***tail_ptr, struct seginfo *info) + { +- struct seginfo *ptr; +- +- if (head->seginfo == NULL) +- head->seginfo = info; +- else +- { +- ptr = head->seginfo; +- while (ptr->next != NULL) +- ptr = ptr->next; +- ptr->next = info; +- } ++ **tail_ptr = info; ++ *tail_ptr = &info->next; + } + + /* Record in LIVE that register REG died. */ +@@ -573,6 +564,7 @@ optimize_mode_switching (void) + Also compute the initial transparency settings. */ + FOR_EACH_BB_FN (bb, cfun) + { ++ struct seginfo **tail_ptr = &infobb->index.seginfo; + struct seginfo *ptr; + int last_mode = no_mode; + bool any_set_required = false; +@@ -598,7 +590,7 @@ optimize_mode_switching (void) + if (ins_pos != BB_END (bb)) + ins_pos = NEXT_INSN (ins_pos); + ptr = new_seginfo (no_mode, ins_pos, live_now); +- add_seginfo (info + bb->index, ptr); ++ add_seginfo (&tail_ptr, ptr); + for (i = 0; i < no_mode; i++) + clear_mode_bit (transpbb->index, j, i); + } +@@ -616,7 +608,7 @@ optimize_mode_switching (void) + any_set_required = true; + last_mode = mode; + ptr = new_seginfo (mode, insn, live_now); +- add_seginfo (info + bb->index, ptr); ++ add_seginfo (&tail_ptr, ptr); + for (i = 0; i < no_mode; i++) + clear_mode_bit (transpbb->index, j, i); + } +@@ -645,7 +637,7 @@ optimize_mode_switching (void) + if (!any_set_required) + { + ptr = new_seginfo (no_mode, BB_END (bb), live_now); +- add_seginfo (info + bb->index, ptr); ++ add_seginfo (&tail_ptr, ptr); + if (last_mode != no_mode) + for (i = 0; i < no_mode; i++) + clear_mode_bit (transpbb->index, j, i); +-- +2.33.0 +
View file
_service:tar_scm:0134-LoongArch-Fix-wrong-LSX-FP-vector-negation.patch
Added
@@ -0,0 +1,122 @@ +From 659b51a6aed60f389009eff1e04645a47e55a45c Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 3 Feb 2024 03:16:14 +0800 +Subject: PATCH 134/188 LoongArch: Fix wrong LSX FP vector negation + +We expanded (neg x) to (minus const0 x) for LSX FP vectors, this is +wrong because -0.0 is not 0 - 0.0. This causes some Python tests to +fail when Python is built with LSX enabled. + +Use the vbitrevi.{d/w} instructions to simply reverse the sign bit +instead. We are already doing this for LASX and now we can unify them +into simd.md. + +gcc/ChangeLog: + + * config/loongarch/lsx.md (neg<mode:FLSX>2): Remove the + incorrect expand. + * config/loongarch/simd.md (simdfmt_as_i): New define_mode_attr. + (elmsgnbit): Likewise. + (neg<mode:FVEC>2): New define_insn. + * config/loongarch/lasx.md (negv4df2, negv8sf2): Remove as they + are now instantiated in simd.md. +--- + gcc/config/loongarch/lasx.md | 16 ---------------- + gcc/config/loongarch/lsx.md | 11 ----------- + gcc/config/loongarch/simd.md | 18 ++++++++++++++++++ + 3 files changed, 18 insertions(+), 27 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 946811e1a..38f35bad6 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -3028,22 +3028,6 @@ + (set_attr "type" "simd_logic") + (set_attr "mode" "V8SF")) + +-(define_insn "negv4df2" +- (set (match_operand:V4DF 0 "register_operand" "=f") +- (neg:V4DF (match_operand:V4DF 1 "register_operand" "f"))) +- "ISA_HAS_LASX" +- "xvbitrevi.d\t%u0,%u1,63" +- (set_attr "type" "simd_logic") +- (set_attr "mode" "V4DF")) +- +-(define_insn "negv8sf2" +- (set (match_operand:V8SF 0 "register_operand" "=f") +- (neg:V8SF (match_operand:V8SF 1 "register_operand" "f"))) +- "ISA_HAS_LASX" +- "xvbitrevi.w\t%u0,%u1,31" +- (set_attr "type" "simd_logic") +- (set_attr "mode" "V8SF")) +- + (define_insn "xvfmadd<mode>4" + (set (match_operand:FLASX 0 "register_operand" "=f") + (fma:FLASX (match_operand:FLASX 1 "register_operand" "f") +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 612377436..d5aa3f46f 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -728,17 +728,6 @@ + DONE; + }) + +-(define_expand "neg<mode>2" +- (set (match_operand:FLSX 0 "register_operand") +- (neg:FLSX (match_operand:FLSX 1 "register_operand"))) +- "ISA_HAS_LSX" +-{ +- rtx reg = gen_reg_rtx (<MODE>mode); +- emit_move_insn (reg, CONST0_RTX (<MODE>mode)); +- emit_insn (gen_sub<mode>3 (operands0, reg, operands1)); +- DONE; +-}) +- + (define_expand "lsx_vrepli<mode>" + (match_operand:ILSX 0 "register_operand") + (match_operand 1 "const_imm10_operand") +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 8ac1d75a8..00d4c7831 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -85,12 +85,21 @@ + (define_mode_attr simdifmt_for_f (V2DF "l") (V4DF "l") + (V4SF "w") (V8SF "w")) + ++;; Suffix for integer mode in LSX or LASX instructions to operating FP ++;; vectors using integer vector operations. ++(define_mode_attr simdfmt_as_i (V2DF "d") (V4DF "d") ++ (V4SF "w") (V8SF "w")) ++ + ;; Size of vector elements in bits. + (define_mode_attr elmbits (V2DI "64") (V4DI "64") + (V4SI "32") (V8SI "32") + (V8HI "16") (V16HI "16") + (V16QI "8") (V32QI "8")) + ++;; The index of sign bit in FP vector elements. ++(define_mode_attr elmsgnbit (V2DF "63") (V4DF "63") ++ (V4SF "31") (V8SF "31")) ++ + ;; This attribute is used to form an immediate operand constraint using + ;; "const_<bitimm>_operand". + (define_mode_attr bitimm (V16QI "uimm3") (V32QI "uimm3") +@@ -457,6 +466,15 @@ + DONE; + }) + ++;; FP negation. ++(define_insn "neg<mode>2" ++ (set (match_operand:FVEC 0 "register_operand" "=f") ++ (neg:FVEC (match_operand:FVEC 1 "register_operand" "f"))) ++ "" ++ "<x>vbitrevi.<simdfmt_as_i>\t%<wu>0,%<wu>1,<elmsgnbit>" ++ (set_attr "type" "simd_logic") ++ (set_attr "mode" "<MODE>")) ++ + ; The LoongArch SX Instructions. + (include "lsx.md") + +-- +2.43.0 +
View file
_service:tar_scm:0135-Backport-SME-mode-switching-Fix-the-mode-passed-to-t.patch
Added
@@ -0,0 +1,136 @@ +From 194700063ed04b56d84912f7ace1b8370af6c696 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:56 +0000 +Subject: PATCH 036/157 BackportSME mode-switching: Fix the mode passed + to the emit hook + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5afd208beaef50bcc43b556d4c41d41656b06436 + +optimize_mode_switching passes an entity's current mode (if known) +to the emit hook. However, the mode that it passed ignored the +effect of the after hook. Instead, the mode for the first emit +call in a block was taken from the incoming mode, whereas the +mode for each subsequent emit call was taken from the result +of the previous call. + +The previous pass through the insns already calculated the +correct mode, so this patch records it in the seginfo structure. +(There was a 32-bit hole on 64-bit hosts, so this doesn't increase +the size of the structure for them.) + +gcc/ + * mode-switching.cc (seginfo): Add a prev_mode field. + (new_seginfo): Take and initialize the prev_mode. + (optimize_mode_switching): Update calls accordingly. + Use the recorded modes during the emit phase, rather than + computing one on the fly. +--- + gcc/mode-switching.cc | 30 +++++++++++++++++------------- + 1 file changed, 17 insertions(+), 13 deletions(-) + +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 6a13951c9..584cd4f67 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -68,6 +68,7 @@ along with GCC; see the file COPYING3. If not see + NEXT is the next insn in the same basic block. */ + struct seginfo + { ++ int prev_mode; + int mode; + rtx_insn *insn_ptr; + struct seginfo *next; +@@ -140,20 +141,22 @@ commit_mode_sets (struct edge_list *edge_list, int e, struct bb_info *info) + return need_commit; + } + +-/* Allocate a new BBINFO structure, initialized with the MODE, INSN, +- and REGS_LIVE parameters. ++/* Allocate a new BBINFO structure, initialized with the PREV_MODE, MODE, ++ INSN, and REGS_LIVE parameters. + INSN may not be a NOTE_INSN_BASIC_BLOCK, unless it is an empty + basic block; that allows us later to insert instructions in a FIFO-like + manner. */ + + static struct seginfo * +-new_seginfo (int mode, rtx_insn *insn, const HARD_REG_SET ®s_live) ++new_seginfo (int prev_mode, int mode, rtx_insn *insn, ++ const HARD_REG_SET ®s_live) + { + struct seginfo *ptr; + + gcc_assert (!NOTE_INSN_BASIC_BLOCK_P (insn) + || insn == BB_END (NOTE_BASIC_BLOCK (insn))); + ptr = XNEW (struct seginfo); ++ ptr->prev_mode = prev_mode; + ptr->mode = mode; + ptr->insn_ptr = insn; + ptr->next = NULL; +@@ -589,7 +592,7 @@ optimize_mode_switching (void) + gcc_assert (NOTE_INSN_BASIC_BLOCK_P (ins_pos)); + if (ins_pos != BB_END (bb)) + ins_pos = NEXT_INSN (ins_pos); +- ptr = new_seginfo (no_mode, ins_pos, live_now); ++ ptr = new_seginfo (no_mode, no_mode, ins_pos, live_now); + add_seginfo (&tail_ptr, ptr); + for (i = 0; i < no_mode; i++) + clear_mode_bit (transpbb->index, j, i); +@@ -605,12 +608,12 @@ optimize_mode_switching (void) + + if (mode != no_mode && mode != last_mode) + { +- any_set_required = true; +- last_mode = mode; +- ptr = new_seginfo (mode, insn, live_now); ++ ptr = new_seginfo (last_mode, mode, insn, live_now); + add_seginfo (&tail_ptr, ptr); + for (i = 0; i < no_mode; i++) + clear_mode_bit (transpbb->index, j, i); ++ any_set_required = true; ++ last_mode = mode; + } + + if (targetm.mode_switching.after) +@@ -636,7 +639,7 @@ optimize_mode_switching (void) + mark the block as nontransparent. */ + if (!any_set_required) + { +- ptr = new_seginfo (no_mode, BB_END (bb), live_now); ++ ptr = new_seginfo (last_mode, no_mode, BB_END (bb), live_now); + add_seginfo (&tail_ptr, ptr); + if (last_mode != no_mode) + for (i = 0; i < no_mode; i++) +@@ -777,9 +780,9 @@ optimize_mode_switching (void) + FOR_EACH_BB_FN (bb, cfun) + { + struct seginfo *ptr, *next; +- int cur_mode = bb_infojbb->index.mode_in; ++ struct seginfo *first = bb_infojbb->index.seginfo; + +- for (ptr = bb_infojbb->index.seginfo; ptr; ptr = next) ++ for (ptr = first; ptr; ptr = next) + { + next = ptr->next; + if (ptr->mode != no_mode) +@@ -789,14 +792,15 @@ optimize_mode_switching (void) + rtl_profile_for_bb (bb); + start_sequence (); + ++ int cur_mode = (ptr == first && ptr->prev_mode == no_mode ++ ? bb_infojbb->index.mode_in ++ : ptr->prev_mode); ++ + targetm.mode_switching.emit (entity_mapj, ptr->mode, + cur_mode, ptr->regs_live); + mode_set = get_insns (); + end_sequence (); + +- /* modes kill each other inside a basic block. */ +- cur_mode = ptr->mode; +- + /* Insert MODE_SET only if it is nonempty. */ + if (mode_set != NULL_RTX) + { +-- +2.33.0 +
View file
_service:tar_scm:0135-LoongArch-Fix-wrong-return-value-type-of-__iocsrrd_h.patch
Added
@@ -0,0 +1,30 @@ +From 539eb7639eeda8ea43149032f6aa724e5d46017c Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Mon, 5 Feb 2024 16:23:20 +0800 +Subject: PATCH 135/188 LoongArch: Fix wrong return value type of + __iocsrrd_h. + +gcc/ChangeLog: + + * config/loongarch/larchintrin.h (__iocsrrd_h): Modify the + function return value type to unsigned short. +--- + gcc/config/loongarch/larchintrin.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h +index 6582dfe49..046e042fd 100644 +--- a/gcc/config/loongarch/larchintrin.h ++++ b/gcc/config/loongarch/larchintrin.h +@@ -268,7 +268,7 @@ __iocsrrd_b (unsigned int _1) + + /* Assembly instruction format: rd, rj. */ + /* Data types in instruction templates: UHI, USI. */ +-extern __inline unsigned char ++extern __inline unsigned short + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrrd_h (unsigned int _1) + { +-- +2.43.0 +
View file
_service:tar_scm:0136-Backport-SME-mode-switching-Simplify-recording-of-tr.patch
Added
@@ -0,0 +1,103 @@ +From ac51d446ee605e942b0831d3ff617980d94bf502 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:56 +0000 +Subject: PATCH 037/157 BackportSME mode-switching: Simplify recording of + transparency + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=335b55f4146c5ef9e3bf4bcb7e58e887c3150b02 + +For a given block, an entity is either transparent for +all modes or for none. Each update to the transparency set +therefore used a loop like: + + for (i = 0; i < no_mode; i++) + clear_mode_bit (transpbb->index, j, i); + +This patch instead starts out with a bit-per-block bitmap +and updates the main bitmap at the end. + +This isn't much of a simplification on its own. The main +purpose is to simplify later patches. + +gcc/ + * mode-switching.cc (optimize_mode_switching): Initially + compute transparency in a bit-per-block bitmap. +--- + gcc/mode-switching.cc | 19 +++++++++++-------- + 1 file changed, 11 insertions(+), 8 deletions(-) + +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 584cd4f67..4d2b9e284 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -555,6 +555,8 @@ optimize_mode_switching (void) + bitmap_vector_clear (antic, last_basic_block_for_fn (cfun)); + bitmap_vector_clear (comp, last_basic_block_for_fn (cfun)); + ++ auto_sbitmap transp_all (last_basic_block_for_fn (cfun)); ++ + for (j = n_entities - 1; j >= 0; j--) + { + int e = entity_mapj; +@@ -562,6 +564,8 @@ optimize_mode_switching (void) + struct bb_info *info = bb_infoj; + rtx_insn *insn; + ++ bitmap_ones (transp_all); ++ + /* Determine what the first use (if any) need for a mode of entity E is. + This will be the mode that is anticipatable for this block. + Also compute the initial transparency settings. */ +@@ -594,8 +598,7 @@ optimize_mode_switching (void) + ins_pos = NEXT_INSN (ins_pos); + ptr = new_seginfo (no_mode, no_mode, ins_pos, live_now); + add_seginfo (&tail_ptr, ptr); +- for (i = 0; i < no_mode; i++) +- clear_mode_bit (transpbb->index, j, i); ++ bitmap_clear_bit (transp_all, bb->index); + } + } + +@@ -610,8 +613,7 @@ optimize_mode_switching (void) + { + ptr = new_seginfo (last_mode, mode, insn, live_now); + add_seginfo (&tail_ptr, ptr); +- for (i = 0; i < no_mode; i++) +- clear_mode_bit (transpbb->index, j, i); ++ bitmap_clear_bit (transp_all, bb->index); + any_set_required = true; + last_mode = mode; + } +@@ -642,8 +644,7 @@ optimize_mode_switching (void) + ptr = new_seginfo (last_mode, no_mode, BB_END (bb), live_now); + add_seginfo (&tail_ptr, ptr); + if (last_mode != no_mode) +- for (i = 0; i < no_mode; i++) +- clear_mode_bit (transpbb->index, j, i); ++ bitmap_clear_bit (transp_all, bb->index); + } + } + if (targetm.mode_switching.entry && targetm.mode_switching.exit) +@@ -666,8 +667,7 @@ optimize_mode_switching (void) + an extra check in make_preds_opaque. We also + need this to avoid confusing pre_edge_lcm when + antic is cleared but transp and comp are set. */ +- for (i = 0; i < no_mode; i++) +- clear_mode_bit (transpbb->index, j, i); ++ bitmap_clear_bit (transp_all, bb->index); + + /* Insert a fake computing definition of MODE into entry + blocks which compute no mode. This represents the mode on +@@ -687,6 +687,9 @@ optimize_mode_switching (void) + + FOR_EACH_BB_FN (bb, cfun) + { ++ if (!bitmap_bit_p (transp_all, bb->index)) ++ clear_mode_bit (transpbb->index, j, m); ++ + if (infobb->index.seginfo->mode == m) + set_mode_bit (anticbb->index, j, m); + +-- +2.33.0 +
View file
_service:tar_scm:0136-LoongArch-Remove-redundant-symbol-type-conversions-i.patch
Added
@@ -0,0 +1,337 @@ +From 868f56db1101bf679f1b2510b9934a978f503a1e Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Mon, 5 Feb 2024 16:53:01 +0800 +Subject: PATCH 136/188 LoongArch: Remove redundant symbol type conversions + in larchintrin.h. + +gcc/ChangeLog: + + * config/loongarch/larchintrin.h (__movgr2fcsr): Remove redundant + symbol type conversions. + (__cacop_d): Likewise. + (__cpucfg): Likewise. + (__asrtle_d): Likewise. + (__asrtgt_d): Likewise. + (__lddir_d): Likewise. + (__ldpte_d): Likewise. + (__crc_w_b_w): Likewise. + (__crc_w_h_w): Likewise. + (__crc_w_w_w): Likewise. + (__crc_w_d_w): Likewise. + (__crcc_w_b_w): Likewise. + (__crcc_w_h_w): Likewise. + (__crcc_w_w_w): Likewise. + (__crcc_w_d_w): Likewise. + (__csrrd_w): Likewise. + (__csrwr_w): Likewise. + (__csrxchg_w): Likewise. + (__csrrd_d): Likewise. + (__csrwr_d): Likewise. + (__csrxchg_d): Likewise. + (__iocsrrd_b): Likewise. + (__iocsrrd_h): Likewise. + (__iocsrrd_w): Likewise. + (__iocsrrd_d): Likewise. + (__iocsrwr_b): Likewise. + (__iocsrwr_h): Likewise. + (__iocsrwr_w): Likewise. + (__iocsrwr_d): Likewise. + (__frecipe_s): Likewise. + (__frecipe_d): Likewise. + (__frsqrte_s): Likewise. + (__frsqrte_d): Likewise. +--- + gcc/config/loongarch/larchintrin.h | 69 ++++++++++++++---------------- + 1 file changed, 33 insertions(+), 36 deletions(-) + +diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h +index 046e042fd..2e94e5612 100644 +--- a/gcc/config/loongarch/larchintrin.h ++++ b/gcc/config/loongarch/larchintrin.h +@@ -87,13 +87,13 @@ __rdtimel_w (void) + /* Assembly instruction format: fcsr, rj. */ + /* Data types in instruction templates: VOID, UQI, USI. */ + #define __movgr2fcsr(/*ui5*/ _1, _2) \ +- __builtin_loongarch_movgr2fcsr ((_1), (unsigned int) _2); ++ __builtin_loongarch_movgr2fcsr ((_1), _2); + + #if defined __loongarch64 + /* Assembly instruction format: ui5, rj, si12. */ + /* Data types in instruction templates: VOID, USI, UDI, SI. */ + #define __cacop_d(/*ui5*/ _1, /*unsigned long int*/ _2, /*si12*/ _3) \ +- ((void) __builtin_loongarch_cacop_d ((_1), (unsigned long int) (_2), (_3))) ++ __builtin_loongarch_cacop_d ((_1), (_2), (_3)) + #else + #error "Unsupported ABI." + #endif +@@ -104,7 +104,7 @@ extern __inline unsigned int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __cpucfg (unsigned int _1) + { +- return (unsigned int) __builtin_loongarch_cpucfg ((unsigned int) _1); ++ return __builtin_loongarch_cpucfg (_1); + } + + #ifdef __loongarch64 +@@ -114,7 +114,7 @@ extern __inline void + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __asrtle_d (long int _1, long int _2) + { +- __builtin_loongarch_asrtle_d ((long int) _1, (long int) _2); ++ __builtin_loongarch_asrtle_d (_1, _2); + } + + /* Assembly instruction format: rj, rk. */ +@@ -123,7 +123,7 @@ extern __inline void + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __asrtgt_d (long int _1, long int _2) + { +- __builtin_loongarch_asrtgt_d ((long int) _1, (long int) _2); ++ __builtin_loongarch_asrtgt_d (_1, _2); + } + #endif + +@@ -131,7 +131,7 @@ __asrtgt_d (long int _1, long int _2) + /* Assembly instruction format: rd, rj, ui5. */ + /* Data types in instruction templates: DI, DI, UQI. */ + #define __lddir_d(/*long int*/ _1, /*ui5*/ _2) \ +- ((long int) __builtin_loongarch_lddir_d ((long int) (_1), (_2))) ++ __builtin_loongarch_lddir_d ((_1), (_2)) + #else + #error "Unsupported ABI." + #endif +@@ -140,7 +140,7 @@ __asrtgt_d (long int _1, long int _2) + /* Assembly instruction format: rj, ui5. */ + /* Data types in instruction templates: VOID, DI, UQI. */ + #define __ldpte_d(/*long int*/ _1, /*ui5*/ _2) \ +- ((void) __builtin_loongarch_ldpte_d ((long int) (_1), (_2))) ++ __builtin_loongarch_ldpte_d ((_1), (_2)) + #else + #error "Unsupported ABI." + #endif +@@ -151,7 +151,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crc_w_b_w (char _1, int _2) + { +- return (int) __builtin_loongarch_crc_w_b_w ((char) _1, (int) _2); ++ return __builtin_loongarch_crc_w_b_w (_1, _2); + } + + /* Assembly instruction format: rd, rj, rk. */ +@@ -160,7 +160,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crc_w_h_w (short _1, int _2) + { +- return (int) __builtin_loongarch_crc_w_h_w ((short) _1, (int) _2); ++ return __builtin_loongarch_crc_w_h_w (_1, _2); + } + + /* Assembly instruction format: rd, rj, rk. */ +@@ -169,7 +169,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crc_w_w_w (int _1, int _2) + { +- return (int) __builtin_loongarch_crc_w_w_w ((int) _1, (int) _2); ++ return __builtin_loongarch_crc_w_w_w (_1, _2); + } + + #ifdef __loongarch64 +@@ -179,7 +179,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crc_w_d_w (long int _1, int _2) + { +- return (int) __builtin_loongarch_crc_w_d_w ((long int) _1, (int) _2); ++ return __builtin_loongarch_crc_w_d_w (_1, _2); + } + #endif + +@@ -189,7 +189,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crcc_w_b_w (char _1, int _2) + { +- return (int) __builtin_loongarch_crcc_w_b_w ((char) _1, (int) _2); ++ return __builtin_loongarch_crcc_w_b_w (_1, _2); + } + + /* Assembly instruction format: rd, rj, rk. */ +@@ -198,7 +198,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crcc_w_h_w (short _1, int _2) + { +- return (int) __builtin_loongarch_crcc_w_h_w ((short) _1, (int) _2); ++ return __builtin_loongarch_crcc_w_h_w (_1, _2); + } + + /* Assembly instruction format: rd, rj, rk. */ +@@ -207,7 +207,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crcc_w_w_w (int _1, int _2) + { +- return (int) __builtin_loongarch_crcc_w_w_w ((int) _1, (int) _2); ++ return __builtin_loongarch_crcc_w_w_w (_1, _2); + } + + #ifdef __loongarch64 +@@ -217,44 +217,41 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crcc_w_d_w (long int _1, int _2) + { +- return (int) __builtin_loongarch_crcc_w_d_w ((long int) _1, (int) _2); ++ return __builtin_loongarch_crcc_w_d_w (_1, _2); + } + #endif + + /* Assembly instruction format: rd, ui14. */ + /* Data types in instruction templates: USI, USI. */ + #define __csrrd_w(/*ui14*/ _1) \ +- ((unsigned int) __builtin_loongarch_csrrd_w ((_1))) ++ __builtin_loongarch_csrrd_w ((_1)) + + /* Assembly instruction format: rd, ui14. */ + /* Data types in instruction templates: USI, USI, USI. */ + #define __csrwr_w(/*unsigned int*/ _1, /*ui14*/ _2) \ +- ((unsigned int) __builtin_loongarch_csrwr_w ((unsigned int) (_1), (_2))) ++ __builtin_loongarch_csrwr_w ((_1), (_2)) + + /* Assembly instruction format: rd, rj, ui14. */ + /* Data types in instruction templates: USI, USI, USI, USI. */ + #define __csrxchg_w(/*unsigned int*/ _1, /*unsigned int*/ _2, /*ui14*/ _3) \ +- ((unsigned int) __builtin_loongarch_csrxchg_w ((unsigned int) (_1), \
View file
_service:tar_scm:0137-Backport-SME-mode-switching-Tweak-entry-exit-handlin.patch
Added
@@ -0,0 +1,92 @@ +From c0aaf329d9c547b249ac120a8d1995d8546a1edb Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:57 +0000 +Subject: PATCH 038/157 BackportSME mode-switching: Tweak entry/exit + handling + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e59ec35276599805cdc6c3979d8a167b027d286e + +An entity isn't transparent in a block that requires a specific mode. +optimize_mode_switching took that into account for normal insns, +but didn't for the exit block. Later patches misbehaved because +of this. + +In contrast, an entity was correctly marked as non-transparent +in the entry block, but the reasoning seemed a bit convoluted. +It also referred to a function that no longer exists. +Since KILL = ~TRANSP, the entity is by definition not transparent +in a block that defines the entity, so I think we can make it so +without comment. + +Finally, the exit handling was nested in the entry handling, +but that doesn't seem necessary. A target could say that an +entity is undefined on entry but must be defined on return, +on a "be liberal in what you accept, be conservative in what +you do" principle. + +gcc/ + * mode-switching.cc (optimize_mode_switching): Mark the exit + block as nontransparent if it requires a specific mode. + Handle the entry and exit mode as sibling rather than nested + concepts. Remove outdated comment. +--- + gcc/mode-switching.cc | 34 +++++++++++++++------------------- + 1 file changed, 15 insertions(+), 19 deletions(-) + +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 4d2b9e284..4761c2ff0 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -649,34 +649,30 @@ optimize_mode_switching (void) + } + if (targetm.mode_switching.entry && targetm.mode_switching.exit) + { +- int mode = targetm.mode_switching.entry (e); +- + infopost_entry->index.mode_out = + infopost_entry->index.mode_in = no_mode; +- if (pre_exit) +- { +- infopre_exit->index.mode_out = +- infopre_exit->index.mode_in = no_mode; +- } + ++ int mode = targetm.mode_switching.entry (e); + if (mode != no_mode) + { +- bb = post_entry; +- +- /* By always making this nontransparent, we save +- an extra check in make_preds_opaque. We also +- need this to avoid confusing pre_edge_lcm when +- antic is cleared but transp and comp are set. */ +- bitmap_clear_bit (transp_all, bb->index); +- + /* Insert a fake computing definition of MODE into entry + blocks which compute no mode. This represents the mode on + entry. */ +- infobb->index.computing = mode; ++ infopost_entry->index.computing = mode; ++ bitmap_clear_bit (transp_all, post_entry->index); ++ } + +- if (pre_exit) +- infopre_exit->index.seginfo->mode = +- targetm.mode_switching.exit (e); ++ if (pre_exit) ++ { ++ infopre_exit->index.mode_out = ++ infopre_exit->index.mode_in = no_mode; ++ ++ int mode = targetm.mode_switching.exit (e); ++ if (mode != no_mode) ++ { ++ infopre_exit->index.seginfo->mode = mode; ++ bitmap_clear_bit (transp_all, pre_exit->index); ++ } + } + } + +-- +2.33.0 +
View file
_service:tar_scm:0137-LoongArch-When-checking-whether-the-assembler-suppor.patch
Added
@@ -0,0 +1,54 @@ +From 3580ce2b8c57967117e55af48beba0aaa6257e8b Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Wed, 21 Feb 2024 11:17:14 +0800 +Subject: PATCH 137/188 LoongArch: When checking whether the assembler + supports conditional branch relaxation, add compilation parameter + "--fatal-warnings" to the assembler. + +In binutils 2.40 and earlier versions, only a warning will be reported +when a relocation immediate value is out of bounds. As a result, +the value of the macro HAVE_AS_COND_BRANCH_RELAXATION will also be +defined as 1 when the assembler does not support conditional branch +relaxation. Therefore, add the compilation option "--fatal-warnings" +to avoid this problem. + +gcc/ChangeLog: + + * configure: Regenerate. + * configure.ac: Add parameter "--fatal-warnings" to assemble + when checking whether the assemble support conditional branch + relaxation. +--- + gcc/configure | 2 +- + gcc/configure.ac | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/configure b/gcc/configure +index eecfe60d6..f31395017 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -28947,7 +28947,7 @@ else + nop + .endr + beq $a0,$a1,a' > conftest.s +- if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags --fatal-warnings -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? +diff --git a/gcc/configure.ac b/gcc/configure.ac +index d1032440d..35f2c657f 100644 +--- a/gcc/configure.ac ++++ b/gcc/configure.ac +@@ -5349,7 +5349,7 @@ x: + Define if your assembler supports -mrelax option.)) + gcc_GAS_CHECK_FEATURE(conditional branch relaxation support, + gcc_cv_as_loongarch_cond_branch_relax, +- , ++ --fatal-warnings, + a: + .rept 32769 + nop +-- +2.43.0 +
View file
_service:tar_scm:0138-Backport-SME-mode-switching-Allow-targets-to-set-the.patch
Added
@@ -0,0 +1,93 @@ +From 9505464aec8f95125293c64e2eea9577e9be4700 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:57 +0000 +Subject: PATCH 039/157 BackportSME mode-switching: Allow targets to set + the mode for EH handlers + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4b803fbf839439b1deca660e32d5ced211111dfa + +The mode-switching pass already had hooks to say what mode +an entity is in on entry to a function and what mode it must +be in on return. For SME, we also want to say what mode an +entity is guaranteed to be in on entry to an exception handler. + +gcc/ + * target.def (mode_switching.eh_handler): New hook. + * doc/tm.texi.in (TARGET_MODE_EH_HANDLER): New @hook. + * doc/tm.texi: Regenerate. + * mode-switching.cc (optimize_mode_switching): Use eh_handler + to get the mode on entry to an exception handler. +--- + gcc/doc/tm.texi | 6 ++++++ + gcc/doc/tm.texi.in | 2 ++ + gcc/mode-switching.cc | 5 ++++- + gcc/target.def | 7 +++++++ + 4 files changed, 19 insertions(+), 1 deletion(-) + +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 553aa4cf2..4788b3f7a 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -10321,6 +10321,12 @@ If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY} + must be defined. + @end deftypefn + ++@deftypefn {Target Hook} int TARGET_MODE_EH_HANDLER (int @var{entity}) ++If this hook is defined, it should return the mode that @var{entity} is ++guaranteed to be in on entry to an exception handler, or the number of modes ++if there is no such guarantee. ++@end deftypefn ++ + @deftypefn {Target Hook} int TARGET_MODE_PRIORITY (int @var{entity}, int @var{n}) + This hook specifies the order in which modes for @var{entity} + are processed. 0 is the highest priority, +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index 9ec11b15c..ad343504f 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -6926,6 +6926,8 @@ mode or ``no mode'', depending on context. + + @hook TARGET_MODE_EXIT + ++@hook TARGET_MODE_EH_HANDLER ++ + @hook TARGET_MODE_PRIORITY + + @node Target Attributes +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 4761c2ff0..9a6ba6cca 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -596,7 +596,10 @@ optimize_mode_switching (void) + gcc_assert (NOTE_INSN_BASIC_BLOCK_P (ins_pos)); + if (ins_pos != BB_END (bb)) + ins_pos = NEXT_INSN (ins_pos); +- ptr = new_seginfo (no_mode, no_mode, ins_pos, live_now); ++ if (bb_has_eh_pred (bb) ++ && targetm.mode_switching.eh_handler) ++ last_mode = targetm.mode_switching.eh_handler (e); ++ ptr = new_seginfo (no_mode, last_mode, ins_pos, live_now); + add_seginfo (&tail_ptr, ptr); + bitmap_clear_bit (transp_all, bb->index); + } +diff --git a/gcc/target.def b/gcc/target.def +index b87b0f927..bbb482de6 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -7042,6 +7042,13 @@ If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY}\n\ + must be defined.", + int, (int entity), NULL) + ++DEFHOOK ++(eh_handler, ++ "If this hook is defined, it should return the mode that @var{entity} is\n\ ++guaranteed to be in on entry to an exception handler, or the number of modes\n\ ++if there is no such guarantee.", ++ int, (int entity), NULL) ++ + DEFHOOK + (priority, + "This hook specifies the order in which modes for @var{entity}\n\ +-- +2.33.0 +
View file
_service:tar_scm:0138-LoongArch-Don-t-falsely-claim-gold-supported-in-topl.patch
Added
@@ -0,0 +1,49 @@ +From e6968eb62b2a0adc7ef591594240582630adfc61 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Wed, 21 Feb 2024 23:54:53 +0800 +Subject: PATCH 138/188 LoongArch: Don't falsely claim gold supported in + toplevel configure + +The gold linker has never been ported to LoongArch (and it seems +unlikely to be ported in the future as the new architectures are +focusing on lld and/or mold for fast linkers). + +ChangeLog: + + * configure.ac (ENABLE_GOLD): Remove loongarch*-*-* from target + list. + * configure: Regenerate. +--- + configure | 2 +- + configure.ac | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/configure b/configure +index 81b4a3cec..ebdca8c62 100755 +--- a/configure ++++ b/configure +@@ -3058,7 +3058,7 @@ case "${ENABLE_GOLD}" in + # Check for target supported by gold. + case "${target}" in + i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \ +- | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-* | loongarch*-*-*) ++ | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-*) + configdirs="$configdirs gold" + if test x${ENABLE_GOLD} = xdefault; then + default_ld=gold +diff --git a/configure.ac b/configure.ac +index 9f8dbd319..4f45fd2ba 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -353,7 +353,7 @@ case "${ENABLE_GOLD}" in + # Check for target supported by gold. + case "${target}" in + i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \ +- | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-* | loongarch*-*-*) ++ | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-*) + configdirs="$configdirs gold" + if test x${ENABLE_GOLD} = xdefault; then + default_ld=gold +-- +2.43.0 +
View file
_service:tar_scm:0139-Backport-SME-mode-switching-Pass-set-of-live-registe.patch
Added
@@ -0,0 +1,211 @@ +From a6964e11c7f624cdaed2c9608565a5968292b70f Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:58 +0000 +Subject: PATCH 040/157 BackportSME mode-switching: Pass set of live + registers to the needed hook + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=29d3e1892ebec8abce784077d1f1a3e21d763218 + +The emit hook already takes the set of live hard registers as input. +This patch passes it to the needed hook too. SME uses this to +optimise the mode choice based on whether state is live or dead. + +The main caller already had access to the required info, but the +special handling of return values did not. + +gcc/ + * target.def (mode_switching.needed): Add a regs_live parameter. + * doc/tm.texi: Regenerate. + * config/epiphany/epiphany-protos.h (epiphany_mode_needed): Update + accordingly. + * config/epiphany/epiphany.cc (epiphany_mode_needed): Likewise. + * config/epiphany/mode-switch-use.cc (insert_uses): Likewise. + * config/i386/i386.cc (ix86_mode_needed): Likewise. + * config/riscv/riscv.cc (riscv_mode_needed): Likewise. + * config/sh/sh.cc (sh_mode_needed): Likewise. + * mode-switching.cc (optimize_mode_switching): Likewise. + (create_pre_exit): Likewise, using the DF simulate functions + to calculate the required information. +--- + gcc/config/epiphany/epiphany-protos.h | 4 +++- + gcc/config/epiphany/epiphany.cc | 2 +- + gcc/config/epiphany/mode-switch-use.cc | 2 +- + gcc/config/i386/i386.cc | 2 +- + gcc/config/sh/sh.cc | 4 ++-- + gcc/doc/tm.texi | 5 +++-- + gcc/mode-switching.cc | 14 ++++++++++++-- + gcc/target.def | 5 +++-- + 8 files changed, 26 insertions(+), 12 deletions(-) + +diff --git a/gcc/config/epiphany/epiphany-protos.h b/gcc/config/epiphany/epiphany-protos.h +index 61b63234e..d463e5483 100644 +--- a/gcc/config/epiphany/epiphany-protos.h ++++ b/gcc/config/epiphany/epiphany-protos.h +@@ -44,7 +44,9 @@ extern void emit_set_fp_mode (int entity, int mode, int prev_mode, + #endif + extern void epiphany_insert_mode_switch_use (rtx_insn *insn, int, int); + extern void epiphany_expand_set_fp_mode (rtx *operands); +-extern int epiphany_mode_needed (int entity, rtx_insn *insn); ++#ifdef HARD_CONST ++extern int epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET); ++#endif + extern int epiphany_mode_after (int entity, int last_mode, rtx_insn *insn); + extern bool epiphany_epilogue_uses (int regno); + extern bool epiphany_optimize_mode_switching (int entity); +diff --git a/gcc/config/epiphany/epiphany.cc b/gcc/config/epiphany/epiphany.cc +index f8c049340..be0fbc68c 100644 +--- a/gcc/config/epiphany/epiphany.cc ++++ b/gcc/config/epiphany/epiphany.cc +@@ -2400,7 +2400,7 @@ epiphany_mode_priority (int entity, int priority) + } + + int +-epiphany_mode_needed (int entity, rtx_insn *insn) ++epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET) + { + enum attr_fp_mode mode; + +diff --git a/gcc/config/epiphany/mode-switch-use.cc b/gcc/config/epiphany/mode-switch-use.cc +index 887550a33..cacb1ce5a 100644 +--- a/gcc/config/epiphany/mode-switch-use.cc ++++ b/gcc/config/epiphany/mode-switch-use.cc +@@ -58,7 +58,7 @@ insert_uses (void) + { + if (!INSN_P (insn)) + continue; +- mode = epiphany_mode_needed (e, insn); ++ mode = epiphany_mode_needed (e, insn, {}); + if (mode == no_mode) + continue; + if (target_insert_mode_switch_use) +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index 60f3296b0..4d591d217 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -14522,7 +14522,7 @@ ix86_i387_mode_needed (int entity, rtx_insn *insn) + prior to the execution of insn. */ + + static int +-ix86_mode_needed (int entity, rtx_insn *insn) ++ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET) + { + switch (entity) + { +diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc +index 03e1c04ec..85e83e12e 100644 +--- a/gcc/config/sh/sh.cc ++++ b/gcc/config/sh/sh.cc +@@ -195,7 +195,7 @@ static int calc_live_regs (HARD_REG_SET *); + static HOST_WIDE_INT rounded_frame_size (int); + static bool sh_frame_pointer_required (void); + static void sh_emit_mode_set (int, int, int, HARD_REG_SET); +-static int sh_mode_needed (int, rtx_insn *); ++static int sh_mode_needed (int, rtx_insn *, HARD_REG_SET); + static int sh_mode_after (int, int, rtx_insn *); + static int sh_mode_entry (int); + static int sh_mode_exit (int); +@@ -12529,7 +12529,7 @@ sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode, + } + + static int +-sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn) ++sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn, HARD_REG_SET) + { + return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE; + } +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 4788b3f7a..d8ac6c4d6 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -10280,12 +10280,13 @@ known. Sets of a lower numbered entity will be emitted before + sets of a higher numbered entity to a mode of the same or lower priority. + @end deftypefn + +-@deftypefn {Target Hook} int TARGET_MODE_NEEDED (int @var{entity}, rtx_insn *@var{insn}) ++@deftypefn {Target Hook} int TARGET_MODE_NEEDED (int @var{entity}, rtx_insn *@var{insn}, HARD_REG_SET @var{regs_live}) + @var{entity} is an integer specifying a mode-switched entity. + If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook + to return the mode that @var{entity} must be switched into prior to the + execution of @var{insn}, or the number of modes if @var{insn} has no +-such requirement. ++such requirement. @var{regs_live} contains the set of hard registers ++that are live before @var{insn}. + @end deftypefn + + @deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn}) +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 9a6ba6cca..6bbda5058 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -254,6 +254,9 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes) + && GET_CODE (PATTERN (last_insn)) == USE + && GET_CODE ((ret_reg = XEXP (PATTERN (last_insn), 0))) == REG) + { ++ auto_bitmap live; ++ df_simulate_initialize_backwards (src_bb, live); ++ + int ret_start = REGNO (ret_reg); + int nregs = REG_NREGS (ret_reg); + int ret_end = ret_start + nregs; +@@ -262,6 +265,8 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes) + bool forced_late_switch = false; + rtx_insn *before_return_copy; + ++ df_simulate_one_insn_backwards (src_bb, last_insn, live); ++ + do + { + rtx_insn *return_copy = PREV_INSN (last_insn); +@@ -269,6 +274,8 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes) + int copy_start, copy_num; + int j; + ++ df_simulate_one_insn_backwards (src_bb, return_copy, live); ++ + if (NONDEBUG_INSN_P (return_copy)) + { + /* When using SJLJ exceptions, the call to the +@@ -368,11 +375,14 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes) + the case for floating point on SH4 - then it might + be set by an arithmetic operation that needs a + different mode than the exit block. */ ++ HARD_REG_SET hard_regs_live; ++ REG_SET_TO_HARD_REG_SET (hard_regs_live, live); + for (j = n_entities - 1; j >= 0; j--) + { + int e = entity_mapj; + int mode = +- targetm.mode_switching.needed (e, return_copy); ++ targetm.mode_switching.needed (e, return_copy, ++ hard_regs_live); + + if (mode != num_modese + && mode != targetm.mode_switching.exit (e)) +@@ -609,7 +619,7 @@ optimize_mode_switching (void) + { + if (INSN_P (insn)) + { +- int mode = targetm.mode_switching.needed (e, insn); ++ int mode = targetm.mode_switching.needed (e, insn, live_now); + rtx link; + + if (mode != no_mode && mode != last_mode) +diff --git a/gcc/target.def b/gcc/target.def +index bbb482de6..06a52bdaf 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -7003,8 +7003,9 @@ DEFHOOK + If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook\n\ + to return the mode that @var{entity} must be switched into prior to the\n\
View file
_service:tar_scm:0139-LoongArch-NFC-Deduplicate-crc-instruction-defines.patch
Added
@@ -0,0 +1,56 @@ +From 7a4761a31454f999331e8aa5f831e26e249c4295 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 25 Feb 2024 20:40:41 +0800 +Subject: PATCH 139/188 LoongArch: NFC: Deduplicate crc instruction defines + +Introduce an iterator for UNSPEC_CRC and UNSPEC_CRCC to make the next +change easier. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (CRC): New define_int_iterator. + (crc): New define_int_attr. + (loongarch_crc_w_<size>_w, loongarch_crcc_w_<size>_w): Unify + into ... + (loongarch_<crc>_w_<size>_w): ... here. +--- + gcc/config/loongarch/loongarch.md | 18 +++++------------- + 1 file changed, 5 insertions(+), 13 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 9356194fe..b5ad9eada 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -4251,24 +4251,16 @@ + + + (define_mode_iterator QHSD QI HI SI DI) ++(define_int_iterator CRC UNSPEC_CRC UNSPEC_CRCC) ++(define_int_attr crc (UNSPEC_CRC "crc") (UNSPEC_CRCC "crcc")) + +-(define_insn "loongarch_crc_w_<size>_w" ++(define_insn "loongarch_<crc>_w_<size>_w" + (set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI (match_operand:QHSD 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r") +- UNSPEC_CRC)) ++ CRC)) + "" +- "crc.w.<size>.w\t%0,%1,%2" +- (set_attr "type" "unknown") +- (set_attr "mode" "<MODE>")) +- +-(define_insn "loongarch_crcc_w_<size>_w" +- (set (match_operand:SI 0 "register_operand" "=r") +- (unspec:SI (match_operand:QHSD 1 "register_operand" "r") +- (match_operand:SI 2 "register_operand" "r") +- UNSPEC_CRCC)) +- "" +- "crcc.w.<size>.w\t%0,%1,%2" ++ "<crc>.w.<size>.w\t%0,%1,%2" + (set_attr "type" "unknown") + (set_attr "mode" "<MODE>")) + +-- +2.43.0 +
View file
_service:tar_scm:0140-Backport-SME-mode-switching-Pass-the-set-of-live-reg.patch
Added
@@ -0,0 +1,177 @@ +From 4457604c11c0a32f3736d73429d1e5fb7baae3a5 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:58 +0000 +Subject: PATCH 041/157 BackportSME mode-switching: Pass the set of live + registers to the after hook + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=93d65f39bc5c3dc318deb6da0e3633f3a4c6c34d + +This patch passes the set of live hard registers to the after hook, +like the previous one did for the needed hook. + +gcc/ + * target.def (mode_switching.after): Add a regs_live parameter. + * doc/tm.texi: Regenerate. + * config/epiphany/epiphany-protos.h (epiphany_mode_after): Update + accordingly. + * config/epiphany/epiphany.cc (epiphany_mode_needed): Likewise. + (epiphany_mode_after): Likewise. + * config/i386/i386.cc (ix86_mode_after): Likewise. + * config/riscv/riscv.cc (riscv_mode_after): Likewise. + * config/sh/sh.cc (sh_mode_after): Likewise. + * mode-switching.cc (optimize_mode_switching): Likewise. +--- + gcc/config/epiphany/epiphany-protos.h | 3 ++- + gcc/config/epiphany/epiphany.cc | 5 +++-- + gcc/config/i386/i386.cc | 2 +- + gcc/config/sh/sh.cc | 5 +++-- + gcc/doc/tm.texi | 4 +++- + gcc/mode-switching.cc | 8 ++++---- + gcc/target.def | 4 +++- + 7 files changed, 19 insertions(+), 12 deletions(-) + +diff --git a/gcc/config/epiphany/epiphany-protos.h b/gcc/config/epiphany/epiphany-protos.h +index d463e5483..6326b7e80 100644 +--- a/gcc/config/epiphany/epiphany-protos.h ++++ b/gcc/config/epiphany/epiphany-protos.h +@@ -46,8 +46,9 @@ extern void epiphany_insert_mode_switch_use (rtx_insn *insn, int, int); + extern void epiphany_expand_set_fp_mode (rtx *operands); + #ifdef HARD_CONST + extern int epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET); ++extern int epiphany_mode_after (int entity, int last_mode, rtx_insn *insn, ++ HARD_REG_SET); + #endif +-extern int epiphany_mode_after (int entity, int last_mode, rtx_insn *insn); + extern bool epiphany_epilogue_uses (int regno); + extern bool epiphany_optimize_mode_switching (int entity); + extern bool epiphany_is_interrupt_p (tree); +diff --git a/gcc/config/epiphany/epiphany.cc b/gcc/config/epiphany/epiphany.cc +index be0fbc68c..62636b1ec 100644 +--- a/gcc/config/epiphany/epiphany.cc ++++ b/gcc/config/epiphany/epiphany.cc +@@ -2437,7 +2437,7 @@ epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET) + return 2; + case EPIPHANY_MSW_ENTITY_ROUND_KNOWN: + if (recog_memoized (insn) == CODE_FOR_set_fp_mode) +- mode = (enum attr_fp_mode) epiphany_mode_after (entity, mode, insn); ++ mode = (enum attr_fp_mode) epiphany_mode_after (entity, mode, insn, {}); + /* Fall through. */ + case EPIPHANY_MSW_ENTITY_NEAREST: + case EPIPHANY_MSW_ENTITY_TRUNC: +@@ -2498,7 +2498,8 @@ epiphany_mode_entry_exit (int entity, bool exit) + } + + int +-epiphany_mode_after (int entity, int last_mode, rtx_insn *insn) ++epiphany_mode_after (int entity, int last_mode, rtx_insn *insn, ++ HARD_REG_SET) + { + /* We have too few call-saved registers to hope to keep the masks across + calls. */ +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index 4d591d217..593185fa6 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -14583,7 +14583,7 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn) + /* Return the mode that an insn results in. */ + + static int +-ix86_mode_after (int entity, int mode, rtx_insn *insn) ++ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET) + { + switch (entity) + { +diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc +index 85e83e12e..74d61c43b 100644 +--- a/gcc/config/sh/sh.cc ++++ b/gcc/config/sh/sh.cc +@@ -196,7 +196,7 @@ static HOST_WIDE_INT rounded_frame_size (int); + static bool sh_frame_pointer_required (void); + static void sh_emit_mode_set (int, int, int, HARD_REG_SET); + static int sh_mode_needed (int, rtx_insn *, HARD_REG_SET); +-static int sh_mode_after (int, int, rtx_insn *); ++static int sh_mode_after (int, int, rtx_insn *, HARD_REG_SET); + static int sh_mode_entry (int); + static int sh_mode_exit (int); + static int sh_mode_priority (int entity, int n); +@@ -12535,7 +12535,8 @@ sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn, HARD_REG_SET) + } + + static int +-sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn) ++sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn, ++ HARD_REG_SET) + { + if (TARGET_HITACHI && recog_memoized (insn) >= 0 && + get_attr_fp_set (insn) != FP_SET_NONE) +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index d8ac6c4d6..7fce485b2 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -10289,12 +10289,14 @@ such requirement. @var{regs_live} contains the set of hard registers + that are live before @var{insn}. + @end deftypefn + +-@deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn}) ++@deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn}, HARD_REG_SET @var{regs_live}) + @var{entity} is an integer specifying a mode-switched entity. + If this hook is defined, it is evaluated for every @var{insn} during mode + switching. It returns the mode that @var{entity} is in after @var{insn} + has been executed. @var{mode} is the mode that @var{entity} was in + before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}. ++@var{regs_live} is the set of hard registers that are live after @var{insn} ++has been executed. + + @var{mode} is equal to the number of modes defined for @var{entity} + if the mode before @var{insn} is unknown. The hook should likewise return +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 6bbda5058..4f0445894 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -631,10 +631,6 @@ optimize_mode_switching (void) + last_mode = mode; + } + +- if (targetm.mode_switching.after) +- last_mode = targetm.mode_switching.after (e, last_mode, +- insn); +- + /* Update LIVE_NOW. */ + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) == REG_DEAD) +@@ -644,6 +640,10 @@ optimize_mode_switching (void) + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) == REG_UNUSED) + reg_dies (XEXP (link, 0), &live_now); ++ ++ if (targetm.mode_switching.after) ++ last_mode = targetm.mode_switching.after (e, last_mode, ++ insn, live_now); + } + } + +diff --git a/gcc/target.def b/gcc/target.def +index 06a52bdaf..67c20bbb0 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -7014,6 +7014,8 @@ If this hook is defined, it is evaluated for every @var{insn} during mode\n\ + switching. It returns the mode that @var{entity} is in after @var{insn}\n\ + has been executed. @var{mode} is the mode that @var{entity} was in\n\ + before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}.\n\ ++@var{regs_live} is the set of hard registers that are live after @var{insn}\n\ ++has been executed.\n\ + \n\ + @var{mode} is equal to the number of modes defined for @var{entity}\n\ + if the mode before @var{insn} is unknown. The hook should likewise return\n\ +@@ -7021,7 +7023,7 @@ the number of modes if it does not know what mode @var{entity} has after\n\ + @var{insn}.\n\ + \n\ + Not defining the hook is equivalent to returning @var{mode}.", +- int, (int entity, int mode, rtx_insn *insn), NULL) ++ int, (int entity, int mode, rtx_insn *insn, HARD_REG_SET regs_live), NULL) + + DEFHOOK + (entry, +-- +2.33.0 +
View file
_service:tar_scm:0140-LoongArch-Remove-unneeded-sign-extension-after-crc-c.patch
Added
@@ -0,0 +1,70 @@ +From 946f9153a5d813301b05fb56a75e2c7ce22a6c2a Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 25 Feb 2024 20:44:34 +0800 +Subject: PATCH 140/188 LoongArch: Remove unneeded sign extension after + crc/crcc instructions + +The specification of crc/crcc instructions is clear that the output is +sign-extended to GRLEN. Add a define_insn to tell the compiler this +fact and allow it to remove the unneeded sign extension on crc/crcc +output. As crc/crcc instructions are usually used in a tight loop, +this should produce a significant performance gain. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md + (loongarch_<crc>_w_<size>_w_extended): New define_insn. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/crc-sext.c: New test; +--- + gcc/config/loongarch/loongarch.md | 11 +++++++++++ + gcc/testsuite/gcc.target/loongarch/crc-sext.c | 13 +++++++++++++ + 2 files changed, 24 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/crc-sext.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index b5ad9eada..248ad12bb 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -4264,6 +4264,17 @@ + (set_attr "type" "unknown") + (set_attr "mode" "<MODE>")) + ++(define_insn "loongarch_<crc>_w_<size>_w_extended" ++ (set (match_operand:DI 0 "register_operand" "=r") ++ (sign_extend:DI ++ (unspec:SI (match_operand:QHSD 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r") ++ CRC))) ++ "TARGET_64BIT" ++ "<crc>.w.<size>.w\t%0,%1,%2" ++ (set_attr "type" "unknown") ++ (set_attr "mode" "<MODE>")) ++ + ;; With normal or medium code models, if the only use of a pc-relative + ;; address is for loading or storing a value, then relying on linker + ;; relaxation is not better than emitting the machine instruction directly. +diff --git a/gcc/testsuite/gcc.target/loongarch/crc-sext.c b/gcc/testsuite/gcc.target/loongarch/crc-sext.c +new file mode 100644 +index 000000000..9ade5a8e4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/crc-sext.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* ++**my_crc: ++** crc.w.d.w \$r4,\$r4,\$r5 ++** jr \$r1 ++*/ ++int my_crc(long long dword, int crc) ++{ ++ return __builtin_loongarch_crc_w_d_w(dword, crc); ++} +-- +2.43.0 +
View file
_service:tar_scm:0141-Backport-SME-mode-switching-Use-1-based-edge-aux-fie.patch
Added
@@ -0,0 +1,56 @@ +From b0d3536b2a28d3a7084e3bbb9532e719aaf2016b Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:59 +0000 +Subject: PATCH 042/157 BackportSME mode-switching: Use 1-based edge aux + fields + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=718228a6f479fe252e0e6f71933c2465b7b480a9 + +The pass used the edge aux field to record which mode change +should happen on the edge, with -1 meaning "none". It's more +convenient for later patches to leave aux zero for "none", +and use numbers based at 1 to record a change. + +gcc/ + * mode-switching.cc (commit_mode_sets): Use 1-based edge aux values. +--- + gcc/mode-switching.cc | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 4f0445894..89a8494c6 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -106,10 +106,10 @@ commit_mode_sets (struct edge_list *edge_list, int e, struct bb_info *info) + for (int ed = NUM_EDGES (edge_list) - 1; ed >= 0; ed--) + { + edge eg = INDEX_EDGE (edge_list, ed); +- int mode; + +- if ((mode = (int)(intptr_t)(eg->aux)) != -1) ++ if (eg->aux) + { ++ int mode = (int) (intptr_t) eg->aux - 1; + HARD_REG_SET live_at_edge; + basic_block src_bb = eg->src; + int cur_mode = infosrc_bb->index.mode_out; +@@ -727,14 +727,14 @@ optimize_mode_switching (void) + { + edge eg = INDEX_EDGE (edge_list, ed); + +- eg->aux = (void *)(intptr_t)-1; ++ eg->aux = (void *) (intptr_t) 0; + + for (i = 0; i < no_mode; i++) + { + int m = targetm.mode_switching.priority (entity_mapj, i); + if (mode_bit_p (inserted, j, m)) + { +- eg->aux = (void *)(intptr_t)m; ++ eg->aux = (void *) (intptr_t) (m + 1); + break; + } + } +-- +2.33.0 +
View file
_service:tar_scm:0141-LoongArch-Allow-s9-as-a-register-alias.patch
Added
@@ -0,0 +1,45 @@ +From a74a85ed5f5b00018553d614b4dc57eb1dd5f5ee Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Thu, 25 Jan 2024 23:49:13 +0800 +Subject: PATCH 141/188 LoongArch: Allow s9 as a register alias + +The psABI allows using s9 as an alias of r22. + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (ADDITIONAL_REGISTER_NAMES): Add + s9 as an alias of r22. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/regname-fp-s9.c: New test. +--- + gcc/config/loongarch/loongarch.h | 1 + + gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c | 3 +++ + 2 files changed, 4 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 15261fdc0..8bcdb8729 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -931,6 +931,7 @@ typedef struct { + { "t8", 20 + GP_REG_FIRST }, \ + { "x", 21 + GP_REG_FIRST }, \ + { "fp", 22 + GP_REG_FIRST }, \ ++ { "s9", 22 + GP_REG_FIRST }, \ + { "s0", 23 + GP_REG_FIRST }, \ + { "s1", 24 + GP_REG_FIRST }, \ + { "s2", 25 + GP_REG_FIRST }, \ +diff --git a/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c b/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c +new file mode 100644 +index 000000000..d2e3b80f8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c +@@ -0,0 +1,3 @@ ++/* { dg-do compile } */ ++register long s9 asm("s9"); /* { dg-note "conflicts with 's9'" } */ ++register long fp asm("fp"); /* { dg-warning "register of 'fp' used for multiple global register variables" } */ +-- +2.43.0 +
View file
_service:tar_scm:0142-Backport-SME-mode-switching-Add-a-target-configurabl.patch
Added
@@ -0,0 +1,337 @@ +From 88d76baa38bb29d5cc732b3c0188b74ef9783713 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:59 +0000 +Subject: PATCH 043/157 BackportSME mode-switching: Add a + target-configurable confluence operator + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=493b0038d7d04986c7de977074d095e4eb7d9a27 + +The mode-switching pass assumed that all of an entity's modes +were mutually exclusive. However, the upcoming SME changes +have an entity with some overlapping modes, so that there is +sometimes a "superunion" mode that contains two given modes. +We can use this relationship to pass something more helpful than +"don't know" to the emit hook. + +This patch adds a new hook that targets can use to specify +a mode confluence operator. + +With mutually exclusive modes, it's possible to compute a block's +incoming and outgoing modes by looking at its availability sets. +With the confluence operator, we instead need to solve a full +dataflow problem. + +However, when emitting a mode transition, the upcoming SME use of +mode-switching benefits from having as much information as possible +about the starting mode. Calculating this information is definitely +worth the compile time. + +The dataflow problem is written to work before and after the LCM +problem has been solved. A later patch makes use of this. + +While there (since git blame would ping me for the reindented code), +I used a lambda to avoid the cut-&-pasted loops. + +gcc/ + * target.def (mode_switching.confluence): New hook. + * doc/tm.texi (TARGET_MODE_CONFLUENCE): New @hook. + * doc/tm.texi.in: Regenerate. + * mode-switching.cc (confluence_info): New variable. + (mode_confluence, forward_confluence_n, forward_transfer): New + functions. + (optimize_mode_switching): Use them to calculate mode_in when + TARGET_MODE_CONFLUENCE is defined. +--- + gcc/doc/tm.texi | 16 ++++ + gcc/doc/tm.texi.in | 2 + + gcc/mode-switching.cc | 179 +++++++++++++++++++++++++++++++++++------- + gcc/target.def | 17 ++++ + 4 files changed, 186 insertions(+), 28 deletions(-) + +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 7fce485b2..d7053ec9e 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -10306,6 +10306,22 @@ the number of modes if it does not know what mode @var{entity} has after + Not defining the hook is equivalent to returning @var{mode}. + @end deftypefn + ++@deftypefn {Target Hook} int TARGET_MODE_CONFLUENCE (int @var{entity}, int @var{mode1}, int @var{mode2}) ++By default, the mode-switching pass assumes that a given entity's modes ++are mutually exclusive. This means that the pass can only tell ++@code{TARGET_MODE_EMIT} about an entity's previous mode if all ++incoming paths of execution leave the entity in the same state. ++ ++However, some entities might have overlapping, non-exclusive modes, ++so that it is sometimes possible to represent ``mode @var{mode1} or mode ++@var{mode2}'' with something more specific than ``mode not known''. ++If this is true for at least one entity, you should define this hook ++and make it return a mode that includes @var{mode1} and @var{mode2} ++as possibilities. (The mode can include other possibilities too.) ++The hook should return the number of modes if no suitable mode exists ++for the given arguments. ++@end deftypefn ++ + @deftypefn {Target Hook} int TARGET_MODE_ENTRY (int @var{entity}) + If this hook is defined, it is evaluated for every @var{entity} that + needs mode switching. It should return the mode that @var{entity} is +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index ad343504f..d420e62fd 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -6922,6 +6922,8 @@ mode or ``no mode'', depending on context. + + @hook TARGET_MODE_AFTER + ++@hook TARGET_MODE_CONFLUENCE ++ + @hook TARGET_MODE_ENTRY + + @hook TARGET_MODE_EXIT +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 89a8494c6..065767902 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -484,6 +484,101 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes) + return pre_exit; + } + ++/* Return the confluence of modes MODE1 and MODE2 for entity ENTITY, ++ using NO_MODE to represent an unknown mode if nothing more precise ++ is available. */ ++ ++int ++mode_confluence (int entity, int mode1, int mode2, int no_mode) ++{ ++ if (mode1 == mode2) ++ return mode1; ++ ++ if (mode1 != no_mode ++ && mode2 != no_mode ++ && targetm.mode_switching.confluence) ++ return targetm.mode_switching.confluence (entity, mode1, mode2); ++ ++ return no_mode; ++} ++ ++/* Information for the dataflow problems below. */ ++struct ++{ ++ /* Information about each basic block, indexed by block id. */ ++ struct bb_info *bb_info; ++ ++ /* The entity that we're processing. */ ++ int entity; ++ ++ /* The number of modes defined for the entity, and thus the identifier ++ of the "don't know" mode. */ ++ int no_mode; ++} confluence_info; ++ ++/* Propagate information about any mode change on edge E to the ++ destination block's mode_in. Return true if something changed. ++ ++ The mode_in and mode_out fields use no_mode + 1 to mean "not yet set". */ ++ ++static bool ++forward_confluence_n (edge e) ++{ ++ /* The entry and exit blocks have no useful mode information. */ ++ if (e->src->index == ENTRY_BLOCK || e->dest->index == EXIT_BLOCK) ++ return false; ++ ++ /* We don't control mode changes across abnormal edges. */ ++ if (e->flags & EDGE_ABNORMAL) ++ return false; ++ ++ /* E->aux is nonzero if we have computed the LCM problem and scheduled ++ E to change the mode to E->aux - 1. Otherwise model the change ++ from the source to the destination. */ ++ struct bb_info *bb_info = confluence_info.bb_info; ++ int no_mode = confluence_info.no_mode; ++ int src_mode = bb_infoe->src->index.mode_out; ++ if (e->aux) ++ src_mode = (int) (intptr_t) e->aux - 1; ++ if (src_mode == no_mode + 1) ++ return false; ++ ++ int dest_mode = bb_infoe->dest->index.mode_in; ++ if (dest_mode == no_mode + 1) ++ { ++ bb_infoe->dest->index.mode_in = src_mode; ++ return true; ++ } ++ ++ int entity = confluence_info.entity; ++ int new_mode = mode_confluence (entity, src_mode, dest_mode, no_mode); ++ if (dest_mode == new_mode) ++ return false; ++ ++ bb_infoe->dest->index.mode_in = new_mode; ++ return true; ++} ++ ++/* Update block BB_INDEX's mode_out based on its mode_in. Return true if ++ something changed. */ ++ ++static bool ++forward_transfer (int bb_index) ++{ ++ /* The entry and exit blocks have no useful mode information. */ ++ if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK) ++ return false; ++ ++ /* Only propagate through a block if the entity is transparent. */ ++ struct bb_info *bb_info = confluence_info.bb_info; ++ if (bb_infobb_index.computing != confluence_info.no_mode ++ || bb_infobb_index.mode_out == bb_infobb_index.mode_in) ++ return false; ++ ++ bb_infobb_index.mode_out = bb_infobb_index.mode_in; ++ return true; ++} ++ + /* Find all insns that need a particular mode setting, and insert the + necessary mode switches. Return true if we did work. */ + +@@ -567,6 +662,39 @@ optimize_mode_switching (void) + + auto_sbitmap transp_all (last_basic_block_for_fn (cfun));
View file
_service:tar_scm:0142-LoongArch-testsuite-Rewrite-x-vfcmp-d-f-.c-to-avoid-.patch
Added
@@ -0,0 +1,1117 @@ +From d568321f8894ed270bf0011892b86baa6d6b82bd Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Tue, 5 Mar 2024 20:46:57 +0800 +Subject: PATCH 142/188 LoongArch: testsuite: Rewrite {x,}vfcmp-{d,f}.c to + avoid named registers + +Loops on named vector register are not vectorized (see comment 11 of +PR113622), so the these test cases have been failing for a while. +Rewrite them using check-function-bodies to remove hard coding register +names. A barrier is needed to always load the first operand before the +second operand. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vfcmp-f.c: Rewrite to avoid named + registers. + * gcc.target/loongarch/vfcmp-d.c: Likewise. + * gcc.target/loongarch/xvfcmp-f.c: Likewise. + * gcc.target/loongarch/xvfcmp-d.c: Likewise. +--- + gcc/testsuite/gcc.target/loongarch/vfcmp-d.c | 202 ++++++++-- + gcc/testsuite/gcc.target/loongarch/vfcmp-f.c | 347 ++++++++++++++---- + gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c | 202 ++++++++-- + gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c | 204 ++++++++-- + 4 files changed, 816 insertions(+), 139 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c b/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c +index 8b870ef38..87e4ed19e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c +@@ -1,28 +1,188 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mlsx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */ ++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ + + #define F double + #define I long long + + #include "vfcmp-f.c" + +-/* { dg-final { scan-assembler "compare_quiet_equal:.*\tvfcmp\\.ceq\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\tvfcmp\\.cune\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater:.*\tvfcmp\\.slt\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\tvfcmp\\.sle\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less:.*\tvfcmp\\.slt\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\tvfcmp\\.sle\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\tvfcmp\\.sule\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_not_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\tvfcmp\\.sult\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_not_less:.*\tvfcmp\\.sule\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_not_less\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\tvfcmp\\.sult\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less:.*\tvfcmp\\.clt\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\tvfcmp\\.cle\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater:.*\tvfcmp\\.clt\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\tvfcmp\\.cle\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_less:.*\tvfcmp\\.cule\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_not_less\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\tvfcmp\\.cult\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\tvfcmp\\.cule\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\tvfcmp\\.cult\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_unordered:.*\tvfcmp\\.cun\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_ordered:.*\tvfcmp\\.cor\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_ordered\n" } } */ ++/* ++** compare_quiet_equal: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.ceq.d (\$vr0-9+),(\1,\2|\2,\1) ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_equal: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cune.d (\$vr0-9+),(\1,\2|\2,\1) ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.slt.d (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater_equal: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.sle.d (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.slt.d (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less_equal: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.sle.d (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_not_greater: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.sule.d (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less_unordered: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.sult.d (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_not_less: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.sule.d (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater_unordered: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.sult.d (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.clt.d (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less_equal: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cle.d (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.clt.d (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater_equal: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cle.d (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_less: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cule.d (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater_unordered: ++** vld (\$vr0-9+),\$r4,0
View file
_service:tar_scm:0143-Backport-SME-mode-switching-Add-a-backprop-hook.patch
Added
@@ -0,0 +1,483 @@ +From cb4189b45a3a411958ab6aa85108f6dc7516acf5 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:29:00 +0000 +Subject: PATCH 044/157 BackportSME mode-switching: Add a backprop hook + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fc8458e20a524d053f576d64a606e21f8bd03b84 + +This patch adds a way for targets to ask that selected mode changes +be brought forward, through a combination of: + +(1) requiring a mode in blocks where the entity was previously + transparent + +(2) pushing the transition at the head of a block onto incomging edges + +SME has two uses for this: + +- A "one-shot" entity that, for any given path of execution, + either stays off or makes exactly one transition from off to on. + This relies only on (1) above; see the hook description for more info. + + The main purpose of using mode-switching for this entity is to + shrink-wrap the code that requires it. + +- A second entity for which all transitions must be from known + modes, which is enforced using a combination of (1) and (2). + More specifically, (1) looks for edges B1->B2 for which: + + - B2 requires a specific mode and + - B1 does not guarantee a specific starting mode + + In this system, such an edge is only possible if the entity is + transparent in B1. (1) then forces B1 to require some safe common + mode. Applying this inductively means that all incoming edges are + from known modes. If different edges give different starting modes, + (2) pushes the transitions onto the edges themselves; this only + happens if the entity is not transparent in some predecessor block. + +The patch also uses the back-propagation as an excuse to do a simple +on-the-fly optimisation. + +Hopefully the comments in the patch explain things a bit better. + +gcc/ + * target.def (mode_switching.backprop): New hook. + * doc/tm.texi.in (TARGET_MODE_BACKPROP): New @hook. + * doc/tm.texi: Regenerate. + * mode-switching.cc (struct bb_info): Add single_succ. + (confluence_info): Add transp field. + (single_succ_confluence_n, single_succ_transfer): New functions. + (backprop_confluence_n, backprop_transfer): Likewise. + (optimize_mode_switching): Use them. Push mode transitions onto + a block's incoming edges, if the backprop hook requires it. +--- + gcc/doc/tm.texi | 28 +++++ + gcc/doc/tm.texi.in | 2 + + gcc/mode-switching.cc | 275 ++++++++++++++++++++++++++++++++++++++++++ + gcc/target.def | 29 +++++ + 4 files changed, 334 insertions(+) + +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index d7053ec9e..5f0972356 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -10322,6 +10322,34 @@ The hook should return the number of modes if no suitable mode exists + for the given arguments. + @end deftypefn + ++@deftypefn {Target Hook} int TARGET_MODE_BACKPROP (int @var{entity}, int @var{mode1}, int @var{mode2}) ++If defined, the mode-switching pass uses this hook to back-propagate mode ++requirements through blocks that have no mode requirements of their own. ++Specifically, @var{mode1} is the mode that @var{entity} has on exit ++from a block B1 (say) and @var{mode2} is the mode that the next block ++requires @var{entity} to have. B1 does not have any mode requirements ++of its own. ++ ++The hook should return the mode that it prefers or requires @var{entity} ++to have in B1, or the number of modes if there is no such requirement. ++If the hook returns a required mode for more than one of B1's outgoing ++edges, those modes are combined as for @code{TARGET_MODE_CONFLUENCE}. ++ ++For example, suppose there is a ``one-shot'' entity that, ++for a given execution of a function, either stays off or makes exactly ++one transition from off to on. It is safe to make the transition at any ++time, but it is better not to do so unnecessarily. This hook allows the ++function to manage such an entity without having to track its state at ++runtime. Specifically. the entity would have two modes, 0 for off and ++1 for on, with 2 representing ``don't know''. The system is forbidden from ++transitioning from 2 to 1, since 2 represents the possibility that the ++entity is already on (and the aim is to avoid having to emit code to ++check for that case). This hook would therefore return 1 when @var{mode1} ++is 2 and @var{mode2} is 1, which would force the entity to be on in the ++source block. Applying this inductively would remove all transitions ++in which the previous state is unknown. ++@end deftypefn ++ + @deftypefn {Target Hook} int TARGET_MODE_ENTRY (int @var{entity}) + If this hook is defined, it is evaluated for every @var{entity} that + needs mode switching. It should return the mode that @var{entity} is +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index d420e62fd..fcab21744 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -6924,6 +6924,8 @@ mode or ``no mode'', depending on context. + + @hook TARGET_MODE_CONFLUENCE + ++@hook TARGET_MODE_BACKPROP ++ + @hook TARGET_MODE_ENTRY + + @hook TARGET_MODE_EXIT +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 065767902..c2a0f0294 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -81,6 +81,7 @@ struct bb_info + int computing; + int mode_out; + int mode_in; ++ int single_succ; + }; + + /* Clear ode I from entity J in bitmap B. */ +@@ -508,6 +509,9 @@ struct + /* Information about each basic block, indexed by block id. */ + struct bb_info *bb_info; + ++ /* A bitmap of blocks for which the current entity is transparent. */ ++ sbitmap transp; ++ + /* The entity that we're processing. */ + int entity; + +@@ -579,6 +583,210 @@ forward_transfer (int bb_index) + return true; + } + ++/* A backwards confluence function. Update the the bb_info single_succ ++ field for E's source block, based on changes to E's destination block. ++ At the end of the dataflow problem, single_succ is the single mode ++ that all successors require (directly or indirectly), or no_mode ++ if there are conflicting requirements. ++ ++ Initially, a value of no_mode + 1 means "don't know". */ ++ ++static bool ++single_succ_confluence_n (edge e) ++{ ++ /* The entry block has no associated mode information. */ ++ if (e->src->index == ENTRY_BLOCK) ++ return false; ++ ++ /* We don't control mode changes across abnormal edges. */ ++ if (e->flags & EDGE_ABNORMAL) ++ return false; ++ ++ /* Do nothing if we've already found a conflict. */ ++ struct bb_info *bb_info = confluence_info.bb_info; ++ int no_mode = confluence_info.no_mode; ++ int src_mode = bb_infoe->src->index.single_succ; ++ if (src_mode == no_mode) ++ return false; ++ ++ /* Work out what mode the destination block (or its successors) require. */ ++ int dest_mode; ++ if (e->dest->index == EXIT_BLOCK) ++ dest_mode = no_mode; ++ else if (bitmap_bit_p (confluence_info.transp, e->dest->index)) ++ dest_mode = bb_infoe->dest->index.single_succ; ++ else ++ dest_mode = bb_infoe->dest->index.seginfo->mode; ++ ++ /* Do nothing if the destination block has no new information. */ ++ if (dest_mode == no_mode + 1 || dest_mode == src_mode) ++ return false; ++ ++ /* Detect conflicting modes. */ ++ if (src_mode != no_mode + 1) ++ dest_mode = no_mode; ++ ++ bb_infoe->src->index.single_succ = dest_mode; ++ return true; ++} ++ ++/* A backward transfer function for computing the bb_info single_succ ++ fields, as described above single_succ_confluence. */ ++ ++static bool ++single_succ_transfer (int bb_index) ++{ ++ /* We don't have any field to transfer to. Assume that, after the ++ first iteration, we are only called if single_succ has changed. ++ We should then process incoming edges if the entity is transparent. */ ++ return bitmap_bit_p (confluence_info.transp, bb_index); ++} ++ ++/* Check whether the target wants to back-propagate a mode change across ++ edge E, and update the source block's computed mode if so. Return true
View file
_service:tar_scm:0143-LoongArch-Use-lib-instead-of-lib64-as-the-library-se.patch
Added
@@ -0,0 +1,80 @@ +From 415d38d84b2e363a2d512b54baac5532553f1402 Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Wed, 6 Mar 2024 09:19:59 +0800 +Subject: PATCH 143/188 LoongArch: Use /lib instead of /lib64 as the library + search path for MUSL. + +gcc/ChangeLog: + + * config.gcc: Add a case for loongarch*-*-linux-musl*. + * config/loongarch/linux.h: Disable the multilib-compatible + treatment for *musl* targets. + * config/loongarch/musl.h: New file. +--- + gcc/config.gcc | 3 +++ + gcc/config/loongarch/linux.h | 4 +++- + gcc/config/loongarch/musl.h | 23 +++++++++++++++++++++++ + 3 files changed, 29 insertions(+), 1 deletion(-) + create mode 100644 gcc/config/loongarch/musl.h + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 039187fa2..499b36b45 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -2509,6 +2509,9 @@ riscv*-*-freebsd*) + + loongarch*-*-linux*) + tm_file="elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file}" ++ case ${target} in ++ *-linux-musl*) tm_file="${tm_file} loongarch/musl.h" ++ esac + tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h loongarch/loongarch-driver.h" + extra_options="${extra_options} linux-android.opt" + tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux" +diff --git a/gcc/config/loongarch/linux.h b/gcc/config/loongarch/linux.h +index 00039ac18..38aa4da2c 100644 +--- a/gcc/config/loongarch/linux.h ++++ b/gcc/config/loongarch/linux.h +@@ -21,7 +21,9 @@ along with GCC; see the file COPYING3. If not see + * This ensures that a compiler configured with --disable-multilib + * can work in a multilib environment. */ + +-#if defined(LA_DISABLE_MULTILIB) && defined(LA_DISABLE_MULTIARCH) ++#if !defined(LA_DEFAULT_TARGET_MUSL) \ ++ && defined(LA_DISABLE_MULTILIB) \ ++ && defined(LA_DISABLE_MULTIARCH) + + #if DEFAULT_ABI_BASE == ABI_BASE_LP64D + #define ABI_LIBDIR "lib64" +diff --git a/gcc/config/loongarch/musl.h b/gcc/config/loongarch/musl.h +new file mode 100644 +index 000000000..fa43bc866 +--- /dev/null ++++ b/gcc/config/loongarch/musl.h +@@ -0,0 +1,23 @@ ++/* Definitions for MUSL C library support. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++ ++#ifndef LA_DEFAULT_TARGET_MUSL ++#define LA_DEFAULT_TARGET_MUSL ++#endif +-- +2.43.0 +
View file
_service:tar_scm:0144-Backport-SME-aarch64-Add-a-result_mode-helper-functi.patch
Added
@@ -0,0 +1,81 @@ +From 4553f252c10968037edceba4abe3984dc9bbad2a Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Fri, 1 Dec 2023 08:36:15 +0000 +Subject: PATCH 045/157 BackportSME aarch64: Add a result_mode helper + function + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a1bc121c00e30bd1bdaa62d87cbe64eb88e74f45 + +SME will add more intrinsics whose expansion code requires +the mode of the function return value. This patch adds an +associated helper routine. + +gcc/ + * config/aarch64/aarch64-sve-builtins.h + (function_expander::result_mode): New member function. + * config/aarch64/aarch64-sve-builtins-base.cc + (svld234_impl::expand): Use it. + * config/aarch64/aarch64-sve-builtins.cc + (function_expander::get_reg_target): Likewise. +--- + gcc/config/aarch64/aarch64-sve-builtins-base.cc | 2 +- + gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +- + gcc/config/aarch64/aarch64-sve-builtins.h | 9 +++++++++ + 3 files changed, 11 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc +index 56c9d75e7..c9bf13792 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc +@@ -1272,7 +1272,7 @@ public: + rtx + expand (function_expander &e) const OVERRIDE + { +- machine_mode tuple_mode = TYPE_MODE (TREE_TYPE (e.call_expr)); ++ machine_mode tuple_mode = e.result_mode (); + insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab, + tuple_mode, e.vector_mode (0)); + return e.use_contiguous_load_insn (icode); +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index e168c8334..91af96687 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -2796,7 +2796,7 @@ function_expander::get_fallback_value (machine_mode mode, unsigned int nops, + rtx + function_expander::get_reg_target () + { +- machine_mode target_mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl))); ++ machine_mode target_mode = result_mode (); + if (!possible_target || GET_MODE (possible_target) != target_mode) + possible_target = gen_reg_rtx (target_mode); + return possible_target; +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index 0d130b871..52994cde0 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -528,6 +528,8 @@ public: + insn_code direct_optab_handler_for_sign (optab, optab, unsigned int = 0, + machine_mode = E_VOIDmode); + ++ machine_mode result_mode () const; ++ + bool overlaps_input_p (rtx); + + rtx convert_to_pmode (rtx); +@@ -877,6 +879,13 @@ function_base::call_properties (const function_instance &instance) const + return flags; + } + ++/* Return the mode of the result of a call. */ ++inline machine_mode ++function_expander::result_mode () const ++{ ++ return TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl))); ++} ++ + } + + #endif +-- +2.33.0 +
View file
_service:tar_scm:0144-LoongArch-testsuite-Fix-problems-with-incorrect-resu.patch
Added
@@ -0,0 +1,551 @@ +From 2170e0e811cb1b592f7577571f10b5ab95da9eaa Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Fri, 25 Oct 2024 06:05:59 +0000 +Subject: PATCH 144/188 LoongArch: testsuite:Fix problems with incorrect + results in vector test cases. + +In simd_correctness_check.h, the role of the macro ASSERTEQ_64 is to check the +result of the passed vector values for the 64-bit data of each array element. +It turns out that it uses the abs() function to check only the lower 32 bits +of the data at a time, so it replaces abs() with the llabs() function. + +However, the following two problems may occur after modification: + +1.FAIL in lasx-xvfrint_s.c and lsx-vfrint_s.c +The reason for the error is because vector test cases that use __m{128,256} to +define vector types are composed of 32-bit primitive types, they should use +ASSERTEQ_32 instead of ASSERTEQ_64 to check for correctness. + +2.FAIL in lasx-xvshuf_b.c and lsx-vshuf.c +The cause of the error is that the expected result of the function setting in +the test case is incorrect. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c: Replace + ASSERTEQ_64 with the macro ASSERTEQ_32. + * gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c: Modify the expected + test results of some functions according to the function of the vector + instruction. + * gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c: Same + modification as lasx-xvfrint_s.c. + * gcc.target/loongarch/vector/lsx/lsx-vshuf.c: Same + modification as lasx-xvshuf_b.c. + * gcc.target/loongarch/vector/simd_correctness_check.h: Use the llabs() + function instead of abs() to check the correctness of the results. +--- + .../loongarch/vector/lasx/lasx-xvfrint_s.c | 58 +++++++++---------- + .../loongarch/vector/lsx/lsx-vfrint_s.c | 50 ++++++++-------- + .../loongarch/vector/simd_correctness_check.h | 2 +- + 3 files changed, 55 insertions(+), 55 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c +index fbfe300ea..4538528a6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c +@@ -184,7 +184,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0xffffffff; + *((int *)&__m256_op06) = 0xffffffff; +@@ -203,7 +203,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0xffffffff; + *((int *)&__m256_op06) = 0xffffffff; +@@ -222,7 +222,7 @@ main () + *((int *)&__m256_result1) = 0xffffffff; + *((int *)&__m256_result0) = 0xffffffff; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x01010101; + *((int *)&__m256_op06) = 0x01010101; +@@ -241,7 +241,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00000000; + *((int *)&__m256_op06) = 0x00000000; +@@ -260,7 +260,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0xffffffff; + *((int *)&__m256_op06) = 0xffffffff; +@@ -279,7 +279,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0xffffffff; + *((int *)&__m256_op06) = 0xffffffff; +@@ -298,7 +298,7 @@ main () + *((int *)&__m256_result1) = 0xffffffff; + *((int *)&__m256_result0) = 0xffffffff; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x01010101; + *((int *)&__m256_op06) = 0x01010101; +@@ -317,7 +317,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x55555555; + *((int *)&__m256_op06) = 0x36aaaaac; +@@ -336,7 +336,7 @@ main () + *((int *)&__m256_result1) = 0x55555555; + *((int *)&__m256_result0) = 0x80000000; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00000000; + *((int *)&__m256_op06) = 0x00000000; +@@ -355,7 +355,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0xffffc741; + *((int *)&__m256_op06) = 0x8a023680; +@@ -374,7 +374,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00000000; + *((int *)&__m256_op06) = 0xffffffff; +@@ -393,7 +393,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0xffffffff; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00200101; + *((int *)&__m256_op06) = 0x01610000; +@@ -412,7 +412,7 @@ main () + *((int *)&__m256_result1) = 0x3f800000; + *((int *)&__m256_result0) = 0x3f800000; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00000000; + *((int *)&__m256_op06) = 0x00000000; +@@ -431,7 +431,7 @@ main () + *((int *)&__m256_result1) = 0xfefefefe; + *((int *)&__m256_result0) = 0x3f800000; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x1c1c1c1c; + *((int *)&__m256_op06) = 0x1c1c1c1c; +@@ -450,7 +450,7 @@ main () + *((int *)&__m256_result1) = 0xfffffffe; + *((int *)&__m256_result0) = 0xffffff00; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00000000; + *((int *)&__m256_op06) = 0x00000000; +@@ -469,7 +469,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrm_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00000000; + *((int *)&__m256_op06) = 0x00000000; +@@ -488,7 +488,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrm_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0xffffffff; + *((int *)&__m256_op06) = 0xffffffff; +@@ -507,7 +507,7 @@ main ()
View file
_service:tar_scm:0145-Backport-SME-rtl-Try-to-remove-EH-edges-after-pro-ep.patch
Added
@@ -0,0 +1,232 @@ +From 60612cbd9cdd9b5079c0505b9d53c9cd98fba4b1 Mon Sep 17 00:00:00 2001 +From: Kewen Lin <linkw@linux.ibm.com> +Date: Tue, 15 Nov 2022 20:26:07 -0600 +Subject: PATCH 046/157 BackportSME rtl: Try to remove EH edges after + {pro,epi}logue generation PR90259 + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=63e1b2e767a3f4695373c2406ff719c0a60c1858 + +After prologue and epilogue generation, the judgement on whether +one memory access onto stack frame may trap or not could change, +since we get more exact stack information by now. + +As PR90259 shows, some memory access becomes impossible to trap +any more after prologue and epilogue generation, it can make +subsequent optimization be able to remove it if safe, but it +results in unexpected control flow status due to REG_EH_REGION +note missing. + +This patch proposes to try to remove EH edges with function +purge_all_dead_edges after prologue and epilogue generation, +it simplifies CFG as early as we can and don't need any fixup +in downstream passes. + +CFG simplification result with PR90259's case as example: + +*before* + + 18: %1:TF=call `__gcc_qdiv' argc:0 + REG_EH_REGION 0x2 + 77: NOTE_INSN_BASIC_BLOCK 3 + 19: NOTE_INSN_DELETED + 20: NOTE_INSN_DELETED + 110: %31:SI+0x20=%1:DF + REG_EH_REGION 0x2 + 116: NOTE_INSN_BASIC_BLOCK 4 + 111: %31:SI+0x28=%2:DF + REG_EH_REGION 0x2 + 22: NOTE_INSN_BASIC_BLOCK 5 + 108: %0:DF=%31:SI+0x20 + REG_EH_REGION 0x2 + 117: NOTE_INSN_BASIC_BLOCK 6 + 109: %1:DF=%31:SI+0x28 + REG_EH_REGION 0x2 + 79: NOTE_INSN_BASIC_BLOCK 7 + 26: %31:SI+0x18=%0:DF + 104: pc=L69 + 105: barrier + +*after* + + 18: %1:TF=call `__gcc_qdiv' argc:0 + REG_EH_REGION 0x2 + 77: NOTE_INSN_BASIC_BLOCK 3 + 19: NOTE_INSN_DELETED + 20: NOTE_INSN_DELETED + 110: %31:SI+0x20=%1:DF + 111: %31:SI+0x28=%2:DF + 108: %0:DF=%31:SI+0x20 + 109: %1:DF=%31:SI+0x28 + 26: %31:SI+0x18=%0:DF + 104: pc=L69 + 105: barrier + + PR rtl-optimization/90259 + +gcc/ChangeLog: + + * function.cc (rest_of_handle_thread_prologue_and_epilogue): Add + parameter fun, and call function purge_all_dead_edges. + (pass_thread_prologue_and_epilogue::execute): Name unamed parameter + as fun, and use it for rest_of_handle_thread_prologue_and_epilogue. + +gcc/testsuite/ChangeLog: + + * g++.target/powerpc/pr90259.C: New. +--- + gcc/function.cc | 13 ++- + gcc/testsuite/g++.target/powerpc/pr90259.C | 103 +++++++++++++++++++++ + 2 files changed, 113 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/g++.target/powerpc/pr90259.C + +diff --git a/gcc/function.cc b/gcc/function.cc +index 49c7ccf4b..28de39dd6 100644 +--- a/gcc/function.cc ++++ b/gcc/function.cc +@@ -6529,7 +6529,7 @@ make_pass_leaf_regs (gcc::context *ctxt) + } + + static unsigned int +-rest_of_handle_thread_prologue_and_epilogue (void) ++rest_of_handle_thread_prologue_and_epilogue (function *fun) + { + /* prepare_shrink_wrap is sensitive to the block structure of the control + flow graph, so clean it up first. */ +@@ -6546,6 +6546,13 @@ rest_of_handle_thread_prologue_and_epilogue (void) + Fix that up. */ + fixup_partitions (); + ++ /* After prologue and epilogue generation, the judgement on whether ++ one memory access onto stack frame may trap or not could change, ++ since we get more exact stack information by now. So try to ++ remove any EH edges here, see PR90259. */ ++ if (fun->can_throw_non_call_exceptions) ++ purge_all_dead_edges (); ++ + /* Shrink-wrapping can result in unreachable edges in the epilogue, + see PR57320. */ + cleanup_cfg (optimize ? CLEANUP_EXPENSIVE : 0); +@@ -6614,9 +6621,9 @@ public: + {} + + /* opt_pass methods: */ +- virtual unsigned int execute (function *) ++ unsigned int execute (function * fun) final override + { +- return rest_of_handle_thread_prologue_and_epilogue (); ++ return rest_of_handle_thread_prologue_and_epilogue (fun); + } + + }; // class pass_thread_prologue_and_epilogue +diff --git a/gcc/testsuite/g++.target/powerpc/pr90259.C b/gcc/testsuite/g++.target/powerpc/pr90259.C +new file mode 100644 +index 000000000..db75ac7fe +--- /dev/null ++++ b/gcc/testsuite/g++.target/powerpc/pr90259.C +@@ -0,0 +1,103 @@ ++/* { dg-require-effective-target long_double_ibm128 } */ ++/* { dg-options "-O2 -ffloat-store -fgcse -fnon-call-exceptions -fno-forward-propagate -fno-omit-frame-pointer -fstack-protector-all" } */ ++/* { dg-add-options long_double_ibm128 } */ ++ ++/* Verify there is no ICE. */ ++ ++template <int a> struct b ++{ ++ static constexpr int c = a; ++}; ++template <bool a> using d = b<a>; ++struct e ++{ ++ int f; ++ int ++ g () ++ { ++ return __builtin_ceil (f / (long double) h); ++ } ++ float h; ++}; ++template <typename, typename> using k = d<!bool ()>; ++template <typename> class n ++{ ++public: ++ e ae; ++ void af (); ++}; ++template <typename l> ++void ++n<l>::af () ++{ ++ ae.g (); ++} ++template <bool> using m = int; ++template <typename ag, typename ah, typename ai = m<k<ag, ah>::c>> ++using aj = n<ai>; ++struct o ++{ ++ void ++ af () ++ { ++ al.af (); ++ } ++ aj<int, int> al; ++}; ++template <typename> class am; ++template <typename i> class ao ++{ ++protected: ++ static i *ap (int); ++}; ++template <typename, typename> class p; ++template <typename ar, typename i, typename... j> class p<ar (j...), i> : ao<i> ++{ ++public: ++ static ar ++ as (const int &p1, j...) ++ { ++ (*ao<i>::ap (p1)) (j ()...); ++ } ++}; ++template <typename ar, typename... j> class am<ar (j...)> ++{ ++ template <typename, typename> using av = int; ++ ++public: ++ template <typename i, typename = av<d<!bool ()>, void>, ++ typename = av<i, void>> ++ am (i); ++ using aw = ar (*) (const int &, j...); ++ aw ax; ++};
View file
_service:tar_scm:0145-LoongArch-Fixed-an-issue-with-the-implementation-of-.patch
Added
@@ -0,0 +1,130 @@ +From 44a9ae67e19c0d744bd744cb0e9ae9e0069e40f1 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Tue, 5 Mar 2024 14:43:04 +0800 +Subject: PATCH 145/188 LoongArch: Fixed an issue with the implementation of + the template atomic_compare_and_swapsi. + +If the hardware does not support LAMCAS, atomic_compare_and_swapsi needs to be +implemented through "ll.w+sc.w". In the implementation of the instruction sequence, +it is necessary to determine whether the two registers are equal. +Since LoongArch's comparison instructions do not distinguish between 32-bit +and 64-bit, the two operand registers that need to be compared are symbolically +extended, and one of the operand registers is obtained from memory through the +"ll.w" instruction, which can ensure that the symbolic expansion is carried out. +However, the value of the other operand register is not guaranteed to be the +value of the sign extension. + +gcc/ChangeLog: + + * config/loongarch/sync.md (atomic_cas_value_strong<mode>): + In loongarch64, a sign extension operation is added when + operands2 is a register operand and the mode is SImode. + +gcc/testsuite/ChangeLog: + + * g++.target/loongarch/atomic-cas-int.C: New test. +--- + gcc/config/loongarch/sync.md | 46 ++++++++++++++----- + .../g++.target/loongarch/atomic-cas-int.C | 32 +++++++++++++ + 2 files changed, 67 insertions(+), 11 deletions(-) + create mode 100644 gcc/testsuite/g++.target/loongarch/atomic-cas-int.C + +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index 5da5c2780..2e008c487 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -245,18 +245,42 @@ + (clobber (match_scratch:GPR 5 "=&r")) + "" + { +- return "1:\\n\\t" +- "ll.<amo>\\t%0,%1\\n\\t" +- "bne\\t%0,%z2,2f\\n\\t" +- "or%i3\\t%5,$zero,%3\\n\\t" +- "sc.<amo>\\t%5,%1\\n\\t" +- "beqz\\t%5,1b\\n\\t" +- "b\\t3f\\n\\t" +- "2:\\n\\t" +- "%G4\\n\\t" +- "3:\\n\\t"; ++ output_asm_insn ("1:", operands); ++ output_asm_insn ("ll.<amo>\t%0,%1", operands); ++ ++ /* Like the test case atomic-cas-int.C, in loongarch64, O1 and higher, the ++ return value of the val_without_const_folding will not be truncated and ++ will be passed directly to the function compare_exchange_strong. ++ However, the instruction 'bne' does not distinguish between 32-bit and ++ 64-bit operations. so if the upper 32 bits of the register are not ++ extended by the 32nd bit symbol, then the comparison may not be valid ++ here. This will affect the result of the operation. */ ++ ++ if (TARGET_64BIT && REG_P (operands2) ++ && GET_MODE (operands2) == SImode) ++ { ++ output_asm_insn ("addi.w\t%5,%2,0", operands); ++ output_asm_insn ("bne\t%0,%5,2f", operands); ++ } ++ else ++ output_asm_insn ("bne\t%0,%z2,2f", operands); ++ ++ output_asm_insn ("or%i3\t%5,$zero,%3", operands); ++ output_asm_insn ("sc.<amo>\t%5,%1", operands); ++ output_asm_insn ("beqz\t%5,1b", operands); ++ output_asm_insn ("b\t3f", operands); ++ output_asm_insn ("2:", operands); ++ output_asm_insn ("%G4", operands); ++ output_asm_insn ("3:", operands); ++ ++ return ""; + } +- (set (attr "length") (const_int 28))) ++ (set (attr "length") ++ (if_then_else ++ (and (match_test "GET_MODE (operands2) == SImode") ++ (match_test "REG_P (operands2)")) ++ (const_int 32) ++ (const_int 28)))) + + (define_insn "atomic_cas_value_strong<mode>_amcas" + (set (match_operand:QHWD 0 "register_operand" "=&r") +diff --git a/gcc/testsuite/g++.target/loongarch/atomic-cas-int.C b/gcc/testsuite/g++.target/loongarch/atomic-cas-int.C +new file mode 100644 +index 000000000..830ce4826 +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/atomic-cas-int.C +@@ -0,0 +1,32 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2" } */ ++ ++#include <atomic> ++#include <cstdio> ++ ++__attribute__ ((noinline)) long ++val_without_const_folding (long val) ++{ ++ return val; ++} ++ ++int ++main () ++{ ++ int oldval = 0xaa; ++ int newval = 0xbb; ++ std::atomic<int> amo; ++ ++ amo.store (oldval); ++ ++ long longval = val_without_const_folding (0xff80000000000000 + oldval); ++ oldval = static_cast<int> (longval); ++ ++ amo.compare_exchange_strong (oldval, newval); ++ ++ if (newval != amo.load (std::memory_order_relaxed)) ++ __builtin_abort (); ++ ++ return 0; ++} ++ +-- +2.43.0 +
View file
_service:tar_scm:0146-Backport-SME-Fix-PR-middle-end-107705-ICE-after-recl.patch
Added
@@ -0,0 +1,71 @@ +From beb962ec516f152cef482b229c9adf0390dc3b2c Mon Sep 17 00:00:00 2001 +From: Andrew Pinski <apinski@marvell.com> +Date: Thu, 17 Nov 2022 22:03:08 +0000 +Subject: PATCH 047/157 BackportSME Fix PR middle-end/107705: ICE after + reclaration error + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ceba66ee230bb96b0889fc8ec7333c7ffae96d6e + +The problem here is after we created a call expression +in the C front-end, we replace the decl type with +an error mark node. We then end up calling +aggregate_value_p with the call expression +with the decl with the error mark as the type +and we ICE. + +The fix is to check the function type +after we process the call expression inside +aggregate_value_p to get it. + +OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. + +Thanks, +Andrew Pinski + +gcc/ChangeLog: + + PR middle-end/107705 + * function.cc (aggregate_value_p): Return 0 if + the function type was an error operand. + +gcc/testsuite/ChangeLog: + + * gcc.dg/redecl-22.c: New test. +--- + gcc/function.cc | 3 +++ + gcc/testsuite/gcc.dg/redecl-22.c | 9 +++++++++ + 2 files changed, 12 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/redecl-22.c + +diff --git a/gcc/function.cc b/gcc/function.cc +index 28de39dd6..99aa738eb 100644 +--- a/gcc/function.cc ++++ b/gcc/function.cc +@@ -2090,6 +2090,9 @@ aggregate_value_p (const_tree exp, const_tree fntype) + if (VOID_TYPE_P (type)) + return 0; + ++ if (error_operand_p (fntype)) ++ return 0; ++ + /* If a record should be passed the same as its first (and only) member + don't pass it as an aggregate. */ + if (TREE_CODE (type) == RECORD_TYPE && TYPE_TRANSPARENT_AGGR (type)) +diff --git a/gcc/testsuite/gcc.dg/redecl-22.c b/gcc/testsuite/gcc.dg/redecl-22.c +new file mode 100644 +index 000000000..7758570fa +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/redecl-22.c +@@ -0,0 +1,9 @@ ++/* We used to ICE in the gimplifier, PR 107705 */ ++/* { dg-do compile } */ ++/* { dg-options "-w" } */ ++int f (void) ++{ ++ int (*p) (void) = 0; // { dg-note "" } ++ return p (); ++ int p = 1; // { dg-error "" } ++} +-- +2.33.0 +
View file
_service:tar_scm:0146-LoongArch-testsuite-Add-compilation-options-to-the-r.patch
Added
@@ -0,0 +1,30 @@ +From eab751e71d4f4d5e9b2eda55d793fd57541fbc56 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 7 Mar 2024 09:44:03 +0800 +Subject: PATCH 146/188 LoongArch: testsuite: Add compilation options to the + regname-fp-s9.c. + +When the value of the macro DEFAULT_CFLAGS is set to '-ansi -pedantic-errors', +regname-s9-fp.c will test to fail. To solve this problem, add the compilation +option '-Wno-pedantic -std=gnu90' to this test case. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/regname-fp-s9.c: Add compilation option + '-Wno-pedantic -std=gnu90'. +--- + gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c b/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c +index d2e3b80f8..77a74f1f6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c ++++ b/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c +@@ -1,3 +1,4 @@ + /* { dg-do compile } */ ++/* { dg-additional-options "-Wno-pedantic -std=gnu90" } */ + register long s9 asm("s9"); /* { dg-note "conflicts with 's9'" } */ + register long fp asm("fp"); /* { dg-warning "register of 'fp' used for multiple global register variables" } */ +-- +2.43.0 +
View file
_service:tar_scm:0147-Backport-SME-function-Change-return-type-of-predicat.patch
Added
@@ -0,0 +1,351 @@ +From c074871572ef22cbcca8f0f4bc493d60caeddd78 Mon Sep 17 00:00:00 2001 +From: Uros Bizjak <ubizjak@gmail.com> +Date: Wed, 21 Jun 2023 21:55:30 +0200 +Subject: PATCH 048/157 BackportSME function: Change return type of + predicate function from int to bool + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ce47d3c2cf59bb2cc94afc4bbef88b0e4950f086 + +Also change some internal variables to bool and some functions to void. + +gcc/ChangeLog: + + * function.h (emit_initial_value_sets): + Change return type from int to void. + (aggregate_value_p): Change return type from int to bool. + (prologue_contains): Ditto. + (epilogue_contains): Ditto. + (prologue_epilogue_contains): Ditto. + * function.cc (temp_slot): Make "in_use" variable bool. + (make_slot_available): Update for changed "in_use" variable. + (assign_stack_temp_for_type): Ditto. + (emit_initial_value_sets): Change return type from int to void + and update function body accordingly. + (instantiate_virtual_regs): Ditto. + (rest_of_handle_thread_prologue_and_epilogue): Ditto. + (safe_insn_predicate): Change return type from int to bool. + (aggregate_value_p): Change return type from int to bool + and update function body accordingly. + (prologue_contains): Change return type from int to bool. + (prologue_epilogue_contains): Ditto. +--- + gcc/function.cc | 77 ++++++++++++++++++++++++------------------------- + gcc/function.h | 10 +++---- + 2 files changed, 42 insertions(+), 45 deletions(-) + +diff --git a/gcc/function.cc b/gcc/function.cc +index 99aa738eb..fc8eb5812 100644 +--- a/gcc/function.cc ++++ b/gcc/function.cc +@@ -578,8 +578,8 @@ public: + tree type; + /* The alignment (in bits) of the slot. */ + unsigned int align; +- /* Nonzero if this temporary is currently in use. */ +- char in_use; ++ /* True if this temporary is currently in use. */ ++ bool in_use; + /* Nesting level at which this slot is being used. */ + int level; + /* The offset of the slot from the frame_pointer, including extra space +@@ -674,7 +674,7 @@ make_slot_available (class temp_slot *temp) + { + cut_slot_from_list (temp, temp_slots_at_level (temp->level)); + insert_slot_to_list (temp, &avail_temp_slots); +- temp->in_use = 0; ++ temp->in_use = false; + temp->level = -1; + n_temp_slots_in_use--; + } +@@ -848,7 +848,7 @@ assign_stack_temp_for_type (machine_mode mode, poly_int64 size, tree type) + if (known_ge (best_p->size - rounded_size, alignment)) + { + p = ggc_alloc<temp_slot> (); +- p->in_use = 0; ++ p->in_use = false; + p->size = best_p->size - rounded_size; + p->base_offset = best_p->base_offset + rounded_size; + p->full_size = best_p->full_size - rounded_size; +@@ -918,7 +918,7 @@ assign_stack_temp_for_type (machine_mode mode, poly_int64 size, tree type) + } + + p = selected; +- p->in_use = 1; ++ p->in_use = true; + p->type = type; + p->level = temp_slot_level; + n_temp_slots_in_use++; +@@ -1340,7 +1340,7 @@ has_hard_reg_initial_val (machine_mode mode, unsigned int regno) + return NULL_RTX; + } + +-unsigned int ++void + emit_initial_value_sets (void) + { + struct initial_value_struct *ivs = crtl->hard_reg_initial_vals; +@@ -1348,7 +1348,7 @@ emit_initial_value_sets (void) + rtx_insn *seq; + + if (ivs == 0) +- return 0; ++ return; + + start_sequence (); + for (i = 0; i < ivs->num_entries; i++) +@@ -1357,7 +1357,6 @@ emit_initial_value_sets (void) + end_sequence (); + + emit_insn_at_entry (seq); +- return 0; + } + + /* Return the hardreg-pseudoreg initial values pair entry I and +@@ -1535,7 +1534,7 @@ instantiate_virtual_regs_in_rtx (rtx *loc) + /* A subroutine of instantiate_virtual_regs_in_insn. Return true if X + matches the predicate for insn CODE operand OPERAND. */ + +-static int ++static bool + safe_insn_predicate (int code, int operand, rtx x) + { + return code < 0 || insn_operand_matches ((enum insn_code) code, operand, x); +@@ -1948,7 +1947,7 @@ instantiate_decls (tree fndecl) + /* Pass through the INSNS of function FNDECL and convert virtual register + references to hard register references. */ + +-static unsigned int ++static void + instantiate_virtual_regs (void) + { + rtx_insn *insn; +@@ -2002,8 +2001,6 @@ instantiate_virtual_regs (void) + /* Indicate that, from now on, assign_stack_local should use + frame_pointer_rtx. */ + virtuals_instantiated = 1; +- +- return 0; + } + + namespace { +@@ -2031,7 +2028,8 @@ public: + /* opt_pass methods: */ + virtual unsigned int execute (function *) + { +- return instantiate_virtual_regs (); ++ instantiate_virtual_regs (); ++ return 0; + } + + }; // class pass_instantiate_virtual_regs +@@ -2045,12 +2043,12 @@ make_pass_instantiate_virtual_regs (gcc::context *ctxt) + } + +  +-/* Return 1 if EXP is an aggregate type (or a value with aggregate type). ++/* Return true if EXP is an aggregate type (or a value with aggregate type). + This means a type for which function calls must pass an address to the + function or get an address back from the function. + EXP may be a type node or an expression (whose type is tested). */ + +-int ++bool + aggregate_value_p (const_tree exp, const_tree fntype) + { + const_tree type = (TYPE_P (exp)) ? exp : TREE_TYPE (exp); +@@ -2070,7 +2068,7 @@ aggregate_value_p (const_tree exp, const_tree fntype) + else + /* For internal functions, assume nothing needs to be + returned in memory. */ +- return 0; ++ return false; + } + break; + case FUNCTION_DECL: +@@ -2088,10 +2086,10 @@ aggregate_value_p (const_tree exp, const_tree fntype) + } + + if (VOID_TYPE_P (type)) +- return 0; ++ return false; + + if (error_operand_p (fntype)) +- return 0; ++ return false; + + /* If a record should be passed the same as its first (and only) member + don't pass it as an aggregate. */ +@@ -2102,25 +2100,25 @@ aggregate_value_p (const_tree exp, const_tree fntype) + reference, do so. */ + if ((TREE_CODE (exp) == PARM_DECL || TREE_CODE (exp) == RESULT_DECL) + && DECL_BY_REFERENCE (exp)) +- return 1; ++ return true; + + /* Function types that are TREE_ADDRESSABLE force return in memory. */ + if (fntype && TREE_ADDRESSABLE (fntype)) +- return 1; ++ return true; + + /* Types that are TREE_ADDRESSABLE must be constructed in memory, + and thus can't be returned in registers. */ + if (TREE_ADDRESSABLE (type)) +- return 1; ++ return true; + + if (TYPE_EMPTY_P (type)) +- return 0; ++ return false; +
View file
_service:tar_scm:0147-LoongArch-Emit-R_LARCH_RELAX-for-TLS-IE-with-non-ext.patch
Added
@@ -0,0 +1,137 @@ +From 465f0653b6e7bf5adb5d1f6c9e8aff2b81a3f27f Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 26 Jan 2024 18:28:32 +0800 +Subject: PATCH 147/188 LoongArch: Emit R_LARCH_RELAX for TLS IE with + non-extreme code model to allow the IE to LE linker relaxation + +In Binutils we need to make IE to LE relaxation only allowed when there +is an R_LARCH_RELAX after R_LARCH_TLE_IE_PC_{HI20,LO12} so an invalid +"partial" relaxation won't happen with the extreme code model. So if we +are emitting %ie_pc_{hi20,lo12} in a non-extreme code model, emit an +R_LARCH_RELAX to allow the relaxation. The IE to LE relaxation does not +require the pcalau12i and the ld instruction to be adjacent, so we don't +need to limit ourselves to use the macro. + +For the distro maintainers backporting changes: this change depends on +r14-8721, without r14-8721 R_LARCH_RELAX can be emitted mistakenly in +the extreme code model. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_print_operand_reloc): + Support 'Q' for R_LARCH_RELAX for TLS IE. + (loongarch_output_move): Use 'Q' to print R_LARCH_RELAX for TLS + IE. + * config/loongarch/loongarch.md (ld_from_got<mode>): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/tls-ie-relax.c: New test. + * gcc.target/loongarch/tls-ie-norelax.c: New test. + * gcc.target/loongarch/tls-ie-extreme.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 15 ++++++++++++++- + gcc/config/loongarch/loongarch.md | 2 +- + .../gcc.target/loongarch/tls-ie-extreme.c | 5 +++++ + .../gcc.target/loongarch/tls-ie-norelax.c | 5 +++++ + gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c | 11 +++++++++++ + 5 files changed, 36 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-ie-extreme.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-ie-norelax.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index d23b09cc5..c1dc30b61 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4977,7 +4977,7 @@ loongarch_output_move (rtx dest, rtx src) + if (type == SYMBOL_TLS_LE) + return "lu12i.w\t%0,%h1"; + else +- return "pcalau12i\t%0,%h1"; ++ return "%Q1pcalau12i\t%0,%h1"; + } + + if (src_code == CONST_INT) +@@ -6141,6 +6141,7 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, + 'L' Print the low-part relocation associated with OP. + 'm' Print one less than CONST_INT OP in decimal. + 'N' Print the inverse of the integer branch condition for comparison OP. ++ 'Q' Print R_LARCH_RELAX for TLS IE. + 'r' Print address 12-31bit relocation associated with OP. + 'R' Print address 32-51bit relocation associated with OP. + 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...), +@@ -6278,6 +6279,18 @@ loongarch_print_operand (FILE *file, rtx op, int letter) + letter); + break; + ++ case 'Q': ++ if (!TARGET_LINKER_RELAXATION) ++ break; ++ ++ if (code == HIGH) ++ op = XEXP (op, 0); ++ ++ if (loongarch_classify_symbolic_expression (op) == SYMBOL_TLS_IE) ++ fprintf (file, ".reloc\t.,R_LARCH_RELAX\n\t"); ++ ++ break; ++ + case 'r': + loongarch_print_operand_reloc (file, op, false /* hi64_part */, + true /* lo_reloc */); +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 248ad12bb..d2c7c3b05 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2620,7 +2620,7 @@ + (match_operand:P 2 "symbolic_operand"))) + UNSPEC_LOAD_FROM_GOT)) + "" +- "ld.<d>\t%0,%1,%L2" ++ "%Q2ld.<d>\t%0,%1,%L2" + (set_attr "type" "move") + ) + +diff --git a/gcc/testsuite/gcc.target/loongarch/tls-ie-extreme.c b/gcc/testsuite/gcc.target/loongarch/tls-ie-extreme.c +new file mode 100644 +index 000000000..00c545a3e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/tls-ie-extreme.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mcmodel=extreme -mexplicit-relocs=auto -mrelax" } */ ++/* { dg-final { scan-assembler-not "R_LARCH_RELAX" { target tls_native } } } */ ++ ++#include "tls-ie-relax.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/tls-ie-norelax.c b/gcc/testsuite/gcc.target/loongarch/tls-ie-norelax.c +new file mode 100644 +index 000000000..dd6bf3634 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/tls-ie-norelax.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mcmodel=normal -mexplicit-relocs -mno-relax" } */ ++/* { dg-final { scan-assembler-not "R_LARCH_RELAX" { target tls_native } } } */ ++ ++#include "tls-ie-relax.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c b/gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c +new file mode 100644 +index 000000000..e9f7569b1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mcmodel=normal -mexplicit-relocs -mrelax" } */ ++/* { dg-final { scan-assembler-times "R_LARCH_RELAX" 2 { target tls_native } } } */ ++ ++extern __thread int errno; ++ ++void ++unimplemented (void) ++{ ++ errno = -38; ++} +-- +2.43.0 +
View file
_service:tar_scm:0148-Backport-SME-Allow-prologues-and-epilogues-to-be-ins.patch
Added
@@ -0,0 +1,233 @@ +From 417d51e1ecf41b3ba3ddf24eaf1e07db5c1ded9e Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 09:28:46 +0000 +Subject: PATCH 049/157 BackportSME Allow prologues and epilogues to be + inserted later + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e9d2ae6b9816e61a6148040149c63faa83f54702 + +Arm's SME adds a new processor mode called streaming mode. +This mode enables some new (matrix-oriented) instructions and +disables several existing groups of instructions, such as most +Advanced SIMD vector instructions and a much smaller set of SVE +instructions. It can also change the current vector length. + +There are instructions to switch in and out of streaming mode. +However, their effect on the ISA and vector length can't be represented +directly in RTL, so they need to be emitted late in the pass pipeline, +close to md_reorg. + +It's sometimes the responsibility of the prologue and epilogue to +switch modes, which means we need to emit the prologue and epilogue +sequences late as well. (This loses shrink-wrapping and scheduling +opportunities, but that's a price worth paying.) + +This patch therefore adds a target hook for forcing prologue +and epilogue insertion to happen later in the pipeline. + +gcc/ + * target.def (use_late_prologue_epilogue): New hook. + * doc/tm.texi.in: Add TARGET_USE_LATE_PROLOGUE_EPILOGUE. + * doc/tm.texi: Regenerate. + * passes.def (pass_late_thread_prologue_and_epilogue): New pass. + * tree-pass.h (make_pass_late_thread_prologue_and_epilogue): Declare. + * function.cc (pass_thread_prologue_and_epilogue::gate): New function. + (pass_data_late_thread_prologue_and_epilogue): New pass variable. + (pass_late_thread_prologue_and_epilogue): New pass class. + (make_pass_late_thread_prologue_and_epilogue): New function. +--- + gcc/doc/tm.texi | 19 ++++++++++++++++++ + gcc/doc/tm.texi.in | 2 ++ + gcc/function.cc | 50 ++++++++++++++++++++++++++++++++++++++++++++++ + gcc/passes.def | 3 +++ + gcc/target.def | 21 +++++++++++++++++++ + gcc/tree-pass.h | 2 ++ + 6 files changed, 97 insertions(+) + +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 5f0972356..d930d233d 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -11684,6 +11684,25 @@ of the if-block in the @code{struct ce_if_block} structure that is pointed + to by @var{ce_info}. + @end defmac + ++@deftypefn {Target Hook} bool TARGET_USE_LATE_PROLOGUE_EPILOGUE () ++Return true if the current function's prologue and epilogue should ++be emitted late in the pass pipeline, instead of at the usual point. ++ ++Normally, the prologue and epilogue sequences are introduced soon after ++register allocation is complete. The advantage of this approach is that ++it allows the prologue and epilogue instructions to be optimized and ++scheduled with other code in the function. However, some targets ++require the prologue and epilogue to be the first and last sequences ++executed by the function, with no variation allowed. This hook should ++return true on such targets. ++ ++The default implementation returns false, which is correct for most ++targets. The hook should only return true if there is a specific ++target limitation that cannot be described in RTL. For example, ++the hook might return true if the prologue and epilogue need to switch ++between instruction sets. ++@end deftypefn ++ + @deftypefn {Target Hook} void TARGET_MACHINE_DEPENDENT_REORG (void) + If non-null, this hook performs a target-specific pass over the + instruction stream. The compiler will run it at all optimization levels, +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index fcab21744..19eabec48 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -7708,6 +7708,8 @@ of the if-block in the @code{struct ce_if_block} structure that is pointed + to by @var{ce_info}. + @end defmac + ++@hook TARGET_USE_LATE_PROLOGUE_EPILOGUE ++ + @hook TARGET_MACHINE_DEPENDENT_REORG + + @hook TARGET_INIT_BUILTINS +diff --git a/gcc/function.cc b/gcc/function.cc +index fc8eb5812..7c90b5f23 100644 +--- a/gcc/function.cc ++++ b/gcc/function.cc +@@ -84,6 +84,7 @@ along with GCC; see the file COPYING3. If not see + #include "function-abi.h" + #include "value-range.h" + #include "gimple-range.h" ++#include "insn-attr.h" + + /* So we can assign to cfun in this file. */ + #undef cfun +@@ -6620,6 +6621,11 @@ public: + {} + + /* opt_pass methods: */ ++ bool gate (function *) final override ++ { ++ return !targetm.use_late_prologue_epilogue (); ++ } ++ + unsigned int execute (function * fun) final override + { + rest_of_handle_thread_prologue_and_epilogue (fun); +@@ -6628,6 +6634,44 @@ public: + + }; // class pass_thread_prologue_and_epilogue + ++const pass_data pass_data_late_thread_prologue_and_epilogue = ++{ ++ RTL_PASS, /* type */ ++ "late_pro_and_epilogue", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_THREAD_PROLOGUE_AND_EPILOGUE, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */ ++}; ++ ++class pass_late_thread_prologue_and_epilogue : public rtl_opt_pass ++{ ++public: ++ pass_late_thread_prologue_and_epilogue (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_late_thread_prologue_and_epilogue, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ bool gate (function *) final override ++ { ++ return targetm.use_late_prologue_epilogue (); ++ } ++ ++ unsigned int execute (function *fn) final override ++ { ++ /* It's not currently possible to have both delay slots and ++ late prologue/epilogue, since the latter has to run before ++ the former, and the former won't honor whatever restrictions ++ the latter is trying to enforce. */ ++ gcc_assert (!DELAY_SLOTS); ++ rest_of_handle_thread_prologue_and_epilogue (fn); ++ return 0; ++ } ++}; // class pass_late_thread_prologue_and_epilogue ++ + } // anon namespace + + rtl_opt_pass * +@@ -6636,6 +6680,12 @@ make_pass_thread_prologue_and_epilogue (gcc::context *ctxt) + return new pass_thread_prologue_and_epilogue (ctxt); + } + ++rtl_opt_pass * ++make_pass_late_thread_prologue_and_epilogue (gcc::context *ctxt) ++{ ++ return new pass_late_thread_prologue_and_epilogue (ctxt); ++} ++ + namespace { + + const pass_data pass_data_zero_call_used_regs = +diff --git a/gcc/passes.def b/gcc/passes.def +index cdc600298..8797f166f 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -523,6 +523,9 @@ along with GCC; see the file COPYING3. If not see + NEXT_PASS (pass_stack_regs_run); + POP_INSERT_PASSES () + POP_INSERT_PASSES () ++ NEXT_PASS (pass_late_thread_prologue_and_epilogue); ++ /* No target-independent code motion is allowed beyond this point, ++ excepting the legacy delayed-branch pass. */ + NEXT_PASS (pass_late_compilation); + PUSH_INSERT_PASSES_WITHIN (pass_late_compilation) + NEXT_PASS (pass_zero_call_used_regs); +diff --git a/gcc/target.def b/gcc/target.def +index 4d77c1523..fd4899612 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -4120,6 +4120,27 @@ returns @code{VOIDmode}.", + machine_mode, (machine_mode m1, machine_mode m2), + default_cc_modes_compatible) + ++DEFHOOK ++(use_late_prologue_epilogue, ++ "Return true if the current function's prologue and epilogue should\n\ ++be emitted late in the pass pipeline, instead of at the usual point.\n\ ++\n\ ++Normally, the prologue and epilogue sequences are introduced soon after\n\
View file
_service:tar_scm:0148-LoongArch-Remove-unused-and-incorrect-sge-u-_-X-mode.patch
Added
@@ -0,0 +1,57 @@ +From acc00029aab3cdd1186f1ed4a93db2205cdd166c Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Wed, 13 Mar 2024 20:44:38 +0800 +Subject: PATCH 148/188 LoongArch: Remove unused and incorrect + "sge<u>_<X:mode><GPR:mode>" define_insn + +If this insn is really used, we'll have something like + + slti $r4,$r0,$r5 + +in the code. The assembler will reject it because slti wants 2 +register operands and 1 immediate operand. But we've not got any bug +report for this, indicating this define_insn is unused at all. + +Note that do_store_flag (in expr.cc) is already converting x >= 1 to +x > 0 unconditionally, so this define_insn is indeed unused and we can +just remove it. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (any_ge): Remove. + (sge<u>_<X:mode><GPR:mode>): Remove. +--- + gcc/config/loongarch/loongarch.md | 10 ---------- + 1 file changed, 10 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index d2c7c3b05..1b3525dde 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -517,7 +517,6 @@ + ;; These code iterators allow the signed and unsigned scc operations to use + ;; the same template. + (define_code_iterator any_gt gt gtu) +-(define_code_iterator any_ge ge geu) + (define_code_iterator any_lt lt ltu) + (define_code_iterator any_le le leu) + +@@ -3355,15 +3354,6 @@ + (set_attr "type" "slt") + (set_attr "mode" "<X:MODE>")) + +-(define_insn "*sge<u>_<X:mode><GPR:mode>" +- (set (match_operand:GPR 0 "register_operand" "=r") +- (any_ge:GPR (match_operand:X 1 "register_operand" "r") +- (const_int 1))) +- "" +- "slt<u>i\t%0,%.,%1" +- (set_attr "type" "slt") +- (set_attr "mode" "<X:MODE>")) +- + (define_insn "*slt<u>_<X:mode><GPR:mode>" + (set (match_operand:GPR 0 "register_operand" "=r") + (any_lt:GPR (match_operand:X 1 "register_operand" "r") +-- +2.43.0 +
View file
_service:tar_scm:0149-Backport-SME-Add-a-target-hook-for-sibcall-epilogues.patch
Added
@@ -0,0 +1,239 @@ +From e906213086639df81085a0101bf88fb66c1dbc2b Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 09:35:57 +0000 +Subject: PATCH 050/157 BackportSME Add a target hook for sibcall + epilogues + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2e0aefa77157396acb48833407637303edba450a + +Epilogues for sibling calls are generated using the +sibcall_epilogue pattern. One disadvantage of this approach +is that the target doesn't know which call the epilogue is for, +even though the code that generates the pattern has the call +to hand. + +Although call instructions are currently rtxes, and so could be +passed as an operand to the pattern, the main point of introducing +rtx_insn was to move towards separating the rtx and insn types +(a good thing IMO). There also isn't an existing practice of +passing genuine instructions (as opposed to labels) to +instruction patterns. + +This patch therefore adds a hook that can be defined as an +alternative to sibcall_epilogue. The advantage is that it +can be passed the call; the disadvantage is that it can't +use .md conveniences like generating instructions from +textual patterns (although most epilogues are too complex +to benefit much from that anyway). + +gcc/ + * doc/tm.texi.in: Add TARGET_EMIT_EPILOGUE_FOR_SIBCALL. + * doc/tm.texi: Regenerate. + * target.def (emit_epilogue_for_sibcall): New hook. + * calls.cc (can_implement_as_sibling_call_p): Use it. + * function.cc (thread_prologue_and_epilogue_insns): Likewise. + (reposition_prologue_and_epilogue_notes): Likewise. + * config/aarch64/aarch64-protos.h (aarch64_expand_epilogue): Take + an rtx_call_insn * rather than a bool. + * config/aarch64/aarch64.cc (aarch64_expand_epilogue): Likewise. + (TARGET_EMIT_EPILOGUE_FOR_SIBCALL): Define. + * config/aarch64/aarch64.md (epilogue): Update call. + (sibcall_epilogue): Delete. +--- + gcc/calls.cc | 3 ++- + gcc/config/aarch64/aarch64-protos.h | 2 +- + gcc/config/aarch64/aarch64.cc | 11 +++++++---- + gcc/config/aarch64/aarch64.md | 11 +---------- + gcc/doc/tm.texi | 8 ++++++++ + gcc/doc/tm.texi.in | 2 ++ + gcc/function.cc | 15 +++++++++++++-- + gcc/target.def | 9 +++++++++ + 8 files changed, 43 insertions(+), 18 deletions(-) + +diff --git a/gcc/calls.cc b/gcc/calls.cc +index 4d0bc45be..c1db66883 100644 +--- a/gcc/calls.cc ++++ b/gcc/calls.cc +@@ -2461,7 +2461,8 @@ can_implement_as_sibling_call_p (tree exp, + tree addr, + const args_size &args_size) + { +- if (!targetm.have_sibcall_epilogue ()) ++ if (!targetm.have_sibcall_epilogue () ++ && !targetm.emit_epilogue_for_sibcall) + { + maybe_complain_about_tail_call + (exp, +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 86e444a60..97984f3ab 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -887,7 +887,7 @@ const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *); + const char * aarch64_output_probe_stack_range (rtx, rtx); + const char * aarch64_output_probe_sve_stack_clash (rtx, rtx, rtx, rtx); + void aarch64_err_no_fpadvsimd (machine_mode); +-void aarch64_expand_epilogue (bool); ++void aarch64_expand_epilogue (rtx_call_insn *); + rtx aarch64_ptrue_all (unsigned int); + opt_machine_mode aarch64_ptrue_all_mode (rtx); + rtx aarch64_convert_sve_data_to_pred (rtx, machine_mode, rtx); +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index fd1114b52..055b436b1 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -10046,7 +10046,7 @@ aarch64_use_return_insn_p (void) + from a deallocated stack, and we optimize the unwind records by + emitting them all together if possible. */ + void +-aarch64_expand_epilogue (bool for_sibcall) ++aarch64_expand_epilogue (rtx_call_insn *sibcall) + { + poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; + HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; +@@ -10194,7 +10194,7 @@ aarch64_expand_epilogue (bool for_sibcall) + explicitly authenticate. + */ + if (aarch64_return_address_signing_enabled () +- && (for_sibcall || !TARGET_ARMV8_3)) ++ && (sibcall || !TARGET_ARMV8_3)) + { + switch (aarch64_ra_sign_key) + { +@@ -10212,7 +10212,7 @@ aarch64_expand_epilogue (bool for_sibcall) + } + + /* Stack adjustment for exception handler. */ +- if (crtl->calls_eh_return && !for_sibcall) ++ if (crtl->calls_eh_return && !sibcall) + { + /* We need to unwind the stack by the offset computed by + EH_RETURN_STACKADJ_RTX. We have already reset the CFA +@@ -10223,7 +10223,7 @@ aarch64_expand_epilogue (bool for_sibcall) + } + + emit_use (gen_rtx_REG (DImode, LR_REGNUM)); +- if (!for_sibcall) ++ if (!sibcall) + emit_jump_insn (ret_rtx); + } + +@@ -28246,6 +28246,9 @@ aarch64_libgcc_floating_mode_supported_p + #undef TARGET_HAVE_SHADOW_CALL_STACK + #define TARGET_HAVE_SHADOW_CALL_STACK true + ++#undef TARGET_EMIT_EPILOGUE_FOR_SIBCALL ++#define TARGET_EMIT_EPILOGUE_FOR_SIBCALL aarch64_expand_epilogue ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + #include "gt-aarch64.h" +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 7267a74d6..a78476c8a 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -871,16 +871,7 @@ + (clobber (const_int 0)) + "" + " +- aarch64_expand_epilogue (false); +- DONE; +- " +-) +- +-(define_expand "sibcall_epilogue" +- (clobber (const_int 0)) +- "" +- " +- aarch64_expand_epilogue (true); ++ aarch64_expand_epilogue (nullptr); + DONE; + " + ) +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index d930d233d..369f4b8da 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -11703,6 +11703,14 @@ the hook might return true if the prologue and epilogue need to switch + between instruction sets. + @end deftypefn + ++@deftypefn {Target Hook} void TARGET_EMIT_EPILOGUE_FOR_SIBCALL (rtx_call_insn *@var{call}) ++If defined, this hook emits an epilogue sequence for sibling (tail) ++call instruction @var{call}. Another way of providing epilogues ++for sibling calls is to define the @code{sibcall_epilogue} instruction ++pattern; the main advantage of this hook over the pattern is that it ++has access to the call instruction. ++@end deftypefn ++ + @deftypefn {Target Hook} void TARGET_MACHINE_DEPENDENT_REORG (void) + If non-null, this hook performs a target-specific pass over the + instruction stream. The compiler will run it at all optimization levels, +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index 19eabec48..748b0777a 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -7710,6 +7710,8 @@ to by @var{ce_info}. + + @hook TARGET_USE_LATE_PROLOGUE_EPILOGUE + ++@hook TARGET_EMIT_EPILOGUE_FOR_SIBCALL ++ + @hook TARGET_MACHINE_DEPENDENT_REORG + + @hook TARGET_INIT_BUILTINS +diff --git a/gcc/function.cc b/gcc/function.cc +index 7c90b5f23..ddab43ca4 100644 +--- a/gcc/function.cc ++++ b/gcc/function.cc +@@ -6209,7 +6209,17 @@ thread_prologue_and_epilogue_insns (void) + if (!(CALL_P (insn) && SIBLING_CALL_P (insn))) + continue; + +- if (rtx_insn *ep_seq = targetm.gen_sibcall_epilogue ()) ++ rtx_insn *ep_seq; ++ if (targetm.emit_epilogue_for_sibcall) ++ { ++ start_sequence (); ++ targetm.emit_epilogue_for_sibcall (as_a<rtx_call_insn *> (insn)); ++ ep_seq = get_insns (); ++ end_sequence ();
View file
_service:tar_scm:0149-LoongArch-Remove-masking-process-for-operand-3-of-xv.patch
Added
@@ -0,0 +1,85 @@ +From 0dba1a1daef3f043235382f0e8f107313b9bde07 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Thu, 14 Mar 2024 09:26:54 +0800 +Subject: PATCH 149/188 LoongArch: Remove masking process for operand 3 of + xvpermi.q. + +The behavior of non-zero unused bits in xvpermi.q instruction's +third operand is undefined on LoongArch, according to our +discussion (https://github.com/llvm/llvm-project/pull/83540), +we think that keeping original insn operand as unmodified +state is better solution. + +This patch partially reverts 7b158e036a95b1ab40793dd53bed7dbd770ffdaf. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (lasx_xvpermi_q_<LASX:mode>): + Remove masking of operand 3. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c: + Reposition operand 3's value into instruction's defined accept range. +--- + gcc/config/loongarch/lasx.md | 5 ----- + .../gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c | 6 +++--- + 2 files changed, 3 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 38f35bad6..f3b5ea373 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -640,8 +640,6 @@ + (set_attr "mode" "<MODE>")) + + ;; xvpermi.q +-;; Unused bits in operands3 need be set to 0 to avoid +-;; causing undefined behavior on LA464. + (define_insn "lasx_xvpermi_q_<LASX:mode>" + (set (match_operand:LASX 0 "register_operand" "=f") + (unspec:LASX +@@ -651,9 +649,6 @@ + UNSPEC_LASX_XVPERMI_Q)) + "ISA_HAS_LASX" + { +- int mask = 0x33; +- mask &= INTVAL (operands3); +- operands3 = GEN_INT (mask); + return "xvpermi.q\t%u0,%u2,%3"; + } + (set_attr "type" "simd_splat") +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c +index dbc29d2fb..f89dfc311 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c +@@ -27,7 +27,7 @@ main () + *((unsigned long*)& __m256i_result2) = 0x7fff7fff7fff0000; + *((unsigned long*)& __m256i_result1) = 0x7fe37fe3001d001d; + *((unsigned long*)& __m256i_result0) = 0x7fff7fff7fff0000; +- __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x2a); ++ __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x22); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + + *((unsigned long*)& __m256i_op03) = 0x0000000000000000; +@@ -42,7 +42,7 @@ main () + *((unsigned long*)& __m256i_result2) = 0x000000000019001c; + *((unsigned long*)& __m256i_result1) = 0x0000000000000000; + *((unsigned long*)& __m256i_result0) = 0x00000000000001fe; +- __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0xb9); ++ __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x31); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + + *((unsigned long*)& __m256i_op03) = 0x00ff00ff00ff00ff; +@@ -57,7 +57,7 @@ main () + *((unsigned long*)& __m256i_result2) = 0xffff0000ffff0000; + *((unsigned long*)& __m256i_result1) = 0x00ff00ff00ff00ff; + *((unsigned long*)& __m256i_result0) = 0x00ff00ff00ff00ff; +- __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0xca); ++ __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x02); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + + return 0; +-- +2.43.0 +
View file
_service:tar_scm:0150-Backport-SME-Add-a-new-target-hook-TARGET_START_CALL.patch
Added
@@ -0,0 +1,461 @@ +From 58adede22d9ff2368b5c24ec3fc0e53bd3ddc8bd Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 09:44:52 +0000 +Subject: PATCH 051/157 BackportSME Add a new target hook: + TARGET_START_CALL_ARGS + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=672fad57c1f99ff893019e2da4620e26b9b31dd2 + +We have the following two hooks into the call expansion code: + +- TARGET_CALL_ARGS is called for each argument before arguments + are moved into hard registers. + +- TARGET_END_CALL_ARGS is called after the end of the call + sequence (specifically, after any return value has been + moved to a pseudo). + +This patch adds a TARGET_START_CALL_ARGS hook that is called before +the TARGET_CALL_ARGS sequence. This means that TARGET_START_CALL_REGS +and TARGET_END_CALL_REGS bracket the region in which argument registers +might be live. They also bracket a region in which the only call +emiitted by target-independent code is the call to the target function +itself. (For example, TARGET_START_CALL_ARGS happens after any use of +memcpy to copy arguments, and TARGET_END_CALL_ARGS happens before any +use of memcpy to copy the result.) + +Also, the patch adds the cumulative argument structure as an argument +to the hooks, so that the target can use it to record and retrieve +information about the call as a whole. + +The TARGET_CALL_ARGS docs said: + + While generating RTL for a function call, this target hook is invoked once + for each argument passed to the function, either a register returned by + ``TARGET_FUNCTION_ARG`` or a memory location. It is called just +- before the point where argument registers are stored. + +The last bit was true for normal calls, but for libcalls the hook was +invoked earlier, before stack arguments have been copied. I don't think +this caused a practical difference for nvptx (the only port to use the +hooks) since I wouldn't expect any libcalls to take stack parameters. + +gcc/ + * doc/tm.texi.in: Add TARGET_START_CALL_ARGS. + * doc/tm.texi: Regenerate. + * target.def (start_call_args): New hook. + (call_args, end_call_args): Add a parameter for the cumulative + argument information. + * hooks.h (hook_void_rtx_tree): Delete. + * hooks.cc (hook_void_rtx_tree): Likewise. + * targhooks.h (hook_void_CUMULATIVE_ARGS): Declare. + (hook_void_CUMULATIVE_ARGS_rtx_tree): Likewise. + * targhooks.cc (hook_void_CUMULATIVE_ARGS): New function. + (hook_void_CUMULATIVE_ARGS_rtx_tree): Likewise. + * calls.cc (expand_call): Call start_call_args before computing + and storing stack parameters. Pass the cumulative argument + information to call_args and end_call_args. + (emit_library_call_value_1): Likewise. + * config/nvptx/nvptx.cc (nvptx_call_args): Add a cumulative + argument parameter. + (nvptx_end_call_args): Likewise. +--- + gcc/calls.cc | 61 +++++++++++++++++++++------------------ + gcc/config/nvptx/nvptx.cc | 4 +-- + gcc/doc/tm.texi | 53 +++++++++++++++++++++++++++------- + gcc/doc/tm.texi.in | 2 ++ + gcc/hooks.cc | 5 ---- + gcc/hooks.h | 1 - + gcc/target.def | 59 +++++++++++++++++++++++++++++-------- + gcc/targhooks.cc | 10 +++++++ + gcc/targhooks.h | 5 ++-- + 9 files changed, 140 insertions(+), 60 deletions(-) + +diff --git a/gcc/calls.cc b/gcc/calls.cc +index c1db66883..4a8535cc6 100644 +--- a/gcc/calls.cc ++++ b/gcc/calls.cc +@@ -3507,15 +3507,26 @@ expand_call (tree exp, rtx target, int ignore) + sibcall_failure = 1; + } + ++ /* Set up the next argument register. For sibling calls on machines ++ with register windows this should be the incoming register. */ ++ if (pass == 0) ++ next_arg_reg = targetm.calls.function_incoming_arg ++ (args_so_far, function_arg_info::end_marker ()); ++ else ++ next_arg_reg = targetm.calls.function_arg ++ (args_so_far, function_arg_info::end_marker ()); ++ ++ targetm.calls.start_call_args (args_so_far); ++ + bool any_regs = false; + for (i = 0; i < num_actuals; i++) + if (argsi.reg != NULL_RTX) + { + any_regs = true; +- targetm.calls.call_args (argsi.reg, funtype); ++ targetm.calls.call_args (args_so_far, argsi.reg, funtype); + } + if (!any_regs) +- targetm.calls.call_args (pc_rtx, funtype); ++ targetm.calls.call_args (args_so_far, pc_rtx, funtype); + + /* Figure out the register where the value, if any, will come back. */ + valreg = 0; +@@ -3578,15 +3589,6 @@ expand_call (tree exp, rtx target, int ignore) + later safely search backwards to find the CALL_INSN. */ + before_call = get_last_insn (); + +- /* Set up next argument register. For sibling calls on machines +- with register windows this should be the incoming register. */ +- if (pass == 0) +- next_arg_reg = targetm.calls.function_incoming_arg +- (args_so_far, function_arg_info::end_marker ()); +- else +- next_arg_reg = targetm.calls.function_arg +- (args_so_far, function_arg_info::end_marker ()); +- + if (pass == 1 && (return_flags & ERF_RETURNS_ARG)) + { + int arg_nr = return_flags & ERF_RETURN_ARG_MASK; +@@ -3879,7 +3881,7 @@ expand_call (tree exp, rtx target, int ignore) + for (i = 0; i < num_actuals; ++i) + free (argsi.aligned_regs); + +- targetm.calls.end_call_args (); ++ targetm.calls.end_call_args (args_so_far); + + insns = get_insns (); + end_sequence (); +@@ -4437,17 +4439,9 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, + } + #endif + +- /* When expanding a normal call, args are stored in push order, +- which is the reverse of what we have here. */ +- bool any_regs = false; +- for (int i = nargs; i-- > 0; ) +- if (argveci.reg != NULL_RTX) +- { +- targetm.calls.call_args (argveci.reg, NULL_TREE); +- any_regs = true; +- } +- if (!any_regs) +- targetm.calls.call_args (pc_rtx, NULL_TREE); ++ rtx call_cookie ++ = targetm.calls.function_arg (args_so_far, ++ function_arg_info::end_marker ()); + + /* Push the args that need to be pushed. */ + +@@ -4565,6 +4559,20 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, + + fun = prepare_call_address (NULL, fun, NULL, &call_fusage, 0, 0); + ++ targetm.calls.start_call_args (args_so_far); ++ ++ /* When expanding a normal call, args are stored in push order, ++ which is the reverse of what we have here. */ ++ bool any_regs = false; ++ for (int i = nargs; i-- > 0; ) ++ if (argveci.reg != NULL_RTX) ++ { ++ targetm.calls.call_args (args_so_far, argveci.reg, NULL_TREE); ++ any_regs = true; ++ } ++ if (!any_regs) ++ targetm.calls.call_args (args_so_far, pc_rtx, NULL_TREE); ++ + /* Now load any reg parms into their regs. */ + + /* ARGNUM indexes the ARGVEC array in the order in which the arguments +@@ -4671,10 +4679,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, + get_identifier (XSTR (orgfun, 0)), + build_function_type (tfom, NULL_TREE), + original_args_size.constant, args_size.constant, +- struct_value_size, +- targetm.calls.function_arg (args_so_far, +- function_arg_info::end_marker ()), +- valreg, ++ struct_value_size, call_cookie, valreg, + old_inhibit_defer_pop + 1, call_fusage, flags, args_so_far); + + if (flag_ipa_ra) +@@ -4694,7 +4699,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, + valreg = gen_rtx_REG (TYPE_MODE (tfom), REGNO (valreg)); + } + +- targetm.calls.end_call_args (); ++ targetm.calls.end_call_args (args_so_far); + + /* For calls to `setjmp', etc., inform function.cc:setjmp_warnings + that it should complain if nonvolatile values are live. For +diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc +index 3634a49de..7f2103ba6 100644 +--- a/gcc/config/nvptx/nvptx.cc ++++ b/gcc/config/nvptx/nvptx.cc +@@ -1780,7 +1780,7 @@ nvptx_get_drap_rtx (void)
View file
_service:tar_scm:0150-LoongArch-Fix-C23-.-functions-returning-large-aggreg.patch
Added
@@ -0,0 +1,48 @@ +From 3ed698858f0ebb12a99ed1cc12c038b533f64b2c Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 25 Oct 2024 06:15:21 +0000 +Subject: PATCH 150/188 LoongArch: Fix C23 (...) functions returning large + aggregates PR114175 + +We were assuming TYPE_NO_NAMED_ARGS_STDARG_P don't have any named +arguments and there is nothing to advance, but that is not the case +for (...) functions returning by hidden reference which have one such +artificial argument. This is causing gcc.dg/c23-stdarg-6.c and +gcc.dg/c23-stdarg-8.c to fail. + +Fix the issue by checking if arg.type is NULL, as r14-9503 explains. + +gcc/ChangeLog: + + PR target/114175 + * config/loongarch/loongarch.cc + (loongarch_setup_incoming_varargs): Only skip + loongarch_function_arg_advance for TYPE_NO_NAMED_ARGS_STDARG_P + functions if arg.type is NULL. +--- + gcc/config/loongarch/loongarch.cc | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index c1dc30b61..1e3981e19 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -767,7 +767,14 @@ loongarch_setup_incoming_varargs (cumulative_args_t cum, + argument. Advance a local copy of CUM past the last "real" named + argument, to find out how many registers are left over. */ + local_cum = *get_cumulative_args (cum); +- loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg); ++ ++ /* For a C23 variadic function w/o any named argument, and w/o an ++ artifical argument for large return value, skip advancing args. ++ There is such an artifical argument iff. arg.type is non-NULL ++ (PR 114175). */ ++ if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)) ++ || arg.type != NULL_TREE) ++ loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg); + + /* Found out how many registers we need to save. */ + gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs; +-- +2.43.0 +
View file
_service:tar_scm:0151-Backport-SME-Allow-targets-to-add-USEs-to-asms.patch
Added
@@ -0,0 +1,490 @@ +From 8684458c3faf358e5a15dfb73b4ef632341ddf0a Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 09:52:41 +0000 +Subject: PATCH 052/157 BackportSME Allow targets to add USEs to asms + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=414d795d8a365b6e72a84257caa36cb3bed7e0ba + +Arm's SME has an array called ZA that for inline asm purposes +is effectively a form of special-purpose memory. It doesn't +have an associated storage type and so can't be passed and +returned in normal C/C++ objects. + +We'd therefore like "za" in a clobber list to mean that an inline +asm can read from and write to ZA. (Just reading or writing +individually is unlikely to be useful, but we could add syntax +for that too if necessary.) + +There is currently a TARGET_MD_ASM_ADJUST target hook that allows +targets to add clobbers to an asm instruction. This patch +extends that to allow targets to add USEs as well. + +gcc/ + * target.def (md_asm_adjust): Add a uses parameter. + * doc/tm.texi: Regenerate. + * cfgexpand.cc (expand_asm_loc): Update call to md_asm_adjust. + Handle any USEs created by the target. + (expand_asm_stmt): Likewise. + * recog.cc (asm_noperands): Handle asms with USEs. + (decode_asm_operands): Likewise. + * config/arm/aarch-common-protos.h (arm_md_asm_adjust): Add uses + parameter. + * config/arm/aarch-common.cc (arm_md_asm_adjust): Likewise. + * config/arm/arm.cc (thumb1_md_asm_adjust): Likewise. + * config/avr/avr.cc (avr_md_asm_adjust): Likewise. + * config/cris/cris.cc (cris_md_asm_adjust): Likewise. + * config/i386/i386.cc (ix86_md_asm_adjust): Likewise. + * config/mn10300/mn10300.cc (mn10300_md_asm_adjust): Likewise. + * config/nds32/nds32.cc (nds32_md_asm_adjust): Likewise. + * config/pdp11/pdp11.cc (pdp11_md_asm_adjust): Likewise. + * config/rs6000/rs6000.cc (rs6000_md_asm_adjust): Likewise. + * config/s390/s390.cc (s390_md_asm_adjust): Likewise. + * config/vax/vax.cc (vax_md_asm_adjust): Likewise. + * config/visium/visium.cc (visium_md_asm_adjust): Likewise. +--- + gcc/cfgexpand.cc | 37 +++++++++++++++++++--------- + gcc/config/arm/aarch-common-protos.h | 2 +- + gcc/config/arm/aarch-common.cc | 3 ++- + gcc/config/arm/arm.cc | 5 ++-- + gcc/config/avr/avr.cc | 1 + + gcc/config/cris/cris.cc | 6 +++-- + gcc/config/i386/i386.cc | 5 ++-- + gcc/config/mn10300/mn10300.cc | 3 ++- + gcc/config/nds32/nds32.cc | 4 +-- + gcc/config/pdp11/pdp11.cc | 6 +++-- + gcc/config/rs6000/rs6000.cc | 3 ++- + gcc/config/s390/s390.cc | 3 ++- + gcc/config/vax/vax.cc | 4 ++- + gcc/config/visium/visium.cc | 5 ++-- + gcc/doc/tm.texi | 5 ++-- + gcc/recog.cc | 20 ++++++++++----- + gcc/target.def | 5 ++-- + 17 files changed, 77 insertions(+), 40 deletions(-) + +diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc +index 4691355aa..5401a4ebd 100644 +--- a/gcc/cfgexpand.cc ++++ b/gcc/cfgexpand.cc +@@ -2873,6 +2873,7 @@ expand_asm_loc (tree string, int vol, location_t locus) + auto_vec<rtx> input_rvec, output_rvec; + auto_vec<machine_mode> input_mode; + auto_vec<const char *> constraints; ++ auto_vec<rtx> use_rvec; + auto_vec<rtx> clobber_rvec; + HARD_REG_SET clobbered_regs; + CLEAR_HARD_REG_SET (clobbered_regs); +@@ -2882,16 +2883,20 @@ expand_asm_loc (tree string, int vol, location_t locus) + + if (targetm.md_asm_adjust) + targetm.md_asm_adjust (output_rvec, input_rvec, input_mode, +- constraints, clobber_rvec, clobbered_regs, +- locus); ++ constraints, use_rvec, clobber_rvec, ++ clobbered_regs, locus); + + asm_op = body; + nclobbers = clobber_rvec.length (); +- body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (1 + nclobbers)); ++ auto nuses = use_rvec.length (); ++ body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (1 + nuses + nclobbers)); + +- XVECEXP (body, 0, 0) = asm_op; +- for (i = 0; i < nclobbers; i++) +- XVECEXP (body, 0, i + 1) = gen_rtx_CLOBBER (VOIDmode, clobber_rveci); ++ i = 0; ++ XVECEXP (body, 0, i++) = asm_op; ++ for (rtx use : use_rvec) ++ XVECEXP (body, 0, i++) = gen_rtx_USE (VOIDmode, use); ++ for (rtx clobber : clobber_rvec) ++ XVECEXP (body, 0, i++) = gen_rtx_CLOBBER (VOIDmode, clobber); + } + + emit_insn (body); +@@ -3443,11 +3448,12 @@ expand_asm_stmt (gasm *stmt) + maintaining source-level compatibility means automatically clobbering + the flags register. */ + rtx_insn *after_md_seq = NULL; ++ auto_vec<rtx> use_rvec; + if (targetm.md_asm_adjust) + after_md_seq + = targetm.md_asm_adjust (output_rvec, input_rvec, input_mode, +- constraints, clobber_rvec, clobbered_regs, +- locus); ++ constraints, use_rvec, clobber_rvec, ++ clobbered_regs, locus); + + /* Do not allow the hook to change the output and input count, + lest it mess up the operand numbering. */ +@@ -3455,7 +3461,8 @@ expand_asm_stmt (gasm *stmt) + gcc_assert (input_rvec.length() == ninputs); + gcc_assert (constraints.length() == noutputs + ninputs); + +- /* But it certainly can adjust the clobbers. */ ++ /* But it certainly can adjust the uses and clobbers. */ ++ unsigned nuses = use_rvec.length (); + unsigned nclobbers = clobber_rvec.length (); + + /* Third pass checks for easy conflicts. */ +@@ -3527,7 +3534,7 @@ expand_asm_stmt (gasm *stmt) + ARGVEC CONSTRAINTS OPNAMES)) + If there is more than one, put them inside a PARALLEL. */ + +- if (noutputs == 0 && nclobbers == 0) ++ if (noutputs == 0 && nuses == 0 && nclobbers == 0) + { + /* No output operands: put in a raw ASM_OPERANDS rtx. */ + if (nlabels > 0) +@@ -3535,7 +3542,7 @@ expand_asm_stmt (gasm *stmt) + else + emit_insn (body); + } +- else if (noutputs == 1 && nclobbers == 0) ++ else if (noutputs == 1 && nuses == 0 && nclobbers == 0) + { + ASM_OPERANDS_OUTPUT_CONSTRAINT (body) = constraints0; + if (nlabels > 0) +@@ -3551,7 +3558,8 @@ expand_asm_stmt (gasm *stmt) + if (num == 0) + num = 1; + +- body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num + nclobbers)); ++ body = gen_rtx_PARALLEL (VOIDmode, ++ rtvec_alloc (num + nuses + nclobbers)); + + /* For each output operand, store a SET. */ + for (i = 0; i < noutputs; ++i) +@@ -3578,6 +3586,11 @@ expand_asm_stmt (gasm *stmt) + if (i == 0) + XVECEXP (body, 0, i++) = obody; + ++ /* Add the uses specified by the target hook. No checking should ++ be needed since this doesn't come directly from user code. */ ++ for (rtx use : use_rvec) ++ XVECEXP (body, 0, i++) = gen_rtx_USE (VOIDmode, use); ++ + /* Store (clobber REG) for each clobbered register specified. */ + for (unsigned j = 0; j < nclobbers; ++j) + { +diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h +index ae0465159..3b525c174 100644 +--- a/gcc/config/arm/aarch-common-protos.h ++++ b/gcc/config/arm/aarch-common-protos.h +@@ -149,7 +149,7 @@ struct cpu_cost_table + + rtx_insn *arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, + vec<machine_mode> & /*input_modes*/, +- vec<const char *> &constraints, ++ vec<const char *> &constraints, vec<rtx> &, + vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs, + location_t loc); + +diff --git a/gcc/config/arm/aarch-common.cc b/gcc/config/arm/aarch-common.cc +index 04a53d750..365cfc140 100644 +--- a/gcc/config/arm/aarch-common.cc ++++ b/gcc/config/arm/aarch-common.cc +@@ -533,7 +533,8 @@ arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer) + rtx_insn * + arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, + vec<machine_mode> & /*input_modes*/, +- vec<const char *> &constraints, vec<rtx> & /*clobbers*/, ++ vec<const char *> &constraints, ++ vec<rtx> & /*uses*/, vec<rtx> & /*clobbers*/, + HARD_REG_SET & /*clobbered_regs*/, location_t loc) + { + bool saw_asm_flag = false; +diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc +index b700c23b8..c72e9c0b0 100644 +--- a/gcc/config/arm/arm.cc ++++ b/gcc/config/arm/arm.cc +@@ -325,7 +325,7 @@ static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
View file
_service:tar_scm:0151-LoongArch-Remove-unused-useless-definitions.patch
Added
@@ -0,0 +1,123 @@ +From 6ee300fd31e000efba141ed8806e56bd03826197 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Fri, 15 Mar 2024 09:30:25 +0800 +Subject: PATCH 151/188 LoongArch: Remove unused/useless definitions. + +This patch removes some unnecessary definitions of target hook functions +according to the documentation of GCC. + +gcc/ChangeLog: + + * config/loongarch/loongarch-protos.h + (loongarch_cfun_has_cprestore_slot_p): Delete. + (loongarch_adjust_insn_length): Delete. + (current_section_name): Delete. + (loongarch_split_symbol_type): Delete. + * config/loongarch/loongarch.cc + (loongarch_case_values_threshold): Delete. + (loongarch_spill_class): Delete. + (TARGET_OPTAB_SUPPORTED_P): Delete. + (TARGET_CASE_VALUES_THRESHOLD): Delete. + (TARGET_SPILL_CLASS): Delete. +--- + gcc/config/loongarch/loongarch-protos.h | 5 ----- + gcc/config/loongarch/loongarch.cc | 26 ------------------------- + 2 files changed, 31 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 87b94e8b0..3dac20279 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -93,7 +93,6 @@ extern void loongarch_split_lsx_copy_d (rtx, rtx, rtx, rtx (*)(rtx, rtx, rtx)); + extern void loongarch_split_lsx_insert_d (rtx, rtx, rtx, rtx); + extern void loongarch_split_lsx_fill_d (rtx, rtx); + extern const char *loongarch_output_move (rtx, rtx); +-extern bool loongarch_cfun_has_cprestore_slot_p (void); + #ifdef RTX_CODE + extern void loongarch_expand_scc (rtx *); + extern bool loongarch_expand_vec_cmp (rtx *); +@@ -135,7 +134,6 @@ extern int loongarch_class_max_nregs (enum reg_class, machine_mode); + extern machine_mode loongarch_hard_regno_caller_save_mode (unsigned int, + unsigned int, + machine_mode); +-extern int loongarch_adjust_insn_length (rtx_insn *, int); + extern const char *loongarch_output_conditional_branch (rtx_insn *, rtx *, + const char *, + const char *); +@@ -157,7 +155,6 @@ extern bool loongarch_global_symbol_noweak_p (const_rtx); + extern bool loongarch_weak_symbol_p (const_rtx); + extern bool loongarch_symbol_binds_local_p (const_rtx); + +-extern const char *current_section_name (void); + extern unsigned int current_section_flags (void); + extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); + extern bool loongarch_check_zero_div_p (void); +@@ -198,8 +195,6 @@ extern bool loongarch_epilogue_uses (unsigned int); + extern bool loongarch_load_store_bonding_p (rtx *, machine_mode, bool); + extern bool loongarch_split_symbol_type (enum loongarch_symbol_type); + +-typedef rtx (*mulsidi3_gen_fn) (rtx, rtx, rtx); +- + extern void loongarch_register_frame_header_opt (void); + extern void loongarch_expand_vec_cond_expr (machine_mode, machine_mode, rtx *); + extern void loongarch_expand_vec_cond_mask_expr (machine_mode, machine_mode, +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 1e3981e19..903c0d4ef 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10812,23 +10812,6 @@ loongarch_expand_vec_cmp (rtx operands) + return true; + } + +-/* Implement TARGET_CASE_VALUES_THRESHOLD. */ +- +-unsigned int +-loongarch_case_values_threshold (void) +-{ +- return default_case_values_threshold (); +-} +- +-/* Implement TARGET_SPILL_CLASS. */ +- +-static reg_class_t +-loongarch_spill_class (reg_class_t rclass ATTRIBUTE_UNUSED, +- machine_mode mode ATTRIBUTE_UNUSED) +-{ +- return NO_REGS; +-} +- + /* Implement TARGET_PROMOTE_FUNCTION_MODE. */ + + /* This function is equivalent to default_promote_function_mode_always_promote +@@ -11283,9 +11266,6 @@ loongarch_asm_code_end (void) + #undef TARGET_FUNCTION_ARG_BOUNDARY + #define TARGET_FUNCTION_ARG_BOUNDARY loongarch_function_arg_boundary + +-#undef TARGET_OPTAB_SUPPORTED_P +-#define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p +- + #undef TARGET_VECTOR_MODE_SUPPORTED_P + #define TARGET_VECTOR_MODE_SUPPORTED_P loongarch_vector_mode_supported_p + +@@ -11355,18 +11335,12 @@ loongarch_asm_code_end (void) + #undef TARGET_SCHED_REASSOCIATION_WIDTH + #define TARGET_SCHED_REASSOCIATION_WIDTH loongarch_sched_reassociation_width + +-#undef TARGET_CASE_VALUES_THRESHOLD +-#define TARGET_CASE_VALUES_THRESHOLD loongarch_case_values_threshold +- + #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV + #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV loongarch_atomic_assign_expand_fenv + + #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS + #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true + +-#undef TARGET_SPILL_CLASS +-#define TARGET_SPILL_CLASS loongarch_spill_class +- + #undef TARGET_HARD_REGNO_NREGS + #define TARGET_HARD_REGNO_NREGS loongarch_hard_regno_nregs + #undef TARGET_HARD_REGNO_MODE_OK +-- +2.43.0 +
View file
_service:tar_scm:0152-Backport-SME-New-compact-syntax-for-insn-and-insn_sp.patch
Added
@@ -0,0 +1,998 @@ +From 763db5ed42e18cdddf979dda82056345e3af15ed Mon Sep 17 00:00:00 2001 +From: Tamar Christina <tamar.christina@arm.com> +Date: Mon, 19 Jun 2023 15:47:46 +0100 +Subject: PATCH 053/157 BackportSME New compact syntax for insn and + insn_split in Machine Descriptions. + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=957ae90406591739b68e95ad49a0232faeb74217 + +This patch adds support for a compact syntax for specifying constraints in +instruction patterns. Credit for the idea goes to Richard Earnshaw. + +With this new syntax we want a clean break from the current limitations to make +something that is hopefully easier to use and maintain. + +The idea behind this compact syntax is that often times it's quite hard to +correlate the entries in the constrains list, attributes and instruction lists. + +One has to count and this often is tedious. Additionally when changing a single +line in the insn multiple lines in a diff change, making it harder to see what's +going on. + +This new syntax takes into account many of the common things that are done in MD +files. It's also worth saying that this version is intended to deal with the +common case of a string based alternatives. For C chunks we have some ideas +but those are not intended to be addressed here. + +It's easiest to explain with an example: + +normal syntax: + +(define_insn_and_split "*movsi_aarch64" + (set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m, r, r, r, w,r,w, w") + (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds")) + "(register_operand (operands0, SImode) + || aarch64_reg_or_zero (operands1, SImode))" + "@ + mov\\t%w0, %w1 + mov\\t%w0, %w1 + mov\\t%w0, %w1 + mov\\t%w0, %1 + # + * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands1); + ldr\\t%w0, %1 + ldr\\t%s0, %1 + str\\t%w1, %0 + str\\t%s1, %0 + adrp\\t%x0, %A1\;ldr\\t%w0, %x0, %L1 + adr\\t%x0, %c1 + adrp\\t%x0, %A1 + fmov\\t%s0, %w1 + fmov\\t%w0, %s1 + fmov\\t%s0, %s1 + * return aarch64_output_scalar_simd_mov_immediate (operands1, SImode);" + "CONST_INT_P (operands1) && !aarch64_move_imm (INTVAL (operands1), SImode) + && REG_P (operands0) && GP_REGNUM_P (REGNO (operands0))" + (const_int 0) + "{ + aarch64_expand_mov_immediate (operands0, operands1); + DONE; + }" + ;; The "mov_imm" type for CNT is just a placeholder. + (set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4, + load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move") + (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd") + (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4") + +) + +New syntax: + +(define_insn_and_split "*movsi_aarch64" + (set (match_operand:SI 0 "nonimmediate_operand") + (match_operand:SI 1 "aarch64_mov_operand")) + "(register_operand (operands0, SImode) + || aarch64_reg_or_zero (operands1, SImode))" + {@ cons: =0, 1; attrs: type, arch, length + r , r ; mov_reg , * , 4 mov\t%w0, %w1 + k , r ; mov_reg , * , 4 ^ + r , k ; mov_reg , * , 4 ^ + r , M ; mov_imm , * , 4 mov\t%w0, %1 + r , n ; mov_imm , * ,16 # + /* The "mov_imm" type for CNT is just a placeholder. */ + r , Usv; mov_imm , sve , 4 << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands1); + r , m ; load_4 , * , 4 ldr\t%w0, %1 + w , m ; load_4 , fp , 4 ldr\t%s0, %1 + m , rZ ; store_4 , * , 4 str\t%w1, %0 + m , w ; store_4 , fp , 4 str\t%s1, %0 + r , Usw; load_4 , * , 8 adrp\t%x0, %A1;ldr\t%w0, %x0, %L1 + r , Usa; adr , * , 4 adr\t%x0, %c1 + r , Ush; adr , * , 4 adrp\t%x0, %A1 + w , rZ ; f_mcr , fp , 4 fmov\t%s0, %w1 + r , w ; f_mrc , fp , 4 fmov\t%w0, %s1 + w , w ; fmov , fp , 4 fmov\t%s0, %s1 + w , Ds ; neon_move, simd, 4 << aarch64_output_scalar_simd_mov_immediate (operands1, SImode); + } + "CONST_INT_P (operands1) && !aarch64_move_imm (INTVAL (operands1), SImode) + && REG_P (operands0) && GP_REGNUM_P (REGNO (operands0))" + (const_int 0) + { + aarch64_expand_mov_immediate (operands0, operands1); + DONE; + } +) + +The main syntax rules are as follows (See docs for full rules): + - Template must start with "{@" and end with "}" to use the new syntax. + - "{@" is followed by a layout in parentheses which is "cons:" followed by + a list of match_operand/match_scratch IDs, then a semicolon, then the + same for attributes ("attrs:"). Both sections are optional (so you can + use only cons, or only attrs, or both), and cons must come before attrs + if present. + - Each alternative begins with any amount of whitespace. + - Following the whitespace is a comma-separated list of constraints and/or + attributes within brackets , with sections separated by a semicolon. + - Following the closing '' is any amount of whitespace, and then the actual + asm output. + - Spaces are allowed in the list (they will simply be removed). + - All alternatives should be specified: a blank list should be + ",,", ",,;," etc., not "" or "" (however genattr may segfault if + you leave certain attributes empty, I have found). + - The actual constraint string in the match_operand or match_scratch, and + the attribute string in the set_attr, must be blank or an empty string + (you can't combine the old and new syntaxes). + - The common idion * return can be shortened by using <<. + - Any unexpanded iterators left during processing will result in an error at + compile time. If for some reason <> is needed in the output then these + must be escaped using \. + - Within an {@ block both multiline and singleline C comments are allowed, but + when used outside of a C block they must be the only non-whitespace blocks on + the line + - Inside an {@ block any unexpanded iterators will result in a compile time + fault instead of incorrect assembly being generated at runtime. If the + literal <> is needed in the output this needs to be escaped with \<\>. + - This check is not performed inside C blocks (lines starting with *). + - Instead of copying the previous instruction again in the next pattern, one + can use ^ to refer to the previous asm string. + +This patch works by blindly transforming the new syntax into the old syntax, +so it doesn't do extensive checking. However, it does verify that: + - The correct number of constraints/attributes are specified. + - You haven't mixed old and new syntax. + - The specified operand IDs/attribute names actually exist. + - You don't have duplicate cons + +If something goes wrong, it may write invalid constraints/attributes/template +back into the rtx. But this shouldn't matter because error_at will cause the +program to fail on exit anyway. + +Because this transformation occurs as early as possible (before patterns are +queued), the rest of the compiler can completely ignore the new syntax and +assume that the old syntax will always be used. + +This doesn't seem to have any measurable effect on the runtime of gen* +programs. + +gcc/ChangeLog: + + * gensupport.cc (class conlist, add_constraints, add_attributes, + skip_spaces, expect_char, preprocess_compact_syntax, + parse_section_layout, parse_section, convert_syntax): New. + (process_rtx): Check for conversion. + * genoutput.cc (process_template): Check for unresolved iterators. + (class data): Add compact_syntax_p. + (gen_insn): Use it. + * gensupport.h (compact_syntax): New. + (hash-set.h): Include. + * doc/md.texi: Document it. + +Co-Authored-By: Omar Tahir <Omar.Tahir2@arm.com> +--- + gcc/doc/md.texi | 163 +++++++++++++++ + gcc/genoutput.cc | 48 ++++- + gcc/gensupport.cc | 498 ++++++++++++++++++++++++++++++++++++++++++++++ + gcc/gensupport.h | 3 + + 4 files changed, 709 insertions(+), 3 deletions(-) + +diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi +index 3b544358b..04ace8f7f 100644 +--- a/gcc/doc/md.texi ++++ b/gcc/doc/md.texi +@@ -27,6 +27,7 @@ See the next chapter for information on the C header file. + from such an insn. + * Output Statement:: For more generality, write C code to output + the assembler code. ++* Compact Syntax:: Compact syntax for writing machine descriptors. + * Predicates:: Controlling what kinds of operands can be used + for an insn. + * Constraints:: Fine-tuning operand selection. +@@ -713,6 +714,168 @@ you can use @samp{*} inside of a @samp{@@} multi-alternative template: + @end group + @end smallexample + ++@node Compact Syntax ++@section Compact Syntax ++@cindex compact syntax ++ ++When a @code{define_insn} or @code{define_insn_and_split} has multiple ++alternatives it may be beneficial to use the compact syntax when specifying ++alternatives.
View file
_service:tar_scm:0152-LoongArch-Change-loongarch_expand_vec_cmp-s-return-t.patch
Added
@@ -0,0 +1,110 @@ +From d569e34b29faee3658014b3900e9553a4880dac0 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Fri, 15 Mar 2024 09:30:26 +0800 +Subject: PATCH 152/188 LoongArch: Change loongarch_expand_vec_cmp()'s return + type from bool to void. + +This function is always return true at the end of function implementation, +so the return value is useless. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (vec_cmp<mode><mode256_i>): Remove checking + of loongarch_expand_vec_cmp()'s return value. + (vec_cmpu<ILASX:mode><mode256_i>): Ditto. + * config/loongarch/lsx.md (vec_cmp<mode><mode_i>): Ditto. + (vec_cmpu<ILSX:mode><mode_i>): Ditto. + * config/loongarch/loongarch-protos.h + (loongarch_expand_vec_cmp): Change loongarch_expand_vec_cmp()'s return + type from bool to void. + * config/loongarch/loongarch.cc (loongarch_expand_vec_cmp): Ditto. +--- + gcc/config/loongarch/lasx.md | 6 ++---- + gcc/config/loongarch/loongarch-protos.h | 2 +- + gcc/config/loongarch/loongarch.cc | 3 +-- + gcc/config/loongarch/lsx.md | 6 ++---- + 4 files changed, 6 insertions(+), 11 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index f3b5ea373..45a0a8cc8 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1378,8 +1378,7 @@ + (match_operand:LASX 3 "register_operand"))) + "ISA_HAS_LASX" + { +- bool ok = loongarch_expand_vec_cmp (operands); +- gcc_assert (ok); ++ loongarch_expand_vec_cmp (operands); + DONE; + }) + +@@ -1390,8 +1389,7 @@ + (match_operand:ILASX 3 "register_operand"))) + "ISA_HAS_LASX" + { +- bool ok = loongarch_expand_vec_cmp (operands); +- gcc_assert (ok); ++ loongarch_expand_vec_cmp (operands); + DONE; + }) + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 3dac20279..8523da8d6 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -95,7 +95,7 @@ extern void loongarch_split_lsx_fill_d (rtx, rtx); + extern const char *loongarch_output_move (rtx, rtx); + #ifdef RTX_CODE + extern void loongarch_expand_scc (rtx *); +-extern bool loongarch_expand_vec_cmp (rtx *); ++extern void loongarch_expand_vec_cmp (rtx *); + extern void loongarch_expand_conditional_branch (rtx *); + extern void loongarch_expand_conditional_move (rtx *); + extern void loongarch_expand_conditional_trap (rtx); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 903c0d4ef..dbb318660 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10803,13 +10803,12 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode, + } + + /* Expand integer vector comparison */ +-bool ++void + loongarch_expand_vec_cmp (rtx operands) + { + + rtx_code code = GET_CODE (operands1); + loongarch_expand_lsx_cmp (operands0, code, operands2, operands3); +- return true; + } + + /* Implement TARGET_PROMOTE_FUNCTION_MODE. */ +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index d5aa3f46f..dc81093e9 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -518,8 +518,7 @@ + (match_operand:LSX 3 "register_operand"))) + "ISA_HAS_LSX" + { +- bool ok = loongarch_expand_vec_cmp (operands); +- gcc_assert (ok); ++ loongarch_expand_vec_cmp (operands); + DONE; + }) + +@@ -530,8 +529,7 @@ + (match_operand:ILSX 3 "register_operand"))) + "ISA_HAS_LSX" + { +- bool ok = loongarch_expand_vec_cmp (operands); +- gcc_assert (ok); ++ loongarch_expand_vec_cmp (operands); + DONE; + }) + +-- +2.43.0 +
View file
_service:tar_scm:0153-Backport-SME-recog-Improve-parser-for-pattern-new-co.patch
Added
@@ -0,0 +1,104 @@ +From 35b64175c6fd622212d0bf936e7e98c635e1c618 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Wed, 13 Sep 2023 14:50:30 +0100 +Subject: PATCH 054/157 BackportSME recog: Improve parser for pattern new + compact syntax + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dd1091fe455c1ede5993b4cdf10d0f7c461b86d7 + +Hi all, + +this is to add support to the new compact pattern syntax for the case +where the constraints do appear unsorted like: + +(define_insn "*<optab>si3_insn_uxtw" + (set (match_operand:DI 0 "register_operand") + (zero_extend:DI (SHIFT_no_rotate:SI + (match_operand:SI 1 "register_operand") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_si")))) + "" + {@ cons: =0, 2, 1 + r, Uss, r <shift>\\t%w0, %w1, %2 + r, r, r <shift>\\t%w0, %w1, %w2 + } + (set_attr "type" "bfx,shift_reg") +) + +Best Regards + + Andrea + +gcc/Changelog + +2023-09-20 Richard Sandiford <richard.sandiford@arm.com> + + * gensupport.cc (convert_syntax): Updated to support unordered + constraints in compact syntax. +--- + gcc/gensupport.cc | 32 ++++++++++++++++---------------- + 1 file changed, 16 insertions(+), 16 deletions(-) + +diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc +index 23c61dcdd..97c614850 100644 +--- a/gcc/gensupport.cc ++++ b/gcc/gensupport.cc +@@ -895,19 +895,6 @@ convert_syntax (rtx x, file_location loc) + + parse_section_layout (loc, &templ, "cons:", tconvec, true); + +- /* Check for any duplicate cons entries and sort based on i. */ +- for (auto e : tconvec) +- { +- unsigned idx = e.idx; +- if (idx >= convec.size ()) +- convec.resize (idx + 1); +- +- if (convecidx.idx >= 0) +- fatal_at (loc, "duplicate cons number found: %d", idx); +- convecidx = e; +- } +- tconvec.clear (); +- + if (*templ != '') + { + if (*templ == ';') +@@ -950,13 +937,13 @@ convert_syntax (rtx x, file_location loc) + new_templ += '\n'; + new_templ.append (buffer); + /* Parse the constraint list, then the attribute list. */ +- if (convec.size () > 0) +- parse_section (&templ, convec.size (), alt_no, convec, loc, ++ if (tconvec.size () > 0) ++ parse_section (&templ, tconvec.size (), alt_no, tconvec, loc, + "constraint"); + + if (attrvec.size () > 0) + { +- if (convec.size () > 0 && !expect_char (&templ, ';')) ++ if (tconvec.size () > 0 && !expect_char (&templ, ';')) + fatal_at (loc, "expected `;' to separate constraints " + "and attributes in alternative %d", alt_no); + +@@ -1026,6 +1013,19 @@ convert_syntax (rtx x, file_location loc) + ++alt_no; + } + ++ /* Check for any duplicate cons entries and sort based on i. */ ++ for (auto e : tconvec) ++ { ++ unsigned idx = e.idx; ++ if (idx >= convec.size ()) ++ convec.resize (idx + 1); ++ ++ if (convecidx.idx >= 0) ++ fatal_at (loc, "duplicate cons number found: %d", idx); ++ convecidx = e; ++ } ++ tconvec.clear (); ++ + /* Write the constraints and attributes into their proper places. */ + if (convec.size () > 0) + add_constraints (x, loc, convec); +-- +2.33.0 +
View file
_service:tar_scm:0153-LoongArch-Combine-UNITS_PER_FP_REG-and-UNITS_PER_FPR.patch
Added
@@ -0,0 +1,104 @@ +From 6c4a2fbdabab053a2a0fb1041e3ffccc3d853c97 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Fri, 15 Mar 2024 09:30:27 +0800 +Subject: PATCH 153/188 LoongArch: Combine UNITS_PER_FP_REG and + UNITS_PER_FPREG macros. + +These macros are completely same in definition, so we can keep the previous one +and eliminate later one. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_hard_regno_mode_ok_uncached): Combine UNITS_PER_FP_REG and + UNITS_PER_FPREG macros. + (loongarch_hard_regno_nregs): Ditto. + (loongarch_class_max_nregs): Ditto. + (loongarch_get_separate_components): Ditto. + (loongarch_process_components): Ditto. + * config/loongarch/loongarch.h (UNITS_PER_FPREG): Ditto. + (UNITS_PER_HWFPVALUE): Ditto. + (UNITS_PER_FPVALUE): Ditto. +--- + gcc/config/loongarch/loongarch.cc | 10 +++++----- + gcc/config/loongarch/loongarch.h | 7 ++----- + 2 files changed, 7 insertions(+), 10 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index dbb318660..8d9cda165 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6773,7 +6773,7 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode) + and TRUNC. There's no point allowing sizes smaller than a word, + because the FPU has no appropriate load/store instructions. */ + if (mclass == MODE_INT) +- return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FPREG; ++ return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FP_REG; + } + + return false; +@@ -6816,7 +6816,7 @@ loongarch_hard_regno_nregs (unsigned int regno, machine_mode mode) + if (LASX_SUPPORTED_MODE_P (mode)) + return 1; + +- return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG; ++ return (GET_MODE_SIZE (mode) + UNITS_PER_FP_REG - 1) / UNITS_PER_FP_REG; + } + + /* All other registers are word-sized. */ +@@ -6851,7 +6851,7 @@ loongarch_class_max_nregs (enum reg_class rclass, machine_mode mode) + else if (LSX_SUPPORTED_MODE_P (mode)) + size = MIN (size, UNITS_PER_LSX_REG); + else +- size = MIN (size, UNITS_PER_FPREG); ++ size = MIN (size, UNITS_PER_FP_REG); + } + left &= ~reg_class_contentsFP_REGS; + } +@@ -8227,7 +8227,7 @@ loongarch_get_separate_components (void) + if (IMM12_OPERAND (offset)) + bitmap_set_bit (components, regno); + +- offset -= UNITS_PER_FPREG; ++ offset -= UNITS_PER_FP_REG; + } + + /* Don't mess with the hard frame pointer. */ +@@ -8306,7 +8306,7 @@ loongarch_process_components (sbitmap components, loongarch_save_restore_fn fn) + if (bitmap_bit_p (components, regno)) + loongarch_save_restore_reg (mode, regno, offset, fn); + +- offset -= UNITS_PER_FPREG; ++ offset -= UNITS_PER_FP_REG; + } + } + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 8bcdb8729..698e42aec 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -138,19 +138,16 @@ along with GCC; see the file COPYING3. If not see + /* Width of a LASX vector register in bits. */ + #define BITS_PER_LASX_REG (UNITS_PER_LASX_REG * BITS_PER_UNIT) + +-/* For LARCH, width of a floating point register. */ +-#define UNITS_PER_FPREG (TARGET_DOUBLE_FLOAT ? 8 : 4) +- + /* The largest size of value that can be held in floating-point + registers and moved with a single instruction. */ + #define UNITS_PER_HWFPVALUE \ +- (TARGET_SOFT_FLOAT ? 0 : UNITS_PER_FPREG) ++ (TARGET_SOFT_FLOAT ? 0 : UNITS_PER_FP_REG) + + /* The largest size of value that can be held in floating-point + registers. */ + #define UNITS_PER_FPVALUE \ + (TARGET_SOFT_FLOAT ? 0 \ +- : TARGET_SINGLE_FLOAT ? UNITS_PER_FPREG \ ++ : TARGET_SINGLE_FLOAT ? UNITS_PER_FP_REG \ + : LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT) + + /* The number of bytes in a double. */ +-- +2.43.0 +
View file
_service:tar_scm:0154-Backport-SME-recog-Support-space-in-cons.patch
Added
@@ -0,0 +1,49 @@ +From e593ad216bd1f4f75d9875898f352e0e5f978159 Mon Sep 17 00:00:00 2001 +From: Andrea Corallo <andrea.corallo@arm.com> +Date: Fri, 15 Sep 2023 10:23:02 +0200 +Subject: PATCH 055/157 BackportSME recog: Support space in " cons" + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=9d31045b21324166c3997d603961d99e3c4c357d + +Hi all, + +this is to allow for spaces before "cons:" in the definitions of +patterns using the new compact syntax, ex: + +(define_insn "aarch64_simd_dup<mode>" + (set (match_operand:VDQ_I 0 "register_operand") + (vec_duplicate:VDQ_I + (match_operand:<VEL> 1 "register_operand"))) + "TARGET_SIMD" + {@ cons: =0 , 1 ; attrs: type + w , w ; neon_dup<q> dup\t%0.<Vtype>, %1.<Vetype>0 + w , ?r ; neon_from_gp<q> dup\t%0.<Vtype>, %<vwcore>1 + } +) + +gcc/Changelog + +2023-09-20 Andrea Corallo <andrea.corallo@arm.com> + + * gensupport.cc (convert_syntax): Skip spaces before "cons:" + in new compact pattern syntax. +--- + gcc/gensupport.cc | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc +index 97c614850..3d7a6d4fd 100644 +--- a/gcc/gensupport.cc ++++ b/gcc/gensupport.cc +@@ -893,6 +893,8 @@ convert_syntax (rtx x, file_location loc) + if (!expect_char (&templ, '')) + fatal_at (loc, "expecing `' to begin section list"); + ++ skip_spaces (&templ); ++ + parse_section_layout (loc, &templ, "cons:", tconvec, true); + + if (*templ != '') +-- +2.33.0 +
View file
_service:tar_scm:0154-LoongArch-Fix-a-typo-PR-114407.patch
Added
@@ -0,0 +1,30 @@ +From 72f18deb0b8e59cc23f25cb99b59a25a0a1d99c7 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Thu, 21 Mar 2024 04:01:17 +0800 +Subject: PATCH 154/188 LoongArch: Fix a typo PR 114407 + +gcc/ChangeLog: + + PR target/114407 + * config/loongarch/loongarch-opts.cc (loongarch_config_target): + Fix typo in diagnostic message, enabing -> enabling. +--- + gcc/config/loongarch/loongarch-opts.cc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 2ea3972d1..bdecfaf49 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -362,7 +362,7 @@ config_target_isa: + gcc_assert (constrained.simd); + + inform (UNKNOWN_LOCATION, +- "enabing %qs promotes %<%s%s%> to %<%s%s%>", ++ "enabling %qs promotes %<%s%s%> to %<%s%s%>", + loongarch_isa_ext_stringst.isa.simd, + OPTSTR_ISA_EXT_FPU, loongarch_isa_ext_stringst.isa.fpu, + OPTSTR_ISA_EXT_FPU, loongarch_isa_ext_stringsISA_EXT_FPU64); +-- +2.43.0 +
View file
_service:tar_scm:0155-Backport-SME-aarch64-Generalise-require_immediate_la.patch
Added
@@ -0,0 +1,164 @@ +From cb6d55f6bc7c490f72a43dd87543ab7a7ea582a8 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:18 +0000 +Subject: PATCH 056/157 BackportSME aarch64: Generalise + require_immediate_lane_index + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c0cf2c893d54420b0c19fee7bd41ae40017d0106 + +require_immediate_lane_index previously hard-coded the assumption +that the group size is determined by the argument immediately before +the index. However, for SME, there are cases where it should be +determined by an earlier argument instead. + +gcc/ + * config/aarch64/aarch64-sve-builtins.h: + (function_checker::require_immediate_lane_index): Add an argument + for the index of the indexed vector argument. + * config/aarch64/aarch64-sve-builtins.cc + (function_checker::require_immediate_lane_index): Likewise. + * config/aarch64/aarch64-sve-builtins-shapes.cc + (ternary_bfloat_lane_base::check): Update accordingly. + (ternary_qq_lane_base::check): Likewise. + (binary_lane_def::check): Likewise. + (binary_long_lane_def::check): Likewise. + (ternary_lane_def::check): Likewise. + (ternary_lane_rotate_def::check): Likewise. + (ternary_long_lane_def::check): Likewise. + (ternary_qq_lane_rotate_def::check): Likewise. +--- + .../aarch64/aarch64-sve-builtins-shapes.cc | 16 ++++++++-------- + gcc/config/aarch64/aarch64-sve-builtins.cc | 18 ++++++++++++------ + gcc/config/aarch64/aarch64-sve-builtins.h | 3 ++- + 3 files changed, 22 insertions(+), 15 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +index f57f92698..4fa4181b9 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +@@ -941,7 +941,7 @@ struct ternary_bfloat_lane_base + bool + check (function_checker &c) const OVERRIDE + { +- return c.require_immediate_lane_index (3, N); ++ return c.require_immediate_lane_index (3, 2, N); + } + }; + +@@ -956,7 +956,7 @@ struct ternary_qq_lane_base + bool + check (function_checker &c) const OVERRIDE + { +- return c.require_immediate_lane_index (3, 4); ++ return c.require_immediate_lane_index (3, 0); + } + }; + +@@ -1123,7 +1123,7 @@ struct binary_lane_def : public overloaded_base<0> + bool + check (function_checker &c) const OVERRIDE + { +- return c.require_immediate_lane_index (2); ++ return c.require_immediate_lane_index (2, 1); + } + }; + SHAPE (binary_lane) +@@ -1162,7 +1162,7 @@ struct binary_long_lane_def : public overloaded_base<0> + bool + check (function_checker &c) const OVERRIDE + { +- return c.require_immediate_lane_index (2); ++ return c.require_immediate_lane_index (2, 1); + } + }; + SHAPE (binary_long_lane) +@@ -2817,7 +2817,7 @@ struct ternary_lane_def : public overloaded_base<0> + bool + check (function_checker &c) const OVERRIDE + { +- return c.require_immediate_lane_index (3); ++ return c.require_immediate_lane_index (3, 2); + } + }; + SHAPE (ternary_lane) +@@ -2845,7 +2845,7 @@ struct ternary_lane_rotate_def : public overloaded_base<0> + bool + check (function_checker &c) const OVERRIDE + { +- return (c.require_immediate_lane_index (3, 2) ++ return (c.require_immediate_lane_index (3, 2, 2) + && c.require_immediate_one_of (4, 0, 90, 180, 270)); + } + }; +@@ -2868,7 +2868,7 @@ struct ternary_long_lane_def + bool + check (function_checker &c) const OVERRIDE + { +- return c.require_immediate_lane_index (3); ++ return c.require_immediate_lane_index (3, 2); + } + }; + SHAPE (ternary_long_lane) +@@ -2965,7 +2965,7 @@ struct ternary_qq_lane_rotate_def : public overloaded_base<0> + bool + check (function_checker &c) const OVERRIDE + { +- return (c.require_immediate_lane_index (3, 4) ++ return (c.require_immediate_lane_index (3, 0) + && c.require_immediate_one_of (4, 0, 90, 180, 270)); + } + }; +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index 91af96687..7924cdf0f 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -2440,20 +2440,26 @@ function_checker::require_immediate_enum (unsigned int rel_argno, tree type) + return false; + } + +-/* Check that argument REL_ARGNO is suitable for indexing argument +- REL_ARGNO - 1, in groups of GROUP_SIZE elements. REL_ARGNO counts +- from the end of the predication arguments. */ ++/* The intrinsic conceptually divides vector argument REL_VEC_ARGNO into ++ groups of GROUP_SIZE elements. Return true if argument REL_ARGNO is ++ a suitable constant index for selecting one of these groups. The ++ selection happens within a 128-bit quadword, rather than the whole vector. ++ ++ REL_ARGNO and REL_VEC_ARGNO count from the end of the predication ++ arguments. */ + bool + function_checker::require_immediate_lane_index (unsigned int rel_argno, ++ unsigned int rel_vec_argno, + unsigned int group_size) + { + unsigned int argno = m_base_arg + rel_argno; + if (!argument_exists_p (argno)) + return true; + +- /* Get the type of the previous argument. tree_argument_type wants a +- 1-based number, whereas ARGNO is 0-based. */ +- machine_mode mode = TYPE_MODE (type_argument_type (m_fntype, argno)); ++ /* Get the type of the vector argument. tree_argument_type wants a ++ 1-based number, whereas VEC_ARGNO is 0-based. */ ++ unsigned int vec_argno = m_base_arg + rel_vec_argno; ++ machine_mode mode = TYPE_MODE (type_argument_type (m_fntype, vec_argno + 1)); + gcc_assert (VECTOR_MODE_P (mode)); + unsigned int nlanes = 128 / (group_size * GET_MODE_UNIT_BITSIZE (mode)); + return require_immediate_range (rel_argno, 0, nlanes - 1); +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index 52994cde0..824c31cd7 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -463,7 +463,8 @@ public: + bool require_immediate_either_or (unsigned int, HOST_WIDE_INT, + HOST_WIDE_INT); + bool require_immediate_enum (unsigned int, tree); +- bool require_immediate_lane_index (unsigned int, unsigned int = 1); ++ bool require_immediate_lane_index (unsigned int, unsigned int, ++ unsigned int = 1); + bool require_immediate_one_of (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT, + HOST_WIDE_INT, HOST_WIDE_INT); + bool require_immediate_range (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT); +-- +2.33.0 +
View file
_service:tar_scm:0155-testsuite-Add-a-test-case-for-negating-FP-vectors-co.patch
Added
@@ -0,0 +1,68 @@ +From e27123a020e7bf0845a9804a4b09fe4ce57992f0 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Tue, 6 Feb 2024 17:49:50 +0800 +Subject: PATCH 155/188 testsuite: Add a test case for negating FP vectors + containing zeros + +Recently I've fixed two wrong FP vector negate implementation which +caused wrong sign bits in zeros in targets (r14-8786 and r14-8801). To +prevent a similar issue from happening again, add a test case. + +Tested on x86_64 (with SSE2, AVX, AVX2, and AVX512F), AArch64, MIPS +(with MSA), LoongArch (with LSX and LASX). + +gcc/testsuite: + + * gcc.dg/vect/vect-neg-zero.c: New test. +--- + gcc/testsuite/gcc.dg/vect/vect-neg-zero.c | 38 +++++++++++++++++++++++ + 1 file changed, 38 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/vect/vect-neg-zero.c + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-neg-zero.c b/gcc/testsuite/gcc.dg/vect/vect-neg-zero.c +new file mode 100644 +index 000000000..21fa00cfa +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/vect-neg-zero.c +@@ -0,0 +1,38 @@ ++/* { dg-add-options ieee } */ ++/* { dg-additional-options "-fno-associative-math -fsigned-zeros" } */ ++ ++double x4 = {-0.0, 0.0, -0.0, 0.0}; ++float y8 = {-0.0, 0.0, -0.0, 0.0, -0.0, -0.0, 0.0, 0.0}; ++ ++static __attribute__ ((always_inline)) inline void ++test (int factor) ++{ ++ double a4; ++ float b8; ++ ++ asm ("" ::: "memory"); ++ ++ for (int i = 0; i < 2 * factor; i++) ++ ai = -xi; ++ ++ for (int i = 0; i < 4 * factor; i++) ++ bi = -yi; ++ ++#pragma GCC novector ++ for (int i = 0; i < 2 * factor; i++) ++ if (__builtin_signbit (ai) == __builtin_signbit (xi)) ++ __builtin_abort (); ++ ++#pragma GCC novector ++ for (int i = 0; i < 4 * factor; i++) ++ if (__builtin_signbit (bi) == __builtin_signbit (yi)) ++ __builtin_abort (); ++} ++ ++int ++main (void) ++{ ++ test (1); ++ test (2); ++ return 0; ++} +-- +2.43.0 +
View file
_service:tar_scm:0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch
Added
@@ -0,0 +1,469 @@ +From 8394394bd26c7be6129b9a4e673d2a3530d9efde Mon Sep 17 00:00:00 2001 +From: Christophe Lyon <christophe.lyon@arm.com> +Date: Fri, 11 Mar 2022 16:21:02 +0000 +Subject: PATCH 057/157 BackportSME aarch64: Add backend support for DFP + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0dc8e1e7026d9b8ec8b669c051786d426a52cd22 + +This patch updates the aarch64 backend as needed to support DFP modes +(SD, DD and TD). + +Changes v1->v2: + +* Drop support for DFP modes in + aarch64_gen_{load||store}wb_pair as these are only used in + prologue/epilogue where DFP modes are not used. Drop the + changes to the corresponding patterns in aarch64.md, and + useless GPF_PAIR iterator. + +* In aarch64_reinterpret_float_as_int, handle DDmode the same way + as DFmode (needed in case the representation of the + floating-point value can be loaded using mov/movk. + +* In aarch64_float_const_zero_rtx_p, reject constants with DFP + mode: when X is zero, the callers want to emit either '0' or + 'zr' depending on the context, which is not the way 0.0 is + represented in DFP mode (in particular fmov d0, #0 is not right + for DFP). + +* In aarch64_legitimate_constant_p, accept DFP + +2022-03-31 Christophe Lyon <christophe.lyon@arm.com> + + gcc/ + * config/aarch64/aarch64.cc + (aarch64_split_128bit_move): Handle DFP modes. + (aarch64_mode_valid_for_sched_fusion_p): Likewise. + (aarch64_classify_address): Likewise. + (aarch64_legitimize_address_displacement): Likewise. + (aarch64_reinterpret_float_as_int): Likewise. + (aarch64_float_const_zero_rtx_p): Likewise. + (aarch64_can_const_movi_rtx_p): Likewise. + (aarch64_anchor_offset): Likewise. + (aarch64_secondary_reload): Likewise. + (aarch64_rtx_costs): Likewise. + (aarch64_legitimate_constant_p): Likewise. + (aarch64_gimplify_va_arg_expr): Likewise. + (aapcs_vfp_sub_candidate): Likewise. + (aarch64_vfp_is_call_or_return_candidate): Likewise. + (aarch64_output_scalar_simd_mov_immediate): Likewise. + (aarch64_gen_adjusted_ldpstp): Likewise. + (aarch64_scalar_mode_supported_p): Accept DFP modes if enabled. + * config/aarch64/aarch64.md + (movsf_aarch64): Use SFD iterator and rename into + mov<mode>_aarch64. + (movdf_aarch64): Use DFD iterator and rename into + mov<mode>_aarch64. + (movtf_aarch64): Use TFD iterator and rename into + mov<mode>_aarch64. + (split pattern for move TF mode): Use TFD iterator. + * config/aarch64/iterators.md + (GPF_TF_F16_MOV): Add DFP modes. + (SFD, DFD, TFD): New iterators. + (GPF_TF): Add DFP modes. + (TX, DX, DX2): Likewise. +--- + gcc/config/aarch64/aarch64.cc | 82 ++++++++++++++++++++++----------- + gcc/config/aarch64/aarch64.md | 34 +++++++------- + gcc/config/aarch64/iterators.md | 24 +++++++--- + 3 files changed, 89 insertions(+), 51 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 055b436b1..02210ed13 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -5068,7 +5068,7 @@ aarch64_split_128bit_move (rtx dst, rtx src) + + machine_mode mode = GET_MODE (dst); + +- gcc_assert (mode == TImode || mode == TFmode); ++ gcc_assert (mode == TImode || mode == TFmode || mode == TDmode); + gcc_assert (!(side_effects_p (src) || side_effects_p (dst))); + gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode); + +@@ -10834,6 +10834,7 @@ aarch64_mode_valid_for_sched_fusion_p (machine_mode mode) + { + return mode == SImode || mode == DImode + || mode == SFmode || mode == DFmode ++ || mode == SDmode || mode == DDmode + || (aarch64_vector_mode_supported_p (mode) + && (known_eq (GET_MODE_SIZE (mode), 8) + || (known_eq (GET_MODE_SIZE (mode), 16) +@@ -10876,12 +10877,13 @@ aarch64_classify_address (struct aarch64_address_info *info, + vec_flags &= ~VEC_PARTIAL; + + /* On BE, we use load/store pair for all large int mode load/stores. +- TI/TFmode may also use a load/store pair. */ ++ TI/TF/TDmode may also use a load/store pair. */ + bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT)); + bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP + || type == ADDR_QUERY_LDP_STP_N + || mode == TImode + || mode == TFmode ++ || mode == TDmode + || (BYTES_BIG_ENDIAN && advsimd_struct_p)); + /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode + corresponds to the actual size of the memory being loaded/stored and the +@@ -10955,7 +10957,7 @@ aarch64_classify_address (struct aarch64_address_info *info, + info->offset = op1; + info->const_offset = offset; + +- /* TImode and TFmode values are allowed in both pairs of X ++ /* TImode, TFmode and TDmode values are allowed in both pairs of X + registers and individual Q registers. The available + address modes are: + X,X: 7-bit signed scaled offset +@@ -10964,7 +10966,7 @@ aarch64_classify_address (struct aarch64_address_info *info, + When performing the check for pairs of X registers i.e. LDP/STP + pass down DImode since that is the natural size of the LDP/STP + instruction memory accesses. */ +- if (mode == TImode || mode == TFmode) ++ if (mode == TImode || mode == TFmode || mode == TDmode) + return (aarch64_offset_7bit_signed_scaled_p (DImode, offset) + && (aarch64_offset_9bit_signed_unscaled_p (mode, offset) + || offset_12bit_unsigned_scaled_p (mode, offset))); +@@ -11087,14 +11089,14 @@ aarch64_classify_address (struct aarch64_address_info *info, + info->offset = XEXP (XEXP (x, 1), 1); + info->const_offset = offset; + +- /* TImode and TFmode values are allowed in both pairs of X ++ /* TImode, TFmode and TDmode values are allowed in both pairs of X + registers and individual Q registers. The available + address modes are: + X,X: 7-bit signed scaled offset + Q: 9-bit signed offset + We conservatively require an offset representable in either mode. + */ +- if (mode == TImode || mode == TFmode) ++ if (mode == TImode || mode == TFmode || mode == TDmode) + return (aarch64_offset_7bit_signed_scaled_p (mode, offset) + && aarch64_offset_9bit_signed_unscaled_p (mode, offset)); + +@@ -11256,9 +11258,9 @@ aarch64_legitimize_address_displacement (rtx *offset1, rtx *offset2, + offset. Use 4KB range for 1- and 2-byte accesses and a 16KB + range otherwise to increase opportunities for sharing the base + address of different sizes. Unaligned accesses use the signed +- 9-bit range, TImode/TFmode use the intersection of signed ++ 9-bit range, TImode/TFmode/TDmode use the intersection of signed + scaled 7-bit and signed 9-bit offset. */ +- if (mode == TImode || mode == TFmode) ++ if (mode == TImode || mode == TFmode || mode == TDmode) + second_offset = ((const_offset + 0x100) & 0x1f8) - 0x100; + else if ((const_offset & (size - 1)) != 0) + second_offset = ((const_offset + 0x100) & 0x1ff) - 0x100; +@@ -11339,7 +11341,7 @@ aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval) + CONST_DOUBLE_REAL_VALUE (value), + REAL_MODE_FORMAT (mode)); + +- if (mode == DFmode) ++ if (mode == DFmode || mode == DDmode) + { + int order = BYTES_BIG_ENDIAN ? 1 : 0; + ival = zext_hwi (resorder, 32); +@@ -11380,11 +11382,15 @@ aarch64_float_const_rtx_p (rtx x) + return false; + } + +-/* Return TRUE if rtx X is immediate constant 0.0 */ ++/* Return TRUE if rtx X is immediate constant 0.0 (but not in Decimal ++ Floating Point). */ + bool + aarch64_float_const_zero_rtx_p (rtx x) + { +- if (GET_MODE (x) == VOIDmode) ++ /* 0.0 in Decimal Floating Point cannot be represented by #0 or ++ zr as our callers expect, so no need to check the actual ++ value if X is of Decimal Floating Point type. */ ++ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_DECIMAL_FLOAT) + return false; + + if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x))) +@@ -11422,7 +11428,7 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode) + else + return false; + +- /* use a 64 bit mode for everything except for DI/DF mode, where we use ++ /* use a 64 bit mode for everything except for DI/DF/DD mode, where we use + a 128 bit vector mode. */ + int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64; + +@@ -12628,7 +12634,7 @@ aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size, + if (IN_RANGE (offset, -256, 0)) + return 0; + +- if (mode == TImode || mode == TFmode) ++ if (mode == TImode || mode == TFmode || mode == TDmode) + return (offset + 0x100) & ~0x1ff; + + /* Use 12-bit offset by access size. */ +@@ -12737,7 +12743,9 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
View file
_service:tar_scm:0156-LoongArch-Add-descriptions-of-the-compilation-option.patch
Added
@@ -0,0 +1,83 @@ +From 899f1f351ddc0d76bc9d432cfe63b30cfb294860 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 25 Oct 2024 06:22:11 +0000 +Subject: PATCH 156/188 LoongArch: Add descriptions of the compilation + options. + +Add descriptions for the compilation options '-mfrecipe' '-mdiv32' +'-mlam-bh' '-mlamcas' and '-mld-seq-sa'. + +gcc/ChangeLog: + + * doc/invoke.texi: Add descriptions for the compilation + options. +--- + gcc/doc/invoke.texi | 45 +++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 43 insertions(+), 2 deletions(-) + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 5c6515cb1..7f24fe1e2 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1008,8 +1008,9 @@ Objective-C and Objective-C++ Dialects}. + -mmax-inline-memcpy-size=@var{n} @gol + -mexplicit-relocs -mno-explicit-relocs @gol + -mdirect-extern-access -mno-direct-extern-access @gol +--mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as} @gol +--mrecip -mrecip=@var{opt} ++-mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as @gol ++-mrecip -mrecip=@var{opt} -mfrecipe -mno-frecipe -mdiv32 -mno-div32 @gol ++-mlam-bh -mno-lam-bh -mlamcas -mno-lamcas -mld-seq-sa -mno-ld-seq-sa} + + @emph{M32R/D Options} + @gccoptlist{-m32r2 -m32rx -m32r @gol +@@ -24686,6 +24687,46 @@ Enable the approximation for vectorized reciprocal square root. + So, for example, @option{-mrecip=all,!sqrt} enables + all of the reciprocal approximations, except for scalar square root. + ++@opindex mfrecipe ++@opindex mno-frecipe ++@item -mfrecipe ++@itemx -mno-frecipe ++Use (do not use) @code{frecipe.@{s/d@}} and @code{frsqrte.@{s/d@}} ++instructions. When build with @option{-march=la664}, it is enabled by default. ++The default is @option{-mno-frecipe}. ++ ++@opindex mdiv32 ++@opindex mno-div32 ++@item -mdiv32 ++@itemx -mno-div32 ++Use (do not use) @code{div.wu} and @code{mod.wu} instructions with input ++not sign-extended. When build with @option{-march=la664}, it is enabled by ++default. The default is @option{-mno-div32}. ++ ++@opindex mlam-bh ++@opindex mno-lam-bh ++@item -mlam-bh ++@itemx -mno-lam-bh ++Use (do not use) @code{am@{swap/add@}_db.@{b/h@}} instructions. When build ++with @option{-march=la664}, it is enabled by default. The default is ++@option{-mno-lam-bh}. ++ ++@opindex mlamcas ++@opindex mno-lamcas ++@item -mlamcas ++@itemx -mno-lamcas ++Use (do not use) @code{amcas_db.@{b/h/w/d@}} instructions. When build with ++@option{-march=la664}, it is enabled by default. The default is ++@option{-mno-lamcas}. ++ ++@opindex mld-seq-sa ++@opindex mno-ld-seq-sa ++@item -mld-seq-sa ++@itemx -mno-ld-seq-sa ++Whether a load-load barrier (@code{dbar 0x700}) is needed. When build with ++@option{-march=la664}, it is enabled by default. The default is ++@option{-mno-ld-seq-sa}, the load-load barrier is needed. ++ + @item loongarch-vect-unroll-limit + The vectorizer will use available tuning information to determine whether it + would be beneficial to unroll the main vectorized loop and by how much. This +-- +2.43.0 +
View file
_service:tar_scm:0157-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch
Added
@@ -0,0 +1,1824 @@ +From 737d2a5f1c5e725b7e5a20075270016ebf56b44c Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 13 Sep 2022 09:28:49 +0100 +Subject: PATCH 058/157 BackportSME aarch64: Vector move fixes for + +nosimd + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=721c0fb3aca31d3bf8ad6e929eab32e29a427e60 + +This patch fixes various issues around the handling of vectors +and (particularly) vector structures with +nosimd. Previously, +passing and returning structures would trigger an ICE, since: + +* we didn't allow the structure modes to be stored in FPRs + +* we didn't provide +nosimd move patterns + +* splitting the moves into word-sized pieces (the default + strategy without move patterns) doesn't work because the + registers are doubleword sized. + +The patch is a bit of a hodge-podge since a lot of the handling of +moves, register costs, and register legitimacy is so interconnected. +It didn't seem feasible to split things further. + +Some notes: + +* The patch recognises vector and tuple modes based on TARGET_FLOAT + rather than TARGET_SIMD, and instead adds TARGET_SIMD to places + that really do need the vector ISA. This is necessary for the + modes to be handled correctly in register arguments and returns. + +* The 64-bit (DREG) STP peephole required TARGET_SIMD but the + LDP peephole didn't. I think the LDP one is right, since + DREG moves could involve GPRs as well as FPRs. + +* The patch keeps the existing choices of instructions for + TARGET_SIMD, just in case they happen to be better than FMOV + on some uarches. + +* Before the patch, +nosimd Q<->Q moves of 128-bit scalars went via + a GPR, thanks to a secondary reload pattern. This approach might + not be ideal, but there's no reason that 128-bit vectors should + behave differently from 128-bit scalars. The patch therefore + extends the current scalar approach to vectors. + +* Multi-vector LD1 and ST1 require TARGET_SIMD, so the TARGET_FLOAT + structure moves need to use LDP/STP and LDR/STR combinations + instead. That's also what we do for big-endian even with + TARGET_SIMD, so most of the code was already there. The patterns + for structures of 64-bit vectors are identical, but the patterns + for structures of 128-bit vectors need to cope with the lack of + 128-bit Q<->Q moves. + + It isn't feasible to move multi-vector tuples via GPRs, so the + patch moves them via memory instead. This contaminates the port + with its first secondary memory reload. + +gcc/ + + * config/aarch64/aarch64.cc (aarch64_classify_vector_mode): Use + TARGET_FLOAT instead of TARGET_SIMD. + (aarch64_vectorize_related_mode): Restrict ADVSIMD handling to + TARGET_SIMD. + (aarch64_hard_regno_mode_ok): Don't allow tuples of 2 64-bit vectors + in GPRs. + (aarch64_classify_address): Treat little-endian structure moves + like big-endian for TARGET_FLOAT && !TARGET_SIMD. + (aarch64_secondary_memory_needed): New function. + (aarch64_secondary_reload): Handle 128-bit Advanced SIMD vectors + in the same way as TF, TI and TD. + (aarch64_rtx_mult_cost): Restrict ADVSIMD handling to TARGET_SIMD. + (aarch64_rtx_costs): Likewise. + (aarch64_register_move_cost): Treat a pair of 64-bit vectors + separately from a single 128-bit vector. Handle the cost implied + by aarch64_secondary_memory_needed. + (aarch64_simd_valid_immediate): Restrict ADVSIMD handling to + TARGET_SIMD. + (aarch64_expand_vec_perm_const_1): Likewise. + (TARGET_SECONDARY_MEMORY_NEEDED): New macro. + * config/aarch64/iterators.md (VTX): New iterator. + * config/aarch64/aarch64.md (arches): Add fp_q as a synonym of simd. + (arch_enabled): Adjust accordingly. + (@aarch64_reload_mov<TX:mode>): Extend to... + (@aarch64_reload_mov<VTX:mode>): ...this. + * config/aarch64/aarch64-simd.md (mov<mode>): Require TARGET_FLOAT + rather than TARGET_SIMD. + (movmisalign<mode>): Likewise. + (load_pair<DREG:mode><DREG2:mode>): Likewise. + (vec_store_pair<DREG:mode><DREG2:mode>): Likewise. + (load_pair<VQ:mode><VQ2:mode>): Likewise. + (vec_store_pair<VQ:mode><VQ2:mode>): Likewise. + (@aarch64_split_simd_mov<mode>): Likewise. + (aarch64_get_low<mode>): Likewise. + (aarch64_get_high<mode>): Likewise. + (aarch64_get_half<mode>): Likewise. Canonicalize to a move for + lowpart extracts. + (*aarch64_simd_mov<VDMOV:mode>): Require TARGET_FLOAT rather than + TARGET_SIMD. Use different w<-w and r<-w instructions for + !TARGET_SIMD. Disable immediate moves for !TARGET_SIMD but + add an alternative specifically for w<-Z. + (*aarch64_simd_mov<VQMOV:mode>): Require TARGET_FLOAT rather than + TARGET_SIMD. Likewise for the associated define_splits. Disable + FPR moves and immediate moves for !TARGET_SIMD but add an alternative + specifically for w<-Z. + (aarch64_simd_mov_from_<mode>high): Require TARGET_FLOAT rather than + TARGET_SIMD. Restrict the existing alternatives to TARGET_SIMD + but add a new r<-w one for !TARGET_SIMD. + (*aarch64_get_high<mode>): New pattern. + (load_pair_lanes<mode>): Require TARGET_FLOAT rather than TARGET_SIMD. + (store_pair_lanes<mode>): Likewise. + (*aarch64_combine_internal<mode>): Likewise. Restrict existing + w<-w, w<-r and w<-m alternatives to TARGET_SIMD but add a new w<-r + alternative for !TARGET_SIMD. + (*aarch64_combine_internal_be<mode>): Likewise. + (aarch64_combinez<mode>): Require TARGET_FLOAT rather than TARGET_SIMD. + Remove bogus arch attribute. + (*aarch64_combinez_be<mode>): Likewise. + (@aarch64_vec_concat<mode>): Require TARGET_FLOAT rather than + TARGET_SIMD. + (aarch64_combine<mode>): Likewise. + (aarch64_rev_reglist<mode>): Likewise. + (mov<mode>): Likewise. + (*aarch64_be_mov<VSTRUCT_2D:mode>): Extend to TARGET_FLOAT && + !TARGET_SIMD, regardless of endianness. Extend associated + define_splits in the same way, both for this pattern and the + ones below. + (*aarch64_be_mov<VSTRUCT_2Qmode>): Likewise. Restrict w<-w + alternative to TARGET_SIMD. + (*aarch64_be_movoi): Likewise. + (*aarch64_be_movci): Likewise. + (*aarch64_be_movxi): Likewise. + (*aarch64_be_mov<VSTRUCT_4QD:mode>): Extend to TARGET_FLOAT + && !TARGET_SIMD, regardless of endianness. Restrict w<-w alternative + to TARGET_SIMD for tuples of 128-bit vectors. + (*aarch64_be_mov<VSTRUCT_4QD:mode>): Likewise. + * config/aarch64/aarch64-ldpstp.md: Remove TARGET_SIMD condition + from DREG STP peephole. Change TARGET_SIMD to TARGET_FLOAT in + the VQ and VP_2E LDP and STP peepholes. + +gcc/testsuite/ + * gcc.target/aarch64/ldp_stp_20.c: New test. + * gcc.target/aarch64/ldp_stp_21.c: Likewise. + * gcc.target/aarch64/ldp_stp_22.c: Likewise. + * gcc.target/aarch64/ldp_stp_23.c: Likewise. + * gcc.target/aarch64/ldp_stp_24.c: Likewise. + * gcc.target/aarch64/movv16qi_1.c (gpr_to_gpr): New function. + * gcc.target/aarch64/movv8qi_1.c (gpr_to_gpr): Likewise. + * gcc.target/aarch64/movv16qi_2.c: New test. + * gcc.target/aarch64/movv16qi_3.c: Likewise. + * gcc.target/aarch64/movv2di_1.c: Likewise. + * gcc.target/aarch64/movv2x16qi_1.c: Likewise. + * gcc.target/aarch64/movv2x8qi_1.c: Likewise. + * gcc.target/aarch64/movv3x16qi_1.c: Likewise. + * gcc.target/aarch64/movv3x8qi_1.c: Likewise. + * gcc.target/aarch64/movv4x16qi_1.c: Likewise. + * gcc.target/aarch64/movv4x8qi_1.c: Likewise. + * gcc.target/aarch64/movv8qi_2.c: Likewise. + * gcc.target/aarch64/movv8qi_3.c: Likewise. + * gcc.target/aarch64/vect_unary_2.c: Likewise. +--- + gcc/config/aarch64/aarch64-ldpstp.md | 11 +- + gcc/config/aarch64/aarch64-simd.md | 199 +++++++++++------- + gcc/config/aarch64/aarch64.cc | 94 ++++++--- + gcc/config/aarch64/aarch64.md | 11 +- + gcc/config/aarch64/iterators.md | 2 + + gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c | 7 + + gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c | 7 + + gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c | 13 ++ + gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c | 16 ++ + gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c | 16 ++ + gcc/testsuite/gcc.target/aarch64/movv16qi_1.c | 21 ++ + gcc/testsuite/gcc.target/aarch64/movv16qi_2.c | 27 +++ + gcc/testsuite/gcc.target/aarch64/movv16qi_3.c | 30 +++ + gcc/testsuite/gcc.target/aarch64/movv2di_1.c | 103 +++++++++ + .../gcc.target/aarch64/movv2x16qi_1.c | 40 ++++ + .../gcc.target/aarch64/movv2x8qi_1.c | 38 ++++ + .../gcc.target/aarch64/movv3x16qi_1.c | 44 ++++ + .../gcc.target/aarch64/movv3x8qi_1.c | 41 ++++ + .../gcc.target/aarch64/movv4x16qi_1.c | 44 ++++ + .../gcc.target/aarch64/movv4x8qi_1.c | 42 ++++ + gcc/testsuite/gcc.target/aarch64/movv8qi_1.c | 15 ++ + gcc/testsuite/gcc.target/aarch64/movv8qi_2.c | 27 +++ + gcc/testsuite/gcc.target/aarch64/movv8qi_3.c | 30 +++ + .../gcc.target/aarch64/vect_unary_2.c | 5 + + 24 files changed, 774 insertions(+), 109 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv2di_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv2x16qi_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv2x8qi_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv3x16qi_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv3x8qi_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv4x16qi_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv4x8qi_1.c
View file
_service:tar_scm:0157-LoongArch-Split-loongarch_option_override_internal-i.patch
Added
@@ -0,0 +1,800 @@ +From 6dd3434f004dd1481a3d18fb416b3ddd4151b10f Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Sat, 30 Mar 2024 16:43:14 +0800 +Subject: PATCH 157/188 LoongArch: Split loongarch_option_override_internal + into smaller procedures + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch.opt.in: Mark -mno-recip as + aliases to -mrecip={all,none}, respectively. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch-def.h (ABI_FPU_64): Rename to... + (ABI_FPU64_P): ...this. + (ABI_FPU_32): Rename to... + (ABI_FPU32_P): ...this. + (ABI_FPU_NONE): Rename to... + (ABI_NOFPU_P): ...this. + (ABI_LP64_P): Define. + * config/loongarch/loongarch.cc (loongarch_init_print_operand_punct): + Merged into loongarch_global_init. + (loongarch_cpu_option_override): Renamed to + loongarch_target_option_override. + (loongarch_option_override_internal): Move the work after + loongarch_config_target into loongarch_target_option_override. + (loongarch_global_init): Define. + (INIT_TARGET_FLAG): Move to loongarch-opts.cc. + (loongarch_option_override): Call loongarch_global_init + separately. + * config/loongarch/loongarch-opts.cc (loongarch_parse_mrecip_scheme): + Split the parsing of -mrecip=<string> from + loongarch_option_override_internal. + (loongarch_generate_mrecip_scheme): Define. Split from + loongarch_option_override_internal. + (loongarch_target_option_override): Define. Renamed from + loongarch_cpu_option_override. + (loongarch_init_misc_options): Define. Split from + loongarch_option_override_internal. + (INIT_TARGET_FLAG): Move from loongarch.cc. + * config/loongarch/loongarch-opts.h (loongarch_target_option_override): + New prototype. + (loongarch_parse_mrecip_scheme): New prototype. + (loongarch_init_misc_options): New prototype. + (TARGET_ABI_LP64): Simplify with ABI_LP64_P. + * config/loongarch/loongarch.h (TARGET_RECIP_DIV): Simplify. + Do not reference specific CPU architecture (LA664). + (TARGET_RECIP_SQRT): Same. + (TARGET_RECIP_RSQRT): Same. + (TARGET_RECIP_VEC_DIV): Same. + (TARGET_RECIP_VEC_SQRT): Same. + (TARGET_RECIP_VEC_RSQRT): Same. +--- + gcc/config/loongarch/genopts/loongarch.opt.in | 8 +- + gcc/config/loongarch/loongarch-def.h | 11 +- + gcc/config/loongarch/loongarch-opts.cc | 253 ++++++++++++++++++ + gcc/config/loongarch/loongarch-opts.h | 27 +- + gcc/config/loongarch/loongarch.cc | 253 +++--------------- + gcc/config/loongarch/loongarch.h | 18 +- + gcc/config/loongarch/loongarch.opt | 8 +- + 7 files changed, 342 insertions(+), 236 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 4d6b1902d..9c6f59bb8 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -197,14 +197,14 @@ mexplicit-relocs + Target Alias(mexplicit-relocs=, always, none) + Use %reloc() assembly operators (for backward compatibility). + +-mrecip +-Target RejectNegative Var(la_recip) Save +-Generate approximate reciprocal divide and square root for better throughput. +- + mrecip= + Target RejectNegative Joined Var(la_recip_name) Save + Control generation of reciprocal estimates. + ++mrecip ++Target Alias(mrecip=, all, none) ++Generate approximate reciprocal divide and square root for better throughput. ++ + ; The code model option names for -mcmodel. + Enum + Name(cmodel) Type(int) +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index fdcf43fc7..b1423bcfe 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -90,11 +90,16 @@ extern loongarch_def_array<const char *, N_ABI_BASE_TYPES> + + #define TO_LP64_ABI_BASE(C) (C) + +-#define ABI_FPU_64(abi_base) \ ++#define ABI_LP64_P(abi_base) \ ++ (abi_base == ABI_BASE_LP64D \ ++ || abi_base == ABI_BASE_LP64F \ ++ || abi_base == ABI_BASE_LP64S) ++ ++#define ABI_FPU64_P(abi_base) \ + (abi_base == ABI_BASE_LP64D) +-#define ABI_FPU_32(abi_base) \ ++#define ABI_FPU32_P(abi_base) \ + (abi_base == ABI_BASE_LP64F) +-#define ABI_FPU_NONE(abi_base) \ ++#define ABI_NOFPU_P(abi_base) \ + (abi_base == ABI_BASE_LP64S) + + +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index bdecfaf49..404642a9e 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -25,6 +25,7 @@ along with GCC; see the file COPYING3. If not see + #include "coretypes.h" + #include "tm.h" + #include "obstack.h" ++#include "opts.h" + #include "diagnostic-core.h" + + #include "loongarch-cpu.h" +@@ -32,8 +33,12 @@ along with GCC; see the file COPYING3. If not see + #include "loongarch-str.h" + #include "loongarch-def.h" + ++/* Target configuration */ + struct loongarch_target la_target; + ++/* RTL cost information */ ++const struct loongarch_rtx_cost_data *loongarch_cost; ++ + /* ABI-related configuration. */ + #define ABI_COUNT (sizeof(abi_priority_list)/sizeof(struct loongarch_abi)) + static const struct loongarch_abi +@@ -795,3 +800,251 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + /* ISA evolution features */ + opts->x_la_isa_evolution = target->isa.evolution; + } ++ ++/* -mrecip=<str> handling */ ++static struct ++ { ++ const char *string; /* option name. */ ++ unsigned int mask; /* mask bits to set. */ ++ } ++const recip_options = { ++ { "all", RECIP_MASK_ALL }, ++ { "none", RECIP_MASK_NONE }, ++ { "div", RECIP_MASK_DIV }, ++ { "sqrt", RECIP_MASK_SQRT }, ++ { "rsqrt", RECIP_MASK_RSQRT }, ++ { "vec-div", RECIP_MASK_VEC_DIV }, ++ { "vec-sqrt", RECIP_MASK_VEC_SQRT }, ++ { "vec-rsqrt", RECIP_MASK_VEC_RSQRT }, ++}; ++ ++/* Parser for -mrecip=<recip_string>. */ ++unsigned int ++loongarch_parse_mrecip_scheme (const char *recip_string) ++{ ++ unsigned int result_mask = RECIP_MASK_NONE; ++ ++ if (recip_string) ++ { ++ char *p = ASTRDUP (recip_string); ++ char *q; ++ unsigned int mask, i; ++ bool invert; ++ ++ while ((q = strtok (p, ",")) != NULL) ++ { ++ p = NULL; ++ if (*q == '!') ++ { ++ invert = true; ++ q++; ++ } ++ else ++ invert = false; ++ ++ if (!strcmp (q, "default")) ++ mask = RECIP_MASK_ALL; ++ else ++ { ++ for (i = 0; i < ARRAY_SIZE (recip_options); i++) ++ if (!strcmp (q, recip_optionsi.string)) ++ { ++ mask = recip_optionsi.mask; ++ break; ++ } ++ ++ if (i == ARRAY_SIZE (recip_options)) ++ { ++ error ("unknown option for %<-mrecip=%s%>", q); ++ invert = false; ++ mask = RECIP_MASK_NONE; ++ } ++ } ++ ++ if (invert) ++ result_mask &= ~mask;
View file
_service:tar_scm:0158-Backport-SME-aarch64-Simplify-output-template-emissi.patch
Added
@@ -0,0 +1,213 @@ +From b51d3b1af24758534e5a8f3a52a56106b935c485 Mon Sep 17 00:00:00 2001 +From: Kyrylo Tkachov <kyrylo.tkachov@arm.com> +Date: Wed, 31 May 2023 11:23:23 +0100 +Subject: PATCH 059/157 BackportSME aarch64: Simplify output template + emission code for a few patterns + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=11bd9b1f8133fc07c267e6d1aee8b01e06c7a750 + +If the output code for a define_insn just does a switch (which_alternative) with no other computation we can almost always +replace it with more compact MD syntax for each alternative in a mult-alternative '@' block. +This patch cleans up some such patterns in the aarch64 backend, making them shorter and more concise. +No behavioural change intended. + +Bootstrapped and tested on aarch64-none-linux-gnu. + +gcc/ChangeLog: + + * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>): Rewrite + output template to avoid explicit switch on which_alternative. + (*aarch64_simd_mov<VQMOV:mode>): Likewise. + (and<mode>3): Likewise. + (ior<mode>3): Likewise. + * config/aarch64/aarch64.md (*mov<mode>_aarch64): Likewise. +--- + gcc/config/aarch64/aarch64-simd.md | 97 +++++++++--------------------- + gcc/config/aarch64/aarch64.md | 42 ++++--------- + 2 files changed, 40 insertions(+), 99 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index ef7fc4ecb..2d688edf5 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -122,28 +122,16 @@ + "TARGET_FLOAT + && (register_operand (operands0, <MODE>mode) + || aarch64_simd_reg_or_zero (operands1, <MODE>mode))" +-{ +- switch (which_alternative) +- { +- case 0: return "ldr\t%d0, %1"; +- case 1: return "str\txzr, %0"; +- case 2: return "str\t%d1, %0"; +- case 3: +- if (TARGET_SIMD) +- return "mov\t%0.<Vbtype>, %1.<Vbtype>"; +- return "fmov\t%d0, %d1"; +- case 4: +- if (TARGET_SIMD) +- return "umov\t%0, %1.d0"; +- return "fmov\t%x0, %d1"; +- case 5: return "fmov\t%d0, %1"; +- case 6: return "mov\t%0, %1"; +- case 7: +- return aarch64_output_simd_mov_immediate (operands1, 64); +- case 8: return "fmov\t%d0, xzr"; +- default: gcc_unreachable (); +- } +-} ++ "@ ++ ldr\t%d0, %1 ++ str\txzr, %0 ++ str\t%d1, %0 ++ * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\"; ++ * return TARGET_SIMD ? \"umov\t%0, %1.d0\" : \"fmov\t%x0, %d1\"; ++ fmov\t%d0, %1 ++ mov\t%0, %1 ++ * return aarch64_output_simd_mov_immediate (operands1, 64); ++ fmov\t%d0, xzr" + (set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\ + neon_logic<q>, neon_to_gp<q>, f_mcr,\ + mov_reg, neon_move<q>, f_mcr") +@@ -158,29 +146,16 @@ + "TARGET_FLOAT + && (register_operand (operands0, <MODE>mode) + || aarch64_simd_reg_or_zero (operands1, <MODE>mode))" +-{ +- switch (which_alternative) +- { +- case 0: +- return "ldr\t%q0, %1"; +- case 1: +- return "stp\txzr, xzr, %0"; +- case 2: +- return "str\t%q1, %0"; +- case 3: +- return "mov\t%0.<Vbtype>, %1.<Vbtype>"; +- case 4: +- case 5: +- case 6: +- return "#"; +- case 7: +- return aarch64_output_simd_mov_immediate (operands1, 128); +- case 8: +- return "fmov\t%d0, xzr"; +- default: +- gcc_unreachable (); +- } +-} ++ "@ ++ ldr\t%q0, %1 ++ stp\txzr, xzr, %0 ++ str\t%q1, %0 ++ mov\t%0.<Vbtype>, %1.<Vbtype> ++ # ++ # ++ # ++ * return aarch64_output_simd_mov_immediate (operands1, 128); ++ fmov\t%d0, xzr" + (set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\ + neon_logic<q>, multiple, multiple,\ + multiple, neon_move<q>, fmov") +@@ -1004,18 +979,10 @@ + (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0") + (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db"))) + "TARGET_SIMD" +- { +- switch (which_alternative) +- { +- case 0: +- return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; +- case 1: +- return aarch64_output_simd_mov_immediate (operands2, <bitsize>, +- AARCH64_CHECK_BIC); +- default: +- gcc_unreachable (); +- } +- } ++ "@ ++ and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype> ++ * return aarch64_output_simd_mov_immediate (operands2, <bitsize>,\ ++ AARCH64_CHECK_BIC);" + (set_attr "type" "neon_logic<q>") + ) + +@@ -1025,18 +992,10 @@ + (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0") + (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do"))) + "TARGET_SIMD" +- { +- switch (which_alternative) +- { +- case 0: +- return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; +- case 1: +- return aarch64_output_simd_mov_immediate (operands2, <bitsize>, +- AARCH64_CHECK_ORR); +- default: +- gcc_unreachable (); +- } +- } ++ "@ ++ orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype> ++ * return aarch64_output_simd_mov_immediate (operands2, <bitsize>,\ ++ AARCH64_CHECK_ORR);" + (set_attr "type" "neon_logic<q>") + ) + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index c0cc91756..7454a5c77 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -1198,36 +1198,18 @@ + (match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D<hq>,Usv,m,m,rZ,w,w,rZ,w")) + "(register_operand (operands0, <MODE>mode) + || aarch64_reg_or_zero (operands1, <MODE>mode))" +-{ +- switch (which_alternative) +- { +- case 0: +- return "mov\t%w0, %w1"; +- case 1: +- return "mov\t%w0, %1"; +- case 2: +- return aarch64_output_scalar_simd_mov_immediate (operands1, +- <MODE>mode); +- case 3: +- return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands1); +- case 4: +- return "ldr<size>\t%w0, %1"; +- case 5: +- return "ldr\t%<size>0, %1"; +- case 6: +- return "str<size>\t%w1, %0"; +- case 7: +- return "str\t%<size>1, %0"; +- case 8: +- return TARGET_SIMD ? "umov\t%w0, %1.<v>0" : "fmov\t%w0, %s1"; +- case 9: +- return TARGET_SIMD ? "dup\t%0.<Vallxd>, %w1" : "fmov\t%s0, %w1"; +- case 10: +- return TARGET_SIMD ? "dup\t%<Vetype>0, %1.<v>0" : "fmov\t%s0, %s1"; +- default: +- gcc_unreachable (); +- } +-} ++ "@ ++ mov\t%w0, %w1 ++ mov\t%w0, %1 ++ * return aarch64_output_scalar_simd_mov_immediate (operands1, <MODE>mode);
View file
_service:tar_scm:0158-LoongArch-Regenerate-loongarch.opt.urls.patch
Added
@@ -0,0 +1,117 @@ +From 90a0f195830a25e4179127c67e873c80f758f29d Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 25 Oct 2024 06:25:39 +0000 +Subject: PATCH 158/188 LoongArch: Regenerate loongarch.opt.urls. + +Fixes: d28ea8e5a704 ("LoongArch: Split loongarch_option_override_internal + into smaller procedures") + +gcc/ChangeLog: + + * config/loongarch/loongarch.opt.urls: Regenerate. +--- + gcc/config/loongarch/loongarch.opt.urls | 92 +++++++++++++++++++++++++ + 1 file changed, 92 insertions(+) + create mode 100644 gcc/config/loongarch/loongarch.opt.urls + +diff --git a/gcc/config/loongarch/loongarch.opt.urls b/gcc/config/loongarch/loongarch.opt.urls +new file mode 100644 +index 000000000..571c504e6 +--- /dev/null ++++ b/gcc/config/loongarch/loongarch.opt.urls +@@ -0,0 +1,92 @@ ++; Autogenerated by regenerate-opt-urls.py from gcc/config/loongarch/loongarch.opt and generated HTML ++ ++mfpu= ++UrlSuffix(gcc/LoongArch-Options.html#index-mfpu-2) ++ ++msoft-float ++UrlSuffix(gcc/LoongArch-Options.html#index-msoft-float-5) ++ ++msingle-float ++UrlSuffix(gcc/LoongArch-Options.html#index-msingle-float) ++ ++mdouble-float ++UrlSuffix(gcc/LoongArch-Options.html#index-mdouble-float-1) ++ ++msimd= ++UrlSuffix(gcc/LoongArch-Options.html#index-msimd-1) ++ ++march= ++UrlSuffix(gcc/LoongArch-Options.html#index-march-7) ++ ++mtune= ++UrlSuffix(gcc/LoongArch-Options.html#index-mtune-8) ++ ++mabi= ++UrlSuffix(gcc/LoongArch-Options.html#index-mabi-2) ++ ++mbranch-cost= ++UrlSuffix(gcc/LoongArch-Options.html#index-mbranch-cost-2) ++ ++mcheck-zero-division ++UrlSuffix(gcc/LoongArch-Options.html#index-mcheck-zero-division) ++ ++mcond-move-int ++UrlSuffix(gcc/LoongArch-Options.html#index-mcond-move-int) ++ ++mcond-move-float ++UrlSuffix(gcc/LoongArch-Options.html#index-mcond-move-float) ++ ++mmemcpy ++UrlSuffix(gcc/LoongArch-Options.html#index-mmemcpy) ++ ++mstrict-align ++UrlSuffix(gcc/LoongArch-Options.html#index-mstrict-align-1) ++ ++mmax-inline-memcpy-size= ++UrlSuffix(gcc/LoongArch-Options.html#index-mmax-inline-memcpy-size) ++ ++mexplicit-relocs= ++UrlSuffix(gcc/LoongArch-Options.html#index-mexplicit-relocs-1) ++ ++mexplicit-relocs ++UrlSuffix(gcc/LoongArch-Options.html#index-mexplicit-relocs-1) ++ ++mrecip= ++UrlSuffix(gcc/LoongArch-Options.html#index-mrecip) ++ ++mrecip ++UrlSuffix(gcc/LoongArch-Options.html#index-mrecip) ++ ++mcmodel= ++UrlSuffix(gcc/LoongArch-Options.html#index-mcmodel_003d-1) ++ ++mdirect-extern-access ++UrlSuffix(gcc/LoongArch-Options.html#index-mdirect-extern-access) ++ ++mrelax ++UrlSuffix(gcc/LoongArch-Options.html#index-mrelax-2) ++ ++mpass-mrelax-to-as ++UrlSuffix(gcc/LoongArch-Options.html#index-mpass-mrelax-to-as) ++ ++mtls-dialect= ++UrlSuffix(gcc/LoongArch-Options.html#index-mtls-dialect-1) ++ ++mannotate-tablejump ++UrlSuffix(gcc/LoongArch-Options.html#index-mannotate-tablejump) ++ ++mfrecipe ++UrlSuffix(gcc/LoongArch-Options.html#index-mfrecipe) ++ ++mdiv32 ++UrlSuffix(gcc/LoongArch-Options.html#index-mdiv32) ++ ++mlam-bh ++UrlSuffix(gcc/LoongArch-Options.html#index-mlam-bh) ++ ++mlamcas ++UrlSuffix(gcc/LoongArch-Options.html#index-mlamcas) ++ ++mld-seq-sa ++UrlSuffix(gcc/LoongArch-Options.html#index-mld-seq-sa) ++ +-- +2.43.0 +
View file
_service:tar_scm:0159-Backport-SME-Improve-immediate-expansion-PR106583.patch
Added
@@ -0,0 +1,631 @@ +From d5293e2a8db54245553e01ad5d791b7492ad6101 Mon Sep 17 00:00:00 2001 +From: Wilco Dijkstra <wdijkstr@arm.com> +Date: Mon, 24 Oct 2022 15:14:14 +0100 +Subject: PATCH 060/157 BackportSME Improve immediate expansion + PR106583 + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a096036589d82175a0f729c2dab73c9a527d075d + +Improve immediate expansion of immediates which can be created from a +bitmask immediate and 2 MOVKs. Simplify, refactor and improve efficiency +of bitmask checks. Move various immediate handling functions together +to avoid forward declarations. + +This reduces the number of 4-instruction immediates in SPECINT/FP by 10-15%. + +gcc/ + + PR target/106583 + * config/aarch64/aarch64.cc (aarch64_internal_mov_immediate) + Add support for a bitmask immediate with 2 MOVKs. + (aarch64_check_bitmask): New function after refactorization. + (aarch64_bitmask_imm): Simplify replication of small modes. + Split function into 64-bit only version for efficiency. + (aarch64_move_imm): Move near other immediate functions. + (aarch64_uimm12_shift): Likewise. + (aarch64_clamp_to_uimm12_shift): Likewise. + (aarch64_movk_shift): Likewise. + (aarch64_replicate_bitmask_imm): Likewise. + (aarch64_and_split_imm1): Likewise. + (aarch64_and_split_imm2): Likewise. + (aarch64_and_bitmask_imm): Likewise. + (aarch64_movw_imm): Likewise. + +gcc/testsuite/ + PR target/106583 + * gcc.target/aarch64/pr106583.c: Add new test. +--- + gcc/config/aarch64/aarch64.cc | 485 +++++++++++--------- + gcc/testsuite/gcc.target/aarch64/pr106583.c | 41 ++ + 2 files changed, 301 insertions(+), 225 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/pr106583.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index b4b646fa0..cf7736994 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -305,7 +305,6 @@ static bool aarch64_builtin_support_vector_misalignment (machine_mode mode, + static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64); + static bool aarch64_print_address_internal (FILE*, machine_mode, rtx, + aarch64_addr_query_type); +-static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val); + + /* The processor for which instructions should be scheduled. */ + enum aarch64_processor aarch64_tune = cortexa53; +@@ -5756,6 +5755,143 @@ aarch64_output_sve_vector_inc_dec (const char *operands, rtx x) + factor, nelts_per_vq); + } + ++/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */ ++ ++static const unsigned HOST_WIDE_INT bitmask_imm_mul = ++ { ++ 0x0000000100000001ull, ++ 0x0001000100010001ull, ++ 0x0101010101010101ull, ++ 0x1111111111111111ull, ++ 0x5555555555555555ull, ++ }; ++ ++ ++ ++/* Return true if 64-bit VAL is a valid bitmask immediate. */ ++static bool ++aarch64_bitmask_imm (unsigned HOST_WIDE_INT val) ++{ ++ unsigned HOST_WIDE_INT tmp, mask, first_one, next_one; ++ int bits; ++ ++ /* Check for a single sequence of one bits and return quickly if so. ++ The special cases of all ones and all zeroes returns false. */ ++ tmp = val + (val & -val); ++ ++ if (tmp == (tmp & -tmp)) ++ return (val + 1) > 1; ++ ++ /* Invert if the immediate doesn't start with a zero bit - this means we ++ only need to search for sequences of one bits. */ ++ if (val & 1) ++ val = ~val; ++ ++ /* Find the first set bit and set tmp to val with the first sequence of one ++ bits removed. Return success if there is a single sequence of ones. */ ++ first_one = val & -val; ++ tmp = val & (val + first_one); ++ ++ if (tmp == 0) ++ return true; ++ ++ /* Find the next set bit and compute the difference in bit position. */ ++ next_one = tmp & -tmp; ++ bits = clz_hwi (first_one) - clz_hwi (next_one); ++ mask = val ^ tmp; ++ ++ /* Check the bit position difference is a power of 2, and that the first ++ sequence of one bits fits within 'bits' bits. */ ++ if ((mask >> bits) != 0 || bits != (bits & -bits)) ++ return false; ++ ++ /* Check the sequence of one bits is repeated 64/bits times. */ ++ return val == mask * bitmask_imm_mul__builtin_clz (bits) - 26; ++} ++ ++ ++/* Return true if VAL is a valid bitmask immediate for MODE. */ ++bool ++aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode) ++{ ++ if (mode == DImode) ++ return aarch64_bitmask_imm (val_in); ++ ++ unsigned HOST_WIDE_INT val = val_in; ++ ++ if (mode == SImode) ++ return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32)); ++ ++ /* Replicate small immediates to fit 64 bits. */ ++ int size = GET_MODE_UNIT_PRECISION (mode); ++ val &= (HOST_WIDE_INT_1U << size) - 1; ++ val *= bitmask_imm_mul__builtin_clz (size) - 26; ++ ++ return aarch64_bitmask_imm (val); ++} ++ ++ ++/* Return true if the immediate VAL can be a bitfield immediate ++ by changing the given MASK bits in VAL to zeroes, ones or bits ++ from the other half of VAL. Return the new immediate in VAL2. */ ++static inline bool ++aarch64_check_bitmask (unsigned HOST_WIDE_INT val, ++ unsigned HOST_WIDE_INT &val2, ++ unsigned HOST_WIDE_INT mask) ++{ ++ val2 = val & ~mask; ++ if (val2 != val && aarch64_bitmask_imm (val2)) ++ return true; ++ val2 = val | mask; ++ if (val2 != val && aarch64_bitmask_imm (val2)) ++ return true; ++ val = val & ~mask; ++ val2 = val | (((val >> 32) | (val << 32)) & mask); ++ if (val2 != val && aarch64_bitmask_imm (val2)) ++ return true; ++ val2 = val | (((val >> 16) | (val << 48)) & mask); ++ if (val2 != val && aarch64_bitmask_imm (val2)) ++ return true; ++ return false; ++} ++ ++ ++/* Return true if val is an immediate that can be loaded into a ++ register by a MOVZ instruction. */ ++static bool ++aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode) ++{ ++ if (GET_MODE_SIZE (mode) > 4) ++ { ++ if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val ++ || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val) ++ return 1; ++ } ++ else ++ { ++ /* Ignore sign extension. */ ++ val &= (HOST_WIDE_INT) 0xffffffff; ++ } ++ return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val ++ || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val); ++} ++ ++ ++/* Return true if VAL is an immediate that can be loaded into a ++ register in a single instruction. */ ++bool ++aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode) ++{ ++ scalar_int_mode int_mode; ++ if (!is_a <scalar_int_mode> (mode, &int_mode)) ++ return false; ++ ++ if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode)) ++ return 1; ++ return aarch64_bitmask_imm (val, int_mode); ++} ++ ++ + static int + aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, + scalar_int_mode mode) +@@ -5786,7 +5922,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
View file
_service:tar_scm:0159-LoongArch-Add-support-for-TLS-descriptors.patch
Added
@@ -0,0 +1,724 @@ +From 0d5ff38a94dbd655bc86e0be262458ac71726ea4 Mon Sep 17 00:00:00 2001 +From: mengqinggang <mengqinggang@loongson.cn> +Date: Tue, 2 Apr 2024 09:57:20 +0800 +Subject: PATCH 159/188 LoongArch: Add support for TLS descriptors. + +Add support for TLS descriptors on normal code model and extreme +code model. + +Normal code model instruction sequence: + -mno-explicit-relocs: + la.tls.desc $r4, s + add.d $r12, $r4, $r2 + -mexplicit-relocs: + pcalau12i $r4,%desc_pc_hi20(s) + addi.d $r4,$r4,%desc_pc_lo12(s) + ld.d $r1,$r4,%desc_ld(s) + jirl $r1,$r1,%desc_call(s) + add.d $r12, $r4, $r2 + +Extreme code model instruction sequence: + -mno-explicit-relocs: + la.tls.desc $r4, $r12, s + add.d $r12, $r4, $r2 + -mexplicit-relocs: + pcalau12i $r4,%desc_pc_hi20(s) + addi.d $r12,$r0,%desc_pc_lo12(s) + lu32i.d $r12,%desc64_pc_lo20(s) + lu52i.d $r12,$r12,%desc64_pc_hi12(s) + add.d $r4,$r4,$r12 + ld.d $r1,$r4,%desc_ld(s) + jirl $r1,$r1,%desc_call(s) + add.d $r12, $r4, $r2 + +The default is still traditional TLS model, but can be configured with +--with-tls={trad,desc}. The default can change to TLS descriptors once +libc and LLVM support this. + +gcc/ChangeLog: + + * config.gcc: Add --with-tls option to change TLS flavor. + * config/loongarch/genopts/loongarch.opt.in: Add -mtls-dialect to + configure TLS flavor. + * config/loongarch/loongarch-def.h (struct loongarch_target): Add + tls_dialect. + * config/loongarch/loongarch-driver.cc (la_driver_init): Add tls + flavor. + * config/loongarch/loongarch-opts.cc (loongarch_init_target): Add + tls_dialect. + (loongarch_config_target): Ditto. + (loongarch_update_gcc_opt_status): Ditto. + * config/loongarch/loongarch-opts.h (loongarch_init_target): Ditto. + (TARGET_TLS_DESC): New define. + * config/loongarch/loongarch.cc (loongarch_symbol_insns): Add TLS + DESC instructions sequence length. + (loongarch_legitimize_tls_address): New TLS DESC instruction sequence. + (loongarch_option_override_internal): Add la_opt_tls_dialect. + (loongarch_option_restore): Add la_target.tls_dialect. + * config/loongarch/loongarch.md (@got_load_tls_desc<mode>): Normal + code model for TLS DESC. + (got_load_tls_desc_off64): Extreme cmode model for TLS DESC. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch.opt.urls: Ditto. + * doc/invoke.texi: Add a description of the compilation option + '-mtls-dialect={trad,desc}'. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/cmodel-extreme-1.c: Add -mtls-dialect=trad. + * gcc.target/loongarch/cmodel-extreme-2.c: Ditto. + * gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c: Ditto. + * gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c: + Ditto. + * gcc.target/loongarch/func-call-medium-1.c: Ditto. + * gcc.target/loongarch/func-call-medium-2.c: Ditto. + * gcc.target/loongarch/func-call-medium-3.c: Ditto. + * gcc.target/loongarch/func-call-medium-4.c: Ditto. + * gcc.target/loongarch/tls-extreme-macro.c: Ditto. + * gcc.target/loongarch/tls-gd-noplt.c: Ditto. + * gcc.target/loongarch/explicit-relocs-auto-extreme-tls-desc.c: New test. + * gcc.target/loongarch/explicit-relocs-auto-tls-desc.c: New test. + * gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c: New test. + * gcc.target/loongarch/explicit-relocs-tls-desc.c: New test. + +Co-authored-by: Lulu Cheng <chenglulu@loongson.cn> +Co-authored-by: Xi Ruoyao <xry111@xry111.site> +--- + gcc/config.gcc | 19 +++++- + gcc/config/loongarch/genopts/loongarch.opt.in | 14 ++++ + gcc/config/loongarch/loongarch-def.h | 7 ++ + gcc/config/loongarch/loongarch-driver.cc | 2 +- + gcc/config/loongarch/loongarch-opts.cc | 12 +++- + gcc/config/loongarch/loongarch-opts.h | 3 + + gcc/config/loongarch/loongarch.cc | 45 ++++++++---- + gcc/config/loongarch/loongarch.md | 68 +++++++++++++++++++ + gcc/config/loongarch/loongarch.opt | 14 ++++ + gcc/doc/invoke.texi | 16 ++++- + .../gcc.target/loongarch/cmodel-extreme-1.c | 2 +- + .../gcc.target/loongarch/cmodel-extreme-2.c | 2 +- + .../explicit-relocs-auto-extreme-tls-desc.c | 10 +++ + .../loongarch/explicit-relocs-auto-tls-desc.c | 10 +++ + .../explicit-relocs-auto-tls-ld-gd.c | 2 +- + .../explicit-relocs-extreme-tls-desc.c | 16 +++++ + ...icit-relocs-medium-call36-auto-tls-ld-gd.c | 2 +- + .../loongarch/explicit-relocs-tls-desc.c | 13 ++++ + .../gcc.target/loongarch/func-call-medium-1.c | 2 +- + .../gcc.target/loongarch/func-call-medium-2.c | 2 +- + .../gcc.target/loongarch/func-call-medium-3.c | 2 +- + .../gcc.target/loongarch/func-call-medium-4.c | 2 +- + .../gcc.target/loongarch/tls-extreme-macro.c | 2 +- + .../gcc.target/loongarch/tls-gd-noplt.c | 2 +- + 24 files changed, 243 insertions(+), 26 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-extreme-tls-desc.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-desc.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 499b36b45..1db558d4c 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -4982,7 +4982,7 @@ case "${target}" in + ;; + + loongarch*-*) +- supported_defaults="abi arch tune fpu simd multilib-default strict-align-lib" ++ supported_defaults="abi arch tune fpu simd multilib-default strict-align-lib tls" + + # Local variables + unset \ +@@ -5240,6 +5240,18 @@ case "${target}" in + with_multilib_list="${abi_base}/${abi_ext}" + fi + ++ # Handle --with-tls. ++ case "$with_tls" in ++ "" \ ++ | trad | desc) ++ # OK ++ ;; ++ *) ++ echo "Unknown TLS method used in --with-tls=$with_tls" 1>&2 ++ exit 1 ++ ;; ++ esac ++ + # Check if the configured default ABI combination is included in + # ${with_multilib_list}. + loongarch_multilib_list_sane=no +@@ -5875,6 +5887,11 @@ case ${target} in + lasx) tm_defines="$tm_defines DEFAULT_ISA_EXT_SIMD=ISA_EXT_SIMD_LASX" ;; + esac + ++ case ${with_tls} in ++ "" | trad) tm_defines="$tm_defines DEFAULT_TLS_TYPE=TLS_TRADITIONAL" ;; ++ desc) tm_defines="$tm_defines DEFAULT_TLS_TYPE=TLS_DESCRIPTORS" ;; ++ esac ++ + tmake_file="loongarch/t-loongarch $tmake_file" + ;; + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 9c6f59bb8..f3d53f03c 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -245,6 +245,20 @@ mpass-mrelax-to-as + Driver Var(la_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) + Pass -mrelax or -mno-relax option to the assembler. + ++Enum ++Name(tls_type) Type(int) ++The possible TLS dialects: ++ ++EnumValue ++Enum(tls_type) String(trad) Value(TLS_TRADITIONAL) ++ ++EnumValue ++Enum(tls_type) String(desc) Value(TLS_DESCRIPTORS) ++ ++mtls-dialect= ++Target RejectNegative Joined Enum(tls_type) Var(la_opt_tls_dialect) Init(M_OPT_UNSET) Save ++Specify TLS dialect. ++ + -param=loongarch-vect-unroll-limit= + Target Joined UInteger Var(la_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param + Used to limit unroll factor which indicates how much the autovectorizer may +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index b1423bcfe..2fe44da5a 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -180,6 +180,7 @@ struct loongarch_target + int cpu_arch; /* CPU_ */ + int cpu_tune; /* same */ + int cmodel; /* CMODEL_ */ ++ int tls_dialect; /* TLS_ */ + }; + + /* CPU model */ +@@ -193,6 +194,12 @@ enum { + N_TUNE_TYPES = 5
View file
_service:tar_scm:0160-Backport-SME-AArch64-Cleanup-move-immediate-code.patch
Added
@@ -0,0 +1,410 @@ +From d76be4acadc0641cc8e795cd6b8a1c3c83b4fdb2 Mon Sep 17 00:00:00 2001 +From: Wilco Dijkstra <wilco.dijkstra@arm.com> +Date: Mon, 5 Dec 2022 10:49:25 +0000 +Subject: PATCH 061/157 BackportSME AArch64: Cleanup move immediate code + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ba1536dac780f3f92c5eab999fda6931f6247fc1 + +Simplify, refactor and improve various move immediate functions. +Allow 32-bit MOVN/I as a valid 64-bit immediate which removes special +cases in aarch64_internal_mov_immediate. Add new constraint so the movdi +pattern only needs a single alternative for move immediate. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_bitmask_imm): Use unsigned type. + (aarch64_is_mov_xn_imm): New function. + (aarch64_move_imm): Refactor, assert mode is SImode or DImode. + (aarch64_internal_mov_immediate): Assert mode is SImode or DImode. + Simplify special cases. + (aarch64_uimm12_shift): Simplify code. + (aarch64_clamp_to_uimm12_shift): Likewise. + (aarch64_movw_imm): Rename to aarch64_is_movz. + (aarch64_float_const_rtx_p): Pass either SImode or DImode to + aarch64_internal_mov_immediate. + (aarch64_rtx_costs): Likewise. + * config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M' + constraints into single 'O'. + (mov<mode>_aarch64): Likewise. + * config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned. + (aarch64_bitmask_imm): Likewise. + (aarch64_uimm12_shift): Likewise. + (aarch64_is_mov_xn_imm): New prototype. + * config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates, + limit 'N' to 64-bit only moves. +--- + gcc/config/aarch64/aarch64-protos.h | 7 +- + gcc/config/aarch64/aarch64.cc | 158 ++++++++++++---------------- + gcc/config/aarch64/aarch64.md | 17 ++- + gcc/config/aarch64/constraints.md | 5 + + 4 files changed, 85 insertions(+), 102 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 97984f3ab..3ff1a0163 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -755,7 +755,7 @@ void aarch64_post_cfi_startproc (void); + poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned); + int aarch64_get_condition_code (rtx); + bool aarch64_address_valid_for_prefetch_p (rtx, bool); +-bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode); ++bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode); + unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in); + unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in); + bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode); +@@ -793,7 +793,7 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT); + bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx); +-bool aarch64_move_imm (HOST_WIDE_INT, machine_mode); ++bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode); + machine_mode aarch64_sve_int_mode (machine_mode); + opt_machine_mode aarch64_sve_pred_mode (unsigned int); + machine_mode aarch64_sve_pred_mode (machine_mode); +@@ -843,8 +843,9 @@ bool aarch64_sve_float_arith_immediate_p (rtx, bool); + bool aarch64_sve_float_mul_immediate_p (rtx); + bool aarch64_split_dimode_const_store (rtx, rtx); + bool aarch64_symbolic_address_p (rtx); +-bool aarch64_uimm12_shift (HOST_WIDE_INT); ++bool aarch64_uimm12_shift (unsigned HOST_WIDE_INT); + int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &); ++bool aarch64_is_mov_xn_imm (unsigned HOST_WIDE_INT); + bool aarch64_use_return_insn_p (void); + const char *aarch64_output_casesi (rtx *); + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index cf7736994..acb659f53 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -5812,12 +5812,10 @@ aarch64_bitmask_imm (unsigned HOST_WIDE_INT val) + + /* Return true if VAL is a valid bitmask immediate for MODE. */ + bool +-aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode) ++aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode) + { + if (mode == DImode) +- return aarch64_bitmask_imm (val_in); +- +- unsigned HOST_WIDE_INT val = val_in; ++ return aarch64_bitmask_imm (val); + + if (mode == SImode) + return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32)); +@@ -5856,51 +5854,55 @@ aarch64_check_bitmask (unsigned HOST_WIDE_INT val, + } + + +-/* Return true if val is an immediate that can be loaded into a +- register by a MOVZ instruction. */ +-static bool +-aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode) ++/* Return true if VAL is a valid MOVZ immediate. */ ++static inline bool ++aarch64_is_movz (unsigned HOST_WIDE_INT val) + { +- if (GET_MODE_SIZE (mode) > 4) +- { +- if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val +- || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val) +- return 1; +- } +- else +- { +- /* Ignore sign extension. */ +- val &= (HOST_WIDE_INT) 0xffffffff; +- } +- return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val +- || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val); ++ return (val >> (ctz_hwi (val) & 48)) < 65536; + } + + +-/* Return true if VAL is an immediate that can be loaded into a +- register in a single instruction. */ ++/* Return true if immediate VAL can be created by a 64-bit MOVI/MOVN/MOVZ. */ + bool +-aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode) ++aarch64_is_mov_xn_imm (unsigned HOST_WIDE_INT val) + { +- scalar_int_mode int_mode; +- if (!is_a <scalar_int_mode> (mode, &int_mode)) +- return false; ++ return aarch64_is_movz (val) || aarch64_is_movz (~val) ++ || aarch64_bitmask_imm (val); ++} + +- if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode)) +- return 1; +- return aarch64_bitmask_imm (val, int_mode); ++ ++/* Return true if VAL is an immediate that can be created by a single ++ MOV instruction. */ ++bool ++aarch64_move_imm (unsigned HOST_WIDE_INT val, machine_mode mode) ++{ ++ gcc_assert (mode == SImode || mode == DImode); ++ ++ if (val < 65536) ++ return true; ++ ++ unsigned HOST_WIDE_INT mask = ++ (val >> 32) == 0 || mode == SImode ? 0xffffffff : HOST_WIDE_INT_M1U; ++ ++ if (aarch64_is_movz (val & mask) || aarch64_is_movz (~val & mask)) ++ return true; ++ ++ val = (val & mask) | ((val << 32) & ~mask); ++ return aarch64_bitmask_imm (val); + } + + + static int + aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, +- scalar_int_mode mode) ++ machine_mode mode) + { + int i; + unsigned HOST_WIDE_INT val, val2, mask; + int one_match, zero_match; + int num_insns; + ++ gcc_assert (mode == SImode || mode == DImode); ++ + val = INTVAL (imm); + + if (aarch64_move_imm (val, mode)) +@@ -5910,31 +5912,6 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, + return 1; + } + +- /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff +- (with XXXX non-zero). In that case check to see if the move can be done in +- a smaller mode. */ +- val2 = val & 0xffffffff; +- if (mode == DImode +- && aarch64_move_imm (val2, SImode) +- && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0)) +- { +- if (generate) +- emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); +- +- /* Check if we have to emit a second instruction by checking to see +- if any of the upper 32 bits of the original DI mode value is set. */ +- if (val == val2) +- return 1; +- +- i = (val >> 48) ? 48 : 32; +- +- if (generate) +- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
View file
_service:tar_scm:0160-LoongArch-Fix-missing-plugin-header.patch
Added
@@ -0,0 +1,32 @@ +From bec97638d68c760f6ee4b0a86ce4f9ffe9a691b3 Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Tue, 2 Apr 2024 09:20:32 +0800 +Subject: PATCH 160/188 LoongArch: Fix missing plugin header + +gcc/ChangeLog: + + * config/loongarch/t-loongarch: Add loongarch-def-arrays.h + to OPTION_H_EXTRA. +--- + gcc/config/loongarch/t-loongarch | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch +index 994f4d19c..488e8cff3 100644 +--- a/gcc/config/loongarch/t-loongarch ++++ b/gcc/config/loongarch/t-loongarch +@@ -18,8 +18,9 @@ + + + GTM_H += loongarch-multilib.h +-OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \ +- $(srcdir)/config/loongarch/loongarch-tune.h \ ++OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \ ++ $(srcdir)/config/loongarch/loongarch-def-array.h \ ++ $(srcdir)/config/loongarch/loongarch-tune.h \ + $(srcdir)/config/loongarch/loongarch-cpucfg-map.h + + # Canonical target triplet from config.gcc +-- +2.43.0 +
View file
_service:tar_scm:0161-Backport-SME-AArch64-convert-some-patterns-to-compac.patch
Added
@@ -0,0 +1,229 @@ +From 5db3e7b68d5a443e908011b8d53de625ae462f82 Mon Sep 17 00:00:00 2001 +From: Tamar Christina <tamar.christina@arm.com> +Date: Mon, 19 Jun 2023 15:55:28 +0100 +Subject: PATCH 062/157 BackportSME AArch64: convert some patterns to + compact MD syntax + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c72a7b849853716d94e8d313be5dce3c22850113 + +Hi All, + +This converts some patterns in the AArch64 backend to use the new +compact syntax. + +gcc/ChangeLog: + + * config/aarch64/aarch64.md (arches): Add nosimd. + (*mov<mode>_aarch64, *movsi_aarch64, *movdi_aarch64): Rewrite to + compact syntax. +--- + gcc/config/aarch64/aarch64.md | 161 ++++++++++++++++------------------ + 1 file changed, 78 insertions(+), 83 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index ea94152bf..5d02da42f 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -378,7 +378,7 @@ + ;; As a convenience, "fp_q" means "fp" + the ability to move between + ;; Q registers and is equivalent to "simd". + +-(define_enum "arches" any rcpc8_4 fp fp_q simd sve fp16) ++(define_enum "arches" any rcpc8_4 fp fp_q simd nosimd sve fp16) + + (define_enum_attr "arch" "arches" (const_string "any")) + +@@ -409,6 +409,9 @@ + (and (eq_attr "arch" "fp_q, simd") + (match_test "TARGET_SIMD")) + ++ (and (eq_attr "arch" "nosimd") ++ (match_test "!TARGET_SIMD")) ++ + (and (eq_attr "arch" "fp16") + (match_test "TARGET_FP_F16INST")) + +@@ -1194,26 +1197,27 @@ + ) + + (define_insn "*mov<mode>_aarch64" +- (set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, w,r ,r,w, m,m,r,w,w") +- (match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D<hq>,Usv,m,m,rZ,w,w,rZ,w")) ++ (set (match_operand:SHORT 0 "nonimmediate_operand") ++ (match_operand:SHORT 1 "aarch64_mov_operand")) + "(register_operand (operands0, <MODE>mode) + || aarch64_reg_or_zero (operands1, <MODE>mode))" +- "@ +- mov\t%w0, %w1 +- mov\t%w0, %1 +- * return aarch64_output_scalar_simd_mov_immediate (operands1, <MODE>mode); +- * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands1); +- ldr<size>\t%w0, %1 +- ldr\t%<size>0, %1 +- str<size>\t%w1, %0 +- str\t%<size>1, %0 +- * return TARGET_SIMD ? \"umov\t%w0, %1.<v>0\" : \"fmov\t%w0, %s1\"; +- * return TARGET_SIMD ? \"dup\t%0.<Vallxd>, %w1\" : \"fmov\t%s0, %w1\"; +- * return TARGET_SIMD ? \"dup\t%<Vetype>0, %1.<v>0\" : \"fmov\t%s0, %s1\";" +- ;; The "mov_imm" type for CNT is just a placeholder. +- (set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4, +- store_4,neon_to_gp<q>,neon_from_gp<q>,neon_dup") +- (set_attr "arch" "*,*,simd,sve,*,*,*,*,*,*,*") ++ {@ cons: =0, 1; attrs: type, arch ++ r, r ; mov_reg , * mov\t%w0, %w1 ++ r, M ; mov_imm , * mov\t%w0, %1 ++ w, D<hq>; neon_move , simd << aarch64_output_scalar_simd_mov_immediate (operands1, <MODE>mode); ++ /* The "mov_imm" type for CNT is just a placeholder. */ ++ r, Usv ; mov_imm , sve << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands1); ++ r, m ; load_4 , * ldr<size>\t%w0, %1 ++ w, m ; load_4 , * ldr\t%<size>0, %1 ++ m, r Z ; store_4 , * str<size>\\t%w1, %0 ++ m, w ; store_4 , * str\t%<size>1, %0 ++ r, w ; neon_to_gp<q> , simd umov\t%w0, %1.<v>0 ++ r, w ; neon_to_gp<q> , nosimd fmov\t%w0, %s1 /*foo */ ++ w, r Z ; neon_from_gp<q>, simd dup\t%0.<Vallxd>, %w1 ++ w, r Z ; neon_from_gp<q>, nosimd fmov\t%s0, %w1 ++ w, w ; neon_dup , simd dup\t%<Vetype>0, %1.<v>0 ++ w, w ; neon_dup , nosimd fmov\t%s0, %s1 ++ } + ) + + (define_expand "mov<mode>" +@@ -1250,79 +1254,70 @@ + ) + + (define_insn_and_split "*movsi_aarch64" +- (set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m, r, r, r, w,r,w, w") +- (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds")) ++ (set (match_operand:SI 0 "nonimmediate_operand") ++ (match_operand:SI 1 "aarch64_mov_operand")) + "(register_operand (operands0, SImode) + || aarch64_reg_or_zero (operands1, SImode))" +- "@ +- mov\\t%w0, %w1 +- mov\\t%w0, %w1 +- mov\\t%w0, %w1 +- mov\\t%w0, %1 +- # +- * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands1); +- ldr\\t%w0, %1 +- ldr\\t%s0, %1 +- str\\t%w1, %0 +- str\\t%s1, %0 +- adrp\\t%x0, %A1\;ldr\\t%w0, %x0, %L1 +- adr\\t%x0, %c1 +- adrp\\t%x0, %A1 +- fmov\\t%s0, %w1 +- fmov\\t%w0, %s1 +- fmov\\t%s0, %s1 +- * return aarch64_output_scalar_simd_mov_immediate (operands1, SImode);" ++ {@ cons: =0, 1; attrs: type, arch, length ++ r k, r ; mov_reg , * , 4 mov\t%w0, %w1 ++ r , k ; mov_reg , * , 4 ^ ++ r , M ; mov_imm , * , 4 mov\t%w0, %1 ++ r , n ; mov_imm , * ,16 # ++ /* The "mov_imm" type for CNT is just a placeholder. */ ++ r , Usv; mov_imm , sve , 4 << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands1); ++ r , m ; load_4 , * , 4 ldr\t%w0, %1 ++ w , m ; load_4 , fp , 4 ldr\t%s0, %1 ++ m , r Z; store_4 , * , 4 str\t%w1, %0 ++ m , w ; store_4 , fp , 4 str\t%s1, %0 ++ r , Usw; load_4 , * , 8 adrp\t%x0, %A1;ldr\t%w0, %x0, %L1 ++ r , Usa; adr , * , 4 adr\t%x0, %c1 ++ r , Ush; adr , * , 4 adrp\t%x0, %A1 ++ w , r Z; f_mcr , fp , 4 fmov\t%s0, %w1 ++ r , w ; f_mrc , fp , 4 fmov\t%w0, %s1 ++ w , w ; fmov , fp , 4 fmov\t%s0, %s1 ++ w , Ds ; neon_move, simd, 4 << aarch64_output_scalar_simd_mov_immediate (operands1, SImode); ++ } + "CONST_INT_P (operands1) && !aarch64_move_imm (INTVAL (operands1), SImode) + && REG_P (operands0) && GP_REGNUM_P (REGNO (operands0))" +- (const_int 0) +- "{ +- aarch64_expand_mov_immediate (operands0, operands1); +- DONE; +- }" +- ;; The "mov_imm" type for CNT is just a placeholder. +- (set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4, +- load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move") +- (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd") +- (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4") +- ++ (const_int 0) ++ { ++ aarch64_expand_mov_immediate (operands0, operands1); ++ DONE; ++ } + ) + + (define_insn_and_split "*movdi_aarch64" +- (set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w") +- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd")) ++ (set (match_operand:DI 0 "nonimmediate_operand") ++ (match_operand:DI 1 "aarch64_mov_operand")) + "(register_operand (operands0, DImode) + || aarch64_reg_or_zero (operands1, DImode))" +- "@ +- mov\\t%x0, %x1 +- mov\\t%0, %x1 +- mov\\t%x0, %1 +- * return aarch64_is_mov_xn_imm (INTVAL (operands1)) ? \"mov\\t%x0, %1\" : \"mov\\t%w0, %1\"; +- # +- * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands1); +- ldr\\t%x0, %1 +- ldr\\t%d0, %1 +- str\\t%x1, %0 +- str\\t%d1, %0 +- * return TARGET_ILP32 ? \"adrp\\t%0, %A1\;ldr\\t%w0, %0, %L1\" : \"adrp\\t%0, %A1\;ldr\\t%0, %0, %L1\"; +- adr\\t%x0, %c1 +- adrp\\t%x0, %A1 +- fmov\\t%d0, %x1 +- fmov\\t%x0, %d1 +- fmov\\t%d0, %d1 +- * return aarch64_output_scalar_simd_mov_immediate (operands1, DImode);" +- "CONST_INT_P (operands1) && !aarch64_move_imm (INTVAL (operands1), DImode) +- && REG_P (operands0) && GP_REGNUM_P (REGNO (operands0))" +- (const_int 0) +- "{ +- aarch64_expand_mov_immediate (operands0, operands1); +- DONE; +- }" +- ;; The "mov_imm" type for CNTD is just a placeholder. +- (set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm, +- load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc, +- fmov,neon_move") +- (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd") +- (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4") ++ {@ cons: =0, 1; attrs: type, arch, length ++ r, r ; mov_reg , * , 4 mov\t%x0, %x1 ++ k, r ; mov_reg , * , 4 mov\t%0, %x1
View file
_service:tar_scm:0161-LoongArch-Remove-unused-code.patch
Added
@@ -0,0 +1,344 @@ +From 47581dd6da960172bc768435400010748b3f97eb Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 3 Apr 2024 09:38:23 +0800 +Subject: PATCH 161/188 LoongArch: Remove unused code. + +gcc/ChangeLog: + + * config/loongarch/lasx.md: Remove unused code. + * config/loongarch/loongarch-protos.h + (loongarch_split_lsx_copy_d): Remove. + (loongarch_split_lsx_insert_d): Ditto. + (loongarch_split_lsx_fill_d): Ditto. + * config/loongarch/loongarch.cc + (loongarch_split_lsx_copy_d): Ditto. + (loongarch_split_lsx_insert_d): Ditto. + (loongarch_split_lsx_fill_d): Ditto. + * config/loongarch/lsx.md (lsx_vpickve2gr_du): Remove splitter. + (lsx_vpickve2gr_<lsxfmt_f>): Ditto. + (abs<mode>2): Remove expander. + (vabs<mode>2): Rename 2 abs<mode>2. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lsx/lsx-abs.c: New test. +--- + gcc/config/loongarch/lasx.md | 12 +-- + gcc/config/loongarch/loongarch-protos.h | 3 - + gcc/config/loongarch/loongarch.cc | 76 ---------------- + gcc/config/loongarch/lsx.md | 89 ++----------------- + .../gcc.target/loongarch/vector/lsx/lsx-abs.c | 26 ++++++ + 5 files changed, 35 insertions(+), 171 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-abs.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 45a0a8cc8..44a7d58ff 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -572,12 +572,7 @@ + (match_operand 3 "const_<bitmask256>_operand" ""))) + "ISA_HAS_LASX" + { +-#if 0 +- if (!TARGET_64BIT && (<MODE>mode == V4DImode || <MODE>mode == V4DFmode)) +- return "#"; +- else +-#endif +- return "xvinsgr2vr.<lasxfmt>\t%u0,%z1,%y3"; ++ return "xvinsgr2vr.<lasxfmt>\t%u0,%z1,%y3"; + } + (set_attr "type" "simd_insert") + (set_attr "mode" "<MODE>")) +@@ -1446,10 +1441,7 @@ + if (which_alternative == 1) + return "xvldi.b\t%u0,0" ; + +- if (!TARGET_64BIT && (<MODE>mode == V2DImode || <MODE>mode == V2DFmode)) +- return "#"; +- else +- return "xvreplgr2vr.<lasxfmt>\t%u0,%z1"; ++ return "xvreplgr2vr.<lasxfmt>\t%u0,%z1"; + } + (set_attr "type" "simd_fill") + (set_attr "mode" "<MODE>") +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 8523da8d6..0c31a74b7 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -89,9 +89,6 @@ extern void loongarch_split_128bit_move (rtx, rtx); + extern bool loongarch_split_128bit_move_p (rtx, rtx); + extern void loongarch_split_256bit_move (rtx, rtx); + extern bool loongarch_split_256bit_move_p (rtx, rtx); +-extern void loongarch_split_lsx_copy_d (rtx, rtx, rtx, rtx (*)(rtx, rtx, rtx)); +-extern void loongarch_split_lsx_insert_d (rtx, rtx, rtx, rtx); +-extern void loongarch_split_lsx_fill_d (rtx, rtx); + extern const char *loongarch_output_move (rtx, rtx); + #ifdef RTX_CODE + extern void loongarch_expand_scc (rtx *); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index e27335b3c..8d8a50b70 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4772,82 +4772,6 @@ loongarch_split_256bit_move (rtx dest, rtx src) + } + } + +- +-/* Split a COPY_S.D with operands DEST, SRC and INDEX. GEN is a function +- used to generate subregs. */ +- +-void +-loongarch_split_lsx_copy_d (rtx dest, rtx src, rtx index, +- rtx (*gen_fn)(rtx, rtx, rtx)) +-{ +- gcc_assert ((GET_MODE (src) == V2DImode && GET_MODE (dest) == DImode) +- || (GET_MODE (src) == V2DFmode && GET_MODE (dest) == DFmode)); +- +- /* Note that low is always from the lower index, and high is always +- from the higher index. */ +- rtx low = loongarch_subword (dest, false); +- rtx high = loongarch_subword (dest, true); +- rtx new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0); +- +- emit_insn (gen_fn (low, new_src, GEN_INT (INTVAL (index) * 2))); +- emit_insn (gen_fn (high, new_src, GEN_INT (INTVAL (index) * 2 + 1))); +-} +- +-/* Split a INSERT.D with operand DEST, SRC1.INDEX and SRC2. */ +- +-void +-loongarch_split_lsx_insert_d (rtx dest, rtx src1, rtx index, rtx src2) +-{ +- int i; +- gcc_assert (GET_MODE (dest) == GET_MODE (src1)); +- gcc_assert ((GET_MODE (dest) == V2DImode +- && (GET_MODE (src2) == DImode || src2 == const0_rtx)) +- || (GET_MODE (dest) == V2DFmode && GET_MODE (src2) == DFmode)); +- +- /* Note that low is always from the lower index, and high is always +- from the higher index. */ +- rtx low = loongarch_subword (src2, false); +- rtx high = loongarch_subword (src2, true); +- rtx new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0); +- rtx new_src1 = simplify_gen_subreg (V4SImode, src1, GET_MODE (src1), 0); +- i = exact_log2 (INTVAL (index)); +- gcc_assert (i != -1); +- +- emit_insn (gen_lsx_vinsgr2vr_w (new_dest, low, new_src1, +- GEN_INT (1 << (i * 2)))); +- emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest, +- GEN_INT (1 << (i * 2 + 1)))); +-} +- +-/* Split FILL.D. */ +- +-void +-loongarch_split_lsx_fill_d (rtx dest, rtx src) +-{ +- gcc_assert ((GET_MODE (dest) == V2DImode +- && (GET_MODE (src) == DImode || src == const0_rtx)) +- || (GET_MODE (dest) == V2DFmode && GET_MODE (src) == DFmode)); +- +- /* Note that low is always from the lower index, and high is always +- from the higher index. */ +- rtx low, high; +- if (src == const0_rtx) +- { +- low = src; +- high = src; +- } +- else +- { +- low = loongarch_subword (src, false); +- high = loongarch_subword (src, true); +- } +- rtx new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0); +- emit_insn (gen_lsx_vreplgr2vr_w (new_dest, low)); +- emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest, GEN_INT (1 << 1))); +- emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest, GEN_INT (1 << 3))); +-} +- +- + /* Return the appropriate instructions to move SRC into DEST. Assume + that SRC is operand 1 and DEST is operand 0. */ + +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index dc81093e9..2eac11473 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -582,28 +582,11 @@ + (match_operand 3 "const_<bitmask>_operand" ""))) + "ISA_HAS_LSX" + { +- if (!TARGET_64BIT && (<MODE>mode == V2DImode || <MODE>mode == V2DFmode)) +- return "#"; +- else +- return "vinsgr2vr.<lsxfmt>\t%w0,%z1,%y3"; ++ return "vinsgr2vr.<lsxfmt>\t%w0,%z1,%y3"; + } + (set_attr "type" "simd_insert") + (set_attr "mode" "<MODE>")) + +-(define_split +- (set (match_operand:LSX_D 0 "register_operand") +- (vec_merge:LSX_D +- (vec_duplicate:LSX_D +- (match_operand:<UNITMODE> 1 "<LSX_D:lsx_d>_operand")) +- (match_operand:LSX_D 2 "register_operand") +- (match_operand 3 "const_<bitmask>_operand"))) +- "reload_completed && ISA_HAS_LSX && !TARGET_64BIT" +- (const_int 0) +-{ +- loongarch_split_lsx_insert_d (operands0, operands2, operands3, operands1); +- DONE; +-}) +- + (define_insn "lsx_vextrins_<lsxfmt_f>_internal" + (set (match_operand:LSX 0 "register_operand" "=f") + (vec_merge:LSX +@@ -653,70 +636,26 @@
View file
_service:tar_scm:0162-Backport-SME-aarch64-Use-SVE-s-RDVL-instruction.patch
Added
@@ -0,0 +1,792 @@ +From 46310765c05cde8732e07bfb0df9f0ec25a34018 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:18 +0000 +Subject: PATCH 063/157 BackportSME aarch64: Use SVE's RDVL instruction + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=80f47d7bbe38234e1530d27fe5c2f130223ca7a0 + +We didn't previously use SVE's RDVL instruction, since the CNT* +forms are preferred and provide most of the range. However, +there are some cases that RDVL can handle and CNT* can't, +and using RDVL-like instructions becomes important for SME. + +gcc/ + * config/aarch64/aarch64-protos.h (aarch64_sve_rdvl_immediate_p) + (aarch64_output_sve_rdvl): Declare. + * config/aarch64/aarch64.cc (aarch64_sve_cnt_factor_p): New + function, split out from... + (aarch64_sve_cnt_immediate_p): ...here. + (aarch64_sve_rdvl_factor_p): New function. + (aarch64_sve_rdvl_immediate_p): Likewise. + (aarch64_output_sve_rdvl): Likewise. + (aarch64_offset_temporaries): Rewrite the SVE handling to use RDVL + for some cases. + (aarch64_expand_mov_immediate): Handle RDVL immediates. + (aarch64_mov_operand_p): Likewise. + * config/aarch64/constraints.md (Usr): New constraint. + * config/aarch64/aarch64.md (*mov<SHORT:mode>_aarch64): Add an RDVL + alternative. + (*movsi_aarch64, *movdi_aarch64): Likewise. + +gcc/testsuite/ + * gcc.target/aarch64/sve/acle/asm/cntb.c: Tweak expected output. + * gcc.target/aarch64/sve/acle/asm/cnth.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/cntw.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/cntd.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/prfb.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/prfh.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/prfw.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/prfd.c: Likewise. + * gcc.target/aarch64/sve/loop_add_4.c: Expect RDVL to be used + to calculate the -17 and 17 factors. + * gcc.target/aarch64/sve/pcs/stack_clash_1.c: Likewise the 18 factor. +--- + gcc/config/aarch64/aarch64-protos.h | 2 + + gcc/config/aarch64/aarch64.cc | 191 ++++++++++++------ + gcc/config/aarch64/aarch64.md | 3 + + gcc/config/aarch64/constraints.md | 6 + + .../gcc.target/aarch64/sve/acle/asm/cntb.c | 71 +++++-- + .../gcc.target/aarch64/sve/acle/asm/cntd.c | 12 +- + .../gcc.target/aarch64/sve/acle/asm/cnth.c | 20 +- + .../gcc.target/aarch64/sve/acle/asm/cntw.c | 16 +- + .../gcc.target/aarch64/sve/acle/asm/prfb.c | 6 +- + .../gcc.target/aarch64/sve/acle/asm/prfd.c | 4 +- + .../gcc.target/aarch64/sve/acle/asm/prfh.c | 4 +- + .../gcc.target/aarch64/sve/acle/asm/prfw.c | 4 +- + .../gcc.target/aarch64/sve/loop_add_4.c | 6 +- + .../aarch64/sve/pcs/stack_clash_1.c | 3 +- + 14 files changed, 225 insertions(+), 123 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 3ff1a0163..14a568140 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -802,6 +802,7 @@ bool aarch64_sve_mode_p (machine_mode); + HOST_WIDE_INT aarch64_fold_sve_cnt_pat (aarch64_svpattern, unsigned int); + bool aarch64_sve_cnt_immediate_p (rtx); + bool aarch64_sve_scalar_inc_dec_immediate_p (rtx); ++bool aarch64_sve_rdvl_immediate_p (rtx); + bool aarch64_sve_addvl_addpl_immediate_p (rtx); + bool aarch64_sve_vector_inc_dec_immediate_p (rtx); + int aarch64_add_offset_temporaries (rtx); +@@ -814,6 +815,7 @@ char *aarch64_output_sve_prefetch (const char *, rtx, const char *); + char *aarch64_output_sve_cnt_immediate (const char *, const char *, rtx); + char *aarch64_output_sve_cnt_pat_immediate (const char *, const char *, rtx *); + char *aarch64_output_sve_scalar_inc_dec (rtx); ++char *aarch64_output_sve_rdvl (rtx); + char *aarch64_output_sve_addvl_addpl (rtx); + char *aarch64_output_sve_vector_inc_dec (const char *, rtx); + char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode); +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index acb659f53..4194dfc70 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -5520,6 +5520,18 @@ aarch64_fold_sve_cnt_pat (aarch64_svpattern pattern, unsigned int nelts_per_vq) + return -1; + } + ++/* Return true if a single CNTBHWD instruction can multiply FACTOR ++ by the number of 128-bit quadwords in an SVE vector. */ ++ ++static bool ++aarch64_sve_cnt_factor_p (HOST_WIDE_INT factor) ++{ ++ /* The coefficient must be 1, 16 * {2, 4, 8, 16}. */ ++ return (IN_RANGE (factor, 2, 16 * 16) ++ && (factor & 1) == 0 ++ && factor <= 16 * (factor & -factor)); ++} ++ + /* Return true if we can move VALUE into a register using a single + CNTBHWD instruction. */ + +@@ -5527,11 +5539,7 @@ static bool + aarch64_sve_cnt_immediate_p (poly_int64 value) + { + HOST_WIDE_INT factor = value.coeffs0; +- /* The coefficient must be 1, 16 * {2, 4, 8, 16}. */ +- return (value.coeffs1 == factor +- && IN_RANGE (factor, 2, 16 * 16) +- && (factor & 1) == 0 +- && factor <= 16 * (factor & -factor)); ++ return value.coeffs1 == factor && aarch64_sve_cnt_factor_p (factor); + } + + /* Likewise for rtx X. */ +@@ -5647,6 +5655,50 @@ aarch64_output_sve_scalar_inc_dec (rtx offset) + -offset_value.coeffs1, 0); + } + ++/* Return true if a single RDVL instruction can multiply FACTOR by the ++ number of 128-bit quadwords in an SVE vector. */ ++ ++static bool ++aarch64_sve_rdvl_factor_p (HOST_WIDE_INT factor) ++{ ++ return (multiple_p (factor, 16) ++ && IN_RANGE (factor, -32 * 16, 31 * 16)); ++} ++ ++/* Return true if we can move VALUE into a register using a single ++ RDVL instruction. */ ++ ++static bool ++aarch64_sve_rdvl_immediate_p (poly_int64 value) ++{ ++ HOST_WIDE_INT factor = value.coeffs0; ++ return value.coeffs1 == factor && aarch64_sve_rdvl_factor_p (factor); ++} ++ ++/* Likewise for rtx X. */ ++ ++bool ++aarch64_sve_rdvl_immediate_p (rtx x) ++{ ++ poly_int64 value; ++ return poly_int_rtx_p (x, &value) && aarch64_sve_rdvl_immediate_p (value); ++} ++ ++/* Return the asm string for moving RDVL immediate OFFSET into register ++ operand 0. */ ++ ++char * ++aarch64_output_sve_rdvl (rtx offset) ++{ ++ static char buffersizeof ("rdvl\t%x0, #-") + 3 * sizeof (int); ++ poly_int64 offset_value = rtx_to_poly_int64 (offset); ++ gcc_assert (aarch64_sve_rdvl_immediate_p (offset_value)); ++ ++ int factor = offset_value.coeffs1; ++ snprintf (buffer, sizeof (buffer), "rdvl\t%%x0, #%d", factor / 16); ++ return buffer; ++} ++ + /* Return true if we can add VALUE to a register using a single ADDVL + or ADDPL instruction. */ + +@@ -6227,13 +6279,13 @@ aarch64_offset_temporaries (bool add_p, poly_int64 offset) + count += 1; + else if (factor != 0) + { +- factor = abs (factor); +- if (factor > 16 * (factor & -factor)) +- /* Need one register for the CNT result and one for the multiplication +- factor. If necessary, the second temporary can be reused for the +- constant part of the offset. */ ++ factor /= (HOST_WIDE_INT) least_bit_hwi (factor); ++ if (!IN_RANGE (factor, -32, 31)) ++ /* Need one register for the CNT or RDVL result and one for the ++ multiplication factor. If necessary, the second temporary ++ can be reused for the constant part of the offset. */ + return 2; +- /* Need one register for the CNT result (which might then ++ /* Need one register for the CNT or RDVL result (which might then + be shifted). */ + count += 1; + } +@@ -6322,85 +6374,100 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, + /* Otherwise use a CNT-based sequence. */ + else if (factor != 0) + { +- /* Use a subtraction if we have a negative factor. */ +- rtx_code code = PLUS; +- if (factor < 0) +- { +- factor = -factor; +- code = MINUS; +- } ++ /* Calculate CNTB * FACTOR / 16 as CNTB * REL_FACTOR * 2**SHIFT, ++ with negative shifts indicating a shift right. */
View file
_service:tar_scm:0162-LoongArch-Set-default-alignment-for-functions-jumps-.patch
Added
@@ -0,0 +1,135 @@ +From 7dff9d3f7fefe074e78cd7ff6529d7c1ea6cc3b1 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Tue, 2 Apr 2024 14:29:08 +0800 +Subject: PATCH 162/188 LoongArch: Set default alignment for functions jumps + and loops PR112919. + +Xi Ruoyao set the alignment rules under LA464 in commit r14-1839, +but the macro ASM_OUTPUT_ALIGN_WITH_NOP was removed in R14-4674, +which affected the alignment rules. + +So I set different aligns on LA464 and LA664 again to test the +performance of spec2006, and modify the alignment based on the test +results. + +gcc/ChangeLog: + + PR target/112919 + * config/loongarch/loongarch-def.cc (la664_align): Newly defined + function that sets alignment rules under the LA664 microarchitecture. + * config/loongarch/loongarch-opts.cc + (loongarch_target_option_override): If not optimizing for size, set + the default alignment to what the target wants. + * config/loongarch/loongarch-tune.h (struct loongarch_align): Add + new member variables jump and loop. +--- + gcc/config/loongarch/loongarch-def.cc | 11 ++++++++--- + gcc/config/loongarch/loongarch-opts.cc | 19 +++++++++++++------ + gcc/config/loongarch/loongarch-tune.h | 22 +++++++++++++++------- + 3 files changed, 36 insertions(+), 16 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index 533dd0af2..a48050c5f 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -81,14 +81,19 @@ array_tune<loongarch_cache> loongarch_cpu_cache = + + static inline loongarch_align la464_align () + { +- return loongarch_align ().function_ ("32").label_ ("16"); ++ return loongarch_align ().function_ ("32").loop_ ("16").jump_ ("16"); ++} ++ ++static inline loongarch_align la664_align () ++{ ++ return loongarch_align ().function_ ("8").loop_ ("8").jump_ ("32"); + } + + array_tune<loongarch_align> loongarch_cpu_align = + array_tune<loongarch_align> () +- .set (CPU_LOONGARCH64, la464_align ()) ++ .set (CPU_LOONGARCH64, la664_align ()) + .set (CPU_LA464, la464_align ()) +- .set (CPU_LA664, la464_align ()); ++ .set (CPU_LA664, la664_align ()); + + /* Default RTX cost initializer. */ + loongarch_rtx_cost_data::loongarch_rtx_cost_data () +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 062d430c2..c455c5e32 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -922,13 +922,20 @@ loongarch_target_option_override (struct loongarch_target *target, + { + loongarch_update_gcc_opt_status (target, opts, opts_set); + +- /* alignments */ +- if (opts->x_flag_align_functions && !opts->x_str_align_functions) +- opts->x_str_align_functions +- = loongarch_cpu_aligntarget->cpu_tune.function; ++ /* If not optimizing for size, set the default ++ alignment to what the target wants. */ ++ if (!opts->x_optimize_size) ++ { ++ if (opts->x_flag_align_functions && !opts->x_str_align_functions) ++ opts->x_str_align_functions ++ = loongarch_cpu_aligntarget->cpu_tune.function; ++ ++ if (opts->x_flag_align_loops && !opts->x_str_align_loops) ++ opts->x_str_align_loops = loongarch_cpu_aligntarget->cpu_tune.loop; + +- if (opts->x_flag_align_labels && !opts->x_str_align_labels) +- opts->x_str_align_labels = loongarch_cpu_aligntarget->cpu_tune.label; ++ if (opts->x_flag_align_jumps && !opts->x_str_align_jumps) ++ opts->x_str_align_jumps = loongarch_cpu_aligntarget->cpu_tune.jump; ++ } + + /* Set up parameters to be used in prefetching algorithm. */ + int simultaneous_prefetches +diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h +index 26f163f0a..d286eee0b 100644 +--- a/gcc/config/loongarch/loongarch-tune.h ++++ b/gcc/config/loongarch/loongarch-tune.h +@@ -162,14 +162,16 @@ struct loongarch_cache { + } + }; + +-/* Alignment for functions and labels for best performance. For new uarchs +- the value should be measured via benchmarking. See the documentation for +- -falign-functions and -falign-labels in invoke.texi for the format. */ ++/* Alignment for functions loops and jumps for best performance. For new ++ uarchs the value should be measured via benchmarking. See the ++ documentation for -falign-functions, -falign-loops, and -falign-jumps in ++ invoke.texi for the format. */ + struct loongarch_align { + const char *function; /* default value for -falign-functions */ +- const char *label; /* default value for -falign-labels */ ++ const char *loop; /* default value for -falign-loops */ ++ const char *jump; /* default value for -falign-jumps */ + +- loongarch_align () : function (nullptr), label (nullptr) {} ++ loongarch_align () : function (nullptr), loop (nullptr), jump (nullptr) {} + + loongarch_align function_ (const char *_function) + { +@@ -177,9 +179,15 @@ struct loongarch_align { + return *this; + } + +- loongarch_align label_ (const char *_label) ++ loongarch_align loop_ (const char *_loop) + { +- label = _label; ++ loop = _loop; ++ return *this; ++ } ++ ++ loongarch_align jump_ (const char *_jump) ++ { ++ jump = _jump; + return *this; + } + }; +-- +2.43.0 +
View file
_service:tar_scm:0163-Backport-SME-aarch64-Make-AARCH64_FL_SVE-requirement.patch
Added
@@ -0,0 +1,137 @@ +From c0badff223a1f5ea5a0f75df72f5d0138d94d8e6 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:19 +0000 +Subject: PATCH 064/157 BackportSME aarch64: Make AARCH64_FL_SVE + requirements explicit + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dd7aaef62a43efe52eece525eea4d7d252b0c148 + +So far, all intrinsics covered by the aarch64-sve-builtins* +framework have (naturally enough) required at least SVE. +However, arm_sme.h defines a couple of intrinsics that can +be called by any code. It's therefore necessary to make +the implicit SVE requirement explicit. + +gcc/ + * config/aarch64/aarch64-sve-builtins.cc (function_groups): Remove + implied requirement on SVE. + * config/aarch64/aarch64-sve-builtins-base.def: Explicitly require SVE. + * config/aarch64/aarch64-sve-builtins-sve2.def: Likewise. +--- + .../aarch64/aarch64-sve-builtins-base.def | 10 +++++----- + .../aarch64/aarch64-sve-builtins-sve2.def | 18 +++++++++++++----- + gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +- + 3 files changed, 19 insertions(+), 11 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def +index ffdf7cb4c..3a58f76c3 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-base.def ++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def +@@ -17,7 +17,7 @@ + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +-#define REQUIRED_EXTENSIONS 0 ++#define REQUIRED_EXTENSIONS AARCH64_FL_SVE + DEF_SVE_FUNCTION (svabd, binary_opt_n, all_arith, mxz) + DEF_SVE_FUNCTION (svabs, unary, all_float_and_signed, mxz) + DEF_SVE_FUNCTION (svacge, compare_opt_n, all_float, implicit) +@@ -318,7 +318,7 @@ DEF_SVE_FUNCTION (svzip2, binary, all_data, none) + DEF_SVE_FUNCTION (svzip2, binary_pred, all_pred, none) + #undef REQUIRED_EXTENSIONS + +-#define REQUIRED_EXTENSIONS AARCH64_FL_BF16 ++#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_BF16 + DEF_SVE_FUNCTION (svbfdot, ternary_bfloat_opt_n, s_float, none) + DEF_SVE_FUNCTION (svbfdot_lane, ternary_bfloat_lanex2, s_float, none) + DEF_SVE_FUNCTION (svbfmlalb, ternary_bfloat_opt_n, s_float, none) +@@ -330,7 +330,7 @@ DEF_SVE_FUNCTION (svcvt, unary_convert, cvt_bfloat, mxz) + DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, mx) + #undef REQUIRED_EXTENSIONS + +-#define REQUIRED_EXTENSIONS AARCH64_FL_I8MM ++#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_I8MM + DEF_SVE_FUNCTION (svmmla, mmla, s_integer, none) + DEF_SVE_FUNCTION (svusmmla, ternary_uintq_intq, s_signed, none) + DEF_SVE_FUNCTION (svsudot, ternary_intq_uintq_opt_n, s_signed, none) +@@ -339,11 +339,11 @@ DEF_SVE_FUNCTION (svusdot, ternary_uintq_intq_opt_n, s_signed, none) + DEF_SVE_FUNCTION (svusdot_lane, ternary_uintq_intq_lane, s_signed, none) + #undef REQUIRED_EXTENSIONS + +-#define REQUIRED_EXTENSIONS AARCH64_FL_F32MM ++#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_F32MM + DEF_SVE_FUNCTION (svmmla, mmla, s_float, none) + #undef REQUIRED_EXTENSIONS + +-#define REQUIRED_EXTENSIONS AARCH64_FL_F64MM ++#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_F64MM + DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit) + DEF_SVE_FUNCTION (svmmla, mmla, d_float, none) + DEF_SVE_FUNCTION (svtrn1q, binary, all_data, none) +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +index 635089ffc..d5f23a887 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def ++++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +@@ -17,7 +17,7 @@ + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +-#define REQUIRED_EXTENSIONS AARCH64_FL_SVE2 ++#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_SVE2 + DEF_SVE_FUNCTION (svaba, ternary_opt_n, all_integer, none) + DEF_SVE_FUNCTION (svabalb, ternary_long_opt_n, hsd_integer, none) + DEF_SVE_FUNCTION (svabalt, ternary_long_opt_n, hsd_integer, none) +@@ -189,7 +189,9 @@ DEF_SVE_FUNCTION (svwhilewr, compare_ptr, all_data, none) + DEF_SVE_FUNCTION (svxar, ternary_shift_right_imm, all_integer, none) + #undef REQUIRED_EXTENSIONS + +-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES) ++#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ ++ | AARCH64_FL_SVE2 \ ++ | AARCH64_FL_SVE2_AES) + DEF_SVE_FUNCTION (svaesd, binary, b_unsigned, none) + DEF_SVE_FUNCTION (svaese, binary, b_unsigned, none) + DEF_SVE_FUNCTION (svaesmc, unary, b_unsigned, none) +@@ -198,17 +200,23 @@ DEF_SVE_FUNCTION (svpmullb_pair, binary_opt_n, d_unsigned, none) + DEF_SVE_FUNCTION (svpmullt_pair, binary_opt_n, d_unsigned, none) + #undef REQUIRED_EXTENSIONS + +-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM) ++#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ ++ | AARCH64_FL_SVE2 \ ++ | AARCH64_FL_SVE2_BITPERM) + DEF_SVE_FUNCTION (svbdep, binary_opt_n, all_unsigned, none) + DEF_SVE_FUNCTION (svbext, binary_opt_n, all_unsigned, none) + DEF_SVE_FUNCTION (svbgrp, binary_opt_n, all_unsigned, none) + #undef REQUIRED_EXTENSIONS + +-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_SHA3) ++#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ ++ | AARCH64_FL_SVE2 \ ++ | AARCH64_FL_SVE2_SHA3) + DEF_SVE_FUNCTION (svrax1, binary, d_integer, none) + #undef REQUIRED_EXTENSIONS + +-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_SM4) ++#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ ++ | AARCH64_FL_SVE2 \ ++ | AARCH64_FL_SVE2_SM4) + DEF_SVE_FUNCTION (svsm4e, binary, s_unsigned, none) + DEF_SVE_FUNCTION (svsm4ekey, binary, s_unsigned, none) + #undef REQUIRED_EXTENSIONS +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index 7924cdf0f..dde01f676 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -525,7 +525,7 @@ static const predication_index preds_z = { PRED_z, NUM_PREDS }; + static CONSTEXPR const function_group_info function_groups = { + #define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \ + { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, preds_##PREDS, \ +- REQUIRED_EXTENSIONS | AARCH64_FL_SVE }, ++ REQUIRED_EXTENSIONS }, + #include "aarch64-sve-builtins.def" + }; + +-- +2.33.0 +
View file
_service:tar_scm:0163-LoongArch-Enable-switchable-target.patch
Added
@@ -0,0 +1,281 @@ +From 427d5f10951435241d883a13557f862683046ddd Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Mon, 8 Apr 2024 16:45:13 +0800 +Subject: PATCH 163/188 LoongArch: Enable switchable target + +This patch fixes the back-end context switching in cases where functions +should be built with their own target contexts instead of the +global one, such as LTO linking and functions with target attributes (TBD). + + PR target/113233 + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_reg_init): + Reinitialize the loongarch_regno_mode_ok cache. + (loongarch_option_override): Same. + (loongarch_save_restore_target_globals): Restore target globals. + (loongarch_set_current_function): Restore the target contexts + for functions. + (TARGET_SET_CURRENT_FUNCTION): Define. + * config/loongarch/loongarch.h (SWITCHABLE_TARGET): Enable + switchable target context. + * config/loongarch/loongarch-builtins.cc (loongarch_init_builtins): + Initialize all builtin functions at startup. + (loongarch_expand_builtin): Turn assertion of builtin availability + into a test. + +gcc/testsuite/ChangeLog: + + * lib/target-supports.exp: Define condition loongarch_sx_as. + * gcc.dg/lto/pr113233_0.c: New test. +--- + gcc/config/loongarch/loongarch-builtins.cc | 25 +++--- + gcc/config/loongarch/loongarch.cc | 91 ++++++++++++++++++++-- + gcc/config/loongarch/loongarch.h | 2 + + gcc/testsuite/gcc.dg/lto/pr113233_0.c | 14 ++++ + gcc/testsuite/lib/target-supports.exp | 12 +++ + 5 files changed, 127 insertions(+), 17 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/lto/pr113233_0.c + +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index e3b4dbc52..51abba007 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -2507,14 +2507,11 @@ loongarch_init_builtins (void) + for (i = 0; i < ARRAY_SIZE (loongarch_builtins); i++) + { + d = &loongarch_builtinsi; +- if (d->avail ()) +- { +- type = loongarch_build_function_type (d->function_type); +- loongarch_builtin_declsi +- = add_builtin_function (d->name, type, i, BUILT_IN_MD, NULL, +- NULL); +- loongarch_get_builtin_decl_indexd->icode = i; +- } ++ type = loongarch_build_function_type (d->function_type); ++ loongarch_builtin_declsi ++ = add_builtin_function (d->name, type, i, BUILT_IN_MD, NULL, ++ NULL); ++ loongarch_get_builtin_decl_indexd->icode = i; + } + } + +@@ -3100,15 +3097,21 @@ loongarch_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) + { + tree fndecl; +- unsigned int fcode, avail; ++ unsigned int fcode; + const struct loongarch_builtin_description *d; + + fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + fcode = DECL_MD_FUNCTION_CODE (fndecl); + gcc_assert (fcode < ARRAY_SIZE (loongarch_builtins)); + d = &loongarch_builtinsfcode; +- avail = d->avail (); +- gcc_assert (avail != 0); ++ ++ if (!d->avail ()) ++ { ++ error_at (EXPR_LOCATION (exp), ++ "built-in function %qD is not enabled", fndecl); ++ return target; ++ } ++ + switch (d->builtin_type) + { + case LARCH_BUILTIN_DIRECT: +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 8d8a50b70..50ab6a82a 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -7567,15 +7567,19 @@ loongarch_global_init (void) + loongarch_dwarf_regnoi = INVALID_REGNUM; + } + ++ /* Function to allocate machine-dependent function status. */ ++ init_machine_status = &loongarch_init_machine_status; ++}; ++ ++static void ++loongarch_reg_init (void) ++{ + /* Set up loongarch_hard_regno_mode_ok. */ + for (int mode = 0; mode < MAX_MACHINE_MODE; mode++) + for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + loongarch_hard_regno_mode_ok_pmoderegno + = loongarch_hard_regno_mode_ok_uncached (regno, (machine_mode) mode); +- +- /* Function to allocate machine-dependent function status. */ +- init_machine_status = &loongarch_init_machine_status; +-}; ++} + + static void + loongarch_option_override_internal (struct loongarch_target *target, +@@ -7602,20 +7606,92 @@ loongarch_option_override_internal (struct loongarch_target *target, + + /* Override some options according to the resolved target. */ + loongarch_target_option_override (target, opts, opts_set); ++ ++ target_option_default_node = target_option_current_node ++ = build_target_option_node (opts, opts_set); ++ ++ loongarch_reg_init (); ++} ++ ++/* Remember the last target of loongarch_set_current_function. */ ++ ++static GTY(()) tree loongarch_previous_fndecl; ++ ++/* Restore or save the TREE_TARGET_GLOBALS from or to new_tree. ++ Used by loongarch_set_current_function to ++ make sure optab availability predicates are recomputed when necessary. */ ++ ++static void ++loongarch_save_restore_target_globals (tree new_tree) ++{ ++ if (TREE_TARGET_GLOBALS (new_tree)) ++ restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); ++ else if (new_tree == target_option_default_node) ++ restore_target_globals (&default_target_globals); ++ else ++ TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); ++} ++ ++/* Implement TARGET_SET_CURRENT_FUNCTION. */ ++ ++static void ++loongarch_set_current_function (tree fndecl) ++{ ++ if (fndecl == loongarch_previous_fndecl) ++ return; ++ ++ tree old_tree; ++ if (loongarch_previous_fndecl == NULL_TREE) ++ old_tree = target_option_current_node; ++ else if (DECL_FUNCTION_SPECIFIC_TARGET (loongarch_previous_fndecl)) ++ old_tree = DECL_FUNCTION_SPECIFIC_TARGET (loongarch_previous_fndecl); ++ else ++ old_tree = target_option_default_node; ++ ++ if (fndecl == NULL_TREE) ++ { ++ if (old_tree != target_option_current_node) ++ { ++ loongarch_previous_fndecl = NULL_TREE; ++ cl_target_option_restore (&global_options, &global_options_set, ++ TREE_TARGET_OPTION ++ (target_option_current_node)); ++ } ++ return; ++ } ++ ++ tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); ++ if (new_tree == NULL_TREE) ++ new_tree = target_option_default_node; ++ ++ loongarch_previous_fndecl = fndecl; ++ ++ if (new_tree == old_tree) ++ return; ++ ++ cl_target_option_restore (&global_options, &global_options_set, ++ TREE_TARGET_OPTION (new_tree)); ++ ++ loongarch_reg_init (); ++ ++ loongarch_save_restore_target_globals (new_tree); + } + ++ ++ + /* Implement TARGET_OPTION_OVERRIDE. */ + + static void + loongarch_option_override (void) + {
View file
_service:tar_scm:0164-Backport-SME-aarch64-Add-group-suffixes-to-SVE-intri.patch
Added
@@ -0,0 +1,562 @@ +From e99332e15895156632949f3b6c3080fc9d994b13 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:19 +0000 +Subject: PATCH 065/157 BackportSME aarch64: Add group suffixes to SVE + intrinsics + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7b607f197967e052d7d7e29f6b41eded18f8c65d + +The SME2 ACLE adds a new "group" suffix component to the naming +convention for SVE intrinsics. This is also used in the new tuple +forms of the svreinterpret intrinsics. + +This patch adds support for group suffixes and defines the +x2, x3 and x4 suffixes that are needed for the svreinterprets. + +gcc/ + * config/aarch64/aarch64-sve-builtins-shapes.cc (build_one): Take + a group suffix index parameter. + (build_32_64, build_all): Update accordingly. Iterate over all + group suffixes. + * config/aarch64/aarch64-sve-builtins-sve2.cc (svqrshl_impl::fold) + (svqshl_impl::fold, svrshl_impl::fold): Update function_instance + constructors. + * config/aarch64/aarch64-sve-builtins.cc (group_suffixes): New array. + (groups_none): New constant. + (function_groups): Initialize the groups field. + (function_instance::hash): Hash the group index. + (function_builder::get_name): Add the group suffix. + (function_builder::add_overloaded_functions): Iterate over all + group suffixes. + (function_resolver::lookup_form): Take a group suffix parameter. + (function_resolver::resolve_to): Likewise. + * config/aarch64/aarch64-sve-builtins.def (DEF_SVE_GROUP_SUFFIX): New + macro. + (x2, x3, x4): New group suffixes. + * config/aarch64/aarch64-sve-builtins.h (group_suffix_index): New enum. + (group_suffix_info): New structure. + (function_group_info::groups): New member variable. + (function_instance::group_suffix_id): Likewise. + (group_suffixes): New array. + (function_instance::operator==): Compare the group suffixes. + (function_instance::group_suffix): New function. +--- + .../aarch64/aarch64-sve-builtins-shapes.cc | 53 ++++++------ + .../aarch64/aarch64-sve-builtins-sve2.cc | 10 +-- + gcc/config/aarch64/aarch64-sve-builtins.cc | 84 +++++++++++++------ + gcc/config/aarch64/aarch64-sve-builtins.def | 9 ++ + gcc/config/aarch64/aarch64-sve-builtins.h | 81 ++++++++++++++---- + 5 files changed, 165 insertions(+), 72 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +index 4fa4181b9..3ecef026c 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +@@ -275,18 +275,20 @@ parse_signature (const function_instance &instance, const char *format, + } + + /* Add one function instance for GROUP, using mode suffix MODE_SUFFIX_ID, +- the type suffixes at index TI and the predication suffix at index PI. +- The other arguments are as for build_all. */ ++ the type suffixes at index TI, the group suffixes at index GI, and the ++ predication suffix at index PI. The other arguments are as for ++ build_all. */ + static void + build_one (function_builder &b, const char *signature, + const function_group_info &group, mode_suffix_index mode_suffix_id, +- unsigned int ti, unsigned int pi, bool force_direct_overloads) ++ unsigned int ti, unsigned int gi, unsigned int pi, ++ bool force_direct_overloads) + { + /* Byte forms of svdupq take 16 arguments. */ + auto_vec<tree, 16> argument_types; + function_instance instance (group.base_name, *group.base, *group.shape, + mode_suffix_id, group.typesti, +- group.predspi); ++ group.groupsgi, group.predspi); + tree return_type = parse_signature (instance, signature, argument_types); + apply_predication (instance, return_type, argument_types); + b.add_unique_function (instance, return_type, argument_types, +@@ -312,24 +314,26 @@ build_32_64 (function_builder &b, const char *signature, + mode_suffix_index mode64, bool force_direct_overloads = false) + { + for (unsigned int pi = 0; group.predspi != NUM_PREDS; ++pi) +- if (group.types00 == NUM_TYPE_SUFFIXES) +- { +- gcc_assert (mode32 != MODE_none && mode64 != MODE_none); +- build_one (b, signature, group, mode32, 0, pi, +- force_direct_overloads); +- build_one (b, signature, group, mode64, 0, pi, +- force_direct_overloads); +- } +- else +- for (unsigned int ti = 0; group.typesti0 != NUM_TYPE_SUFFIXES; ++ti) ++ for (unsigned int gi = 0; group.groupsgi != NUM_GROUP_SUFFIXES; ++gi) ++ if (group.types00 == NUM_TYPE_SUFFIXES) + { +- unsigned int bits = type_suffixesgroup.typesti0.element_bits; +- gcc_assert (bits == 32 || bits == 64); +- mode_suffix_index mode = bits == 32 ? mode32 : mode64; +- if (mode != MODE_none) +- build_one (b, signature, group, mode, ti, pi, +- force_direct_overloads); ++ gcc_assert (mode32 != MODE_none && mode64 != MODE_none); ++ build_one (b, signature, group, mode32, 0, gi, pi, ++ force_direct_overloads); ++ build_one (b, signature, group, mode64, 0, gi, pi, ++ force_direct_overloads); + } ++ else ++ for (unsigned int ti = 0; group.typesti0 != NUM_TYPE_SUFFIXES; ++ ++ti) ++ { ++ unsigned int bits = type_suffixesgroup.typesti0.element_bits; ++ gcc_assert (bits == 32 || bits == 64); ++ mode_suffix_index mode = bits == 32 ? mode32 : mode64; ++ if (mode != MODE_none) ++ build_one (b, signature, group, mode, ti, gi, pi, ++ force_direct_overloads); ++ } + } + + /* For every type and predicate combination in GROUP, add one function +@@ -423,10 +427,11 @@ build_all (function_builder &b, const char *signature, + bool force_direct_overloads = false) + { + for (unsigned int pi = 0; group.predspi != NUM_PREDS; ++pi) +- for (unsigned int ti = 0; +- ti == 0 || group.typesti0 != NUM_TYPE_SUFFIXES; ++ti) +- build_one (b, signature, group, mode_suffix_id, ti, pi, +- force_direct_overloads); ++ for (unsigned int gi = 0; group.groupsgi != NUM_GROUP_SUFFIXES; ++gi) ++ for (unsigned int ti = 0; ++ ti == 0 || group.typesti0 != NUM_TYPE_SUFFIXES; ++ti) ++ build_one (b, signature, group, mode_suffix_id, ti, gi, pi, ++ force_direct_overloads); + } + + /* TYPE is the largest type suffix associated with the arguments of R, +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc +index e066f096d..a94e5e269 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc +@@ -252,7 +252,7 @@ public: + that we can use for sensible shift amounts. */ + function_instance instance ("svqshl", functions::svqshl, + shapes::binary_int_opt_n, MODE_n, +- f.type_suffix_ids, f.pred); ++ f.type_suffix_ids, GROUP_none, f.pred); + return f.redirect_call (instance); + } + else +@@ -261,7 +261,7 @@ public: + that we can use for sensible shift amounts. */ + function_instance instance ("svrshl", functions::svrshl, + shapes::binary_int_opt_n, MODE_n, +- f.type_suffix_ids, f.pred); ++ f.type_suffix_ids, GROUP_none, f.pred); + return f.redirect_call (instance); + } + } +@@ -290,7 +290,7 @@ public: + -wi::to_wide (amount)); + function_instance instance ("svasr", functions::svasr, + shapes::binary_uint_opt_n, MODE_n, +- f.type_suffix_ids, f.pred); ++ f.type_suffix_ids, GROUP_none, f.pred); + if (f.type_suffix (0).unsigned_p) + { + instance.base_name = "svlsr"; +@@ -322,7 +322,7 @@ public: + that we can use for sensible shift amounts. */ + function_instance instance ("svlsl", functions::svlsl, + shapes::binary_uint_opt_n, MODE_n, +- f.type_suffix_ids, f.pred); ++ f.type_suffix_ids, GROUP_none, f.pred); + gcall *call = as_a <gcall *> (f.redirect_call (instance)); + gimple_call_set_arg (call, 2, amount); + return call; +@@ -335,7 +335,7 @@ public: + -wi::to_wide (amount)); + function_instance instance ("svrshr", functions::svrshr, + shapes::shift_right_imm, MODE_n, +- f.type_suffix_ids, f.pred); ++ f.type_suffix_ids, GROUP_none, f.pred); + gcall *call = as_a <gcall *> (f.redirect_call (instance)); + gimple_call_set_arg (call, 2, amount); + return call; +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index dde01f676..dc3fd80da 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -144,6 +144,13 @@ CONSTEXPR const type_suffix_info type_suffixesNUM_TYPE_SUFFIXES + 1 = { + 0, VOIDmode } + }; + ++CONSTEXPR const group_suffix_info group_suffixes = { ++#define DEF_SVE_GROUP_SUFFIX(NAME, VG, VECTORS_PER_TUPLE) \ ++ { "_" #NAME, VG, VECTORS_PER_TUPLE }, ++#include "aarch64-sve-builtins.def"
View file
_service:tar_scm:0164-LoongArch-Define-ISA-versions.patch
Added
@@ -0,0 +1,1016 @@ +From 66c8369ff9e5987c14786692cf6fd945a94273a1 Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Tue, 23 Apr 2024 10:42:47 +0800 +Subject: PATCH 164/188 LoongArch: Define ISA versions + +These ISA versions are defined as -march= parameters and +are recommended for building binaries for distribution. + +Detailed description of these definitions can be found at +https://github.com/loongson/la-toolchain-conventions, which +the LoongArch GCC port aims to conform to. + +gcc/ChangeLog: + + * config.gcc: Make la64v1.0 the default ISA preset of the lp64d ABI. + * config/loongarch/genopts/loongarch-strings: Define la64v1.0, la64v1.1. + * config/loongarch/genopts/loongarch.opt.in: Likewise. + * config/loongarch/loongarch-c.cc (LARCH_CPP_SET_PROCESSOR): Likewise. + (loongarch_cpu_cpp_builtins): Likewise. + * config/loongarch/loongarch-cpu.cc (get_native_prid): Likewise. + (fill_native_cpu_config): Likewise. + * config/loongarch/loongarch-def.cc (array_tune): Likewise. + * config/loongarch/loongarch-def.h: Likewise. + * config/loongarch/loongarch-driver.cc (driver_set_m_parm): Likewise. + (driver_get_normalized_m_opts): Likewise. + * config/loongarch/loongarch-opts.cc (default_tune_for_arch): Likewise. + (TUNE_FOR_ARCH): Likewise. + (arch_str): Likewise. + (loongarch_target_option_override): Likewise. + * config/loongarch/loongarch-opts.h (TARGET_uARCH_LA464): Likewise. + (TARGET_uARCH_LA664): Likewise. + * config/loongarch/loongarch-str.h (STR_CPU_ABI_DEFAULT): Likewise. + (STR_ARCH_ABI_DEFAULT): Likewise. + (STR_TUNE_GENERIC): Likewise. + (STR_ARCH_LA64V1_0): Likewise. + (STR_ARCH_LA64V1_1): Likewise. + * config/loongarch/loongarch.cc (loongarch_cpu_sched_reassociation_width): Likewise. + (loongarch_asm_code_end): Likewise. + * config/loongarch/loongarch.opt: Likewise. + * doc/invoke.texi: Likewise. +--- + gcc/config.gcc | 34 ++++---- + .../loongarch/genopts/loongarch-strings | 5 +- + gcc/config/loongarch/genopts/loongarch.opt.in | 43 ++++++++-- + gcc/config/loongarch/loongarch-c.cc | 37 +++------ + gcc/config/loongarch/loongarch-cpu.cc | 35 ++++---- + gcc/config/loongarch/loongarch-def.cc | 83 +++++++++++++------ + gcc/config/loongarch/loongarch-def.h | 37 ++++++--- + gcc/config/loongarch/loongarch-driver.cc | 8 +- + gcc/config/loongarch/loongarch-opts.cc | 66 +++++++++++---- + gcc/config/loongarch/loongarch-opts.h | 4 +- + gcc/config/loongarch/loongarch-str.h | 5 +- + gcc/config/loongarch/loongarch.cc | 11 +-- + gcc/config/loongarch/loongarch.opt | 43 ++++++++-- + gcc/doc/invoke.texi | 57 ++++++++----- + 14 files changed, 300 insertions(+), 168 deletions(-) + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 1db558d4c..c6820d0f1 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -5035,7 +5035,7 @@ case "${target}" in + + # Perform initial sanity checks on --with-* options. + case ${with_arch} in +- "" | abi-default | loongarch64 | la4664) ;; # OK, append here. ++ "" | la64v1.01 | abi-default | loongarch64 | la4664) ;; # OK, append here. + native) + if test x${host} != x${target}; then + echo "--with-arch=native is illegal for cross-compiler." 1>&2 +@@ -5082,10 +5082,18 @@ case "${target}" in + + # Infer ISA-related default options from the ABI: pass 1 + case ${abi_base}/${abi_ext} in +- lp64*/base) ++ lp64d/base) + # architectures that support lp64* ABI +- arch_pattern="native|abi-default|loongarch64|la4664" +- # default architecture for lp64* ABI ++ arch_pattern="native|abi-default|la64v1.01|loongarch64|la4664" ++ ++ # default architecture for lp64d ABI ++ arch_default="la64v1.0" ++ ;; ++ lp64fs/base) ++ # architectures that support lp64* ABI ++ arch_pattern="native|abi-default|la64v1.01|loongarch64|la4664" ++ ++ # default architecture for lp64fs ABI + arch_default="abi-default" + ;; + *) +@@ -5157,15 +5165,7 @@ case "${target}" in + + + # Check default with_tune configuration using with_arch. +- case ${with_arch} in +- loongarch64) +- tune_pattern="native|abi-default|loongarch64|la4664" +- ;; +- *) +- # By default, $with_tune == $with_arch +- tune_pattern="*" +- ;; +- esac ++ tune_pattern="native|generic|loongarch64|la4664" + + case ${with_tune} in + "") ;; # OK +@@ -5215,7 +5215,7 @@ case "${target}" in + # Fixed: use the default gcc configuration for all multilib + # builds by default. + with_multilib_default="" ;; +- arch,native|arch,loongarch64|arch,la4664) # OK, append here. ++ arch,native|arch,la64v1.01|arch,loongarch64|arch,la4664) # OK, append here. + with_multilib_default="/march=${component}" ;; + arch,*) + with_multilib_default="/march=abi-default" +@@ -5315,7 +5315,7 @@ case "${target}" in + if test x${parse_state} = x"arch"; then + # -march option + case ${component} in +- native | abi-default | loongarch64 | la4664) # OK, append here. ++ native | abi-default | la64v1.01 | loongarch64 | la4664) # OK, append here. + # Append -march spec for each multilib variant. + loongarch_multilib_list_make="${loongarch_multilib_list_make}/march=${component}" + parse_state="opts" +@@ -5858,7 +5858,7 @@ case ${target} in + # See macro definitions from loongarch-opts.h and loongarch-cpu.h. + + # Architecture +- tm_defines="${tm_defines} DEFAULT_CPU_ARCH=CPU_$(echo ${with_arch} | tr a-z- A-Z_)" ++ tm_defines="${tm_defines} DEFAULT_CPU_ARCH=ARCH_$(echo ${with_arch} | tr a-z.- A-Z__)" + + # Base ABI type + tm_defines="${tm_defines} DEFAULT_ABI_BASE=ABI_BASE_$(echo ${abi_base} | tr a-z- A-Z_)" +@@ -5870,7 +5870,7 @@ case ${target} in + + # Microarchitecture + if test x${with_tune} != x; then +- tm_defines="${tm_defines} DEFAULT_CPU_TUNE=CPU_$(echo ${with_tune} | tr a-z- A-Z_)" ++ tm_defines="${tm_defines} DEFAULT_CPU_TUNE=TUNE_$(echo ${with_tune} | tr a-z.- A-Z__)" + fi + + # FPU adjustment +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +index 99fd4e7cd..fd2f9b4f3 100644 +--- a/gcc/config/loongarch/genopts/loongarch-strings ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -23,10 +23,13 @@ OPTSTR_ARCH arch + OPTSTR_TUNE tune + + STR_CPU_NATIVE native +-STR_CPU_ABI_DEFAULT abi-default ++STR_ARCH_ABI_DEFAULT abi-default ++STR_TUNE_GENERIC generic + STR_CPU_LOONGARCH64 loongarch64 + STR_CPU_LA464 la464 + STR_CPU_LA664 la664 ++STR_ARCH_LA64V1_0 la64v1.0 ++STR_ARCH_LA64V1_1 la64v1.1 + + # Base architecture + STR_ISA_BASE_LA64 la64 +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index f3d53f03c..0ecd10922 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -95,30 +95,55 @@ Enable LoongArch Advanced SIMD Extension (LASX, 256-bit). + + ;; Base target models (implies ISA & tune parameters) + Enum +-Name(cpu_type) Type(int) +-LoongArch CPU types: ++Name(arch_type) Type(int) ++LoongArch ARCH presets: + + EnumValue +-Enum(cpu_type) String(@@STR_CPU_NATIVE@@) Value(CPU_NATIVE) ++Enum(arch_type) String(@@STR_CPU_NATIVE@@) Value(ARCH_NATIVE) + + EnumValue +-Enum(cpu_type) String(@@STR_CPU_ABI_DEFAULT@@) Value(CPU_ABI_DEFAULT) ++Enum(arch_type) String(@@STR_ARCH_ABI_DEFAULT@@) Value(ARCH_ABI_DEFAULT) + + EnumValue +-Enum(cpu_type) String(@@STR_CPU_LOONGARCH64@@) Value(CPU_LOONGARCH64) ++Enum(arch_type) String(@@STR_CPU_LOONGARCH64@@) Value(ARCH_LOONGARCH64) + + EnumValue +-Enum(cpu_type) String(@@STR_CPU_LA464@@) Value(CPU_LA464) ++Enum(arch_type) String(@@STR_CPU_LA464@@) Value(ARCH_LA464) + + EnumValue +-Enum(cpu_type) String(@@STR_CPU_LA664@@) Value(CPU_LA664) ++Enum(arch_type) String(@@STR_CPU_LA664@@) Value(ARCH_LA664) ++ ++EnumValue ++Enum(arch_type) String(@@STR_ARCH_LA64V1_0@@) Value(ARCH_LA64V1_0)
View file
_service:tar_scm:0165-Backport-SME-aarch64-Add-sve_type-to-SVE-builtins-co.patch
Added
@@ -0,0 +1,230 @@ +From a32a9321b3336907fe2d17148cb9e4652642a3e6 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:20 +0000 +Subject: PATCH 066/157 BackportSME aarch64: Add sve_type to SVE builtins + code + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7f6de9861e5d7745a0af5174582519a39d545a92 + +Until now, the SVE ACLE code had mostly been able to represent +individual SVE arguments with just an element type suffix (s32, u32, +etc.). However, the SME2 ACLE provides many overloaded intrinsics +that operate on tuples rather than single vectors. This patch +therefore adds a new type (sve_type) that combines an element +type suffix with a vector count. This is enough to uniquely +represent all SVE ACLE types. + +gcc/ + * config/aarch64/aarch64-sve-builtins.h (sve_type): New struct. + (sve_type::operator==): New function. + (function_resolver::get_vector_type): Delete. + (function_resolver::report_no_such_form): Take an sve_type rather + than a type_suffix_index. + * config/aarch64/aarch64-sve-builtins.cc (get_vector_type): New + function. + (function_resolver::get_vector_type): Delete. + (function_resolver::report_no_such_form): Take an sve_type rather + than a type_suffix_index. + (find_sve_type): New function, split out from... + (function_resolver::infer_vector_or_tuple_type): ...here. +--- + gcc/config/aarch64/aarch64-sve-builtins.cc | 93 ++++++++++++---------- + gcc/config/aarch64/aarch64-sve-builtins.h | 37 ++++++++- + 2 files changed, 88 insertions(+), 42 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index dc3fd80da..cc676bfe1 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -659,6 +659,14 @@ find_type_suffix_for_scalar_type (const_tree type) + return NUM_TYPE_SUFFIXES; + } + ++/* Return the vector type associated with TYPE. */ ++static tree ++get_vector_type (sve_type type) ++{ ++ auto vector_type = type_suffixestype.type.vector_type; ++ return acle_vector_typestype.num_vectors - 1vector_type; ++} ++ + /* Report an error against LOCATION that the user has tried to use + function FNDECL when extension EXTENSION is disabled. */ + static void +@@ -1190,13 +1198,6 @@ function_resolver::function_resolver (location_t location, + { + } + +-/* Return the vector type associated with type suffix TYPE. */ +-tree +-function_resolver::get_vector_type (type_suffix_index type) +-{ +- return acle_vector_types0type_suffixestype.vector_type; +-} +- + /* Return the <stdint.h> name associated with TYPE. Using the <stdint.h> + name should be more user-friendly than the underlying canonical type, + since it makes the signedness and bitwidth explicit. */ +@@ -1227,10 +1228,10 @@ function_resolver::scalar_argument_p (unsigned int i) + || SCALAR_FLOAT_TYPE_P (type)); + } + +-/* Report that the function has no form that takes type suffix TYPE. ++/* Report that the function has no form that takes type TYPE. + Return error_mark_node. */ + tree +-function_resolver::report_no_such_form (type_suffix_index type) ++function_resolver::report_no_such_form (sve_type type) + { + error_at (location, "%qE has no form that takes %qT arguments", + fndecl, get_vector_type (type)); +@@ -1352,6 +1353,25 @@ function_resolver::infer_pointer_type (unsigned int argno, + return type; + } + ++/* If TYPE is an SVE predicate or vector type, or a tuple of such a type, ++ return the associated sve_type, otherwise return an invalid sve_type. */ ++static sve_type ++find_sve_type (const_tree type) ++{ ++ /* A linear search should be OK here, since the code isn't hot and ++ the number of types is only small. */ ++ for (unsigned int size_i = 0; size_i < MAX_TUPLE_SIZE; ++size_i) ++ for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i) ++ { ++ vector_type_index type_i = type_suffixessuffix_i.vector_type; ++ tree this_type = acle_vector_typessize_itype_i; ++ if (this_type && matches_type_p (this_type, type)) ++ return { type_suffix_index (suffix_i), size_i + 1 }; ++ } ++ ++ return {}; ++} ++ + /* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS + vectors; NUM_VECTORS is 1 for the former. Return the associated type + suffix on success, using TYPE_SUFFIX_b for predicates. Report an error +@@ -1364,37 +1384,30 @@ function_resolver::infer_vector_or_tuple_type (unsigned int argno, + if (actual == error_mark_node) + return NUM_TYPE_SUFFIXES; + +- /* A linear search should be OK here, since the code isn't hot and +- the number of types is only small. */ +- for (unsigned int size_i = 0; size_i < MAX_TUPLE_SIZE; ++size_i) +- for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i) +- { +- vector_type_index type_i = type_suffixessuffix_i.vector_type; +- tree type = acle_vector_typessize_itype_i; +- if (type && matches_type_p (type, actual)) +- { +- if (size_i + 1 == num_vectors) +- return type_suffix_index (suffix_i); +- +- if (num_vectors == 1) +- error_at (location, "passing %qT to argument %d of %qE, which" +- " expects a single SVE vector rather than a tuple", +- actual, argno + 1, fndecl); +- else if (size_i == 0 && type_i != VECTOR_TYPE_svbool_t) +- /* num_vectors is always != 1, so the singular isn't needed. */ +- error_n (location, num_vectors, "%qT%d%qE%d", +- "passing single vector %qT to argument %d" +- " of %qE, which expects a tuple of %d vectors", +- actual, argno + 1, fndecl, num_vectors); +- else +- /* num_vectors is always != 1, so the singular isn't needed. */ +- error_n (location, num_vectors, "%qT%d%qE%d", +- "passing %qT to argument %d of %qE, which" +- " expects a tuple of %d vectors", actual, argno + 1, +- fndecl, num_vectors); +- return NUM_TYPE_SUFFIXES; +- } +- } ++ if (auto sve_type = find_sve_type (actual)) ++ { ++ if (sve_type.num_vectors == num_vectors) ++ return sve_type.type; ++ ++ if (num_vectors == 1) ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a single SVE vector rather than a tuple", ++ actual, argno + 1, fndecl); ++ else if (sve_type.num_vectors == 1 ++ && sve_type.type != TYPE_SUFFIX_b) ++ /* num_vectors is always != 1, so the singular isn't needed. */ ++ error_n (location, num_vectors, "%qT%d%qE%d", ++ "passing single vector %qT to argument %d" ++ " of %qE, which expects a tuple of %d vectors", ++ actual, argno + 1, fndecl, num_vectors); ++ else ++ /* num_vectors is always != 1, so the singular isn't needed. */ ++ error_n (location, num_vectors, "%qT%d%qE%d", ++ "passing %qT to argument %d of %qE, which" ++ " expects a tuple of %d vectors", actual, argno + 1, ++ fndecl, num_vectors); ++ return NUM_TYPE_SUFFIXES; ++ } + + if (num_vectors == 1) + error_at (location, "passing %qT to argument %d of %qE, which" +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index 374c57e93..f4f2c415f 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -263,6 +263,40 @@ struct group_suffix_info + unsigned int vectors_per_tuple; + }; + ++/* Represents an SVE vector, predicate, tuple of vectors, or tuple of ++ predicates. There is also a representation of "no type"/"invalid type". */ ++struct sve_type ++{ ++ sve_type () = default; ++ sve_type (type_suffix_index type) : type (type), num_vectors (1) {} ++ sve_type (type_suffix_index type, unsigned int num_vectors) ++ : type (type), num_vectors (num_vectors) {} ++ ++ /* Return true if the type is valid. */ ++ explicit operator bool () const { return type != NUM_TYPE_SUFFIXES; } ++ ++ bool operator== (const sve_type &) const; ++ bool operator!= (const sve_type &x) const { return !operator== (x); } ++ ++ /* This is one of: ++ ++ - TYPE_SUFFIX_b for svbool_t-based types ++ - TYPE_SUFFIX_c for svcount_t-based types ++ - the type suffix of a data element for SVE data vectors and tuples ++ - NUM_TYPE_SUFFIXES for invalid types. */ ++ type_suffix_index type = NUM_TYPE_SUFFIXES; ++
View file
_service:tar_scm:0165-LoongArch-Define-builtin-macros-for-ISA-evolutions.patch
Added
@@ -0,0 +1,678 @@ +From 9af73fb7213d5c10b3683465e6682ad20f5abe64 Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Tue, 23 Apr 2024 10:42:48 +0800 +Subject: PATCH 165/188 LoongArch: Define builtin macros for ISA evolutions + +Detailed description of these definitions can be found at +https://github.com/loongson/la-toolchain-conventions, which +the LoongArch GCC port aims to conform to. + +gcc/ChangeLog: + + * config.gcc: Add loongarch-evolution.o. + * config/loongarch/genopts/genstr.sh: Enable generation of + loongarch-evolution.cc,h. + * config/loongarch/t-loongarch: Likewise. + * config/loongarch/genopts/gen-evolution.awk: New file. + * config/loongarch/genopts/isa-evolution.in: Mark ISA version + of introduction for each ISA evolution feature. + * config/loongarch/loongarch-c.cc (loongarch_cpu_cpp_builtins): + Define builtin macros for enabled ISA evolutions and the ISA + version. + * config/loongarch/loongarch-cpu.cc: Use loongarch-evolution.h. + * config/loongarch/loongarch.h: Likewise. + * config/loongarch/loongarch-cpucfg-map.h: Delete. + * config/loongarch/loongarch-evolution.cc: New file. + * config/loongarch/loongarch-evolution.h: New file. + * config/loongarch/loongarch-opts.h (ISA_HAS_FRECIPE): Define. + (ISA_HAS_DIV32): Likewise. + (ISA_HAS_LAM_BH): Likewise. + (ISA_HAS_LAMCAS): Likewise. + (ISA_HAS_LD_SEQ_SA): Likewise. +--- + gcc/config.gcc | 2 +- + .../loongarch/genopts/gen-evolution.awk | 230 ++++++++++++++++++ + gcc/config/loongarch/genopts/genstr.sh | 82 ++----- + gcc/config/loongarch/genopts/isa-evolution.in | 10 +- + gcc/config/loongarch/loongarch-c.cc | 23 ++ + gcc/config/loongarch/loongarch-cpu.cc | 2 +- + gcc/config/loongarch/loongarch-evolution.cc | 60 +++++ + ...rch-cpucfg-map.h => loongarch-evolution.h} | 46 +++- + gcc/config/loongarch/loongarch-opts.h | 11 - + gcc/config/loongarch/loongarch.h | 1 + + gcc/config/loongarch/t-loongarch | 26 +- + 11 files changed, 398 insertions(+), 95 deletions(-) + create mode 100644 gcc/config/loongarch/genopts/gen-evolution.awk + create mode 100644 gcc/config/loongarch/loongarch-evolution.cc + rename gcc/config/loongarch/{loongarch-cpucfg-map.h => loongarch-evolution.h} (52%) + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index c6820d0f1..a405e6d2e 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -458,7 +458,7 @@ loongarch*-*-*) + cpu_type=loongarch + d_target_objs="loongarch-d.o" + extra_headers="larchintrin.h lsxintrin.h lasxintrin.h" +- extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o" ++ extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o loongarch-evolution.o" + extra_gcc_objs="loongarch-driver.o loongarch-cpu.o loongarch-opts.o loongarch-def.o" + extra_options="${extra_options} g.opt fused-madd.opt" + ;; +diff --git a/gcc/config/loongarch/genopts/gen-evolution.awk b/gcc/config/loongarch/genopts/gen-evolution.awk +new file mode 100644 +index 000000000..4d105afa9 +--- /dev/null ++++ b/gcc/config/loongarch/genopts/gen-evolution.awk +@@ -0,0 +1,230 @@ ++#!/usr/bin/gawk ++# ++# A simple script that generates loongarch-evolution.h ++# from genopts/isa-evolution.in ++# ++# Copyright (C) 2021-2024 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it under ++# the terms of the GNU General Public License as published by the Free ++# Software Foundation; either version 3, or (at your option) any later ++# version. ++# ++# GCC is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++# License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# <http://www.gnu.org/licenses/>. ++ ++BEGIN { ++ # isa_version_major ++ # isa_version_minor ++ # cpucfg_word ++ # cpucfg_bit_in_word ++ # name_capitalized ++ # comment ++} ++ ++{ ++ cpucfg_wordNR = $1 ++ cpucfg_bit_in_wordNR = $2 ++ nameNR = gensub(/-/, "_", "g", $3) ++ name_capitalizedNR = toupper(nameNR) ++ isa_version_majorNR = gensub(/^(1-90-9*)\.(0-9+)$/, "\\1", 1, $4) ++ isa_version_minorNR = gensub(/^(1-90-9*)\.(0-9+)$/, "\\2", 1, $4) ++ ++ $1 = $2 = $3 = $4 = "" ++ sub (/^\s*/, "") ++ commentNR = $0 ++} ++ ++function copyright_header(from_year,to_year) ++{ ++ print " Copyright (C) " from_year "-" to_year \ ++ " Free Software Foundation, Inc." ++ print "" ++ print "This file is part of GCC." ++ print "" ++ print "GCC is free software; you can redistribute it and/or modify" ++ print "it under the terms of the GNU General Public License as published by" ++ print "the Free Software Foundation; either version 3, or (at your option)" ++ print "any later version." ++ print "" ++ print "GCC is distributed in the hope that it will be useful," ++ print "but WITHOUT ANY WARRANTY; without even the implied warranty of" ++ print "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the" ++ print "GNU General Public License for more details." ++ print "" ++ print "You should have received a copy of the GNU General Public License" ++ print "along with GCC; see the file COPYING3. If not see" ++ print "<http://www.gnu.org/licenses/>." ++} ++ ++function gen_cpucfg_map() ++{ ++ print "static constexpr struct {" ++ print " int cpucfg_word;" ++ print " unsigned int cpucfg_bit;" ++ print " HOST_WIDE_INT isa_evolution_bit;" ++ print "} cpucfg_map = {" ++ ++ for (i = 1; i <= NR; i++) ++ printf (" { %d, 1u << %d, OPTION_MASK_ISA_%s },\n", ++ cpucfg_wordi, cpucfg_bit_in_wordi, name_capitalizedi) ++ ++ print "};" ++} ++ ++function gen_cpucfg_useful_idx() ++{ ++ split("0 1 2 16 17 18 19", init_useful_idx) ++ ++ delete idx_bucket ++ ++ for (i in init_useful_idx) ++ idx_bucketinit_useful_idxi = 1 ++ delete init_useful_idx ++ ++ for (i in cpucfg_word) ++ idx_bucketcpucfg_wordi = 1 ++ ++ delete idx_list ++ for (i in idx_bucket) ++ idx_listlength(idx_list)-1 = i+0 ++ delete idx_bucket ++ ++ asort (idx_list) ++ ++ print "static constexpr int cpucfg_useful_idx = {" ++ for (i in idx_list) ++ printf(" %d,\n", idx_listi) ++ print "};" ++ ++ print "" ++ ++ printf ("static constexpr int N_CPUCFG_WORDS = %d;\n", ++ idx_listlength(idx_list) + 1) ++ ++ delete idx_list ++} ++ ++function gen_evolution_decl() ++{ ++ print "/* ISA evolution features */" ++ print "enum {" ++ ++ for (i = 1; i <= NR; i++) ++ print " EVO_" name_capitalizedi " = " i - 1 "," ++ ++ print " N_EVO_FEATURES = " NR ++ print "};" ++ print "" ++ ++ print "/* Condition macros */" ++ for (i = 1; i <= NR; i++) ++ printf ("#define ISA_HAS_%s \\\n" \ ++ " (la_target.isa.evolution & OPTION_MASK_ISA_%s)\n", ++ name_capitalizedi, name_capitalizedi)
View file
_service:tar_scm:0166-Backport-SME-aarch64-Generalise-some-SVE-ACLE-error-.patch
Added
@@ -0,0 +1,1474 @@ +From 21839879d5f00db48cdacd472044a9bd4e23a2c6 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:20 +0000 +Subject: PATCH 067/157 BackportSME aarch64: Generalise some SVE ACLE + error messages + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bb01ef94ff5096b907639aa3a1d77850921e7d37 + +The current SVE ACLE function-resolution diagnostics assume +that a function has a fixed choice between vectors or tuples +of vectors. If an argument was not an SVE type at all, the +error message said the function "expects an SVE vector type" +or "expects an SVE tuple type". + +This patch generalises the error to cope with cases where +an argument can be either a vector or a tuple. It also splits +out the diagnostics for mismatched tuple sizes, so that they +can be reused by later patches. + +gcc/ + * config/aarch64/aarch64-sve-builtins.h + (function_resolver::infer_sve_type): New member function. + (function_resolver::report_incorrect_num_vectors): Likewise. + * config/aarch64/aarch64-sve-builtins.cc + (function_resolver::infer_sve_type): New function,. + (function_resolver::report_incorrect_num_vectors): New function, + split out from... + (function_resolver::infer_vector_or_tuple_type): ...here. Use + infer_sve_type. + +gcc/testsuite/ + * gcc.target/aarch64/sve/acle/general-c/*: Update expected error + messages. +--- + gcc/config/aarch64/aarch64-sve-builtins.cc | 87 ++++++++++++------- + gcc/config/aarch64/aarch64-sve-builtins.h | 3 + + .../aarch64/sve/acle/general-c/adr_index_1.c | 6 +- + .../aarch64/sve/acle/general-c/adr_offset_1.c | 6 +- + .../aarch64/sve/acle/general-c/binary_1.c | 2 +- + .../sve/acle/general-c/binary_int_opt_n.c | 2 +- + .../sve/acle/general-c/binary_lane_1.c | 4 +- + .../sve/acle/general-c/binary_long_lane_1.c | 4 +- + .../sve/acle/general-c/binary_long_opt_n_1.c | 2 +- + .../aarch64/sve/acle/general-c/binary_n_1.c | 2 +- + .../acle/general-c/binary_narrowb_opt_n_1.c | 2 +- + .../acle/general-c/binary_narrowt_opt_n_1.c | 4 +- + .../sve/acle/general-c/binary_opt_n_2.c | 2 +- + .../sve/acle/general-c/binary_opt_n_3.c | 2 +- + .../sve/acle/general-c/binary_rotate_1.c | 4 +- + .../sve/acle/general-c/binary_to_uint_1.c | 4 +- + .../sve/acle/general-c/binary_uint64_n_1.c | 2 +- + .../acle/general-c/binary_uint64_opt_n_2.c | 2 +- + .../sve/acle/general-c/binary_uint_1.c | 2 +- + .../sve/acle/general-c/binary_uint_n_1.c | 2 +- + .../sve/acle/general-c/binary_uint_opt_n_1.c | 2 +- + .../sve/acle/general-c/binary_wide_1.c | 8 +- + .../sve/acle/general-c/binary_wide_opt_n_1.c | 4 +- + .../aarch64/sve/acle/general-c/clast_1.c | 4 +- + .../aarch64/sve/acle/general-c/compare_1.c | 4 +- + .../sve/acle/general-c/compare_opt_n_1.c | 2 +- + .../sve/acle/general-c/compare_wide_opt_n_1.c | 2 +- + .../sve/acle/general-c/count_vector_1.c | 2 +- + .../aarch64/sve/acle/general-c/create_1.c | 4 +- + .../aarch64/sve/acle/general-c/create_3.c | 4 +- + .../aarch64/sve/acle/general-c/create_5.c | 4 +- + .../aarch64/sve/acle/general-c/fold_left_1.c | 4 +- + .../sve/acle/general-c/inc_dec_pred_1.c | 2 +- + .../aarch64/sve/acle/general-c/mmla_1.c | 10 +-- + .../acle/general-c/prefetch_gather_offset_2.c | 2 +- + .../aarch64/sve/acle/general-c/reduction_1.c | 2 +- + .../sve/acle/general-c/reduction_wide_1.c | 2 +- + .../general-c/shift_right_imm_narrowb_1.c | 2 +- + .../shift_right_imm_narrowb_to_uint_1.c | 2 +- + .../general-c/shift_right_imm_narrowt_1.c | 4 +- + .../shift_right_imm_narrowt_to_uint_1.c | 4 +- + .../aarch64/sve/acle/general-c/store_1.c | 2 +- + .../aarch64/sve/acle/general-c/store_2.c | 2 +- + .../acle/general-c/store_scatter_offset_1.c | 4 +- + .../sve/acle/general-c/ternary_bfloat16_1.c | 2 +- + .../acle/general-c/ternary_bfloat16_lane_1.c | 2 +- + .../general-c/ternary_bfloat16_lanex2_1.c | 2 +- + .../acle/general-c/ternary_bfloat16_opt_n_1.c | 2 +- + .../general-c/ternary_intq_uintq_lane_1.c | 6 +- + .../general-c/ternary_intq_uintq_opt_n_1.c | 4 +- + .../sve/acle/general-c/ternary_lane_1.c | 6 +- + .../acle/general-c/ternary_lane_rotate_1.c | 6 +- + .../sve/acle/general-c/ternary_long_lane_1.c | 6 +- + .../sve/acle/general-c/ternary_long_opt_n_1.c | 4 +- + .../sve/acle/general-c/ternary_opt_n_1.c | 4 +- + .../sve/acle/general-c/ternary_qq_lane_1.c | 6 +- + .../acle/general-c/ternary_qq_lane_rotate_1.c | 6 +- + .../sve/acle/general-c/ternary_qq_opt_n_2.c | 4 +- + .../sve/acle/general-c/ternary_qq_rotate_1.c | 6 +- + .../sve/acle/general-c/ternary_rotate_1.c | 6 +- + .../general-c/ternary_shift_right_imm_1.c | 4 +- + .../sve/acle/general-c/ternary_uint_1.c | 6 +- + .../sve/acle/general-c/ternary_uintq_intq_1.c | 6 +- + .../general-c/ternary_uintq_intq_lane_1.c | 6 +- + .../general-c/ternary_uintq_intq_opt_n_1.c | 4 +- + .../aarch64/sve/acle/general-c/tmad_1.c | 4 +- + .../aarch64/sve/acle/general-c/unary_1.c | 2 +- + .../aarch64/sve/acle/general-c/unary_2.c | 2 +- + .../sve/acle/general-c/unary_convert_1.c | 2 +- + .../sve/acle/general-c/unary_convert_2.c | 2 +- + .../acle/general-c/unary_convert_narrowt_1.c | 2 +- + .../sve/acle/general-c/unary_narrowb_1.c | 2 +- + .../acle/general-c/unary_narrowb_to_uint_1.c | 2 +- + .../sve/acle/general-c/unary_narrowt_1.c | 4 +- + .../acle/general-c/unary_narrowt_to_uint_1.c | 4 +- + .../sve/acle/general-c/unary_to_int_1.c | 2 +- + .../sve/acle/general-c/unary_to_uint_1.c | 2 +- + .../sve/acle/general-c/unary_to_uint_2.c | 2 +- + .../sve/acle/general-c/unary_to_uint_3.c | 2 +- + .../aarch64/sve/acle/general-c/unary_uint_1.c | 2 +- + .../sve/acle/general-c/unary_widen_1.c | 4 +- + 81 files changed, 195 insertions(+), 169 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index cc676bfe1..4e94e3633 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -1228,6 +1228,32 @@ function_resolver::scalar_argument_p (unsigned int i) + || SCALAR_FLOAT_TYPE_P (type)); + } + ++/* Report that argument ARGNO was expected to have NUM_VECTORS vectors. ++ TYPE is the type that ARGNO actually has. */ ++void ++function_resolver::report_incorrect_num_vectors (unsigned int argno, ++ sve_type type, ++ unsigned int num_vectors) ++{ ++ if (num_vectors == 1) ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a single SVE vector rather than a tuple", ++ get_vector_type (type), argno + 1, fndecl); ++ else if (type.num_vectors == 1 ++ && type.type != TYPE_SUFFIX_b) ++ /* num_vectors is always != 1, so the singular isn't needed. */ ++ error_n (location, num_vectors, "%qT%d%qE%d", ++ "passing single vector %qT to argument %d" ++ " of %qE, which expects a tuple of %d vectors", ++ get_vector_type (type), argno + 1, fndecl, num_vectors); ++ else ++ /* num_vectors is always != 1, so the singular isn't needed. */ ++ error_n (location, num_vectors, "%qT%d%qE%d", ++ "passing %qT to argument %d of %qE, which" ++ " expects a tuple of %d vectors", get_vector_type (type), ++ argno + 1, fndecl, num_vectors); ++} ++ + /* Report that the function has no form that takes type TYPE. + Return error_mark_node. */ + tree +@@ -1372,6 +1398,30 @@ find_sve_type (const_tree type) + return {}; + } + ++/* Require argument ARGNO to be an SVE type (i.e. something that can be ++ represented by sve_type). Return the (valid) type if it is, otherwise ++ report an error and return an invalid type. */ ++sve_type ++function_resolver::infer_sve_type (unsigned int argno) ++{ ++ tree actual = get_argument_type (argno); ++ if (actual == error_mark_node) ++ return {}; ++ ++ if (sve_type type = find_sve_type (actual)) ++ return type; ++ ++ if (scalar_argument_p (argno)) ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects an SVE type rather than a scalar type", ++ actual, argno + 1, fndecl); ++ else ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects an SVE type", ++ actual, argno + 1, fndecl); ++ return {}; ++} ++ + /* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS + vectors; NUM_VECTORS is 1 for the former. Return the associated type + suffix on success, using TYPE_SUFFIX_b for predicates. Report an error +@@ -1380,41 +1430,14 @@ type_suffix_index + function_resolver::infer_vector_or_tuple_type (unsigned int argno, + unsigned int num_vectors) + { +- tree actual = get_argument_type (argno); +- if (actual == error_mark_node) ++ auto type = infer_sve_type (argno); ++ if (!type) + return NUM_TYPE_SUFFIXES; + +- if (auto sve_type = find_sve_type (actual)) +- { +- if (sve_type.num_vectors == num_vectors) +- return sve_type.type;
View file
_service:tar_scm:0166-LoongArch-Add-constraints-for-bit-string-operation-d.patch
Added
@@ -0,0 +1,120 @@ +From 3bb46830b0f92f54d1ef529796348c0a86504065 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 26 Apr 2024 15:59:11 +0800 +Subject: PATCH 166/188 LoongArch: Add constraints for bit string operation + define_insn_and_split's PR114861 + +Without the constrants, the compiler attempts to use a stack slot as the +target, causing an ICE building the kernel with -Os: + + drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c:3144:1: + error: could not split insn + (insn:TI 1764 67 1745 + (set (mem/c:DI (reg/f:DI 3 $r3) 707 %sfp+-80 S8 A64) + (and:DI (reg/v:DI 28 $r28 orig:422 raster_config 422) + (const_int -50331649 0xfffffffffcffffff))) + "drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c":1386:21 111 + {*bstrins_di_for_mask} + (nil)) + +Add these constrants to fix the issue. + +gcc/ChangeLog: + + PR target/114861 + * config/loongarch/loongarch.md (bstrins_<mode>_for_mask): Add + constraints for operands. + (bstrins_<mode>_for_ior_mask): Likewise. + +gcc/testsuite/ChangeLog: + + PR target/114861 + * gcc.target/loongarch/pr114861.c: New test. +--- + gcc/config/loongarch/loongarch.md | 16 ++++---- + gcc/testsuite/gcc.target/loongarch/pr114861.c | 39 +++++++++++++++++++ + 2 files changed, 47 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr114861.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 95beb88fe..20494ce8a 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1543,9 +1543,9 @@ + (set_attr "mode" "<MODE>")) + + (define_insn_and_split "*bstrins_<mode>_for_mask" +- (set (match_operand:GPR 0 "register_operand") +- (and:GPR (match_operand:GPR 1 "register_operand") +- (match_operand:GPR 2 "ins_zero_bitmask_operand"))) ++ (set (match_operand:GPR 0 "register_operand" "=r") ++ (and:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand:GPR 2 "ins_zero_bitmask_operand" "i"))) + "" + "#" + "" +@@ -1563,11 +1563,11 @@ + }) + + (define_insn_and_split "*bstrins_<mode>_for_ior_mask" +- (set (match_operand:GPR 0 "register_operand") +- (ior:GPR (and:GPR (match_operand:GPR 1 "register_operand") +- (match_operand:GPR 2 "const_int_operand")) +- (and:GPR (match_operand:GPR 3 "register_operand") +- (match_operand:GPR 4 "const_int_operand")))) ++ (set (match_operand:GPR 0 "register_operand" "=r") ++ (ior:GPR (and:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand:GPR 2 "const_int_operand" "i")) ++ (and:GPR (match_operand:GPR 3 "register_operand" "r") ++ (match_operand:GPR 4 "const_int_operand" "i")))) + "loongarch_pre_reload_split () + && loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)" + "#" +diff --git a/gcc/testsuite/gcc.target/loongarch/pr114861.c b/gcc/testsuite/gcc.target/loongarch/pr114861.c +new file mode 100644 +index 000000000..e6507c406 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr114861.c +@@ -0,0 +1,39 @@ ++/* PR114861: ICE building the kernel with -Os ++ Reduced from linux/fs/ntfs3/attrib.c at revision c942a0cd3603. */ ++/* { dg-do compile } */ ++/* { dg-options "-Os -march=loongarch64 -msoft-float -mabi=lp64s" } */ ++ ++long evcn, attr_collapse_range_vbo, attr_collapse_range_bytes; ++unsigned short flags; ++int attr_collapse_range_ni_0_0; ++int *attr_collapse_range_mi; ++unsigned attr_collapse_range_svcn, attr_collapse_range_vcn1; ++void ni_insert_nonresident (unsigned, unsigned short, int **); ++int mi_pack_runs (int); ++int ++attr_collapse_range (void) ++{ ++ _Bool __trans_tmp_1; ++ int run = attr_collapse_range_ni_0_0; ++ unsigned evcn1, vcn, end; ++ short a_flags = flags; ++ __trans_tmp_1 = flags & (32768 | 1); ++ if (__trans_tmp_1) ++ return 2; ++ vcn = attr_collapse_range_vbo; ++ end = attr_collapse_range_bytes; ++ evcn1 = evcn; ++ for (;;) ++ if (attr_collapse_range_svcn >= end) ++ { ++ unsigned eat, next_svcn = mi_pack_runs (42); ++ attr_collapse_range_vcn1 = (vcn ? vcn : attr_collapse_range_svcn); ++ eat = (0 < end) - attr_collapse_range_vcn1; ++ mi_pack_runs (run - eat); ++ if (next_svcn + eat) ++ ni_insert_nonresident (evcn1 - eat - next_svcn, a_flags, ++ &attr_collapse_range_mi); ++ } ++ else ++ return 42; ++} +-- +2.43.0 +
View file
_service:tar_scm:0167-Backport-SME-aarch64-Replace-vague-previous-argument.patch
Added
@@ -0,0 +1,698 @@ +From 6a7cb5074824416ae562de0589550a930e9dbcaf Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:21 +0000 +Subject: PATCH 068/157 BackportSME aarch64: Replace vague "previous + arguments" message + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1b52d4b66e8b91ec1e3de9c0b79aaf258824b875 + +If an SVE ACLE intrinsic requires two arguments to have the +same type, the C resolver would report mismatches as "argument N +has type T2, but previous arguments had type T1". This patch makes +the message say which argument had type T1. + +This is needed to give decent error messages for some SME cases. + +gcc/ + * config/aarch64/aarch64-sve-builtins.h + (function_resolver::require_matching_vector_type): Add a parameter + that specifies the number of the earlier argument that is being + matched against. + * config/aarch64/aarch64-sve-builtins.cc + (function_resolver::require_matching_vector_type): Likewise. + (require_derived_vector_type): Update calls accordingly. + (function_resolver::resolve_unary): Likewise. + (function_resolver::resolve_uniform): Likewise. + (function_resolver::resolve_uniform_opt_n): Likewise. + * config/aarch64/aarch64-sve-builtins-shapes.cc + (binary_long_lane_def::resolve): Likewise. + (clast_def::resolve, ternary_uint_def::resolve): Likewise. + +gcc/testsuite/ + * gcc.target/aarch64/sve/acle/general-c/*: Replace "but previous + arguments had" with "but argument N had". +--- + .../aarch64/aarch64-sve-builtins-shapes.cc | 6 ++-- + gcc/config/aarch64/aarch64-sve-builtins.cc | 17 +++++------ + gcc/config/aarch64/aarch64-sve-builtins.h | 3 +- + .../aarch64/sve/acle/general-c/binary_1.c | 6 ++-- + .../sve/acle/general-c/binary_lane_1.c | 2 +- + .../sve/acle/general-c/binary_long_lane_1.c | 2 +- + .../sve/acle/general-c/binary_long_opt_n_1.c | 8 +++--- + .../acle/general-c/binary_narrowb_opt_n_1.c | 8 +++--- + .../acle/general-c/binary_narrowt_opt_n_1.c | 8 +++--- + .../sve/acle/general-c/binary_opt_n_2.c | 14 +++++----- + .../sve/acle/general-c/binary_opt_n_3.c | 16 +++++------ + .../sve/acle/general-c/binary_rotate_1.c | 2 +- + .../sve/acle/general-c/binary_to_uint_1.c | 4 +-- + .../aarch64/sve/acle/general-c/clast_1.c | 2 +- + .../aarch64/sve/acle/general-c/compare_1.c | 14 +++++----- + .../sve/acle/general-c/compare_opt_n_1.c | 14 +++++----- + .../aarch64/sve/acle/general-c/create_1.c | 6 ++-- + .../aarch64/sve/acle/general-c/create_3.c | 6 ++-- + .../aarch64/sve/acle/general-c/create_5.c | 6 ++-- + .../aarch64/sve/acle/general-c/mmla_1.c | 14 +++++----- + .../sve/acle/general-c/ternary_lane_1.c | 4 +-- + .../acle/general-c/ternary_lane_rotate_1.c | 4 +-- + .../sve/acle/general-c/ternary_opt_n_1.c | 28 +++++++++---------- + .../sve/acle/general-c/ternary_rotate_1.c | 4 +-- + .../general-c/ternary_shift_right_imm_1.c | 6 ++-- + .../sve/acle/general-c/ternary_uint_1.c | 6 ++-- + .../aarch64/sve/acle/general-c/tmad_1.c | 2 +- + .../aarch64/sve/acle/general-c/unary_1.c | 8 +++--- + .../aarch64/sve/acle/general-c/undeclared_2.c | 2 +- + 29 files changed, 112 insertions(+), 110 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +index 3ecef026c..40aa418e0 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +@@ -1153,7 +1153,7 @@ struct binary_long_lane_def : public overloaded_base<0> + type_suffix_index type, result_type; + if (!r.check_gp_argument (3, i, nargs) + || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES +- || !r.require_matching_vector_type (i + 1, type) ++ || !r.require_matching_vector_type (i + 1, i, type) + || !r.require_integer_immediate (i + 2) + || (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES) + return error_mark_node; +@@ -1608,7 +1608,7 @@ struct clast_def : public overloaded_base<0> + { + type_suffix_index type; + if ((type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES +- || !r.require_matching_vector_type (i + 1, type)) ++ || !r.require_matching_vector_type (i + 1, i, type)) + return error_mark_node; + return r.resolve_to (MODE_none, type); + } +@@ -3108,7 +3108,7 @@ struct ternary_uint_def : public overloaded_base<0> + type_suffix_index type; + if (!r.check_gp_argument (3, i, nargs) + || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES +- || !r.require_matching_vector_type (i + 1, type) ++ || !r.require_matching_vector_type (i + 1, i, type) + || !r.require_derived_vector_type (i + 2, i, type, TYPE_unsigned)) + return error_mark_node; + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index 4e94e3633..1545fd78d 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -1561,11 +1561,12 @@ function_resolver::require_vector_type (unsigned int argno, + return true; + } + +-/* Like require_vector_type, but TYPE is inferred from previous arguments ++/* Like require_vector_type, but TYPE is inferred from argument FIRST_ARGNO + rather than being a fixed part of the function signature. This changes + the nature of the error messages. */ + bool + function_resolver::require_matching_vector_type (unsigned int argno, ++ unsigned int first_argno, + type_suffix_index type) + { + type_suffix_index new_type = infer_vector_type (argno); +@@ -1575,9 +1576,9 @@ function_resolver::require_matching_vector_type (unsigned int argno, + if (type != new_type) + { + error_at (location, "passing %qT to argument %d of %qE, but" +- " previous arguments had type %qT", ++ " argument %d had type %qT", + get_vector_type (new_type), argno + 1, fndecl, +- get_vector_type (type)); ++ first_argno + 1, get_vector_type (type)); + return false; + } + return true; +@@ -1626,7 +1627,7 @@ require_derived_vector_type (unsigned int argno, + { + /* There's no need to resolve this case out of order. */ + gcc_assert (argno > first_argno); +- return require_matching_vector_type (argno, first_type); ++ return require_matching_vector_type (argno, first_argno, first_type); + } + + /* Use FIRST_TYPE to get the expected type class and element size. */ +@@ -2314,7 +2315,7 @@ function_resolver::resolve_unary (type_class_index merge_tclass, + so we can use normal left-to-right resolution. */ + if ((type = infer_vector_type (0)) == NUM_TYPE_SUFFIXES + || !require_vector_type (1, VECTOR_TYPE_svbool_t) +- || !require_matching_vector_type (2, type)) ++ || !require_matching_vector_type (2, 0, type)) + return error_mark_node; + } + else +@@ -2359,9 +2360,9 @@ function_resolver::resolve_uniform (unsigned int nops, unsigned int nimm) + || (type = infer_vector_type (i)) == NUM_TYPE_SUFFIXES) + return error_mark_node; + +- i += 1; ++ unsigned int first_arg = i++; + for (; i < nargs - nimm; ++i) +- if (!require_matching_vector_type (i, type)) ++ if (!require_matching_vector_type (i, first_arg, type)) + return error_mark_node; + + for (; i < nargs; ++i) +@@ -2390,7 +2391,7 @@ function_resolver::resolve_uniform_opt_n (unsigned int nops) + + unsigned int first_arg = i++; + for (; i < nargs - 1; ++i) +- if (!require_matching_vector_type (i, type)) ++ if (!require_matching_vector_type (i, first_arg, type)) + return error_mark_node; + + return finish_opt_n_resolution (i, first_arg, type); +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index 5a4f35123..f7d6cc084 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -476,7 +476,8 @@ public: + bool require_vector_or_scalar_type (unsigned int); + + bool require_vector_type (unsigned int, vector_type_index); +- bool require_matching_vector_type (unsigned int, type_suffix_index); ++ bool require_matching_vector_type (unsigned int, unsigned int, ++ type_suffix_index); + bool require_derived_vector_type (unsigned int, unsigned int, + type_suffix_index, + type_class_index = SAME_TYPE_CLASS, +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c +index 4343146de..2e919d287 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c +@@ -7,8 +7,8 @@ f1 (svbool_t pg, svuint8_t u8, svint16_t s16) + { + svzip1 (pg); /* { dg-error {too few arguments to function 'svzip1'} } */ + svzip1 (pg, u8, u8); /* { dg-error {too many arguments to function 'svzip1'} } */ +- svzip1 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svzip1', but previous arguments had type 'svbool_t'} } */ +- svzip1 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */ +- svzip1 (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */ ++ svzip1 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svzip1', but argument 1 had type 'svbool_t'} } */ ++ svzip1 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svzip1', but argument 1 had type 'svuint8_t'} } */ ++ svzip1 (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svzip1', but argument 1 had type 'svuint8_t'} } */ + svzip1 (u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svzip1', which expects an SVE type rather than a scalar} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c +index 10b6b7e81..81533b25d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c
View file
_service:tar_scm:0167-LoongArch-Guard-REGNO-with-REG_P-in-loongarch_expand.patch
Added
@@ -0,0 +1,67 @@ +From be1397b598a436d562e6a35a13ed2ae695531255 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Wed, 22 May 2024 09:29:43 +0800 +Subject: PATCH 167/188 LoongArch: Guard REGNO with REG_P in + loongarch_expand_conditional_move PR115169 + +gcc/ChangeLog: + + PR target/115169 + * config/loongarch/loongarch.cc + (loongarch_expand_conditional_move): Guard REGNO with REG_P. +--- + gcc/config/loongarch/loongarch.cc | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index c86a0856b..0c2c38f6f 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5341,6 +5341,7 @@ loongarch_expand_conditional_move (rtx *operands) + rtx op1_extend = op1; + + /* Record whether operands2 and operands3 modes are promoted to word_mode. */ ++ bool promote_op2 = {false, false}; + bool promote_p = false; + machine_mode mode = GET_MODE (operands0); + +@@ -5348,9 +5349,15 @@ loongarch_expand_conditional_move (rtx *operands) + loongarch_emit_float_compare (&code, &op0, &op1); + else + { +- if ((REGNO (op0) == REGNO (operands2) +- || (REGNO (op1) == REGNO (operands3) && (op1 != const0_rtx))) +- && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode)) ++ if (GET_MODE_SIZE (GET_MODE (op0)) < word_mode) ++ { ++ promote_op0 = (REG_P (op0) && REG_P (operands2) && ++ REGNO (op0) == REGNO (operands2)); ++ promote_op1 = (REG_P (op1) && REG_P (operands3) && ++ REGNO (op1) == REGNO (operands3)); ++ } ++ ++ if (promote_op0 || promote_op1) + { + mode = word_mode; + promote_p = true; +@@ -5392,7 +5399,7 @@ loongarch_expand_conditional_move (rtx *operands) + + if (promote_p) + { +- if (REGNO (XEXP (operands1, 0)) == REGNO (operands2)) ++ if (promote_op0) + op2 = op0_extend; + else + { +@@ -5400,7 +5407,7 @@ loongarch_expand_conditional_move (rtx *operands) + op2 = force_reg (mode, op2); + } + +- if (REGNO (XEXP (operands1, 1)) == REGNO (operands3)) ++ if (promote_op1) + op3 = op1_extend; + else + { +-- +2.43.0 +
View file
_service:tar_scm:0168-Backport-SME-aarch64-Make-more-use-of-sve_type-in-AC.patch
Added
@@ -0,0 +1,368 @@ +From 05dee9ad331c27345b014fe9aec0067a6f3b07d9 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:21 +0000 +Subject: PATCH 069/157 BackportSME aarch64: Make more use of sve_type in + ACLE code + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1f7f076ad6293cad19d35efdf726eb48cf78e3dd + +This patch makes some functions operate on sve_type, rather than just +on type suffixes. It also allows an overload to be resolved based on +a mode and sve_type. In this case the sve_type is used to derive the +group size as well as a type suffix. + +This is needed for the SME2 intrinsics and the new tuple forms of +svreinterpret. No functional change intended on its own. + +gcc/ + * config/aarch64/aarch64-sve-builtins.h + (function_resolver::lookup_form): Add an overload that takes + an sve_type rather than type and group suffixes. + (function_resolver::resolve_to): Likewise. + (function_resolver::infer_vector_or_tuple_type): Return an sve_type. + (function_resolver::infer_tuple_type): Likewise. + (function_resolver::require_matching_vector_type): Take an sve_type + rather than a type_suffix_index. + (function_resolver::require_derived_vector_type): Likewise. + * config/aarch64/aarch64-sve-builtins.cc (num_vectors_to_group): + New function. + (function_resolver::lookup_form): Add an overload that takes + an sve_type rather than type and group suffixes. + (function_resolver::resolve_to): Likewise. + (function_resolver::infer_vector_or_tuple_type): Return an sve_type. + (function_resolver::infer_tuple_type): Likewise. + (function_resolver::infer_vector_type): Update accordingly. + (function_resolver::require_matching_vector_type): Take an sve_type + rather than a type_suffix_index. + (function_resolver::require_derived_vector_type): Likewise. + * config/aarch64/aarch64-sve-builtins-shapes.cc (get_def::resolve) + (set_def::resolve, store_def::resolve, tbl_tuple_def::resolve): Update + calls accordingly. +--- + .../aarch64/aarch64-sve-builtins-shapes.cc | 16 +-- + gcc/config/aarch64/aarch64-sve-builtins.cc | 111 +++++++++++++----- + gcc/config/aarch64/aarch64-sve-builtins.h | 12 +- + 3 files changed, 95 insertions(+), 44 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +index 40aa418e0..f187b4cb2 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +@@ -1904,9 +1904,9 @@ struct get_def : public overloaded_base<0> + resolve (function_resolver &r) const OVERRIDE + { + unsigned int i, nargs; +- type_suffix_index type; ++ sve_type type; + if (!r.check_gp_argument (2, i, nargs) +- || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES ++ || !(type = r.infer_tuple_type (i)) + || !r.require_integer_immediate (i + 1)) + return error_mark_node; + +@@ -2417,9 +2417,9 @@ struct set_def : public overloaded_base<0> + resolve (function_resolver &r) const OVERRIDE + { + unsigned int i, nargs; +- type_suffix_index type; ++ sve_type type; + if (!r.check_gp_argument (3, i, nargs) +- || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES ++ || !(type = r.infer_tuple_type (i)) + || !r.require_integer_immediate (i + 1) + || !r.require_derived_vector_type (i + 2, i, type)) + return error_mark_node; +@@ -2592,11 +2592,11 @@ struct store_def : public overloaded_base<0> + gcc_assert (r.mode_suffix_id == MODE_none || vnum_p); + + unsigned int i, nargs; +- type_suffix_index type; ++ sve_type type; + if (!r.check_gp_argument (vnum_p ? 3 : 2, i, nargs) + || !r.require_pointer_type (i) + || (vnum_p && !r.require_scalar_type (i + 1, "int64_t")) +- || ((type = r.infer_tuple_type (nargs - 1)) == NUM_TYPE_SUFFIXES)) ++ || !(type = r.infer_tuple_type (nargs - 1))) + return error_mark_node; + + return r.resolve_to (r.mode_suffix_id, type); +@@ -2713,9 +2713,9 @@ struct tbl_tuple_def : public overloaded_base<0> + resolve (function_resolver &r) const OVERRIDE + { + unsigned int i, nargs; +- type_suffix_index type; ++ sve_type type; + if (!r.check_gp_argument (2, i, nargs) +- || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES ++ || !(type = r.infer_tuple_type (i)) + || !r.require_derived_vector_type (i + 1, i, type, TYPE_unsigned)) + return error_mark_node; + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index 1545fd78d..e98274f8a 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -659,6 +659,21 @@ find_type_suffix_for_scalar_type (const_tree type) + return NUM_TYPE_SUFFIXES; + } + ++/* Return the implicit group suffix for intrinsics that operate on NVECTORS ++ vectors. */ ++static group_suffix_index ++num_vectors_to_group (unsigned int nvectors) ++{ ++ switch (nvectors) ++ { ++ case 1: return GROUP_none; ++ case 2: return GROUP_x2; ++ case 3: return GROUP_x3; ++ case 4: return GROUP_x4; ++ } ++ gcc_unreachable (); ++} ++ + /* Return the vector type associated with TYPE. */ + static tree + get_vector_type (sve_type type) +@@ -1282,6 +1297,27 @@ function_resolver::lookup_form (mode_suffix_index mode, + return rfn ? rfn->decl : NULL_TREE; + } + ++/* Silently check whether there is an instance of the function that has the ++ mode suffix given by MODE and the type and group suffixes implied by TYPE. ++ If the overloaded function has an explicit first type suffix (like ++ conversions do), TYPE describes the implicit second type suffix. ++ Otherwise, TYPE describes the only type suffix. ++ ++ Return the decl of the function if it exists, otherwise return null. */ ++tree ++function_resolver::lookup_form (mode_suffix_index mode, sve_type type) ++{ ++ type_suffix_index type0 = type_suffix_ids0; ++ type_suffix_index type1 = type_suffix_ids1; ++ (type0 == NUM_TYPE_SUFFIXES ? type0 : type1) = type.type; ++ ++ group_suffix_index group = group_suffix_id; ++ if (group == GROUP_none && type.num_vectors != vectors_per_tuple ()) ++ group = num_vectors_to_group (type.num_vectors); ++ ++ return lookup_form (mode, type0, type1, group); ++} ++ + /* Resolve the function to one with the mode suffix given by MODE, the + type suffixes given by TYPE0 and TYPE1, and group suffix given by + GROUP. Return its function decl on success, otherwise report an +@@ -1305,6 +1341,19 @@ function_resolver::resolve_to (mode_suffix_index mode, + return res; + } + ++/* Resolve the function to one that has the suffixes associated with MODE ++ and TYPE; see lookup_form for how TYPE is interpreted. Return the ++ function decl on success, otherwise report an error and return ++ error_mark_node. */ ++tree ++function_resolver::resolve_to (mode_suffix_index mode, sve_type type) ++{ ++ if (tree res = lookup_form (mode, type)) ++ return res; ++ ++ return report_no_such_form (type); ++} ++ + /* Require argument ARGNO to be a 32-bit or 64-bit scalar integer type. + Return the associated type suffix on success, otherwise report an + error and return NUM_TYPE_SUFFIXES. */ +@@ -1424,21 +1473,20 @@ function_resolver::infer_sve_type (unsigned int argno) + + /* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS + vectors; NUM_VECTORS is 1 for the former. Return the associated type +- suffix on success, using TYPE_SUFFIX_b for predicates. Report an error +- and return NUM_TYPE_SUFFIXES on failure. */ +-type_suffix_index ++ on success. Report an error on failure. */ ++sve_type + function_resolver::infer_vector_or_tuple_type (unsigned int argno, + unsigned int num_vectors) + { + auto type = infer_sve_type (argno); + if (!type) +- return NUM_TYPE_SUFFIXES; ++ return type; + + if (type.num_vectors == num_vectors) +- return type.type; ++ return type; + + report_incorrect_num_vectors (argno, type, num_vectors); +- return NUM_TYPE_SUFFIXES; ++ return {}; + }
View file
_service:tar_scm:0168-LoongArch-Fix-mode-size-comparision-in-loongarch_exp.patch
Added
@@ -0,0 +1,36 @@ +From 7675f45536691eeca7d8163020c9bfb127d5ee4f Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Wed, 12 Jun 2024 11:01:53 +0800 +Subject: PATCH 168/188 LoongArch: Fix mode size comparision in + loongarch_expand_conditional_move + +We were comparing a mode size with word_mode, but word_mode is an enum +value thus this does not really make any sense. (Un)luckily E_DImode +happens to be 8 so this seemed to work, but let's make it correct so it +won't blow up when we add LA32 support or add another machine mode... + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_expand_conditional_move): Compare mode size with + UNITS_PER_WORD instead of word_mode. +--- + gcc/config/loongarch/loongarch.cc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 0c2c38f6f..77f83ab9e 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5349,7 +5349,7 @@ loongarch_expand_conditional_move (rtx *operands) + loongarch_emit_float_compare (&code, &op0, &op1); + else + { +- if (GET_MODE_SIZE (GET_MODE (op0)) < word_mode) ++ if (GET_MODE_SIZE (GET_MODE (op0)) < UNITS_PER_WORD) + { + promote_op0 = (REG_P (op0) && REG_P (operands2) && + REGNO (op0) == REGNO (operands2)); +-- +2.43.0 +
View file
_service:tar_scm:0169-Backport-SME-aarch64-Tweak-error-message-for-tuple-v.patch
Added
@@ -0,0 +1,106 @@ +From 1abb02c636eef4f9a5f55f243bc0c4d38ee1f849 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:22 +0000 +Subject: PATCH 070/157 BackportSME aarch64: Tweak error message for + (tuple,vector) pairs + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5ce2e22b7e02c7fbd1ab8145b632559b67ae9958 + +SME2 adds more intrinsics that take a tuple of vectors followed +by a single vector, with the two arguments expected to have the +same element type. Unlike with the existing svset* intrinsics, +the size of the tuple is not fixed by the overloaded function name. + +This patch adds an error message that (hopefully) copes better +with that combination. + +gcc/ + * config/aarch64/aarch64-sve-builtins.cc + (function_resolver::require_derived_vector_type): Add a specific + error message for the case in which the caller wants a single + vector whose element type matches a previous tuyple argument. + +gcc/testsuite/ + * gcc.target/aarch64/sve/acle/general-c/set_1.c: Tweak expected + error message. + * gcc.target/aarch64/sve/acle/general-c/set_3.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/set_5.c: Likewise. +--- + gcc/config/aarch64/aarch64-sve-builtins.cc | 13 +++++++++++++ + .../gcc.target/aarch64/sve/acle/general-c/set_1.c | 4 ++-- + .../gcc.target/aarch64/sve/acle/general-c/set_3.c | 4 ++-- + .../gcc.target/aarch64/sve/acle/general-c/set_5.c | 4 ++-- + 4 files changed, 19 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index e98274f8a..9224916a7 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -1707,6 +1707,19 @@ require_derived_vector_type (unsigned int argno, + if (!actual_type) + return false; + ++ if (orig_expected_tclass == SAME_TYPE_CLASS ++ && orig_expected_bits == SAME_SIZE) ++ { ++ if (actual_type.type == first_type.type) ++ return true; ++ ++ error_at (location, "passing %qT to argument %d of %qE, but" ++ " argument %d was a tuple of %qT", ++ get_vector_type (actual_type), argno + 1, fndecl, ++ first_argno + 1, get_vector_type (first_type.type)); ++ return false; ++ } ++ + /* Exit now if we got the right type. */ + auto &actual_type_suffix = type_suffixesactual_type.type; + bool tclass_ok_p = (actual_type_suffix.tclass == expected_tclass); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c +index f07c76102..f2a6da536 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c +@@ -16,8 +16,8 @@ f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svuint8x3_t u8x3, int x) + u8x2 = svset2 (u8x3, 0, u8); /* { dg-error {passing 'svuint8x3_t' to argument 1 of 'svset2', which expects a tuple of 2 vectors} } */ + u8x2 = svset2 (pg, 0, u8); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset2', which expects a tuple of 2 vectors} } */ + u8x2 = svset2 (u8x2, 0, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 3 of 'svset2', which expects a single SVE vector rather than a tuple} } */ +- u8x2 = svset2 (u8x2, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svuint8_t' to argument 3 of 'svset2', after passing 'svuint8x2_t' to argument 1} } */ +- u8x2 = svset2 (u8x2, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svuint8_t' to argument 3 of 'svset2', after passing 'svuint8x2_t' to argument 1} } */ ++ u8x2 = svset2 (u8x2, 0, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svset2', but argument 1 was a tuple of 'svuint8_t'} } */ ++ u8x2 = svset2 (u8x2, 0, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svset2', but argument 1 was a tuple of 'svuint8_t'} } */ + u8x2 = svset2 (u8x2, x, u8); /* { dg-error {argument 2 of 'svset2' must be an integer constant expression} } */ + u8x2 = svset2 (u8x2, 0, u8); + f64 = svset2 (u8x2, 0, u8); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svuint8x2_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c +index 543a1bea8..92b955f83 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c +@@ -17,8 +17,8 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svfloat16x4_t f16x4, + f16x3 = svset3 (f16x4, 0, f16); /* { dg-error {passing 'svfloat16x4_t' to argument 1 of 'svset3', which expects a tuple of 3 vectors} } */ + f16x3 = svset3 (pg, 0, f16); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset3', which expects a tuple of 3 vectors} } */ + f16x3 = svset3 (f16x3, 0, f16x3); /* { dg-error {passing 'svfloat16x3_t' to argument 3 of 'svset3', which expects a single SVE vector rather than a tuple} } */ +- f16x3 = svset3 (f16x3, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svfloat16_t' to argument 3 of 'svset3', after passing 'svfloat16x3_t' to argument 1} } */ +- f16x3 = svset3 (f16x3, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svfloat16_t' to argument 3 of 'svset3', after passing 'svfloat16x3_t' to argument 1} } */ ++ f16x3 = svset3 (f16x3, 0, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svset3', but argument 1 was a tuple of 'svfloat16_t'} } */ ++ f16x3 = svset3 (f16x3, 0, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svset3', but argument 1 was a tuple of 'svfloat16_t'} } */ + f16x3 = svset3 (f16x3, x, f16); /* { dg-error {argument 2 of 'svset3' must be an integer constant expression} } */ + f16x3 = svset3 (f16x3, 0, f16); + f64 = svset3 (f16x3, 0, f16); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svfloat16x3_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c +index be911a731..f0696fb07 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c +@@ -16,8 +16,8 @@ f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svint32x2_t s32x2, int x) + s32x4 = svset4 (s32x2, 0, s32); /* { dg-error {passing 'svint32x2_t' to argument 1 of 'svset4', which expects a tuple of 4 vectors} } */ + s32x4 = svset4 (pg, 0, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset4', which expects a tuple of 4 vectors} } */ + s32x4 = svset4 (s32x4, 0, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 3 of 'svset4', which expects a single SVE vector rather than a tuple} } */ +- s32x4 = svset4 (s32x4, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svint32_t' to argument 3 of 'svset4', after passing 'svint32x4_t' to argument 1} } */ +- s32x4 = svset4 (s32x4, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svint32_t' to argument 3 of 'svset4', after passing 'svint32x4_t' to argument 1} } */ ++ s32x4 = svset4 (s32x4, 0, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svset4', but argument 1 was a tuple of 'svint32_t'} } */ ++ s32x4 = svset4 (s32x4, 0, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svset4', but argument 1 was a tuple of 'svint32_t'} } */ + s32x4 = svset4 (s32x4, x, s32); /* { dg-error {argument 2 of 'svset4' must be an integer constant expression} } */ + s32x4 = svset4 (s32x4, 0, s32); + f64 = svset4 (s32x4, 0, s32); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svint32x4_t'} } */ +-- +2.33.0 +
View file
_service:tar_scm:0169-LoongArch-Use-bstrins-for-value-1u-const.patch
Added
@@ -0,0 +1,135 @@ +From 7e34bede110bfa7b2f91dc657c41ed0e7b4b11f7 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 9 Jun 2024 14:43:48 +0800 +Subject: PATCH 169/188 LoongArch: Use bstrins for "value & (-1u << const)" + +A move/bstrins pair is as fast as a (addi.w|lu12i.w|lu32i.d|lu52i.d)/and +pair, and twice fast as a srli/slli pair. When the src reg and the dst +reg happens to be the same, the move instruction can be optimized away. + +gcc/ChangeLog: + + * config/loongarch/predicates.md (high_bitmask_operand): New + predicate. + * config/loongarch/constraints.md (Yy): New constriant. + * config/loongarch/loongarch.md (and<mode>3_align): New + define_insn_and_split. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/bstrins-1.c: New test. + * gcc.target/loongarch/bstrins-2.c: New test. +--- + gcc/config/loongarch/constraints.md | 5 +++++ + gcc/config/loongarch/loongarch.md | 17 +++++++++++++++++ + gcc/config/loongarch/predicates.md | 4 ++++ + gcc/testsuite/gcc.target/loongarch/bstrins-1.c | 9 +++++++++ + gcc/testsuite/gcc.target/loongarch/bstrins-2.c | 14 ++++++++++++++ + 5 files changed, 49 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-2.c + +diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md +index cec5d8857..f3f5776da 100644 +--- a/gcc/config/loongarch/constraints.md ++++ b/gcc/config/loongarch/constraints.md +@@ -94,6 +94,7 @@ + ;; "A constant @code{move_operand} that can be safely loaded using + ;; @code{la}." + ;; "Yx" ++;; "Yy" + ;; "Z" - + ;; "ZC" + ;; "A memory operand whose address is formed by a base register and offset +@@ -291,6 +292,10 @@ + "@internal" + (match_operand 0 "low_bitmask_operand")) + ++(define_constraint "Yy" ++ "@internal" ++ (match_operand 0 "high_bitmask_operand")) ++ + (define_constraint "YI" + "@internal + A replicated vector const in which the replicated value is in the range +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 20494ce8a..55a759850 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1542,6 +1542,23 @@ + (set_attr "move_type" "pick_ins") + (set_attr "mode" "<MODE>")) + ++(define_insn_and_split "and<mode>3_align" ++ (set (match_operand:GPR 0 "register_operand" "=r") ++ (and:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand:GPR 2 "high_bitmask_operand" "Yy"))) ++ "" ++ "#" ++ "" ++ (set (match_dup 0) (match_dup 1)) ++ (set (zero_extract:GPR (match_dup 0) (match_dup 2) (const_int 0)) ++ (const_int 0)) ++{ ++ int len; ++ ++ len = low_bitmask_len (<MODE>mode, ~INTVAL (operands2)); ++ operands2 = GEN_INT (len); ++}) ++ + (define_insn_and_split "*bstrins_<mode>_for_mask" + (set (match_operand:GPR 0 "register_operand" "=r") + (and:GPR (match_operand:GPR 1 "register_operand" "r") +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 1d9a30695..95be8a4fe 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -293,6 +293,10 @@ + (and (match_code "const_int") + (match_test "low_bitmask_len (mode, INTVAL (op)) > 12"))) + ++(define_predicate "high_bitmask_operand" ++ (and (match_code "const_int") ++ (match_test "low_bitmask_len (mode, ~INTVAL (op)) > 0"))) ++ + (define_predicate "d_operand" + (and (match_code "reg") + (match_test "GP_REG_P (REGNO (op))"))) +diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-1.c b/gcc/testsuite/gcc.target/loongarch/bstrins-1.c +new file mode 100644 +index 000000000..7cb3a9523 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-1.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r4,\\\$r0,4,0" } } */ ++ ++long ++x (long a) ++{ ++ return a & -32; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-2.c b/gcc/testsuite/gcc.target/loongarch/bstrins-2.c +new file mode 100644 +index 000000000..9777f502e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-2.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r\0-9\+,\\\$r0,4,0" } } */ ++ ++struct aligned_buffer { ++ _Alignas(32) char x1024; ++}; ++ ++extern int f(char *); ++int g(void) ++{ ++ struct aligned_buffer buf; ++ return f(buf.x); ++} +-- +2.43.0 +
View file
_service:tar_scm:0170-Backport-SME-aarch64-Add-tuple-forms-of-svreinterpre.patch
Added
@@ -0,0 +1,1236 @@ +From 95234ef07c47dda7ac6a13f75619580a6683118c Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:22 +0000 +Subject: PATCH 071/157 BackportSME aarch64: Add tuple forms of + svreinterpret + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1ce9dc263c2f6d455b2013fc58932beda2a4ae92 + +SME2 adds a number of intrinsics that operate on tuples of 2 and 4 +vectors. The ACLE therefore extends the existing svreinterpret +intrinsics to handle tuples as well. + +gcc/ + * config/aarch64/aarch64-sve-builtins-base.cc + (svreinterpret_impl::fold): Punt on tuple forms. + (svreinterpret_impl::expand): Use tuple_mode instead of vector_mode. + * config/aarch64/aarch64-sve-builtins-base.def (svreinterpret): + Extend to x1234 groups. + * config/aarch64/aarch64-sve-builtins-functions.h + (multi_vector_function::vectors_per_tuple): If the function has + a group suffix, get the number of vectors from there. + * config/aarch64/aarch64-sve-builtins-shapes.h (reinterpret): Declare. + * config/aarch64/aarch64-sve-builtins-shapes.cc (reinterpret_def) + (reinterpret): New function shape. + * config/aarch64/aarch64-sve-builtins.cc (function_groups): Handle + DEF_SVE_FUNCTION_GS. + * config/aarch64/aarch64-sve-builtins.def (DEF_SVE_FUNCTION_GS): New + macro. + (DEF_SVE_FUNCTION): Forward to DEF_SVE_FUNCTION_GS by default. + * config/aarch64/aarch64-sve-builtins.h + (function_instance::tuple_mode): New member function. + (function_base::vectors_per_tuple): Take the function instance + as argument and get the number from the group suffix. + (function_instance::vectors_per_tuple): Update accordingly. + * config/aarch64/iterators.md (SVE_FULLx2, SVE_FULLx3, SVE_FULLx4) + (SVE_ALL_STRUCT): New mode iterators. + (SVE_STRUCT): Redefine in terms of SVE_FULL*. + * config/aarch64/aarch64-sve.md (@aarch64_sve_reinterpret<mode>) + (*aarch64_sve_reinterpret<mode>): Extend to SVE structure modes. + +gcc/testsuite/ + * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h (TEST_DUAL_XN): + New macro. + * gcc.target/aarch64/sve/acle/asm/reinterpret_bf16.c: Add tests for + tuple forms. + * gcc.target/aarch64/sve/acle/asm/reinterpret_f16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_s16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_s8.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_u16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_u8.c: Likewise. +--- + .../aarch64/aarch64-sve-builtins-base.cc | 5 +- + .../aarch64/aarch64-sve-builtins-base.def | 2 +- + .../aarch64/aarch64-sve-builtins-functions.h | 7 ++- + .../aarch64/aarch64-sve-builtins-shapes.cc | 28 +++++++++ + .../aarch64/aarch64-sve-builtins-shapes.h | 1 + + gcc/config/aarch64/aarch64-sve-builtins.cc | 8 ++- + gcc/config/aarch64/aarch64-sve-builtins.def | 8 ++- + gcc/config/aarch64/aarch64-sve-builtins.h | 20 +++++- + gcc/config/aarch64/aarch64-sve.md | 8 +-- + gcc/config/aarch64/iterators.md | 26 +++++--- + .../aarch64/sve/acle/asm/reinterpret_bf16.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_f16.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_f32.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_f64.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_s16.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_s32.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_s64.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_s8.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_u16.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_u32.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_u64.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_u8.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/test_sve_acle.h | 14 +++++ + 23 files changed, 851 insertions(+), 20 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc +index c9bf13792..53f3f28f9 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc +@@ -1928,6 +1928,9 @@ public: + gimple * + fold (gimple_folder &f) const OVERRIDE + { ++ if (f.vectors_per_tuple () > 1) ++ return NULL; ++ + /* Punt to rtl if the effect of the reinterpret on registers does not + conform to GCC's endianness model. */ + if (!targetm.can_change_mode_class (f.vector_mode (0), +@@ -1944,7 +1947,7 @@ public: + rtx + expand (function_expander &e) const OVERRIDE + { +- machine_mode mode = e.vector_mode (0); ++ machine_mode mode = e.tuple_mode (0); + return e.use_exact_insn (code_for_aarch64_sve_reinterpret (mode)); + } + }; +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def +index 3a58f76c3..756469959 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-base.def ++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def +@@ -248,7 +248,7 @@ DEF_SVE_FUNCTION (svrdffr, rdffr, none, z_or_none) + DEF_SVE_FUNCTION (svrecpe, unary, all_float, none) + DEF_SVE_FUNCTION (svrecps, binary, all_float, none) + DEF_SVE_FUNCTION (svrecpx, unary, all_float, mxz) +-DEF_SVE_FUNCTION (svreinterpret, unary_convert, reinterpret, none) ++DEF_SVE_FUNCTION_GS (svreinterpret, reinterpret, reinterpret, x1234, none) + DEF_SVE_FUNCTION (svrev, unary, all_data, none) + DEF_SVE_FUNCTION (svrev, unary_pred, all_pred, none) + DEF_SVE_FUNCTION (svrevb, unary, hsd_integer, mxz) +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h +index 9d346b6ff..94a6d1207 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h +@@ -59,8 +59,13 @@ public: + : m_vectors_per_tuple (vectors_per_tuple) {} + + unsigned int +- vectors_per_tuple () const OVERRIDE ++ vectors_per_tuple (const function_instance &fi) const override + { ++ if (fi.group_suffix_id != GROUP_none) ++ { ++ gcc_checking_assert (m_vectors_per_tuple == 1); ++ return fi.group_suffix ().vectors_per_tuple; ++ } + return m_vectors_per_tuple; + } + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +index f187b4cb2..95e40d8f3 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +@@ -2400,6 +2400,34 @@ struct reduction_wide_def : public overloaded_base<0> + }; + SHAPE (reduction_wide) + ++/* sv<t0>x<g>_t svfoo_t0_t1_g(sv<t1>x<g>_t) ++ ++ where the target type <t0> must be specified explicitly but the source ++ type <t1> can be inferred. */ ++struct reinterpret_def : public overloaded_base<1> ++{ ++ bool explicit_group_suffix_p () const override { return false; } ++ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "t0,t1", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const override ++ { ++ sve_type type; ++ if (!r.check_num_arguments (1) ++ || !(type = r.infer_sve_type (0))) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++}; ++SHAPE (reinterpret) ++ + /* sv<t0>xN_t svfoo_t0(sv<t0>xN_t, uint64_t, sv<t0>_t) + + where the second argument is an integer constant expression in the +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h +index 3b0025f85..2b06152d4 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h +@@ -133,6 +133,7 @@ namespace aarch64_sve + extern const function_shape *const rdffr; + extern const function_shape *const reduction; + extern const function_shape *const reduction_wide; ++ extern const function_shape *const reinterpret; + extern const function_shape *const set; + extern const function_shape *const setffr; + extern const function_shape *const shift_left_imm_long; +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index 9224916a7..c439f2e8a 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -494,6 +494,10 @@ static const group_suffix_index groups_none = { + GROUP_none, NUM_GROUP_SUFFIXES + }; + ++static const group_suffix_index groups_x1234 = { ++ GROUP_none, GROUP_x2, GROUP_x3, GROUP_x4, NUM_GROUP_SUFFIXES ++};
View file
_service:tar_scm:0170-LoongArch-Tweak-IOR-rtx_cost-for-bstrins.patch
Added
@@ -0,0 +1,158 @@ +From bdc189d43ef38ea53823120de8008f39ead0618d Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 15 Jun 2024 18:29:43 +0800 +Subject: PATCH 170/188 LoongArch: Tweak IOR rtx_cost for bstrins + +Consider + + c &= 0xfff; + a &= ~0xfff; + b &= ~0xfff; + a |= c; + b |= c; + +This can be done with 2 bstrins instructions. But we need to recognize +it in loongarch_rtx_costs or the compiler will not propagate "c & 0xfff" +forward. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc: + (loongarch_use_bstrins_for_ior_with_mask): Split the main logic + into ... + (loongarch_use_bstrins_for_ior_with_mask_1): ... here. + (loongarch_rtx_costs): Special case for IOR those can be + implemented with bstrins. + +gcc/testsuite/ChangeLog; + + * gcc.target/loongarch/bstrins-3.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 73 ++++++++++++++----- + .../gcc.target/loongarch/bstrins-3.c | 16 ++++ + 2 files changed, 72 insertions(+), 17 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-3.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 77f83ab9e..cd9fa98dc 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3678,6 +3678,27 @@ loongarch_set_reg_reg_piece_cost (machine_mode mode, unsigned int units) + return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units); + } + ++static int ++loongarch_use_bstrins_for_ior_with_mask_1 (machine_mode mode, ++ unsigned HOST_WIDE_INT mask1, ++ unsigned HOST_WIDE_INT mask2) ++{ ++ if (mask1 != ~mask2 || !mask1 || !mask2) ++ return 0; ++ ++ /* Try to avoid a right-shift. */ ++ if (low_bitmask_len (mode, mask1) != -1) ++ return -1; ++ ++ if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1) ++ return 1; ++ ++ if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1) ++ return -1; ++ ++ return 0; ++} ++ + /* Return the cost of moving between two registers of mode MODE. */ + + static int +@@ -3809,6 +3830,38 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + /* Fall through. */ + + case IOR: ++ { ++ rtx op2 = {XEXP (x, 0), XEXP (x, 1)}; ++ if (GET_CODE (op0) == AND && GET_CODE (op1) == AND ++ && (mode == SImode || (TARGET_64BIT && mode == DImode))) ++ { ++ rtx rtx_mask0 = XEXP (op0, 1), rtx_mask1 = XEXP (op1, 1); ++ if (CONST_INT_P (rtx_mask0) && CONST_INT_P (rtx_mask1)) ++ { ++ unsigned HOST_WIDE_INT mask0 = UINTVAL (rtx_mask0); ++ unsigned HOST_WIDE_INT mask1 = UINTVAL (rtx_mask1); ++ if (loongarch_use_bstrins_for_ior_with_mask_1 (mode, ++ mask0, ++ mask1)) ++ { ++ /* A bstrins instruction */ ++ *total = COSTS_N_INSNS (1); ++ ++ /* A srai instruction */ ++ if (low_bitmask_len (mode, mask0) == -1 ++ && low_bitmask_len (mode, mask1) == -1) ++ *total += COSTS_N_INSNS (1); ++ ++ for (int i = 0; i < 2; i++) ++ *total += set_src_cost (XEXP (opi, 0), mode, speed); ++ ++ return true; ++ } ++ } ++ } ++ } ++ ++ /* Fall through. */ + case XOR: + /* Double-word operations use two single-word operations. */ + *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2), +@@ -5793,23 +5846,9 @@ bool loongarch_pre_reload_split (void) + int + loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op) + { +- unsigned HOST_WIDE_INT mask1 = UINTVAL (op2); +- unsigned HOST_WIDE_INT mask2 = UINTVAL (op4); +- +- if (mask1 != ~mask2 || !mask1 || !mask2) +- return 0; +- +- /* Try to avoid a right-shift. */ +- if (low_bitmask_len (mode, mask1) != -1) +- return -1; +- +- if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1) +- return 1; +- +- if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1) +- return -1; +- +- return 0; ++ return loongarch_use_bstrins_for_ior_with_mask_1 (mode, ++ UINTVAL (op2), ++ UINTVAL (op4)); + } + + /* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto +diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-3.c b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c +new file mode 100644 +index 000000000..13762bdef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-rtl-final" } */ ++/* { dg-final { scan-rtl-dump-times "insv\sd\i" 2 "final" } } */ ++ ++struct X { ++ long a, b; ++}; ++ ++struct X ++test (long a, long b, long c) ++{ ++ c &= 0xfff; ++ a &= ~0xfff; ++ b &= ~0xfff; ++ return (struct X){.a = a | c, .b = b | c}; ++} +-- +2.43.0 +
View file
_service:tar_scm:0171-Backport-SME-attribs-Use-existing-traits-for-excl_ha.patch
Added
@@ -0,0 +1,90 @@ +From 11f813112629dbad432134f7b4c7c9a93551eb3c Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Mon, 27 Nov 2023 13:38:16 +0000 +Subject: PATCH 072/157 BackportSME attribs: Use existing traits for + excl_hash_traits + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5b33cf3a3a2025a4856f90fea8bd04884c2f6b31 + +excl_hash_traits can be defined more simply by reusing existing traits. + +gcc/ + * attribs.cc (excl_hash_traits): Delete. + (test_attribute_exclusions): Use pair_hash and nofree_string_hash + instead. +--- + gcc/attribs.cc | 45 +++------------------------------------------ + 1 file changed, 3 insertions(+), 42 deletions(-) + +diff --git a/gcc/attribs.cc b/gcc/attribs.cc +index b219f8780..16d05b1da 100644 +--- a/gcc/attribs.cc ++++ b/gcc/attribs.cc +@@ -2555,47 +2555,6 @@ namespace selftest + + typedef std::pair<const char *, const char *> excl_pair; + +-struct excl_hash_traits: typed_noop_remove<excl_pair> +-{ +- typedef excl_pair value_type; +- typedef value_type compare_type; +- +- static hashval_t hash (const value_type &x) +- { +- hashval_t h1 = htab_hash_string (x.first); +- hashval_t h2 = htab_hash_string (x.second); +- return h1 ^ h2; +- } +- +- static bool equal (const value_type &x, const value_type &y) +- { +- return !strcmp (x.first, y.first) && !strcmp (x.second, y.second); +- } +- +- static void mark_deleted (value_type &x) +- { +- x = value_type (NULL, NULL); +- } +- +- static const bool empty_zero_p = false; +- +- static void mark_empty (value_type &x) +- { +- x = value_type ("", ""); +- } +- +- static bool is_deleted (const value_type &x) +- { +- return !x.first && !x.second; +- } +- +- static bool is_empty (const value_type &x) +- { +- return !*x.first && !*x.second; +- } +-}; +- +- + /* Self-test to verify that each attribute exclusion is symmetric, + meaning that if attribute A is encoded as incompatible with + attribute B then the opposite relationship is also encoded. +@@ -2605,13 +2564,15 @@ struct excl_hash_traits: typed_noop_remove<excl_pair> + static void + test_attribute_exclusions () + { ++ using excl_hash_traits = pair_hash<nofree_string_hash, nofree_string_hash>; ++ + /* Iterate over the array of attribute tables first (with TI0 as + the index) and over the array of attribute_spec in each table + (with SI0 as the index). */ + const size_t ntables = ARRAY_SIZE (attribute_tables); + + /* Set of pairs of mutually exclusive attributes. */ +- typedef hash_set<excl_pair, false, excl_hash_traits> exclusion_set; ++ typedef hash_set<excl_hash_traits> exclusion_set; + exclusion_set excl_set; + + for (size_t ti0 = 0; ti0 != ntables; ++ti0) +-- +2.33.0 +
View file
_service:tar_scm:0171-LoongArch-NFC-Dedup-and-sort-the-comment-in-loongarc.patch
Added
@@ -0,0 +1,44 @@ +From 51c20768fde58093794ff0281c698b6738346313 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 16 Jun 2024 12:22:40 +0800 +Subject: PATCH 171/188 LoongArch: NFC: Dedup and sort the comment in + loongarch_print_operand_reloc + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_print_operand_reloc): + Dedup and sort the comment describing modifiers. +--- + gcc/config/loongarch/loongarch.cc | 10 +--------- + 1 file changed, 1 insertion(+), 9 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index cd9fa98dc..35524b5da 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6129,21 +6129,13 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, + 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...), + 'z' for (eq:?I ...), 'n' for (ne:?I ...). + 't' Like 'T', but with the EQ/NE cases reversed +- 'F' Print the FPU branch condition for comparison OP. +- 'W' Print the inverse of the FPU branch condition for comparison OP. +- 'w' Print a LSX register. + 'u' Print a LASX register. +- 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...), +- 'z' for (eq:?I ...), 'n' for (ne:?I ...). +- 't' Like 'T', but with the EQ/NE cases reversed +- 'Y' Print loongarch_fp_conditionsINTVAL (OP) +- 'Z' Print OP and a comma for 8CC, otherwise print nothing. +- 'z' Print $0 if OP is zero, otherwise print OP normally. + 'v' Print the insn size suffix b, h, w or d for vector modes V16QI, V8HI, + V4SI, V2SI, and w, d for vector modes V4SF, V2DF respectively. + 'V' Print exact log2 of CONST_INT OP element 0 of a replicated + CONST_VECTOR in decimal. + 'W' Print the inverse of the FPU branch condition for comparison OP. ++ 'w' Print a LSX register. + 'X' Print CONST_INT OP in hexadecimal format. + 'x' Print the low 16 bits of CONST_INT OP in hexadecimal format. + 'Y' Print loongarch_fp_conditionsINTVAL (OP) +-- +2.43.0 +
View file
_service:tar_scm:0172-Backport-SME-Allow-target-attributes-in-non-gnu-name.patch
Added
@@ -0,0 +1,2369 @@ +From 82d654912e3671055034e789a8f7110f6d87d447 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 2 Dec 2023 13:49:52 +0000 +Subject: PATCH 073/157 BackportSME Allow target attributes in non-gnu + namespaces + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7fa24687aa3a683fd105ce5ff6b176f48dca3b6c + +Currently there are four static sources of attributes: + +- LANG_HOOKS_ATTRIBUTE_TABLE +- LANG_HOOKS_COMMON_ATTRIBUTE_TABLE +- LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE +- TARGET_ATTRIBUTE_TABLE + +All of the attributes in these tables go in the "gnu" namespace. +This means that they can use the traditional GNU __attribute__((...)) +syntax and the standard gnu::... syntax. + +Standard attributes are registered dynamically with a null namespace. +There are no supported attributes in other namespaces (clang, vendor +namespaces, etc.). + +This patch tries to generalise things by making the namespace +part of the attribute specification. + +It's usual for multiple attributes to be defined in the same namespace, +so rather than adding the namespace to each individual definition, +it seemed better to group attributes in the same namespace together. +This would also allow us to reuse the same table for clang attributes +that are written with the GNU syntax, or other similar situations +where the attribute can be accessed via multiple "spellings". + +The patch therefore adds a scoped_attribute_specs that contains +a namespace and a list of attributes in that namespace. + +It's still possible to have multiple scoped_attribute_specs +for the same namespace. E.g. it makes sense to keep the +C++-specific, C/C++-common, and format-related attributes in +separate tables, even though they're all GNU attributes. + +Current lists of attributes are terminated by a null name. +Rather than keep that for the new structure, it seemed neater +to use an array_slice. This also makes the tables slighly more +compact. + +In general, a target might want to support attributes in multiple +namespaces. Rather than have a separate hook for each possibility +(like the three langhooks above), it seemed better to make +TARGET_ATTRIBUTE_TABLE a table of tables. Specifically, it's +an array_slice of scoped_attribute_specs. + +We can do the same thing for langhooks, which allows the three hooks +above to be merged into a single LANG_HOOKS_ATTRIBUTE_TABLE. +It also allows the standard attributes to be registered statically +and checked by the usual attribs.cc checks. + +The patch adds a TARGET_GNU_ATTRIBUTES helper for the common case +in which a target wants a single table of gnu attributes. It can +only be used if the table is free of preprocessor directives. + +There are probably other things we need to do to make vendor namespaces +work smoothly. E.g. in principle it would be good to make exclusion +sets namespace-aware. But to some extent we have that with standard +vs. gnu attributes too. This patch is just supposed to be a first step. + +gcc/ + * attribs.h (scoped_attribute_specs): New structure. + (register_scoped_attributes): Take a reference to a + scoped_attribute_specs instead of separate namespace and array + parameters. + * plugin.h (register_scoped_attributes): Likewise. + * attribs.cc (register_scoped_attributes): Likewise. + (attribute_tables): Change into an array of scoped_attribute_specs + pointers. Reduce to 1 element for frontends and 1 element for targets. + (empty_attribute_table): Delete. + (check_attribute_tables): Update for changes to attribute_tables. + Use a hash_set to identify duplicates. + (handle_ignored_attributes_option): Update for above changes. + (init_attributes): Likewise. + (excl_pair): Delete. + (test_attribute_exclusions): Update for above changes. Don't + enforce symmetry for standard attributes in the top-level namespace. + * langhooks-def.h (LANG_HOOKS_COMMON_ATTRIBUTE_TABLE): Delete. + (LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE): Likewise. + (LANG_HOOKS_INITIALIZER): Update accordingly. + (LANG_HOOKS_ATTRIBUTE_TABLE): Define to an empty constructor. + * langhooks.h (lang_hooks::common_attribute_table): Delete. + (lang_hooks::format_attribute_table): Likewise. + (lang_hooks::attribute_table): Redefine to an array of + scoped_attribute_specs pointers. + * target-def.h (TARGET_GNU_ATTRIBUTES): New macro. + * target.def (attribute_spec): Redefine to return an array of + scoped_attribute_specs pointers. + * tree-inline.cc (function_attribute_inlinable_p): Update accordingly. + * doc/tm.texi: Regenerate. + * config/aarch64/aarch64.cc (aarch64_attribute_table): Define using + TARGET_GNU_ATTRIBUTES. + * config/alpha/alpha.cc (vms_attribute_table): Likewise. + * config/avr/avr.cc (avr_attribute_table): Likewise. + * config/bfin/bfin.cc (bfin_attribute_table): Likewise. + * config/bpf/bpf.cc (bpf_attribute_table): Likewise. + * config/csky/csky.cc (csky_attribute_table): Likewise. + * config/epiphany/epiphany.cc (epiphany_attribute_table): Likewise. + * config/gcn/gcn.cc (gcn_attribute_table): Likewise. + * config/h8300/h8300.cc (h8300_attribute_table): Likewise. + * config/loongarch/loongarch.cc (loongarch_attribute_table): Likewise. + * config/m32c/m32c.cc (m32c_attribute_table): Likewise. + * config/m32r/m32r.cc (m32r_attribute_table): Likewise. + * config/m68k/m68k.cc (m68k_attribute_table): Likewise. + * config/mcore/mcore.cc (mcore_attribute_table): Likewise. + * config/microblaze/microblaze.cc (microblaze_attribute_table): + Likewise. + * config/mips/mips.cc (mips_attribute_table): Likewise. + * config/msp430/msp430.cc (msp430_attribute_table): Likewise. + * config/nds32/nds32.cc (nds32_attribute_table): Likewise. + * config/nvptx/nvptx.cc (nvptx_attribute_table): Likewise. + * config/riscv/riscv.cc (riscv_attribute_table): Likewise. + * config/rl78/rl78.cc (rl78_attribute_table): Likewise. + * config/rx/rx.cc (rx_attribute_table): Likewise. + * config/s390/s390.cc (s390_attribute_table): Likewise. + * config/sh/sh.cc (sh_attribute_table): Likewise. + * config/sparc/sparc.cc (sparc_attribute_table): Likewise. + * config/stormy16/stormy16.cc (xstormy16_attribute_table): Likewise. + * config/v850/v850.cc (v850_attribute_table): Likewise. + * config/visium/visium.cc (visium_attribute_table): Likewise. + * config/arc/arc.cc (arc_attribute_table): Likewise. Move further + down file. + * config/arm/arm.cc (arm_attribute_table): Update for above changes, + using... + (arm_gnu_attributes, arm_gnu_attribute_table): ...these new globals. + * config/i386/i386-options.h (ix86_attribute_table): Delete. + (ix86_gnu_attribute_table): Declare. + * config/i386/i386-options.cc (ix86_attribute_table): Replace with... + (ix86_gnu_attributes, ix86_gnu_attribute_table): ...these two globals. + * config/i386/i386.cc (ix86_attribute_table): Define as an array of + scoped_attribute_specs pointers. + * config/ia64/ia64.cc (ia64_attribute_table): Update for above changes, + using... + (ia64_gnu_attributes, ia64_gnu_attribute_table): ...these new globals. + * config/rs6000/rs6000.cc (rs6000_attribute_table): Update for above + changes, using... + (rs6000_gnu_attributes, rs6000_gnu_attribute_table): ...these new + globals. + +gcc/ada/ + * gcc-interface/gigi.h (gnat_internal_attribute_table): Change + type to scoped_attribute_specs. + * gcc-interface/utils.cc (gnat_internal_attribute_table): Likewise, + using... + (gnat_internal_attributes): ...this as the underlying array. + * gcc-interface/misc.cc (gnat_attribute_table): New global. + (LANG_HOOKS_ATTRIBUTE_TABLE): Use it. + +gcc/c-family/ + * c-common.h (c_common_attribute_table): Replace with... + (c_common_gnu_attribute_table): ...this. + (c_common_format_attribute_table): Change type to + scoped_attribute_specs. + * c-attribs.cc (c_common_attribute_table): Replace with... + (c_common_gnu_attributes, c_common_gnu_attribute_table): ...these + new globals. + (c_common_format_attribute_table): Change type to + scoped_attribute_specs, using... + (c_common_format_attributes): ...this as the underlying array. + +gcc/c/ + * c-tree.h (std_attribute_table): Declare. + * c-decl.cc (std_attribute_table): Change type to + scoped_attribute_specs, using... + (std_attributes): ...this as the underlying array. + (c_init_decl_processing): Remove call to register_scoped_attributes. + * c-objc-common.h (c_objc_attribute_table): New global. + (LANG_HOOKS_ATTRIBUTE_TABLE): Use it. + (LANG_HOOKS_COMMON_ATTRIBUTE_TABLE): Delete. + (LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE): Delete. + +gcc/cp/ + * cp-tree.h (cxx_attribute_table): Delete. + (cxx_gnu_attribute_table, std_attribute_table): Declare. + * cp-objcp-common.h (LANG_HOOKS_COMMON_ATTRIBUTE_TABLE): Delete. + (LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE): Delete. + (cp_objcp_attribute_table): New table. + (LANG_HOOKS_ATTRIBUTE_TABLE): Redefine. + * tree.cc (cxx_attribute_table): Replace with... + (cxx_gnu_attributes, cxx_gnu_attribute_table): ...these globals. + (std_attribute_table): Change type to scoped_attribute_specs, using... + (std_attributes): ...this as the underlying array. + (init_tree): Remove call to register_scoped_attributes. + +gcc/d/ + * d-tree.h (d_langhook_attribute_table): Replace with... + (d_langhook_gnu_attribute_table): ...this. + (d_langhook_common_attribute_table): Change type to + scoped_attribute_specs. + * d-attribs.cc (d_langhook_common_attribute_table): Change type to + scoped_attribute_specs, using... + (d_langhook_common_attributes): ...this as the underlying array. + (d_langhook_attribute_table): Replace with...
View file
_service:tar_scm:0172-LoongArch-Fix-explicit-relocs-extreme-tls-desc.c-tes.patch
Added
@@ -0,0 +1,45 @@ +From 9503e64bf304d44947791d9ff17d65a6905e59ce Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 28 Jun 2024 15:04:26 +0800 +Subject: PATCH 172/188 LoongArch: Fix explicit-relocs-{extreme-,}tls-desc.c + tests. + +After r15-1579, ADD and LD/ST pairs will be merged into LDX/STX. +Cause these two tests to fail. To guarantee that these two tests pass, +add the compilation option '-fno-late-combine-instructions'. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c: + Add compilation options '-fno-late-combine-instructions'. + * gcc.target/loongarch/explicit-relocs-tls-desc.c: Likewise. +--- + .../gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c | 2 +- + gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c +index 3797556e1..e9eb0d6f7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc -mcmodel=extreme" } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc -mcmodel=extreme -fno-late-combine-instructions" } */ + + __thread int a __attribute__((visibility("hidden"))); + extern __thread int b __attribute__((visibility("default"))); +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c +index f66903091..fed478458 100644 +--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc" } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc -fno-late-combine-instructions" } */ + + __thread int a __attribute__((visibility("hidden"))); + extern __thread int b __attribute__((visibility("default"))); +-- +2.43.0 +
View file
_service:tar_scm:0173-Backport-SME-aarch64-Fix-plugin-header-install.patch
Added
@@ -0,0 +1,64 @@ +From b1025ef48bff0622e54822dc0974f38748e9109f Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek <jakub@redhat.com> +Date: Thu, 22 Dec 2022 11:15:47 +0100 +Subject: PATCH 074/157 BackportSME aarch64: Fix plugin header install + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5b30e9bc211fede06cf85b54e466012540bef14d + +The r13-2943-g11a113d501ff64 made aarch64.h include +aarch64-option-extensions.def, but that file isn't installed +for building plugins. + +On Wed, Dec 21, 2022 at 09:56:33AM +0000, Richard Sandiford wrote: +> Should this (and aarch64-fusion-pairs.def and aarch64-tuning-flags.def) +> be in TM_H instead? The first two OPTIONS_H_EXTRA entries seem to be +> for aarch64-opt.h (included via aarch64.opt). +> +> I guess TM_H should also have aarch64-arches.def, since it's included +> for aarch64_feature. + +gcc/Makefile.in has +TM_H = $(GTM_H) insn-flags.h $(OPTIONS_H) +and +OPTIONS_H = options.h flag-types.h $(OPTIONS_H_EXTRA) +which means that adding something into TM_H when it is already in +OPTIONS_H_EXTRA is a unnecessary. +It is true that aarch64-fusion-pairs.def (included by aarch64-protos.h) +and aarch64-tuning-flags.def (ditto) and aarch64-option-extensions.def +(included by aarch64.h) aren't needed for options.h, so I think the +right patch would be following. + +2022-12-22 Jakub Jelinek <jakub@redhat.com> + + * config/aarch64/t-aarch64 (TM_H): Don't add aarch64-cores.def, + add aarch64-fusion-pairs.def, aarch64-tuning-flags.def and + aarch64-option-extensions.def. + (OPTIONS_H_EXTRA): Don't add aarch64-fusion-pairs.def nor + aarch64-tuning-flags.def. +--- + gcc/config/aarch64/t-aarch64 | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64 +index ba74abc0a..6a21a248f 100644 +--- a/gcc/config/aarch64/t-aarch64 ++++ b/gcc/config/aarch64/t-aarch64 +@@ -18,11 +18,11 @@ + # along with GCC; see the file COPYING3. If not see + # <http://www.gnu.org/licenses/>. + +-TM_H += $(srcdir)/config/aarch64/aarch64-cores.def ++TM_H += $(srcdir)/config/aarch64/aarch64-fusion-pairs.def \ ++ $(srcdir)/config/aarch64/aarch64-tuning-flags.def \ ++ $(srcdir)/config/aarch64/aarch64-option-extensions.def + OPTIONS_H_EXTRA += $(srcdir)/config/aarch64/aarch64-cores.def \ +- $(srcdir)/config/aarch64/aarch64-arches.def \ +- $(srcdir)/config/aarch64/aarch64-fusion-pairs.def \ +- $(srcdir)/config/aarch64/aarch64-tuning-flags.def ++ $(srcdir)/config/aarch64/aarch64-arches.def + + $(srcdir)/config/aarch64/aarch64-tune.md: s-aarch64-tune-md; @true + s-aarch64-tune-md: $(srcdir)/config/aarch64/gentune.sh \ +-- +2.33.0 +
View file
_service:tar_scm:0173-LoongArch-Define-loongarch_insn_cost-and-set-the-cos.patch
Added
@@ -0,0 +1,70 @@ +From 727b1a2cff9cecd904545895bbf39a89fbf1ea4f Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 28 Jun 2024 15:09:48 +0800 +Subject: PATCH 173/188 LoongArch: Define loongarch_insn_cost and set the + cost of movcf2gr and movgr2cf. + +The following two FAIL items have been fixed: + +FAIL: gcc.target/loongarch/movcf2gr-via-fr.c scan-assembler movcf2fr\\t\\\\\$f0-9+,\\\\\$fcc +FAIL: gcc.target/loongarch/movcf2gr-via-fr.c scan-assembler movfr2gr\\\\.s\\t\\\\\$r4 + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_insn_cost): + New function. + (TARGET_INSN_COST): New macro. +--- + gcc/config/loongarch/loongarch.cc | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 35524b5da..958e82b86 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4369,6 +4369,33 @@ loongarch_address_cost (rtx addr, machine_mode mode, + return loongarch_address_insns (addr, mode, false); + } + ++/* Implement TARGET_INSN_COST. */ ++ ++static int ++loongarch_insn_cost (rtx_insn *insn, bool speed) ++{ ++ rtx x = PATTERN (insn); ++ int cost = pattern_cost (x, speed); ++ ++ /* On LA464, prevent movcf2fr and movfr2gr from merging into movcf2gr. */ ++ if (GET_CODE (x) == SET ++ && GET_MODE (XEXP (x, 0)) == FCCmode) ++ { ++ rtx dest, src; ++ dest = XEXP (x, 0); ++ src = XEXP (x, 1); ++ ++ if (REG_P (dest) && REG_P (src)) ++ { ++ if (GP_REG_P (REGNO (dest)) && FCC_REG_P (REGNO (src))) ++ cost = loongarch_cost->movcf2gr; ++ else if (FCC_REG_P (REGNO (dest)) && GP_REG_P (REGNO (src))) ++ cost = loongarch_cost->movgr2cf; ++ } ++ } ++ return cost; ++} ++ + /* Return one word of double-word value OP, taking into account the fixed + endianness of certain registers. HIGH_P is true to select the high part, + false to select the low part. */ +@@ -11089,6 +11116,8 @@ loongarch_asm_code_end (void) + #define TARGET_RTX_COSTS loongarch_rtx_costs + #undef TARGET_ADDRESS_COST + #define TARGET_ADDRESS_COST loongarch_address_cost ++#undef TARGET_INSN_COST ++#define TARGET_INSN_COST loongarch_insn_cost + #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST + #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ + loongarch_builtin_vectorization_cost +-- +2.43.0 +
View file
_service:tar_scm:0174-Backport-SME-aarch64-Add-arm_streaming-_compatible-a.patch
Added
@@ -0,0 +1,1178 @@ +From 70b732b4518dd0e44b9e6bfaaad78492b8db8f29 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:23 +0000 +Subject: PATCH 075/157 BackportSME aarch64: Add + arm_streaming(_compatible) attributes + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2c9a54b4238308b127c3b60b01a591363131e7db + +This patch adds support for recognising the SME arm::streaming +and arm::streaming_compatible attributes. These attributes +respectively describe whether the processor is definitely in +"streaming mode" (PSTATE.SM==1), whether the processor is +definitely not in streaming mode (PSTATE.SM==0), or whether +we don't know at compile time either way. + +As far as the compiler is concerned, this effectively creates three +ISA submodes: streaming mode enables things that are not available +in non-streaming mode, non-streaming mode enables things that not +available in streaming mode, and streaming-compatible mode has to stick +to the common subset. This means that some instructions are conditional +on PSTATE.SM==1 and some are conditional on PSTATE.SM==0. + +I wondered about recording the streaming state in a new variable. +However, the set of available instructions is also influenced by +PSTATE.ZA (added later), so I think it makes sense to view this +as an instance of a more general mechanism. Also, keeping the +PSTATE.SM state in the same flag variable as the other ISA +features makes it possible to sum up the requirements of an +ACLE function in a single value. + +The patch therefore adds a new set of feature flags called "ISA modes". +Unlike the other two sets of flags (optional features and architecture- +level features), these ISA modes are not controlled directly by +command-line parameters or "target" attributes. + +arm::streaming and arm::streaming_compatible are function type attributes +rather than function declaration attributes. This means that we need +to find somewhere to copy the type information across to a function's +target options. The patch does this in aarch64_set_current_function. + +We also need to record which ISA mode a callee expects/requires +to be active on entry. (The same mode is then active on return.) +The patch extends the current UNSPEC_CALLEE_ABI cookie to include +this information, as well as the PCS variant that it recorded +previously. + +The attributes can also be written __arm_streaming and +__arm_streaming_compatible. This has two advantages: it triggers +an error on compilers that don't understand the attributes, and it +eases use on C, where ... attributes were only added in C23. + +gcc/ + * config/aarch64/aarch64-isa-modes.def: New file. + * config/aarch64/aarch64.h: Include it in the feature enumerations. + (AARCH64_FL_SM_STATE, AARCH64_FL_ISA_MODES): New constants. + (AARCH64_FL_DEFAULT_ISA_MODE): Likewise. + (AARCH64_ISA_MODE): New macro. + (CUMULATIVE_ARGS): Add an isa_mode field. + * config/aarch64/aarch64-protos.h (aarch64_gen_callee_cookie): Declare. + (aarch64_tlsdesc_abi_id): Return an arm_pcs. + * config/aarch64/aarch64.cc (attr_streaming_exclusions) + (aarch64_gnu_attributes, aarch64_gnu_attribute_table) + (aarch64_arm_attributes, aarch64_arm_attribute_table): New tables. + (aarch64_attribute_table): Redefine to include the gnu and arm + attributes. + (aarch64_fntype_pstate_sm, aarch64_fntype_isa_mode): New functions. + (aarch64_fndecl_pstate_sm, aarch64_fndecl_isa_mode): Likewise. + (aarch64_gen_callee_cookie, aarch64_callee_abi): Likewise. + (aarch64_insn_callee_cookie, aarch64_insn_callee_abi): Use them. + (aarch64_function_arg, aarch64_output_mi_thunk): Likewise. + (aarch64_init_cumulative_args): Initialize the isa_mode field. + (aarch64_output_mi_thunk): Use aarch64_gen_callee_cookie to get + the ABI cookie. + (aarch64_override_options): Add the ISA mode to the feature set. + (aarch64_temporary_target::copy_from_fndecl): Likewise. + (aarch64_fndecl_options, aarch64_handle_attr_arch): Likewise. + (aarch64_set_current_function): Maintain the correct ISA mode. + (aarch64_tlsdesc_abi_id): Return an arm_pcs. + (aarch64_comp_type_attributes): Handle arm::streaming and + arm::streaming_compatible. + * config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros): + Define __arm_streaming and __arm_streaming_compatible. + * config/aarch64/aarch64.md (tlsdesc_small_<mode>): Use + aarch64_gen_callee_cookie to get the ABI cookie. + * config/aarch64/t-aarch64 (TM_H): Add all feature-related .def files. + +gcc/testsuite/ + * gcc.target/aarch64/sme/aarch64-sme.exp: New harness. + * gcc.target/aarch64/sme/streaming_mode_1.c: New test. + * gcc.target/aarch64/sme/streaming_mode_2.c: Likewise. + * gcc.target/aarch64/sme/keyword_macros_1.c: Likewise. + * g++.target/aarch64/sme/aarch64-sme.exp: New harness. + * g++.target/aarch64/sme/streaming_mode_1.C: New test. + * g++.target/aarch64/sme/streaming_mode_2.C: Likewise. + * g++.target/aarch64/sme/keyword_macros_1.C: Likewise. + * gcc.target/aarch64/auto-init-1.c: Only expect the call insn + to contain 1 (const_int 0), not 2. +--- + gcc/config/aarch64/aarch64-c.cc | 14 ++ + gcc/config/aarch64/aarch64-isa-modes.def | 35 +++ + gcc/config/aarch64/aarch64-protos.h | 3 +- + gcc/config/aarch64/aarch64.cc | 233 +++++++++++++++--- + gcc/config/aarch64/aarch64.h | 24 +- + gcc/config/aarch64/aarch64.md | 3 +- + gcc/config/aarch64/t-aarch64 | 5 +- + .../g++.target/aarch64/sme/aarch64-sme.exp | 40 +++ + .../g++.target/aarch64/sme/keyword_macros_1.C | 4 + + .../g++.target/aarch64/sme/streaming_mode_1.C | 142 +++++++++++ + .../g++.target/aarch64/sme/streaming_mode_2.C | 25 ++ + .../gcc.target/aarch64/auto-init-1.c | 3 +- + .../gcc.target/aarch64/sme/aarch64-sme.exp | 40 +++ + .../gcc.target/aarch64/sme/keyword_macros_1.c | 4 + + .../gcc.target/aarch64/sme/streaming_mode_1.c | 130 ++++++++++ + .../gcc.target/aarch64/sme/streaming_mode_2.c | 25 ++ + 16 files changed, 685 insertions(+), 45 deletions(-) + create mode 100644 gcc/config/aarch64/aarch64-isa-modes.def + create mode 100644 gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp + create mode 100644 gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C + create mode 100644 gcc/testsuite/g++.target/aarch64/sme/streaming_mode_1.C + create mode 100644 gcc/testsuite/g++.target/aarch64/sme/streaming_mode_2.C + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c + +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index 4085ad840..397745fbd 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -72,6 +72,20 @@ aarch64_define_unconditional_macros (cpp_reader *pfile) + builtin_define_with_int_value ("__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE / 8); + + builtin_define ("__GCC_ASM_FLAG_OUTPUTS__"); ++ ++ /* Define keyword attributes like __arm_streaming as macros that expand ++ to the associated ... attribute. Use __extension__ in the attribute ++ for C, since the ... syntax was only added in C23. */ ++#define DEFINE_ARM_KEYWORD_MACRO(NAME) \ ++ builtin_define_with_value ("__arm_" NAME, \ ++ lang_GNU_CXX () \ ++ ? "arm::" NAME "" \ ++ : "__extension__ arm::" NAME "", 0); ++ ++ DEFINE_ARM_KEYWORD_MACRO ("streaming"); ++ DEFINE_ARM_KEYWORD_MACRO ("streaming_compatible"); ++ ++#undef DEFINE_ARM_KEYWORD_MACRO + } + + /* Undefine/redefine macros that depend on the current backend state and may +diff --git a/gcc/config/aarch64/aarch64-isa-modes.def b/gcc/config/aarch64/aarch64-isa-modes.def +new file mode 100644 +index 000000000..5915c98a8 +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-isa-modes.def +@@ -0,0 +1,35 @@ ++/* Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++/* This file defines a set of "ISA modes"; in other words, it defines ++ various bits of runtime state that control the set of available ++ instructions or that affect the semantics of instructions in some way. ++ ++ Before using #include to read this file, define a macro: ++ ++ DEF_AARCH64_ISA_MODE(NAME) ++ ++ where NAME is the name of the mode. */ ++ ++/* Indicates that PSTATE.SM is known to be 1 or 0 respectively. These ++ modes are mutually exclusive. If neither mode is active then the state ++ of PSTATE.SM is not known at compile time. */ ++DEF_AARCH64_ISA_MODE(SM_ON) ++DEF_AARCH64_ISA_MODE(SM_OFF) ++ ++#undef DEF_AARCH64_ISA_MODE +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 14a568140..9b03410dc 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -772,6 +772,7 @@ bool aarch64_constant_address_p (rtx); + bool aarch64_emit_approx_div (rtx, rtx, rtx); + bool aarch64_emit_approx_sqrt (rtx, rtx, bool); + tree aarch64_vector_load_decl (tree);
View file
_service:tar_scm:0174-LoongArch-TFmode-is-not-allowed-to-be-stored-in-the-.patch
Added
@@ -0,0 +1,73 @@ +From b6b917847a25afbaba9983e80b62b69ed3ce3983 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 4 Jul 2024 10:37:26 +0800 +Subject: PATCH 174/188 LoongArch: TFmode is not allowed to be stored in the + float register. + + PR target/115752 + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_hard_regno_mode_ok_uncached): Replace + UNITS_PER_FPVALUE with UNITS_PER_HWFPVALUE. + * config/loongarch/loongarch.h (UNITS_PER_FPVALUE): Delete. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/pr115752.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 2 +- + gcc/config/loongarch/loongarch.h | 7 ------- + gcc/testsuite/gcc.target/loongarch/pr115752.c | 8 ++++++++ + 3 files changed, 9 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr115752.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 958e82b86..b78512e0e 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6760,7 +6760,7 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode) + if (mclass == MODE_FLOAT + || mclass == MODE_COMPLEX_FLOAT + || mclass == MODE_VECTOR_FLOAT) +- return size <= UNITS_PER_FPVALUE; ++ return size <= UNITS_PER_HWFPVALUE; + + /* Allow integer modes that fit into a single register. We need + to put integers into FPRs when using instructions like CVT +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 6743d2684..a23dabde1 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -146,13 +146,6 @@ along with GCC; see the file COPYING3. If not see + #define UNITS_PER_HWFPVALUE \ + (TARGET_SOFT_FLOAT ? 0 : UNITS_PER_FP_REG) + +-/* The largest size of value that can be held in floating-point +- registers. */ +-#define UNITS_PER_FPVALUE \ +- (TARGET_SOFT_FLOAT ? 0 \ +- : TARGET_SINGLE_FLOAT ? UNITS_PER_FP_REG \ +- : LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT) +- + /* The number of bytes in a double. */ + #define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT) + +diff --git a/gcc/testsuite/gcc.target/loongarch/pr115752.c b/gcc/testsuite/gcc.target/loongarch/pr115752.c +new file mode 100644 +index 000000000..df4bae524 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr115752.c +@@ -0,0 +1,8 @@ ++/* { dg-do compile } */ ++ ++long double ++test (long double xx) ++{ ++ __asm ("" :: "f"(xx)); /* { dg-error "inconsistent operand constraints in an 'asm'" } */ ++ return xx + 1; ++} +-- +2.43.0 +
View file
_service:tar_scm:0175-Backport-SME-aarch64-Add-sme.patch
Added
@@ -0,0 +1,330 @@ +From c097d9ffc7dd8f90f78eb3b994f3691f4c8f812d Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:23 +0000 +Subject: PATCH 076/157 BackportSME aarch64: Add +sme + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7e04bd1fadf3410c3d24b56f650a52ff53d01a3c + +This patch adds the +sme ISA feature and requires it to be present +when compiling arm_streaming code. (arm_streaming_compatible code +does not necessarily assume the presence of SME. It just has to +work when SME is present and streaming mode is enabled.) + +gcc/ + * doc/invoke.texi: Document SME. + * doc/sourcebuild.texi: Document aarch64_sve. + * config/aarch64/aarch64-option-extensions.def (sme): Define. + * config/aarch64/aarch64.h (AARCH64_ISA_SME): New macro. + (TARGET_SME): Likewise. + * config/aarch64/aarch64.cc (aarch64_override_options_internal): + Ensure that SME is present when compiling streaming code. + +gcc/testsuite/ + * lib/target-supports.exp (check_effective_target_aarch64_sme): New + target test. + * gcc.target/aarch64/sme/aarch64-sme.exp: Force SME to be enabled + if it isn't by default. + * g++.target/aarch64/sme/aarch64-sme.exp: Likewise. + * gcc.target/aarch64/sme/streaming_mode_3.c: New test. +--- + .../aarch64/aarch64-option-extensions.def | 2 + + gcc/config/aarch64/aarch64.cc | 33 ++++++++++ + gcc/config/aarch64/aarch64.h | 5 ++ + gcc/doc/invoke.texi | 2 + + gcc/doc/sourcebuild.texi | 2 + + .../g++.target/aarch64/sme/aarch64-sme.exp | 10 ++- + .../gcc.target/aarch64/sme/aarch64-sme.exp | 10 ++- + .../gcc.target/aarch64/sme/streaming_mode_3.c | 63 +++++++++++++++++++ + .../gcc.target/aarch64/sme/streaming_mode_4.c | 22 +++++++ + gcc/testsuite/lib/target-supports.exp | 12 ++++ + 10 files changed, 157 insertions(+), 4 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_4.c + +diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def +index bdf4baf30..faee64a79 100644 +--- a/gcc/config/aarch64/aarch64-option-extensions.def ++++ b/gcc/config/aarch64/aarch64-option-extensions.def +@@ -149,4 +149,6 @@ AARCH64_OPT_EXTENSION("ls64", LS64, (), (), (), "") + + AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "") + ++AARCH64_OPT_EXTENSION("sme", SME, (BF16, SVE2), (), (), "sme") ++ + #undef AARCH64_OPT_EXTENSION +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 904166b21..8f8395201 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -11648,6 +11648,23 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) + return true; + } + ++/* Implement TARGET_START_CALL_ARGS. */ ++ ++static void ++aarch64_start_call_args (cumulative_args_t ca_v) ++{ ++ CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v); ++ ++ if (!TARGET_SME && (ca->isa_mode & AARCH64_FL_SM_ON)) ++ { ++ error ("calling a streaming function requires the ISA extension %qs", ++ "sme"); ++ inform (input_location, "you can enable %qs using the command-line" ++ " option %<-march%>, or by using the %<target%>" ++ " attribute or pragma", "sme"); ++ } ++} ++ + /* This function is used by the call expanders of the machine description. + RESULT is the register in which the result is returned. It's NULL for + "call" and "sibcall". +@@ -18194,6 +18211,19 @@ aarch64_override_options_internal (struct gcc_options *opts) + && !fixed_regsR18_REGNUM) + error ("%<-fsanitize=shadow-call-stack%> requires %<-ffixed-x18%>"); + ++ if ((opts->x_aarch64_isa_flags & AARCH64_FL_SM_ON) ++ && !(opts->x_aarch64_isa_flags & AARCH64_FL_SME)) ++ { ++ error ("streaming functions require the ISA extension %qs", "sme"); ++ inform (input_location, "you can enable %qs using the command-line" ++ " option %<-march%>, or by using the %<target%>" ++ " attribute or pragma", "sme"); ++ opts->x_target_flags &= ~MASK_GENERAL_REGS_ONLY; ++ auto new_flags = (opts->x_aarch64_asm_isa_flags ++ | feature_deps::SME ().enable); ++ aarch64_set_asm_isa_flags (opts, new_flags); ++ } ++ + initialize_aarch64_code_model (opts); + initialize_aarch64_tls_size (opts); + +@@ -28159,6 +28189,9 @@ aarch64_get_v16qi_mode () + #undef TARGET_FUNCTION_VALUE_REGNO_P + #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p + ++#undef TARGET_START_CALL_ARGS ++#define TARGET_START_CALL_ARGS aarch64_start_call_args ++ + #undef TARGET_GIMPLE_FOLD_BUILTIN + #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 84215c8c3..dd2de4e88 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -214,6 +214,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + #define AARCH64_ISA_SVE2_BITPERM (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM) + #define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3) + #define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4) ++#define AARCH64_ISA_SME (aarch64_isa_flags & AARCH64_FL_SME) + #define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3A) + #define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD) + #define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES) +@@ -292,6 +293,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + /* SVE2 SM4 instructions, enabled through +sve2-sm4. */ + #define TARGET_SVE2_SM4 (AARCH64_ISA_SVE2_SM4) + ++/* SME instructions, enabled through +sme. Note that this does not ++ imply anything about the state of PSTATE.SM. */ ++#define TARGET_SME (AARCH64_ISA_SME) ++ + /* ARMv8.3-A features. */ + #define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A) + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 53709b246..2420b05d9 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -19478,6 +19478,8 @@ Enable the instructions to accelerate memory operations like @code{memcpy}, + Enable the Flag Manipulation instructions Extension. + @item pauth + Enable the Pointer Authentication Extension. ++@item sme ++Enable the Scalable Matrix Extension. + + @end table + +diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi +index 454fae11a..80936a0eb 100644 +--- a/gcc/doc/sourcebuild.texi ++++ b/gcc/doc/sourcebuild.texi +@@ -2277,6 +2277,8 @@ AArch64 target which generates instruction sequences for big endian. + @item aarch64_small_fpic + Binutils installed on test system supports relocation types required by -fpic + for AArch64 small memory model. ++@item aarch64_sme ++AArch64 target that generates instructions for SME. + @item aarch64_sve_hw + AArch64 target that is able to generate and execute SVE code (regardless of + whether it does so by default). +diff --git a/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp +index 72fcd0bd9..1c3e69cde 100644 +--- a/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp ++++ b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp +@@ -30,10 +30,16 @@ load_lib g++-dg.exp + # Initialize `dg'. + dg-init + +-aarch64-with-arch-dg-options "" { ++if { check_effective_target_aarch64_sme } { ++ set sme_flags "" ++} else { ++ set sme_flags "-march=armv9-a+sme" ++} ++ ++aarch64-with-arch-dg-options $sme_flags { + # Main loop. + dg-runtest lsort glob -nocomplain $srcdir/$subdir/*.\cCS\ \ +- "" "" ++ "" $sme_flags + } + + # All done. +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp +index c990e5924..011310e80 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp ++++ b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp +@@ -30,10 +30,16 @@ load_lib gcc-dg.exp + # Initialize `dg'. + dg-init + +-aarch64-with-arch-dg-options "" { ++if { check_effective_target_aarch64_sme } { ++ set sme_flags "" ++} else { ++ set sme_flags "-march=armv9-a+sme" ++} ++
View file
_service:tar_scm:0175-LoongArch-Remove-unreachable-codes.patch
Added
@@ -0,0 +1,249 @@ +From a75609d90f506709e4af26701aa4fb6adce00700 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 4 Jul 2024 15:00:40 +0800 +Subject: PATCH 175/188 LoongArch: Remove unreachable codes. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_split_move): Delete. + (loongarch_hard_regno_mode_ok_uncached): Likewise. + * config/loongarch/loongarch.md + (move_doubleword_fpr<mode>): Likewise. + (load_low<mode>): Likewise. + (load_high<mode>): Likewise. + (store_word<mode>): Likewise. + (movgr2frh<mode>): Likewise. + (movfrh2gr<mode>): Likewise. +--- + gcc/config/loongarch/loongarch.cc | 47 +++---------- + gcc/config/loongarch/loongarch.md | 109 ------------------------------ + 2 files changed, 8 insertions(+), 148 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index b78512e0e..260dd7b5f 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4459,42 +4459,13 @@ loongarch_split_move_p (rtx dest, rtx src) + void + loongarch_split_move (rtx dest, rtx src) + { +- rtx low_dest; +- + gcc_checking_assert (loongarch_split_move_p (dest, src)); + if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))) + loongarch_split_128bit_move (dest, src); + else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest))) + loongarch_split_256bit_move (dest, src); +- else if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src)) +- { +- if (!TARGET_64BIT && GET_MODE (dest) == DImode) +- emit_insn (gen_move_doubleword_fprdi (dest, src)); +- else if (!TARGET_64BIT && GET_MODE (dest) == DFmode) +- emit_insn (gen_move_doubleword_fprdf (dest, src)); +- else if (TARGET_64BIT && GET_MODE (dest) == TFmode) +- emit_insn (gen_move_doubleword_fprtf (dest, src)); +- else +- gcc_unreachable (); +- } + else +- { +- /* The operation can be split into two normal moves. Decide in +- which order to do them. */ +- low_dest = loongarch_subword (dest, false); +- if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src)) +- { +- loongarch_emit_move (loongarch_subword (dest, true), +- loongarch_subword (src, true)); +- loongarch_emit_move (low_dest, loongarch_subword (src, false)); +- } +- else +- { +- loongarch_emit_move (low_dest, loongarch_subword (src, false)); +- loongarch_emit_move (loongarch_subword (dest, true), +- loongarch_subword (src, true)); +- } +- } ++ gcc_unreachable (); + } + + /* Check if adding an integer constant value for a specific mode can be +@@ -6743,20 +6714,18 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode) + size = GET_MODE_SIZE (mode); + mclass = GET_MODE_CLASS (mode); + +- if (GP_REG_P (regno) && !LSX_SUPPORTED_MODE_P (mode) ++ if (GP_REG_P (regno) ++ && !LSX_SUPPORTED_MODE_P (mode) + && !LASX_SUPPORTED_MODE_P (mode)) + return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD; + +- /* For LSX, allow TImode and 128-bit vector modes in all FPR. */ +- if (FP_REG_P (regno) && LSX_SUPPORTED_MODE_P (mode)) +- return true; +- +- /* FIXED ME: For LASX, allow TImode and 256-bit vector modes in all FPR. */ +- if (FP_REG_P (regno) && LASX_SUPPORTED_MODE_P (mode)) +- return true; +- + if (FP_REG_P (regno)) + { ++ /* Allow 128-bit or 256-bit vector modes in all FPR. */ ++ if (LSX_SUPPORTED_MODE_P (mode) ++ || LASX_SUPPORTED_MODE_P (mode)) ++ return true; ++ + if (mclass == MODE_FLOAT + || mclass == MODE_COMPLEX_FLOAT + || mclass == MODE_VECTOR_FLOAT) +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 55a759850..16f9f37c8 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -400,9 +400,6 @@ + ;; 64-bit modes for which we provide move patterns. + (define_mode_iterator MOVE64 DI DF) + +-;; 128-bit modes for which we provide move patterns on 64-bit targets. +-(define_mode_iterator MOVE128 TI TF) +- + ;; Iterator for sub-32-bit integer modes. + (define_mode_iterator SHORT QI HI) + +@@ -421,12 +418,6 @@ + (define_mode_iterator ANYFI (SI "TARGET_HARD_FLOAT") + (DI "TARGET_DOUBLE_FLOAT")) + +-;; A mode for which moves involving FPRs may need to be split. +-(define_mode_iterator SPLITF +- (DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") +- (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") +- (TF "TARGET_64BIT && TARGET_DOUBLE_FLOAT")) +- + ;; A mode for anything with 32 bits or more, and able to be loaded with + ;; the same addressing mode as ld.w. + (define_mode_iterator LD_AT_LEAST_32_BIT GPR ANYF) +@@ -2421,41 +2412,6 @@ + (set_attr "move_type" "move,load,store") + (set_attr "mode" "DF")) + +-;; Emit a doubleword move in which exactly one of the operands is +-;; a floating-point register. We can't just emit two normal moves +-;; because of the constraints imposed by the FPU register model; +-;; see loongarch_can_change_mode_class for details. Instead, we keep +-;; the FPR whole and use special patterns to refer to each word of +-;; the other operand. +- +-(define_expand "move_doubleword_fpr<mode>" +- (set (match_operand:SPLITF 0) +- (match_operand:SPLITF 1)) +- "" +-{ +- if (FP_REG_RTX_P (operands0)) +- { +- rtx low = loongarch_subword (operands1, 0); +- rtx high = loongarch_subword (operands1, 1); +- emit_insn (gen_load_low<mode> (operands0, low)); +- if (!TARGET_64BIT) +- emit_insn (gen_movgr2frh<mode> (operands0, high, operands0)); +- else +- emit_insn (gen_load_high<mode> (operands0, high, operands0)); +- } +- else +- { +- rtx low = loongarch_subword (operands0, 0); +- rtx high = loongarch_subword (operands0, 1); +- emit_insn (gen_store_word<mode> (low, operands1, const0_rtx)); +- if (!TARGET_64BIT) +- emit_insn (gen_movfrh2gr<mode> (high, operands1)); +- else +- emit_insn (gen_store_word<mode> (high, operands1, const1_rtx)); +- } +- DONE; +-}) +- + ;; Clear one FCC register + + (define_expand "movfcc" +@@ -2742,49 +2698,6 @@ + (set_attr "type" "fcvt") + (set_attr "mode" "<ANYF:MODE>")) + +-;; Load the low word of operand 0 with operand 1. +-(define_insn "load_low<mode>" +- (set (match_operand:SPLITF 0 "register_operand" "=f,f") +- (unspec:SPLITF (match_operand:<HALFMODE> 1 "general_operand" "rJ,m") +- UNSPEC_LOAD_LOW)) +- "TARGET_HARD_FLOAT" +-{ +- operands0 = loongarch_subword (operands0, 0); +- return loongarch_output_move (operands0, operands1); +-} +- (set_attr "move_type" "mgtf,fpload") +- (set_attr "mode" "<HALFMODE>")) +- +-;; Load the high word of operand 0 from operand 1, preserving the value +-;; in the low word. +-(define_insn "load_high<mode>" +- (set (match_operand:SPLITF 0 "register_operand" "=f,f") +- (unspec:SPLITF (match_operand:<HALFMODE> 1 "general_operand" "rJ,m") +- (match_operand:SPLITF 2 "register_operand" "0,0") +- UNSPEC_LOAD_HIGH)) +- "TARGET_HARD_FLOAT" +-{ +- operands0 = loongarch_subword (operands0, 1); +- return loongarch_output_move (operands0, operands1); +-} +- (set_attr "move_type" "mgtf,fpload") +- (set_attr "mode" "<HALFMODE>")) +-
View file
_service:tar_scm:0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch
Added
@@ -0,0 +1,168 @@ +From d8233e19aae2272c4863de5e8d61d49d3147e807 Mon Sep 17 00:00:00 2001 +From: Kyrylo Tkachov <kyrylo.tkachov@arm.com> +Date: Thu, 1 Jun 2023 09:37:06 +0100 +Subject: PATCH 077/157 BackportSME aarch64: Add =r,m and =m,r + alternatives to 64-bit vector move patterns + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=12e71b593ea0c64d919df525cd75ea10b7be8a4b + +We can use the X registers to load and store 64-bit vector modes, we just need to add the alternatives +to the mov patterns. This straightforward patch does that and for the pair variants too. +For the testcase in the code we now generate the optimal assembly without any superfluous +GP<->SIMD moves. + +Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. + +gcc/ChangeLog: + + * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>): + Add =r,m and =r,m alternatives. + (load_pair<DREG:mode><DREG2:mode>): Likewise. + (vec_store_pair<DREG:mode><DREG2:mode>): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/aarch64/xreg-vec-modes_1.c: New test. +--- + gcc/config/aarch64/aarch64-simd.md | 40 ++++++++++-------- + .../gcc.target/aarch64/xreg-vec-modes_1.c | 42 +++++++++++++++++++ + 2 files changed, 65 insertions(+), 17 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c + +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index 2d688edf5..b5c52ba16 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -116,26 +116,28 @@ + + (define_insn "*aarch64_simd_mov<VDMOV:mode>" + (set (match_operand:VDMOV 0 "nonimmediate_operand" +- "=w, m, m, w, ?r, ?w, ?r, w, w") ++ "=w, r, m, m, m, w, ?r, ?w, ?r, w, w") + (match_operand:VDMOV 1 "general_operand" +- "m, Dz, w, w, w, r, r, Dn, Dz")) ++ "m, m, Dz, w, r, w, w, r, r, Dn, Dz")) + "TARGET_FLOAT + && (register_operand (operands0, <MODE>mode) + || aarch64_simd_reg_or_zero (operands1, <MODE>mode))" + "@ + ldr\t%d0, %1 ++ ldr\t%x0, %1 + str\txzr, %0 + str\t%d1, %0 ++ str\t%x1, %0 + * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\"; + * return TARGET_SIMD ? \"umov\t%0, %1.d0\" : \"fmov\t%x0, %d1\"; + fmov\t%d0, %1 + mov\t%0, %1 + * return aarch64_output_simd_mov_immediate (operands1, 64); + fmov\t%d0, xzr" +- (set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\ +- neon_logic<q>, neon_to_gp<q>, f_mcr,\ ++ (set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\ ++ store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\ + mov_reg, neon_move<q>, f_mcr") +- (set_attr "arch" "*,*,*,*,*,*,*,simd,*") ++ (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*") + ) + + (define_insn "*aarch64_simd_mov<VQMOV:mode>" +@@ -177,31 +179,35 @@ + ) + + (define_insn "load_pair<DREG:mode><DREG2:mode>" +- (set (match_operand:DREG 0 "register_operand" "=w") +- (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump")) +- (set (match_operand:DREG2 2 "register_operand" "=w") +- (match_operand:DREG2 3 "memory_operand" "m")) ++ (set (match_operand:DREG 0 "register_operand" "=w,r") ++ (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump,Ump")) ++ (set (match_operand:DREG2 2 "register_operand" "=w,r") ++ (match_operand:DREG2 3 "memory_operand" "m,m")) + "TARGET_FLOAT + && rtx_equal_p (XEXP (operands3, 0), + plus_constant (Pmode, + XEXP (operands1, 0), + GET_MODE_SIZE (<DREG:MODE>mode)))" +- "ldp\\t%d0, %d2, %z1" +- (set_attr "type" "neon_ldp") ++ "@ ++ ldp\t%d0, %d2, %z1 ++ ldp\t%x0, %x2, %z1" ++ (set_attr "type" "neon_ldp,load_16") + ) + + (define_insn "vec_store_pair<DREG:mode><DREG2:mode>" +- (set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump") +- (match_operand:DREG 1 "register_operand" "w")) +- (set (match_operand:DREG2 2 "memory_operand" "=m") +- (match_operand:DREG2 3 "register_operand" "w")) ++ (set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump,Ump") ++ (match_operand:DREG 1 "register_operand" "w,r")) ++ (set (match_operand:DREG2 2 "memory_operand" "=m,m") ++ (match_operand:DREG2 3 "register_operand" "w,r")) + "TARGET_FLOAT + && rtx_equal_p (XEXP (operands2, 0), + plus_constant (Pmode, + XEXP (operands0, 0), + GET_MODE_SIZE (<DREG:MODE>mode)))" +- "stp\\t%d1, %d3, %z0" +- (set_attr "type" "neon_stp") ++ "@ ++ stp\t%d1, %d3, %z0 ++ stp\t%x1, %x3, %z0" ++ (set_attr "type" "neon_stp,store_16") + ) + + (define_insn "load_pair<VQ:mode><VQ2:mode>" +diff --git a/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c +new file mode 100644 +index 000000000..fc4dcb1ad +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c +@@ -0,0 +1,42 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++typedef unsigned int v2si __attribute__((vector_size (8))); ++ ++#define force_gp(V1) asm volatile ("" \ ++ : "=r"(V1) \ ++ : "r"(V1) \ ++ : /* No clobbers */); ++ ++/* ++** foo: ++** ldr (x0-9+), \x1\ ++** str \1, \x0\ ++** ret ++*/ ++ ++void ++foo (v2si *a, v2si *b) ++{ ++ v2si tmp = *b; ++ force_gp (tmp); ++ *a = tmp; ++} ++ ++/* ++** foo2: ++** ldp (x0-9+), (x0-9+), \x0\ ++** stp \1, \2, \x1\ ++** ret ++*/ ++void ++foo2 (v2si *a, v2si *b) ++{ ++ v2si t1 = *a; ++ v2si t2 = a1; ++ force_gp (t1); ++ force_gp (t2); ++ *b = t1; ++ b1 = t2; ++} +-- +2.33.0 +
View file
_service:tar_scm:0176-LoongArch-Organize-the-code-related-to-split-move-an.patch
Added
@@ -0,0 +1,413 @@ +From 95089699271d235efc29ae48b78f8c7f1b6386c4 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 12 Jul 2024 09:57:40 +0800 +Subject: PATCH 176/188 LoongArch: Organize the code related to split move + and merge the same functions. + +gcc/ChangeLog: + + * config/loongarch/loongarch-protos.h + (loongarch_split_128bit_move): Delete. + (loongarch_split_128bit_move_p): Delete. + (loongarch_split_256bit_move): Delete. + (loongarch_split_256bit_move_p): Delete. + (loongarch_split_vector_move): Add a function declaration. + * config/loongarch/loongarch.cc + (loongarch_vector_costs::finish_cost): Adjust the code + formatting. + (loongarch_split_vector_move_p): Merge + loongarch_split_128bit_move_p and loongarch_split_256bit_move_p. + (loongarch_split_move_p): Merge code. + (loongarch_split_move): Likewise. + (loongarch_split_128bit_move_p): Delete. + (loongarch_split_256bit_move_p): Delete. + (loongarch_split_128bit_move): Delete. + (loongarch_split_vector_move): Merge loongarch_split_128bit_move + and loongarch_split_256bit_move. + (loongarch_split_256bit_move): Delete. + (loongarch_global_init): Remove the extra semicolon at the + end of the function. + * config/loongarch/loongarch.md (*movdf_softfloat): Added a new + condition TARGET_64BIT. +--- + gcc/config/loongarch/loongarch-protos.h | 5 +- + gcc/config/loongarch/loongarch.cc | 221 ++++++------------------ + gcc/config/loongarch/loongarch.md | 1 + + 3 files changed, 58 insertions(+), 169 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 0c31a74b7..abf1a0893 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -85,10 +85,7 @@ extern bool loongarch_split_move_p (rtx, rtx); + extern void loongarch_split_move (rtx, rtx); + extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode); + extern void loongarch_split_plus_constant (rtx *, machine_mode); +-extern void loongarch_split_128bit_move (rtx, rtx); +-extern bool loongarch_split_128bit_move_p (rtx, rtx); +-extern void loongarch_split_256bit_move (rtx, rtx); +-extern bool loongarch_split_256bit_move_p (rtx, rtx); ++extern void loongarch_split_vector_move (rtx, rtx); + extern const char *loongarch_output_move (rtx, rtx); + #ifdef RTX_CODE + extern void loongarch_expand_scc (rtx *); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 260dd7b5f..53bd8d7ec 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4351,10 +4351,10 @@ void + loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs) + { + loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo); ++ + if (loop_vinfo) +- { +- m_suggested_unroll_factor = determine_suggested_unroll_factor (loop_vinfo); +- } ++ m_suggested_unroll_factor ++ = determine_suggested_unroll_factor (loop_vinfo); + + vector_costs::finish_cost (scalar_costs); + } +@@ -4420,6 +4420,7 @@ loongarch_subword (rtx op, bool high_p) + return simplify_gen_subreg (word_mode, op, mode, byte); + } + ++static bool loongarch_split_vector_move_p (rtx dest, rtx src); + /* Return true if a move from SRC to DEST should be split into two. + SPLIT_TYPE describes the split condition. */ + +@@ -4441,13 +4442,11 @@ loongarch_split_move_p (rtx dest, rtx src) + return false; + } + +- /* Check if LSX moves need splitting. */ +- if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))) +- return loongarch_split_128bit_move_p (dest, src); + +- /* Check if LASX moves need splitting. */ +- if (LASX_SUPPORTED_MODE_P (GET_MODE (dest))) +- return loongarch_split_256bit_move_p (dest, src); ++ /* Check if vector moves need splitting. */ ++ if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)) ++ || LASX_SUPPORTED_MODE_P (GET_MODE (dest))) ++ return loongarch_split_vector_move_p (dest, src); + + /* Otherwise split all multiword moves. */ + return size > UNITS_PER_WORD; +@@ -4460,10 +4459,9 @@ void + loongarch_split_move (rtx dest, rtx src) + { + gcc_checking_assert (loongarch_split_move_p (dest, src)); +- if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))) +- loongarch_split_128bit_move (dest, src); +- else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest))) +- loongarch_split_256bit_move (dest, src); ++ if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)) ++ || LASX_SUPPORTED_MODE_P (GET_MODE (dest))) ++ loongarch_split_vector_move (dest, src); + else + gcc_unreachable (); + } +@@ -4585,224 +4583,117 @@ loongarch_output_move_index_float (rtx x, machine_mode mode, bool ldr) + + return insnldrindex-2; + } +-/* Return true if a 128-bit move from SRC to DEST should be split. */ +- +-bool +-loongarch_split_128bit_move_p (rtx dest, rtx src) +-{ +- /* LSX-to-LSX moves can be done in a single instruction. */ +- if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest)) +- return false; +- +- /* Check for LSX loads and stores. */ +- if (FP_REG_RTX_P (dest) && MEM_P (src)) +- return false; +- if (FP_REG_RTX_P (src) && MEM_P (dest)) +- return false; +- +- /* Check for LSX set to an immediate const vector with valid replicated +- element. */ +- if (FP_REG_RTX_P (dest) +- && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511)) +- return false; +- +- /* Check for LSX load zero immediate. */ +- if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src))) +- return false; +- +- return true; +-} +- +-/* Return true if a 256-bit move from SRC to DEST should be split. */ ++/* Return true if a vector move from SRC to DEST should be split. */ + +-bool +-loongarch_split_256bit_move_p (rtx dest, rtx src) ++static bool ++loongarch_split_vector_move_p (rtx dest, rtx src) + { +- /* LSX-to-LSX moves can be done in a single instruction. */ ++ /* Vector moves can be done in a single instruction. */ + if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest)) + return false; + +- /* Check for LSX loads and stores. */ ++ /* Check for vector loads and stores. */ + if (FP_REG_RTX_P (dest) && MEM_P (src)) + return false; + if (FP_REG_RTX_P (src) && MEM_P (dest)) + return false; + +- /* Check for LSX set to an immediate const vector with valid replicated ++ /* Check for vector set to an immediate const vector with valid replicated + element. */ + if (FP_REG_RTX_P (dest) + && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511)) + return false; + +- /* Check for LSX load zero immediate. */ ++ /* Check for vector load zero immediate. */ + if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src))) + return false; + + return true; + } + +-/* Split a 128-bit move from SRC to DEST. */ ++/* Split a vector move from SRC to DEST. */ + + void +-loongarch_split_128bit_move (rtx dest, rtx src) ++loongarch_split_vector_move (rtx dest, rtx src) + { + int byte, index; +- rtx low_dest, low_src, d, s; ++ rtx s, d; ++ machine_mode mode = GET_MODE (dest); ++ bool lsx_p = LSX_SUPPORTED_MODE_P (mode); + + if (FP_REG_RTX_P (dest)) + { + gcc_assert (!MEM_P (src)); + +- rtx new_dest = dest; +- if (!TARGET_64BIT) +- { +- if (GET_MODE (dest) != V4SImode)
View file
_service:tar_scm:0177-Backport-SME-AArch64-Rewrite-simd-move-immediate-pat.patch
Added
@@ -0,0 +1,167 @@ +From 7d40978965ff893871a79f5f624f54ae02a34a8b Mon Sep 17 00:00:00 2001 +From: Tamar Christina <tamar.christina@arm.com> +Date: Wed, 18 Oct 2023 09:34:01 +0100 +Subject: PATCH 078/157 BackportSME AArch64: Rewrite simd move immediate + patterns to new syntax + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=04227acbe9e6c60d1e314a6b4f2d949c07f30baa + +This rewrites the simd MOV patterns to use the new compact syntax. +No change in semantics is expected. This will be needed in follow on patches. + +This also merges the splits into the define_insn which will also be needed soon. + +gcc/ChangeLog: + + PR tree-optimization/109154 + * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>): + Rewrite to new syntax. + (*aarch64_simd_mov<VQMOV:mode): Rewrite to new syntax and merge in + splits. +--- + gcc/config/aarch64/aarch64-simd.md | 116 ++++++++++++----------------- + 1 file changed, 47 insertions(+), 69 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index b5c52ba16..1f4b30642 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -115,54 +115,59 @@ + ) + + (define_insn "*aarch64_simd_mov<VDMOV:mode>" +- (set (match_operand:VDMOV 0 "nonimmediate_operand" +- "=w, r, m, m, m, w, ?r, ?w, ?r, w, w") +- (match_operand:VDMOV 1 "general_operand" +- "m, m, Dz, w, r, w, w, r, r, Dn, Dz")) ++ (set (match_operand:VDMOV 0 "nonimmediate_operand") ++ (match_operand:VDMOV 1 "general_operand")) + "TARGET_FLOAT + && (register_operand (operands0, <MODE>mode) + || aarch64_simd_reg_or_zero (operands1, <MODE>mode))" +- "@ +- ldr\t%d0, %1 +- ldr\t%x0, %1 +- str\txzr, %0 +- str\t%d1, %0 +- str\t%x1, %0 +- * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\"; +- * return TARGET_SIMD ? \"umov\t%0, %1.d0\" : \"fmov\t%x0, %d1\"; +- fmov\t%d0, %1 +- mov\t%0, %1 +- * return aarch64_output_simd_mov_immediate (operands1, 64); +- fmov\t%d0, xzr" +- (set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\ +- store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\ +- mov_reg, neon_move<q>, f_mcr") +- (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*") +-) +- +-(define_insn "*aarch64_simd_mov<VQMOV:mode>" +- (set (match_operand:VQMOV 0 "nonimmediate_operand" +- "=w, Umn, m, w, ?r, ?w, ?r, w, w") +- (match_operand:VQMOV 1 "general_operand" +- "m, Dz, w, w, w, r, r, Dn, Dz")) ++ {@ cons: =0, 1; attrs: type, arch ++ w , m ; neon_load1_1reg<q> , * ldr\t%d0, %1 ++ r , m ; load_8 , * ldr\t%x0, %1 ++ m , Dz; store_8 , * str\txzr, %0 ++ m , w ; neon_store1_1reg<q>, * str\t%d1, %0 ++ m , r ; store_8 , * str\t%x1, %0 ++ w , w ; neon_logic<q> , simd mov\t%0.<Vbtype>, %1.<Vbtype> ++ w , w ; neon_logic<q> , * fmov\t%d0, %d1 ++ ?r, w ; neon_to_gp<q> , simd umov\t%0, %1.d0 ++ ?r, w ; neon_to_gp<q> , * fmov\t%x0, %d1 ++ ?w, r ; f_mcr , * fmov\t%d0, %1 ++ ?r, r ; mov_reg , * mov\t%0, %1 ++ w , Dn; neon_move<q> , simd << aarch64_output_simd_mov_immediate (operands1, 64); ++ w , Dz; f_mcr , * fmov\t%d0, xzr ++ } ++) ++ ++(define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>" ++ (set (match_operand:VQMOV 0 "nonimmediate_operand") ++ (match_operand:VQMOV 1 "general_operand")) + "TARGET_FLOAT + && (register_operand (operands0, <MODE>mode) + || aarch64_simd_reg_or_zero (operands1, <MODE>mode))" +- "@ +- ldr\t%q0, %1 +- stp\txzr, xzr, %0 +- str\t%q1, %0 +- mov\t%0.<Vbtype>, %1.<Vbtype> +- # +- # +- # +- * return aarch64_output_simd_mov_immediate (operands1, 128); +- fmov\t%d0, xzr" +- (set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\ +- neon_logic<q>, multiple, multiple,\ +- multiple, neon_move<q>, fmov") +- (set_attr "length" "4,4,4,4,8,8,8,4,4") +- (set_attr "arch" "*,*,*,simd,*,*,*,simd,*") ++ {@ cons: =0, 1; attrs: type, arch, length ++ w , m ; neon_load1_1reg<q> , * , 4 ldr\t%q0, %1 ++ Umn, Dz; store_16 , * , 4 stp\txzr, xzr, %0 ++ m , w ; neon_store1_1reg<q>, * , 4 str\t%q1, %0 ++ w , w ; neon_logic<q> , simd, 4 mov\t%0.<Vbtype>, %1.<Vbtype> ++ ?r , w ; multiple , * , 8 # ++ ?w , r ; multiple , * , 8 # ++ ?r , r ; multiple , * , 8 # ++ w , Dn; neon_move<q> , simd, 4 << aarch64_output_simd_mov_immediate (operands1, 128); ++ w , Dz; fmov , * , 4 fmov\t%d0, xzr ++ } ++ "&& reload_completed ++ && (REG_P (operands0) ++ && REG_P (operands1) ++ && !(FP_REGNUM_P (REGNO (operands0)) ++ && FP_REGNUM_P (REGNO (operands1))))" ++ (const_int 0) ++ { ++ if (GP_REGNUM_P (REGNO (operands0)) ++ && GP_REGNUM_P (REGNO (operands1))) ++ aarch64_simd_emit_reg_reg_move (operands, DImode, 2); ++ else ++ aarch64_split_simd_move (operands0, operands1); ++ DONE; ++ } + ) + + ;; When storing lane zero we can use the normal STR and its more permissive +@@ -238,33 +243,6 @@ + (set_attr "type" "neon_stp_q") + ) + +- +-(define_split +- (set (match_operand:VQMOV 0 "register_operand" "") +- (match_operand:VQMOV 1 "register_operand" "")) +- "TARGET_FLOAT +- && reload_completed +- && GP_REGNUM_P (REGNO (operands0)) +- && GP_REGNUM_P (REGNO (operands1))" +- (const_int 0) +-{ +- aarch64_simd_emit_reg_reg_move (operands, DImode, 2); +- DONE; +-}) +- +-(define_split +- (set (match_operand:VQMOV 0 "register_operand" "") +- (match_operand:VQMOV 1 "register_operand" "")) +- "TARGET_FLOAT +- && reload_completed +- && ((FP_REGNUM_P (REGNO (operands0)) && GP_REGNUM_P (REGNO (operands1))) +- || (GP_REGNUM_P (REGNO (operands0)) && FP_REGNUM_P (REGNO (operands1))))" +- (const_int 0) +-{ +- aarch64_split_simd_move (operands0, operands1); +- DONE; +-}) +- + (define_expand "@aarch64_split_simd_mov<mode>" + (set (match_operand:VQMOV 0) + (match_operand:VQMOV 1)) +-- +2.33.0 +
View file
_service:tar_scm:0177-LoongArch-Expand-some-SImode-operations-through-si3_.patch
Added
@@ -0,0 +1,364 @@ +From 34c8e935780d43a797e403ca6604115ec393f0e6 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 20 Jul 2024 20:38:13 +0800 +Subject: PATCH 177/188 LoongArch: Expand some SImode operations through + "si3_extend" instructions if TARGET_64BIT + +We already had "si3_extend" insns and we hoped the fwprop or combine +passes can use them to remove unnecessary sign extensions. But this +does not always work: for cases like x << 1 | y, the compiler +tends to do + + (sign_extend:DI + (ior:SI (ashift:SI (reg:SI $r4) + (const_int 1)) + (reg:SI $r5))) + +instead of + + (ior:DI (sign_extend:DI (ashift:SI (reg:SI $r4) (const_int 1))) + (sign_extend:DI (reg:SI $r5))) + +So we cannot match the ashlsi3_extend instruction here and we get: + + slli.w $r4,$r4,1 + or $r4,$r5,$r4 + slli.w $r4,$r4,0 # <= redundant + jr $r1 + +To eliminate this redundant extension we need to turn SImode shift etc. +to DImode "si3_extend" operations earlier, when we expand the SImode +operation. We are already doing this for addition, now do it for +shifts, rotates, substract, multiplication, division, and modulo as +well. + +The bytepick.w definition for TARGET_64BIT needs to be adjusted so it +won't be undone by the shift expanding. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (optab): Add (rotatert "rotr"). + (<optab:any_shift><mode>3, <optab:any_div><mode>3, + sub<mode>3, rotr<mode>3, mul<mode>3): Add a "*" to the insn name + so we can redefine the names with define_expand. + (*<optab:any_shift>si3_extend): Remove "*" so we can use them + in expanders. + (*subsi3_extended, *mulsi3_extended): Likewise, also remove the + trailing "ed" for consistency. + (*<optab:any_div>si3_extended): Add mode for sign_extend to + prevent an ICE using it in expanders. + (shift_w, arith_w): New define_code_iterator. + (<optab:any_w><mode>3): New define_expand. Expand with + <optab:any_w>si3_extend for SImode if TARGET_64BIT. + (<optab:arith_w><mode>3): Likewise. + (mul<mode>3): Expand to mulsi3_extended for SImode if + TARGET_64BIT and ISA_HAS_DIV32. + (<optab:any_div><mode>3): Expand to <optab:any_div>si3_extended + for SImode if TARGET_64BIT. + (rotl<mode>3): Expand to rotrsi3_extend for SImode if + TARGET_64BIT. + (bytepick_w_<bytepick_imm>): Add mode for lshiftrt and ashift. + (bitsize, bytepick_imm, bytepick_w_ashift_amount): New + define_mode_attr. + (bytepick_w_<bytepick_imm>_extend): Adjust for the RTL change + caused by 32-bit shift expanding. Now bytepick_imm only covers + 2 and 3, separate one remaining case to ... + (bytepick_w_1_extend): ... here, new define_insn. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/bitwise_extend.c: New test. +--- + gcc/config/loongarch/loongarch.md | 131 +++++++++++++++--- + .../gcc.target/loongarch/bitwise_extend.c | 45 ++++++ + 2 files changed, 154 insertions(+), 22 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/bitwise_extend.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 8bcb43042..6915dab0e 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -546,6 +546,7 @@ + (define_code_attr optab (ashift "ashl") + (ashiftrt "ashr") + (lshiftrt "lshr") ++ (rotatert "rotr") + (ior "ior") + (xor "xor") + (and "and") +@@ -624,6 +625,49 @@ + (48 "6") + (56 "7")) + ++;; Expand some 32-bit operations to si3_extend operations if TARGET_64BIT ++;; so the redundant sign extension can be removed if the output is used as ++;; an input of a bitwise operation. Note plus, rotl, and div are handled ++;; separately. ++(define_code_iterator shift_w any_shift rotatert) ++(define_code_iterator arith_w minus mult) ++ ++(define_expand "<optab><mode>3" ++ (set (match_operand:GPR 0 "register_operand" "=r") ++ (shift_w:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand:SI 2 "arith_operand" "rI"))) ++ "" ++{ ++ if (TARGET_64BIT && <MODE>mode == SImode) ++ { ++ rtx t = gen_reg_rtx (DImode); ++ emit_insn (gen_<optab>si3_extend (t, operands1, operands2)); ++ t = gen_lowpart (SImode, t); ++ SUBREG_PROMOTED_VAR_P (t) = 1; ++ SUBREG_PROMOTED_SET (t, SRP_SIGNED); ++ emit_move_insn (operands0, t); ++ DONE; ++ } ++}) ++ ++(define_expand "<optab><mode>3" ++ (set (match_operand:GPR 0 "register_operand" "=r") ++ (arith_w:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand:GPR 2 "register_operand" "r"))) ++ "" ++{ ++ if (TARGET_64BIT && <MODE>mode == SImode) ++ { ++ rtx t = gen_reg_rtx (DImode); ++ emit_insn (gen_<optab>si3_extend (t, operands1, operands2)); ++ t = gen_lowpart (SImode, t); ++ SUBREG_PROMOTED_VAR_P (t) = 1; ++ SUBREG_PROMOTED_SET (t, SRP_SIGNED); ++ emit_move_insn (operands0, t); ++ DONE; ++ } ++}) ++ + ;; + ;; .................... + ;; +@@ -781,7 +825,7 @@ + (set_attr "type" "fadd") + (set_attr "mode" "<UNITMODE>")) + +-(define_insn "sub<mode>3" ++(define_insn "*sub<mode>3" + (set (match_operand:GPR 0 "register_operand" "=r") + (minus:GPR (match_operand:GPR 1 "register_operand" "r") + (match_operand:GPR 2 "register_operand" "r"))) +@@ -791,7 +835,7 @@ + (set_attr "mode" "<MODE>")) + + +-(define_insn "*subsi3_extended" ++(define_insn "subsi3_extend" + (set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ") +@@ -818,7 +862,7 @@ + (set_attr "type" "fmul") + (set_attr "mode" "<MODE>")) + +-(define_insn "mul<mode>3" ++(define_insn "*mul<mode>3" + (set (match_operand:GPR 0 "register_operand" "=r") + (mult:GPR (match_operand:GPR 1 "register_operand" "r") + (match_operand:GPR 2 "register_operand" "r"))) +@@ -827,7 +871,7 @@ + (set_attr "type" "imul") + (set_attr "mode" "<MODE>")) + +-(define_insn "*mulsi3_extended" ++(define_insn "mulsi3_extend" + (set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (mult:SI (match_operand:SI 1 "register_operand" "r") +@@ -1001,8 +1045,19 @@ + (match_operand:GPR 2 "register_operand"))) + "" + { +- if (GET_MODE (operands0) == SImode && TARGET_64BIT && !ISA_HAS_DIV32) ++ if (GET_MODE (operands0) == SImode && TARGET_64BIT) + { ++ if (ISA_HAS_DIV32) ++ { ++ rtx t = gen_reg_rtx (DImode); ++ emit_insn (gen_<optab>si3_extended (t, operands1, operands2)); ++ t = gen_lowpart (SImode, t); ++ SUBREG_PROMOTED_VAR_P (t) = 1; ++ SUBREG_PROMOTED_SET (t, SRP_SIGNED); ++ emit_move_insn (operands0, t); ++ DONE; ++ } ++ + rtx reg1 = gen_reg_rtx (DImode); + rtx reg2 = gen_reg_rtx (DImode); + rtx rd = gen_reg_rtx (DImode); +@@ -1038,7 +1093,7 @@ + + (define_insn "<optab>si3_extended" + (set (match_operand:DI 0 "register_operand" "=r,&r,&r")
View file
_service:tar_scm:0178-Backport-SME-AArch64-remove-test-comment-from-mov-mo.patch
Added
@@ -0,0 +1,34 @@ +From 883af5a13e648e74cb8d8722be6d4980e8bc8f48 Mon Sep 17 00:00:00 2001 +From: Tamar Christina <tamar.christina@arm.com> +Date: Tue, 20 Jun 2023 08:54:42 +0100 +Subject: PATCH 079/157 BackportSME AArch64: remove test comment from + *mov<mode>_aarch64 + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=36de416df8b3f109353e309011061fa66e872e3a + +I accidentally left a test comment in the final version of the patch. +This removes the comment. + +gcc/ChangeLog: + + * config/aarch64/aarch64.md (*mov<mode>_aarch64): Drop test comment. +--- + gcc/config/aarch64/aarch64.md | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 29a665e45..1ec23fae8 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -1213,7 +1213,7 @@ + m, r Z ; store_4 , * str<size>\\t%w1, %0 + m, w ; store_4 , * str\t%<size>1, %0 + r, w ; neon_to_gp<q> , simd umov\t%w0, %1.<v>0 +- r, w ; neon_to_gp<q> , nosimd fmov\t%w0, %s1 /*foo */ ++ r, w ; neon_to_gp<q> , nosimd fmov\t%w0, %s1 + w, r Z ; neon_from_gp<q>, simd dup\t%0.<Vallxd>, %w1 + w, r Z ; neon_from_gp<q>, nosimd fmov\t%s0, %w1 + w, w ; neon_dup , simd dup\t%<Vetype>0, %1.<v>0 +-- +2.33.0 +
View file
_service:tar_scm:0178-LoongArch-Relax-ins_zero_bitmask_operand-and-remove-.patch
Added
@@ -0,0 +1,123 @@ +From 8311e0053c8a9646b8798c53ae4a8f45d12c42c1 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 28 Jul 2024 17:02:49 +0800 +Subject: PATCH 178/188 LoongArch: Relax ins_zero_bitmask_operand and remove + and<mode>3_align + +In r15-1207 I was too stupid to realize we just need to relax +ins_zero_bitmask_operand to allow using bstrins for aligning, instead of +adding a new split. And, "> 12" in ins_zero_bitmask_operand also makes +no sense: it rejects bstrins for things like "x & ~4l" with no good +reason. + +So fix my errors now. + +gcc/ChangeLog: + + * config/loongarch/predicates.md (ins_zero_bitmask_operand): + Cover more cases that bstrins can benefit. + (high_bitmask_operand): Remove. + * config/loongarch/constraints.md (Yy): Remove. + * config/loongarch/loongarch.md (and<mode>3_align): Remove. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/bstrins-4.c: New test. +--- + gcc/config/loongarch/constraints.md | 4 ---- + gcc/config/loongarch/loongarch.md | 17 ----------------- + gcc/config/loongarch/predicates.md | 9 ++------- + gcc/testsuite/gcc.target/loongarch/bstrins-4.c | 9 +++++++++ + 4 files changed, 11 insertions(+), 28 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-4.c + +diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md +index f3f5776da..d653ea82a 100644 +--- a/gcc/config/loongarch/constraints.md ++++ b/gcc/config/loongarch/constraints.md +@@ -292,10 +292,6 @@ + "@internal" + (match_operand 0 "low_bitmask_operand")) + +-(define_constraint "Yy" +- "@internal" +- (match_operand 0 "high_bitmask_operand")) +- + (define_constraint "YI" + "@internal + A replicated vector const in which the replicated value is in the range +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 6915dab0e..1ebcfa0c7 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1588,23 +1588,6 @@ + (set_attr "move_type" "pick_ins") + (set_attr "mode" "<MODE>")) + +-(define_insn_and_split "and<mode>3_align" +- (set (match_operand:GPR 0 "register_operand" "=r") +- (and:GPR (match_operand:GPR 1 "register_operand" "r") +- (match_operand:GPR 2 "high_bitmask_operand" "Yy"))) +- "" +- "#" +- "" +- (set (match_dup 0) (match_dup 1)) +- (set (zero_extract:GPR (match_dup 0) (match_dup 2) (const_int 0)) +- (const_int 0)) +-{ +- int len; +- +- len = low_bitmask_len (<MODE>mode, ~INTVAL (operands2)); +- operands2 = GEN_INT (len); +-}) +- + (define_insn_and_split "*bstrins_<mode>_for_mask" + (set (match_operand:GPR 0 "register_operand" "=r") + (and:GPR (match_operand:GPR 1 "register_operand" "r") +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 95be8a4fe..2b7f7ed47 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -293,10 +293,6 @@ + (and (match_code "const_int") + (match_test "low_bitmask_len (mode, INTVAL (op)) > 12"))) + +-(define_predicate "high_bitmask_operand" +- (and (match_code "const_int") +- (match_test "low_bitmask_len (mode, ~INTVAL (op)) > 0"))) +- + (define_predicate "d_operand" + (and (match_code "reg") + (match_test "GP_REG_P (REGNO (op))"))) +@@ -406,11 +402,10 @@ + + (define_predicate "ins_zero_bitmask_operand" + (and (match_code "const_int") +- (match_test "INTVAL (op) != -1") +- (match_test "INTVAL (op) & 1") + (match_test "low_bitmask_len (mode, \ + ~UINTVAL (op) | (~UINTVAL(op) - 1)) \ +- > 12"))) ++ > 0") ++ (not (match_operand 0 "const_uns_arith_operand")))) + + (define_predicate "const_call_insn_operand" + (match_code "const,symbol_ref,label_ref") +diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-4.c b/gcc/testsuite/gcc.target/loongarch/bstrins-4.c +new file mode 100644 +index 000000000..0823cfc38 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-4.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r4,\\\$r0,2,2" } } */ ++ ++long ++x (long a) ++{ ++ return a & ~4; ++} +-- +2.43.0 +
View file
_service:tar_scm:0179-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch
Added
@@ -0,0 +1,1552 @@ +From 4a0e91dc27b30ae673ba132bf2be17a74bc89f31 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:24 +0000 +Subject: PATCH 080/157 BackportSME aarch64: Distinguish + streaming-compatible AdvSIMD insns + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c86ee4f683e05e5809597d96b5eeb261c9c92cac + +The vast majority of Advanced SIMD instructions are not +available in streaming mode, but some of the load/store/move +instructions are. This patch adds a new target feature macro +called TARGET_BASE_SIMD for this streaming-compatible subset. + +The vector-to-vector move instructions are not streaming-compatible, +so we need to use the SVE move instructions where enabled, or fall +back to the nofp16 handling otherwise. + +I haven't found a good way of testing the SVE EXT alternative +in aarch64_simd_mov_from_<mode>high, but I'd rather provide it +than not. + +gcc/ + * config/aarch64/aarch64.h (TARGET_BASE_SIMD): New macro. + (TARGET_SIMD): Require PSTATE.SM to be 0. + (AARCH64_ISA_SM_OFF): New macro. + * config/aarch64/aarch64.cc (aarch64_array_mode_supported_p): + Allow Advanced SIMD structure modes for TARGET_BASE_SIMD. + (aarch64_print_operand): Support '%Z'. + (aarch64_secondary_reload): Expect SVE moves to be used for + Advanced SIMD modes if SVE is enabled and non-streaming + Advanced SIMD isn't. + (aarch64_register_move_cost): Likewise. + (aarch64_simd_container_mode): Extend Advanced SIMD mode + handling to TARGET_BASE_SIMD. + (aarch64_expand_cpymem): Expand commentary. + * config/aarch64/aarch64.md (arches): Add base_simd and nobase_simd. + (arch_enabled): Handle it. + (*mov<mode>_aarch64): Extend UMOV alternative to TARGET_BASE_SIMD. + (*movti_aarch64): Use an SVE move instruction if non-streaming + SIMD isn't available. + (*mov<TFD:mode>_aarch64): Likewise. + (load_pair_dw_tftf): Extend to TARGET_BASE_SIMD. + (store_pair_dw_tftf): Likewise. + (loadwb_pair<TX:mode>_<P:mode>): Likewise. + (storewb_pair<TX:mode>_<P:mode>): Likewise. + * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>): + Allow UMOV in streaming mode. + (*aarch64_simd_mov<VQMOV:mode>): Use an SVE move instruction + if non-streaming SIMD isn't available. + (aarch64_store_lane0<mode>): Depend on TARGET_FLOAT rather than + TARGET_SIMD. + (aarch64_simd_mov_from_<mode>low): Likewise. Use fmov if + Advanced SIMD is completely disabled. + (aarch64_simd_mov_from_<mode>high): Use SVE EXT instructions if + non-streaming SIMD isn't available. + +gcc/testsuite/ + * gcc.target/aarch64/movdf_2.c: New test. + * gcc.target/aarch64/movdi_3.c: Likewise. + * gcc.target/aarch64/movhf_2.c: Likewise. + * gcc.target/aarch64/movhi_2.c: Likewise. + * gcc.target/aarch64/movqi_2.c: Likewise. + * gcc.target/aarch64/movsf_2.c: Likewise. + * gcc.target/aarch64/movsi_2.c: Likewise. + * gcc.target/aarch64/movtf_3.c: Likewise. + * gcc.target/aarch64/movtf_4.c: Likewise. + * gcc.target/aarch64/movti_3.c: Likewise. + * gcc.target/aarch64/movti_4.c: Likewise. + * gcc.target/aarch64/movv16qi_4.c: Likewise. + * gcc.target/aarch64/movv16qi_5.c: Likewise. + * gcc.target/aarch64/movv8qi_4.c: Likewise. + * gcc.target/aarch64/sme/arm_neon_1.c: Likewise. + * gcc.target/aarch64/sme/arm_neon_2.c: Likewise. + * gcc.target/aarch64/sme/arm_neon_3.c: Likewise. +--- + gcc/config/aarch64/aarch64-simd.md | 50 ++++++----- + gcc/config/aarch64/aarch64.cc | 16 ++-- + gcc/config/aarch64/aarch64.h | 12 ++- + gcc/config/aarch64/aarch64.md | 77 +++++++++-------- + gcc/testsuite/gcc.target/aarch64/movdf_2.c | 51 +++++++++++ + gcc/testsuite/gcc.target/aarch64/movdi_3.c | 59 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movhf_2.c | 53 ++++++++++++ + gcc/testsuite/gcc.target/aarch64/movhi_2.c | 61 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movqi_2.c | 59 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movsf_2.c | 51 +++++++++++ + gcc/testsuite/gcc.target/aarch64/movsi_2.c | 59 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movtf_3.c | 81 +++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movtf_4.c | 78 +++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movti_3.c | 86 +++++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movti_4.c | 83 ++++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movv16qi_4.c | 82 ++++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movv16qi_5.c | 79 +++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movv8qi_4.c | 55 ++++++++++++ + .../gcc.target/aarch64/sme/arm_neon_1.c | 13 +++ + .../gcc.target/aarch64/sme/arm_neon_2.c | 11 +++ + .../gcc.target/aarch64/sme/arm_neon_3.c | 11 +++ + 21 files changed, 1062 insertions(+), 65 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/movdf_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movdi_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movhf_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movhi_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movqi_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movsf_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movsi_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_5.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv8qi_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/arm_neon_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/arm_neon_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/arm_neon_3.c + +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index 1f4b30642..62493cdfa 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -121,19 +121,19 @@ + && (register_operand (operands0, <MODE>mode) + || aarch64_simd_reg_or_zero (operands1, <MODE>mode))" + {@ cons: =0, 1; attrs: type, arch +- w , m ; neon_load1_1reg<q> , * ldr\t%d0, %1 +- r , m ; load_8 , * ldr\t%x0, %1 +- m , Dz; store_8 , * str\txzr, %0 +- m , w ; neon_store1_1reg<q>, * str\t%d1, %0 +- m , r ; store_8 , * str\t%x1, %0 +- w , w ; neon_logic<q> , simd mov\t%0.<Vbtype>, %1.<Vbtype> +- w , w ; neon_logic<q> , * fmov\t%d0, %d1 +- ?r, w ; neon_to_gp<q> , simd umov\t%0, %1.d0 +- ?r, w ; neon_to_gp<q> , * fmov\t%x0, %d1 +- ?w, r ; f_mcr , * fmov\t%d0, %1 +- ?r, r ; mov_reg , * mov\t%0, %1 +- w , Dn; neon_move<q> , simd << aarch64_output_simd_mov_immediate (operands1, 64); +- w , Dz; f_mcr , * fmov\t%d0, xzr ++ w , m ; neon_load1_1reg<q> , * ldr\t%d0, %1 ++ r , m ; load_8 , * ldr\t%x0, %1 ++ m , Dz; store_8 , * str\txzr, %0 ++ m , w ; neon_store1_1reg<q>, * str\t%d1, %0 ++ m , r ; store_8 , * str\t%x1, %0 ++ w , w ; neon_logic<q> , simd mov\t%0.<Vbtype>, %1.<Vbtype> ++ w , w ; neon_logic<q> , * fmov\t%d0, %d1 ++ ?r, w ; neon_to_gp<q> , base_simd umov\t%0, %1.d0 ++ ?r, w ; neon_to_gp<q> , * fmov\t%x0, %d1 ++ ?w, r ; f_mcr , * fmov\t%d0, %1 ++ ?r, r ; mov_reg , * mov\t%0, %1 ++ w , Dn; neon_move<q> , simd << aarch64_output_simd_mov_immediate (operands1, 64); ++ w , Dz; f_mcr , * fmov\t%d0, xzr + } + ) + +@@ -148,6 +148,7 @@ + Umn, Dz; store_16 , * , 4 stp\txzr, xzr, %0 + m , w ; neon_store1_1reg<q>, * , 4 str\t%q1, %0 + w , w ; neon_logic<q> , simd, 4 mov\t%0.<Vbtype>, %1.<Vbtype> ++ w , w ; * , sve , 4 mov\t%Z0.d, %Z1.d + ?r , w ; multiple , * , 8 # + ?w , r ; multiple , * , 8 # + ?r , r ; multiple , * , 8 # +@@ -177,7 +178,7 @@ + (set (match_operand:<VEL> 0 "memory_operand" "=m") + (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w") + (parallel (match_operand 2 "const_int_operand" "n")))) +- "TARGET_SIMD ++ "TARGET_FLOAT + && ENDIAN_LANE_N (<nunits>, INTVAL (operands2)) == 0" + "str\\t%<Vetype>1, %0" + (set_attr "type" "neon_store1_1reg<q>") +@@ -312,35 +313,38 @@ + ) + + (define_insn_and_split "aarch64_simd_mov_from_<mode>low" +- (set (match_operand:<VHALF> 0 "register_operand" "=w,?r") ++ (set (match_operand:<VHALF> 0 "register_operand" "=w,?r,?r") + (vec_select:<VHALF> +- (match_operand:VQMOV_NO2E 1 "register_operand" "w,w") ++ (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w") + (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" ""))) +- "TARGET_SIMD" ++ "TARGET_FLOAT" + "@ + # +- umov\t%0, %1.d0" ++ umov\t%0, %1.d0 ++ fmov\t%0, %d1" + "&& reload_completed && aarch64_simd_register (operands0, <VHALF>mode)" + (set (match_dup 0) (match_dup 1)) + { + operands1 = aarch64_replace_reg_mode (operands1, <VHALF>mode); + } +- (set_attr "type" "mov_reg,neon_to_gp<q>") ++ (set_attr "type" "mov_reg,neon_to_gp<q>,f_mrc") ++ (set_attr "arch" "simd,base_simd,*") + (set_attr "length" "4") + ) + + (define_insn "aarch64_simd_mov_from_<mode>high" +- (set (match_operand:<VHALF> 0 "register_operand" "=w,?r,?r")
View file
_service:tar_scm:0179-LoongArch-Rework-bswap-hi-si-di-2-definition.patch
Added
@@ -0,0 +1,224 @@ +From 54bf8fc616af5cdb9e4c787a2dfb2c516c8e425a Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 28 Jul 2024 19:57:02 +0800 +Subject: PATCH 179/188 LoongArch: Rework bswap{hi,si,di}2 definition + +Per a gcc-help thread we are generating sub-optimal code for +__builtin_bswap{32,64}. To fix it: + +- Use a single revb.d instruction for bswapdi2. +- Use a single revb.2w instruction for bswapsi2 for TARGET_64BIT, + revb.2h + rotri.w for !TARGET_64BIT. +- Use a single revb.2h instruction for bswapsi2 (x) r>> 16, and a single + revb.2w instruction for bswapdi2 (x) r>> 32. + +Unfortunately I cannot figure out a way to make the compiler generate +revb.4h or revh.{2w,d} instructions. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (UNSPEC_REVB_2H, UNSPEC_REVB_4H, + UNSPEC_REVH_D): Remove UNSPECs. + (revb_4h, revh_d): Remove define_insn. + (revb_2h): Define as (rotatert:SI (bswap:SI x) 16) instead of + an UNSPEC. + (revb_2h_extend, revb_2w, *bswapsi2, bswapdi2): New define_insn. + (bswapsi2): Change to define_expand. Only expand to revb.2h + + rotri.w if !TARGET_64BIT. + (bswapdi2): Change to define_insn of which the output is just a + revb.d instruction. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/revb.c: New test. +--- + gcc/config/loongarch/loongarch.md | 79 ++++++++++++----------- + gcc/testsuite/gcc.target/loongarch/revb.c | 61 +++++++++++++++++ + 2 files changed, 104 insertions(+), 36 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/revb.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 1ebcfa0c7..b1c828dba 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -20,11 +20,6 @@ + ;; <http://www.gnu.org/licenses/>. + + (define_c_enum "unspec" +- ;; Integer operations that are too cumbersome to describe directly. +- UNSPEC_REVB_2H +- UNSPEC_REVB_4H +- UNSPEC_REVH_D +- + ;; Floating-point moves. + UNSPEC_LOAD_LOW + UNSPEC_LOAD_HIGH +@@ -3151,55 +3146,67 @@ + + ;; Reverse the order of bytes of operand 1 and store the result in operand 0. + +-(define_insn "bswaphi2" +- (set (match_operand:HI 0 "register_operand" "=r") +- (bswap:HI (match_operand:HI 1 "register_operand" "r"))) ++(define_insn "revb_2h" ++ (set (match_operand:SI 0 "register_operand" "=r") ++ (rotatert:SI (bswap:SI (match_operand:SI 1 "register_operand" "r")) ++ (const_int 16))) + "" + "revb.2h\t%0,%1" + (set_attr "type" "shift")) + +-(define_insn_and_split "bswapsi2" +- (set (match_operand:SI 0 "register_operand" "=r") +- (bswap:SI (match_operand:SI 1 "register_operand" "r"))) +- "" +- "#" +- "" +- (set (match_dup 0) (unspec:SI (match_dup 1) UNSPEC_REVB_2H)) +- (set (match_dup 0) (rotatert:SI (match_dup 0) (const_int 16))) +- "" +- (set_attr "insn_count" "2")) +- +-(define_insn_and_split "bswapdi2" ++(define_insn "revb_2h_extend" + (set (match_operand:DI 0 "register_operand" "=r") +- (bswap:DI (match_operand:DI 1 "register_operand" "r"))) ++ (sign_extend:DI ++ (rotatert:SI ++ (bswap:SI (match_operand:SI 1 "register_operand" "r")) ++ (const_int 16)))) + "TARGET_64BIT" +- "#" +- "" +- (set (match_dup 0) (unspec:DI (match_dup 1) UNSPEC_REVB_4H)) +- (set (match_dup 0) (unspec:DI (match_dup 0) UNSPEC_REVH_D)) +- "" +- (set_attr "insn_count" "2")) ++ "revb.2h\t%0,%1" ++ (set_attr "type" "shift")) + +-(define_insn "revb_2h" +- (set (match_operand:SI 0 "register_operand" "=r") +- (unspec:SI (match_operand:SI 1 "register_operand" "r") UNSPEC_REVB_2H)) ++(define_insn "bswaphi2" ++ (set (match_operand:HI 0 "register_operand" "=r") ++ (bswap:HI (match_operand:HI 1 "register_operand" "r"))) + "" + "revb.2h\t%0,%1" + (set_attr "type" "shift")) + +-(define_insn "revb_4h" ++(define_insn "revb_2w" + (set (match_operand:DI 0 "register_operand" "=r") +- (unspec:DI (match_operand:DI 1 "register_operand" "r") UNSPEC_REVB_4H)) ++ (rotatert:DI (bswap:DI (match_operand:DI 1 "register_operand" "r")) ++ (const_int 32))) + "TARGET_64BIT" +- "revb.4h\t%0,%1" ++ "revb.2w\t%0,%1" + (set_attr "type" "shift")) + +-(define_insn "revh_d" ++(define_insn "*bswapsi2" ++ (set (match_operand:SI 0 "register_operand" "=r") ++ (bswap:SI (match_operand:SI 1 "register_operand" "r"))) ++ "TARGET_64BIT" ++ "revb.2w\t%0,%1" ++ (set_attr "type" "shift")) ++ ++(define_expand "bswapsi2" ++ (set (match_operand:SI 0 "register_operand" "=r") ++ (bswap:SI (match_operand:SI 1 "register_operand" "r"))) ++ "" ++{ ++ if (!TARGET_64BIT) ++ { ++ rtx t = gen_reg_rtx (SImode); ++ emit_insn (gen_revb_2h (t, operands1)); ++ emit_insn (gen_rotrsi3 (operands0, t, GEN_INT (16))); ++ DONE; ++ } ++}) ++ ++(define_insn "bswapdi2" + (set (match_operand:DI 0 "register_operand" "=r") +- (unspec:DI (match_operand:DI 1 "register_operand" "r") UNSPEC_REVH_D)) ++ (bswap:DI (match_operand:DI 1 "register_operand" "r"))) + "TARGET_64BIT" +- "revh.d\t%0,%1" ++ "revb.d\t%0,%1" + (set_attr "type" "shift")) ++ +  + ;; + ;; .................... +diff --git a/gcc/testsuite/gcc.target/loongarch/revb.c b/gcc/testsuite/gcc.target/loongarch/revb.c +new file mode 100644 +index 000000000..27a5d0fc7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/revb.c +@@ -0,0 +1,61 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* ++**t1: ++** revb.2w \$r4,\$r4 ++** slli.w \$r4,\$r4,0 ++** jr \$r1 ++*/ ++unsigned int ++t1 (unsigned int x) ++{ ++ return __builtin_bswap32 (x); ++} ++ ++/* ++**t2: ++** revb.d \$r4,\$r4 ++** jr \$r1 ++*/ ++unsigned long ++t2 (unsigned long x) ++{ ++ return __builtin_bswap64 (x); ++} ++ ++/* ++**t3: ++** revb.2h \$r4,\$r4 ++** jr \$r1 ++*/ ++unsigned int ++t3 (unsigned int x) ++{ ++ return (x >> 8) & 0xff00ff | (x << 8) & 0xff00ff00; ++} ++ ++/*
View file
_service:tar_scm:0180-Backport-SME-aarch64-Mark-relevant-SVE-instructions-.patch
Added
@@ -0,0 +1,4506 @@ +From 0404dfa43633a35460aba1b96d04f62cc7d6103b Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:24 +0000 +Subject: PATCH 081/157 BackportSME aarch64: Mark relevant SVE + instructions as non-streaming + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=983b4365028e9a059b5fb1eef85a297bea19fc8e + +Following on from the previous Advanced SIMD patch, this one +divides SVE instructions into non-streaming and streaming- +compatible groups. + +gcc/ + * config/aarch64/aarch64.h (TARGET_NON_STREAMING): New macro. + (TARGET_SVE2_AES, TARGET_SVE2_BITPERM): Use it. + (TARGET_SVE2_SHA3, TARGET_SVE2_SM4): Likewise. + * config/aarch64/aarch64-sve-builtins-base.def: Separate out + the functions that require PSTATE.SM to be 0 and guard them + with AARCH64_FL_SM_OFF. + * config/aarch64/aarch64-sve-builtins-sve2.def: Likewise. + * config/aarch64/aarch64-sve-builtins.cc (check_required_extensions): + Enforce AARCH64_FL_SM_OFF requirements. + * config/aarch64/aarch64-sve.md (aarch64_wrffr): Require + TARGET_NON_STREAMING + (aarch64_rdffr, aarch64_rdffr_z, *aarch64_rdffr_z_ptest): Likewise. + (*aarch64_rdffr_ptest, *aarch64_rdffr_z_cc, *aarch64_rdffr_cc) + (@aarch64_ld<fn>f1<mode>): Likewise. + (@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>) + (gather_load<mode><v_int_container>): Likewise + (mask_gather_load<mode><v_int_container>): Likewise. + (mask_gather_load<mode><v_int_container>): Likewise. + (*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked): Likewise. + (*mask_gather_load<mode><v_int_container>_sxtw): Likewise. + (*mask_gather_load<mode><v_int_container>_uxtw): Likewise. + (@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>) + (@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode> + <SVE_2BHSI:mode>): Likewise. + (*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode> + <SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked) + (*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode> + <SVE_2BHSI:mode>_sxtw): Likewise. + (*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode> + <SVE_2BHSI:mode>_uxtw): Likewise. + (@aarch64_ldff1_gather<mode>, @aarch64_ldff1_gather<mode>): Likewise. + (*aarch64_ldff1_gather<mode>_sxtw): Likewise. + (*aarch64_ldff1_gather<mode>_uxtw): Likewise. + (@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode> + <VNx4_NARROW:mode>): Likewise. + (@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode> + <VNx2_NARROW:mode>): Likewise. + (*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode> + <VNx2_NARROW:mode>_sxtw): Likewise. + (*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode> + <VNx2_NARROW:mode>_uxtw): Likewise. + (@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>) + (@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>) + (*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw) + (*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw) + (scatter_store<mode><v_int_container>): Likewise. + (mask_scatter_store<mode><v_int_container>): Likewise. + (*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked) + (*mask_scatter_store<mode><v_int_container>_sxtw): Likewise. + (*mask_scatter_store<mode><v_int_container>_uxtw): Likewise. + (@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>) + (@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>) + (*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw) + (*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw) + (@aarch64_sve_ld1ro<mode>, @aarch64_adr<mode>): Likewise. + (*aarch64_adr_sxtw, *aarch64_adr_uxtw_unspec): Likewise. + (*aarch64_adr_uxtw_and, @aarch64_adr<mode>_shift): Likewise. + (*aarch64_adr<mode>_shift, *aarch64_adr_shift_sxtw): Likewise. + (*aarch64_adr_shift_uxtw, @aarch64_sve_add_<optab><vsi2qi>): Likewise. + (@aarch64_sve_<sve_fp_op><mode>, fold_left_plus_<mode>): Likewise. + (mask_fold_left_plus_<mode>, @aarch64_sve_compact<mode>): Likewise. + * config/aarch64/aarch64-sve2.md (@aarch64_gather_ldnt<mode>) + (@aarch64_gather_ldnt_<ANY_EXTEND:optab><SVE_FULL_SDI:mode> + <SVE_PARTIAL_I:mode>): Likewise. + (@aarch64_sve2_histcnt<mode>, @aarch64_sve2_histseg<mode>): Likewise. + (@aarch64_pred_<SVE2_MATCH:sve_int_op><mode>): Likewise. + (*aarch64_pred_<SVE2_MATCH:sve_int_op><mode>_cc): Likewise. + (*aarch64_pred_<SVE2_MATCH:sve_int_op><mode>_ptest): Likewise. + * config/aarch64/iterators.md (SVE_FP_UNARY_INT): Make FEXPA + depend on TARGET_NON_STREAMING. + (SVE_BFLOAT_TERNARY_LONG): Likewise BFMMLA. + +gcc/testsuite/ + * g++.target/aarch64/sve/aarch64-ssve.exp: New harness. + * g++.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp: Add + -DSTREAMING_COMPATIBLE to the list of options. + * g++.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp: Likewise. + * gcc.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp: Likewise. + * gcc.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp: Likewise. + Fix pasto in variable name. + * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: Mark functions + as streaming-compatible if STREAMING_COMPATIBLE is defined. + * gcc.target/aarch64/sve/acle/asm/adda_f16.c: Disable for + streaming-compatible code. + * gcc.target/aarch64/sve/acle/asm/adda_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/adda_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/adrb.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/adrd.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/adrh.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/adrw.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/compact_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/compact_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/compact_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/compact_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/compact_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/compact_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/expa_f16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/expa_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/expa_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1_gather_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1_gather_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1_gather_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1_gather_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_bf16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sw_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sw_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1uw_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1uw_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_bf16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_f16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_gather_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_gather_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_gather_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_gather_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_s16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_s8.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_u16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_u8.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_s16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_u16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sh_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sh_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sh_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sh_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sw_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sw_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1ub_s16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1ub_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1ub_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1ub_u16.c: Likewise.
View file
_service:tar_scm:0180-testsuite-fix-dg-do-preprocess-typo.patch
Added
@@ -0,0 +1,26 @@ +From 35d804730dcac1b3e96db4b587de0cd77fec3504 Mon Sep 17 00:00:00 2001 +From: Sam James <sam@gentoo.org> +Date: Tue, 30 Jul 2024 21:46:29 +0100 +Subject: PATCH 180/188 testsuite: fix 'dg-do-preprocess' typo + +We want 'dg-do preprocess', not 'dg-do-preprocess'. Fix that. + + PR target/106828 + * g++.target/loongarch/pr106828.C: Fix 'dg-do compile' typo. +--- + gcc/testsuite/g++.target/loongarch/pr106828.C | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/testsuite/g++.target/loongarch/pr106828.C b/gcc/testsuite/g++.target/loongarch/pr106828.C +index 190c1db71..0d13cbbd5 100644 +--- a/gcc/testsuite/g++.target/loongarch/pr106828.C ++++ b/gcc/testsuite/g++.target/loongarch/pr106828.C +@@ -1,4 +1,4 @@ +-/* { dg-do-preprocess } */ ++/* { dg-do preprocess } */ + /* { dg-options "-mabi=lp64d -fsanitize=address" } */ + + /* Tests whether the compiler supports compile option '-fsanitize=address'. */ +-- +2.43.0 +
View file
_service:tar_scm:0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch
Added
@@ -0,0 +1,250 @@ +From da06b276b6ae281efad2ec3b982e09b1f4015917 Mon Sep 17 00:00:00 2001 +From: Tamar Christina <tamar.christina@arm.com> +Date: Mon, 12 Dec 2022 15:18:56 +0000 +Subject: PATCH 082/157 BackportSME AArch64: Support new tbranch optab. + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=17ae956c0fa6baac3d22764019d5dd5ebf5c2b11 + +This implements the new tbranch optab for AArch64. + +we cannot emit one big RTL for the final instruction immediately. +The reason that all comparisons in the AArch64 backend expand to separate CC +compares, and separate testing of the operands is for ifcvt. + +The separate CC compare is needed so ifcvt can produce csel, cset etc from the +compares. Unlike say combine, ifcvt can not do recog on a parallel with a +clobber. Should we emit the instruction directly then ifcvt will not be able +to say, make a csel, because we have no patterns which handle zero_extract and +compare. (unlike combine ifcvt cannot transform the extract into an AND). + +While you could provide various patterns for this (and I did try) you end up +with broken patterns because you can't add the clobber to the CC register. If +you do, ifcvt recog fails. + +i.e. + +int +f1 (int x) +{ + if (x & 1) + return 1; + return x; +} + +We lose csel here. + +Secondly the reason the compare with an explicit CC mode is needed is so that +ifcvt can transform the operation into a version that doesn't require the flags +to be set. But it only does so if it know the explicit usage of the CC reg. + +For instance + +int +foo (int a, int b) +{ + return ((a & (1 << 25)) ? 5 : 4); +} + +Doesn't require a comparison, the optimal form is: + +foo(int, int): + ubfx x0, x0, 25, 1 + add w0, w0, 4 + ret + +and no compare is actually needed. If you represent the instruction using an +ANDS instead of a zero_extract then you get close, but you end up with an ands +followed by an add, which is a slower operation. + +gcc/ChangeLog: + + * config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to... + (*tb<optab><ALLI:mode><GPI:mode>1): ... this. + (tbranch_<code><mode>4): New. + * config/aarch64/iterators.md(ZEROM, zerom): New. + +gcc/testsuite/ChangeLog: + + * gcc.target/aarch64/tbz_1.c: New test. +--- + gcc/config/aarch64/aarch64.md | 33 ++++++-- + gcc/config/aarch64/iterators.md | 2 + + gcc/testsuite/gcc.target/aarch64/tbz_1.c | 95 ++++++++++++++++++++++++ + 3 files changed, 122 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/tbz_1.c + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 079c8a3f9..2becc888e 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -953,12 +953,29 @@ + (const_int 1))) + ) + +-(define_insn "*tb<optab><mode>1" ++(define_expand "tbranch_<code><mode>3" + (set (pc) (if_then_else +- (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r") +- (const_int 1) +- (match_operand 1 +- "aarch64_simd_shift_imm_<mode>" "n")) ++ (EQL (match_operand:ALLI 0 "register_operand") ++ (match_operand 1 "aarch64_simd_shift_imm_<mode>")) ++ (label_ref (match_operand 2 "")) ++ (pc))) ++ "" ++{ ++ rtx bitvalue = gen_reg_rtx (<ZEROM>mode); ++ rtx reg = gen_lowpart (<ZEROM>mode, operands0); ++ rtx val = GEN_INT (1UL << UINTVAL (operands1)); ++ emit_insn (gen_and<zerom>3 (bitvalue, reg, val)); ++ operands1 = const0_rtx; ++ operands0 = aarch64_gen_compare_reg (<CODE>, bitvalue, ++ operands1); ++}) ++ ++(define_insn "*tb<optab><ALLI:mode><GPI:mode>1" ++ (set (pc) (if_then_else ++ (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r") ++ (const_int 1) ++ (match_operand 1 ++ "aarch64_simd_shift_imm_<ALLI:mode>" "n")) + (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc))) +@@ -969,15 +986,15 @@ + { + if (get_attr_far_branch (insn) == 1) + return aarch64_gen_far_branch (operands, 2, "Ltb", +- "<inv_tb>\\t%<w>0, %1, "); ++ "<inv_tb>\\t%<ALLI:w>0, %1, "); + else + { + operands1 = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands1)); +- return "tst\t%<w>0, %1\;<bcond>\t%l2"; ++ return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2"; + } + } + else +- return "<tbz>\t%<w>0, %1, %l2"; ++ return "<tbz>\t%<ALLI:w>0, %1, %l2"; + } + (set_attr "type" "branch") + (set (attr "length") +diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md +index 226dea48a..b616f5c9a 100644 +--- a/gcc/config/aarch64/iterators.md ++++ b/gcc/config/aarch64/iterators.md +@@ -1104,6 +1104,8 @@ + + ;; Give the number of bits in the mode + (define_mode_attr sizen (QI "8") (HI "16") (SI "32") (DI "64")) ++(define_mode_attr ZEROM (QI "SI") (HI "SI") (SI "SI") (DI "DI")) ++(define_mode_attr zerom (QI "si") (HI "si") (SI "si") (DI "di")) + + ;; Give the ordinal of the MSB in the mode + (define_mode_attr sizem1 (QI "#7") (HI "#15") (SI "#31") (DI "#63") +diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c +new file mode 100644 +index 000000000..39deb58e2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c +@@ -0,0 +1,95 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-O2 -std=c99 -fno-unwind-tables -fno-asynchronous-unwind-tables" } */ ++/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */ ++ ++#include <stdbool.h> ++ ++void h(void); ++ ++/* ++** g1: ++** tbnz w0-9+, #?0, .L(0-9+) ++** ret ++** ... ++*/ ++void g1(bool x) ++{ ++ if (__builtin_expect (x, 0)) ++ h (); ++} ++ ++/* ++** g2: ++** tbz w0-9+, #?0, .L(0-9+) ++** b h ++** ... ++*/ ++void g2(bool x) ++{ ++ if (__builtin_expect (x, 1)) ++ h (); ++} ++ ++/* ++** g3_ge: ++** tbnz w0-9+, #?31, .L0-9+ ++** b h ++** ... ++*/ ++void g3_ge(int x) ++{ ++ if (__builtin_expect (x >= 0, 1)) ++ h (); ++} ++ ++/* ++** g3_gt: ++** cmp w0-9+, 0
View file
_service:tar_scm:0181-LoongArch-Remove-gawk-extension-from-a-generator-scr.patch
Added
@@ -0,0 +1,47 @@ +From 643248a4c60c016af44bc740b35c7ac174849029 Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Tue, 23 Jul 2024 10:04:26 +0800 +Subject: PATCH 181/188 LoongArch: Remove gawk extension from a generator + script. + +gcc/ChangeLog: + + * config/loongarch/genopts/gen-evolution.awk: Do not use + "length()" to compute the size of an array. +--- + gcc/config/loongarch/genopts/gen-evolution.awk | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/gen-evolution.awk b/gcc/config/loongarch/genopts/gen-evolution.awk +index 4d105afa9..1c8004e41 100644 +--- a/gcc/config/loongarch/genopts/gen-evolution.awk ++++ b/gcc/config/loongarch/genopts/gen-evolution.awk +@@ -1,4 +1,4 @@ +-#!/usr/bin/gawk ++#!/usr/bin/awk -f + # + # A simple script that generates loongarch-evolution.h + # from genopts/isa-evolution.in +@@ -94,8 +94,9 @@ function gen_cpucfg_useful_idx() + idx_bucketcpucfg_wordi = 1 + + delete idx_list ++ j = 1 + for (i in idx_bucket) +- idx_listlength(idx_list)-1 = i+0 ++ idx_listj++ = i+0 + delete idx_bucket + + asort (idx_list) +@@ -108,7 +109,7 @@ function gen_cpucfg_useful_idx() + print "" + + printf ("static constexpr int N_CPUCFG_WORDS = %d;\n", +- idx_listlength(idx_list) + 1) ++ idx_listj - 1 + 1) + + delete idx_list + } +-- +2.43.0 +
View file
_service:tar_scm:0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch
Added
@@ -0,0 +1,381 @@ +From 755f67b1abd70b3c3ea20076fe60c1d303bf1e0c Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:04 +0100 +Subject: PATCH 083/157 BackportSME aarch64: Use local frame vars in + shrink-wrapping code + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=76d89da25af3064e80c9b7b584c678ff72b1f0bd + +aarch64_layout_frame uses a shorthand for referring to +cfun->machine->frame: + + aarch64_frame &frame = cfun->machine->frame; + +This patch does the same for some other heavy users of the structure. +No functional change intended. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_save_callee_saves): Use + a local shorthand for cfun->machine->frame. + (aarch64_restore_callee_saves, aarch64_get_separate_components): + (aarch64_process_components): Likewise. + (aarch64_allocate_and_probe_stack_space): Likewise. + (aarch64_expand_prologue, aarch64_expand_epilogue): Likewise. + (aarch64_layout_frame): Use existing shorthand for one more case. +--- + gcc/config/aarch64/aarch64.cc | 123 ++++++++++++++++++---------------- + 1 file changed, 64 insertions(+), 59 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 08a98f8ba..b7da1d0be 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8951,7 +8951,7 @@ aarch64_layout_frame (void) + frame.is_scs_enabled + = (!crtl->calls_eh_return + && sanitize_flags_p (SANITIZE_SHADOW_CALL_STACK) +- && known_ge (cfun->machine->frame.reg_offsetLR_REGNUM, 0)); ++ && known_ge (frame.reg_offsetLR_REGNUM, 0)); + + /* When shadow call stack is enabled, the scs_pop in the epilogue will + restore x30, and we don't need to pop x30 again in the traditional +@@ -9363,6 +9363,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, + unsigned start, unsigned limit, bool skip_wb, + bool hard_fp_valid_p) + { ++ aarch64_frame &frame = cfun->machine->frame; + rtx_insn *insn; + unsigned regno; + unsigned regno2; +@@ -9377,8 +9378,8 @@ aarch64_save_callee_saves (poly_int64 start_offset, + bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); + + if (skip_wb +- && (regno == cfun->machine->frame.wb_push_candidate1 +- || regno == cfun->machine->frame.wb_push_candidate2)) ++ && (regno == frame.wb_push_candidate1 ++ || regno == frame.wb_push_candidate2)) + continue; + + if (cfun->machine->reg_is_wrapped_separatelyregno) +@@ -9386,7 +9387,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, + + machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); +- offset = start_offset + cfun->machine->frame.reg_offsetregno; ++ offset = start_offset + frame.reg_offsetregno; + rtx base_rtx = stack_pointer_rtx; + poly_int64 sp_offset = offset; + +@@ -9399,7 +9400,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, + { + gcc_assert (known_eq (start_offset, 0)); + poly_int64 fp_offset +- = cfun->machine->frame.below_hard_fp_saved_regs_size; ++ = frame.below_hard_fp_saved_regs_size; + if (hard_fp_valid_p) + base_rtx = hard_frame_pointer_rtx; + else +@@ -9421,8 +9422,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, + && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit + && !cfun->machine->reg_is_wrapped_separatelyregno2 + && known_eq (GET_MODE_SIZE (mode), +- cfun->machine->frame.reg_offsetregno2 +- - cfun->machine->frame.reg_offsetregno)) ++ frame.reg_offsetregno2 - frame.reg_offsetregno)) + { + rtx reg2 = gen_rtx_REG (mode, regno2); + rtx mem2; +@@ -9472,6 +9472,7 @@ static void + aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, + unsigned limit, bool skip_wb, rtx *cfi_ops) + { ++ aarch64_frame &frame = cfun->machine->frame; + unsigned regno; + unsigned regno2; + poly_int64 offset; +@@ -9488,13 +9489,13 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, + rtx reg, mem; + + if (skip_wb +- && (regno == cfun->machine->frame.wb_pop_candidate1 +- || regno == cfun->machine->frame.wb_pop_candidate2)) ++ && (regno == frame.wb_pop_candidate1 ++ || regno == frame.wb_pop_candidate2)) + continue; + + machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); +- offset = start_offset + cfun->machine->frame.reg_offsetregno; ++ offset = start_offset + frame.reg_offsetregno; + rtx base_rtx = stack_pointer_rtx; + if (mode == VNx2DImode && BYTES_BIG_ENDIAN) + aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, +@@ -9505,8 +9506,7 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, + && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit + && !cfun->machine->reg_is_wrapped_separatelyregno2 + && known_eq (GET_MODE_SIZE (mode), +- cfun->machine->frame.reg_offsetregno2 +- - cfun->machine->frame.reg_offsetregno)) ++ frame.reg_offsetregno2 - frame.reg_offsetregno)) + { + rtx reg2 = gen_rtx_REG (mode, regno2); + rtx mem2; +@@ -9611,6 +9611,7 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset) + static sbitmap + aarch64_get_separate_components (void) + { ++ aarch64_frame &frame = cfun->machine->frame; + sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1); + bitmap_clear (components); + +@@ -9627,18 +9628,18 @@ aarch64_get_separate_components (void) + if (mode == VNx2DImode && BYTES_BIG_ENDIAN) + continue; + +- poly_int64 offset = cfun->machine->frame.reg_offsetregno; ++ poly_int64 offset = frame.reg_offsetregno; + + /* If the register is saved in the first SVE save slot, we use + it as a stack probe for -fstack-clash-protection. */ + if (flag_stack_clash_protection +- && maybe_ne (cfun->machine->frame.below_hard_fp_saved_regs_size, 0) ++ && maybe_ne (frame.below_hard_fp_saved_regs_size, 0) + && known_eq (offset, 0)) + continue; + + /* Get the offset relative to the register we'll use. */ + if (frame_pointer_needed) +- offset -= cfun->machine->frame.below_hard_fp_saved_regs_size; ++ offset -= frame.below_hard_fp_saved_regs_size; + else + offset += crtl->outgoing_args_size; + +@@ -9657,11 +9658,11 @@ aarch64_get_separate_components (void) + /* If the spare predicate register used by big-endian SVE code + is call-preserved, it must be saved in the main prologue + before any saves that use it. */ +- if (cfun->machine->frame.spare_pred_reg != INVALID_REGNUM) +- bitmap_clear_bit (components, cfun->machine->frame.spare_pred_reg); ++ if (frame.spare_pred_reg != INVALID_REGNUM) ++ bitmap_clear_bit (components, frame.spare_pred_reg); + +- unsigned reg1 = cfun->machine->frame.wb_push_candidate1; +- unsigned reg2 = cfun->machine->frame.wb_push_candidate2; ++ unsigned reg1 = frame.wb_push_candidate1; ++ unsigned reg2 = frame.wb_push_candidate2; + /* If registers have been chosen to be stored/restored with + writeback don't interfere with them to avoid having to output explicit + stack adjustment instructions. */ +@@ -9770,6 +9771,7 @@ aarch64_get_next_set_bit (sbitmap bmp, unsigned int start) + static void + aarch64_process_components (sbitmap components, bool prologue_p) + { ++ aarch64_frame &frame = cfun->machine->frame; + rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed + ? HARD_FRAME_POINTER_REGNUM + : STACK_POINTER_REGNUM); +@@ -9784,9 +9786,9 @@ aarch64_process_components (sbitmap components, bool prologue_p) + machine_mode mode = aarch64_reg_save_mode (regno); + + rtx reg = gen_rtx_REG (mode, regno); +- poly_int64 offset = cfun->machine->frame.reg_offsetregno; ++ poly_int64 offset = frame.reg_offsetregno; + if (frame_pointer_needed) +- offset -= cfun->machine->frame.below_hard_fp_saved_regs_size; ++ offset -= frame.below_hard_fp_saved_regs_size; + else + offset += crtl->outgoing_args_size; + +@@ -9811,14 +9813,14 @@ aarch64_process_components (sbitmap components, bool prologue_p) + break; + } + +- poly_int64 offset2 = cfun->machine->frame.reg_offsetregno2; ++ poly_int64 offset2 = frame.reg_offsetregno2; + /* The next register is not of the same class or its offset is not + mergeable with the current one into a pair. */ + if (aarch64_sve_mode_p (mode) + || !satisfies_constraint_Ump (mem)
View file
_service:tar_scm:0182-LoongArch-Use-iorn-and-andn-standard-pattern-names.patch
Added
@@ -0,0 +1,226 @@ +From 64560e75b4d020b6c47e07592595ceed663541af Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 1 Aug 2024 16:07:25 +0800 +Subject: PATCH 182/188 LoongArch: Use iorn and andn standard pattern names. + +R15-1890 introduced new optabs iorc and andc, and its corresponding +internal functions BIT_{ANDC,IORC}, and if targets defines such optabs +for vector modes. And in r15-2258 the iorc and andc were renamed to +iorn and andn. +So we changed the andn and iorn implementation templates to the standard +template names. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (xvandn<mode>3): Rename to ... + (andn<mode>3): This. + (xvorn<mode>3): Rename to ... + (iorn<mode>3): This. + * config/loongarch/loongarch-builtins.cc + (CODE_FOR_lsx_vandn_v): Defined as the modified name. + (CODE_FOR_lsx_vorn_v): Likewise. + (CODE_FOR_lasx_xvandn_v): Likewise. + (CODE_FOR_lasx_xvorn_v): Likewise. + (loongarch_expand_builtin_insn): When the builtin function to be + called is __builtin_lasx_xvandn or __builtin_lsx_vandn, swap the + two operands. + * config/loongarch/loongarch.md (<optab>n<mode>): Rename to ... + (<optab>n<mode>3): This. + * config/loongarch/lsx.md (vandn<mode>3): Rename to ... + (andn<mode>3): This. + (vorn<mode>3): Rename to ... + (iorn<mode>3): This. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/lasx-andn-iorn.c: New test. + * gcc.target/loongarch/lsx-andn-iorn.c: New test. +--- + gcc/config/loongarch/lasx.md | 10 +++---- + gcc/config/loongarch/loongarch-builtins.cc | 10 ++++--- + gcc/config/loongarch/loongarch.md | 8 +++--- + gcc/config/loongarch/lsx.md | 10 +++---- + .../gcc.target/loongarch/lasx-andn-iorn.c | 11 ++++++++ + .../gcc.target/loongarch/lsx-andn-iorn.c | 28 +++++++++++++++++++ + 6 files changed, 59 insertions(+), 18 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 44a7d58ff..3775155ca 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -2716,12 +2716,12 @@ + (set_attr "mode" "V4DI")) + + ;; Extend loongson-sx to loongson-asx. +-(define_insn "xvandn<mode>3" ++(define_insn "andn<mode>3" + (set (match_operand:LASX 0 "register_operand" "=f") +- (and:LASX (not:LASX (match_operand:LASX 1 "register_operand" "f")) +- (match_operand:LASX 2 "register_operand" "f"))) ++ (and:LASX (not:LASX (match_operand:LASX 2 "register_operand" "f")) ++ (match_operand:LASX 1 "register_operand" "f"))) + "ISA_HAS_LASX" +- "xvandn.v\t%u0,%u1,%u2" ++ "xvandn.v\t%u0,%u2,%u1" + (set_attr "type" "simd_logic") + (set_attr "mode" "<MODE>")) + +@@ -4637,7 +4637,7 @@ + (set_attr "type" "simd_int_arith") + (set_attr "mode" "<MODE>")) + +-(define_insn "xvorn<mode>3" ++(define_insn "iorn<mode>3" + (set (match_operand:ILASX 0 "register_operand" "=f") + (ior:ILASX (not:ILASX (match_operand:ILASX 2 "register_operand" "f")) + (match_operand:ILASX 1 "register_operand" "f"))) +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index 51abba007..f9ff85d2e 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -458,8 +458,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE) + #define CODE_FOR_lsx_vabsd_du CODE_FOR_lsx_vabsd_u_du + #define CODE_FOR_lsx_vftint_wu_s CODE_FOR_lsx_vftint_u_wu_s + #define CODE_FOR_lsx_vftint_lu_d CODE_FOR_lsx_vftint_u_lu_d +-#define CODE_FOR_lsx_vandn_v CODE_FOR_vandnv16qi3 +-#define CODE_FOR_lsx_vorn_v CODE_FOR_vornv16qi3 ++#define CODE_FOR_lsx_vandn_v CODE_FOR_andnv16qi3 ++#define CODE_FOR_lsx_vorn_v CODE_FOR_iornv16qi3 + #define CODE_FOR_lsx_vneg_b CODE_FOR_vnegv16qi2 + #define CODE_FOR_lsx_vneg_h CODE_FOR_vnegv8hi2 + #define CODE_FOR_lsx_vneg_w CODE_FOR_vnegv4si2 +@@ -692,8 +692,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE) + #define CODE_FOR_lasx_xvrepli_w CODE_FOR_lasx_xvrepliv8si + #define CODE_FOR_lasx_xvrepli_d CODE_FOR_lasx_xvrepliv4di + +-#define CODE_FOR_lasx_xvandn_v CODE_FOR_xvandnv32qi3 +-#define CODE_FOR_lasx_xvorn_v CODE_FOR_xvornv32qi3 ++#define CODE_FOR_lasx_xvandn_v CODE_FOR_andnv32qi3 ++#define CODE_FOR_lasx_xvorn_v CODE_FOR_iornv32qi3 + #define CODE_FOR_lasx_xvneg_b CODE_FOR_negv32qi2 + #define CODE_FOR_lasx_xvneg_h CODE_FOR_negv16hi2 + #define CODE_FOR_lasx_xvneg_w CODE_FOR_negv8si2 +@@ -2853,6 +2853,7 @@ loongarch_expand_builtin_insn (enum insn_code icode, unsigned int nops, + case CODE_FOR_lsx_vpickod_b: + case CODE_FOR_lsx_vpickod_h: + case CODE_FOR_lsx_vpickod_w: ++ case CODE_FOR_lsx_vandn_v: + case CODE_FOR_lasx_xvilvh_b: + case CODE_FOR_lasx_xvilvh_h: + case CODE_FOR_lasx_xvilvh_w: +@@ -2873,6 +2874,7 @@ loongarch_expand_builtin_insn (enum insn_code icode, unsigned int nops, + case CODE_FOR_lasx_xvpickod_b: + case CODE_FOR_lasx_xvpickod_h: + case CODE_FOR_lasx_xvpickod_w: ++ case CODE_FOR_lasx_xvandn_v: + /* Swap the operands 1 and 2 for interleave operations. Built-ins follow + convention of ISA, which have op1 as higher component and op2 as lower + component. However, the VEC_PERM op in tree and vec_concat in RTL +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index b1c828dba..58c8f28ed 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1701,13 +1701,13 @@ + (set_attr "type" "logical") + (set_attr "mode" "SI")) + +-(define_insn "<optab>n<mode>" ++(define_insn "<optab>n<mode>3" + (set (match_operand:X 0 "register_operand" "=r") + (neg_bitwise:X +- (not:X (match_operand:X 1 "register_operand" "r")) +- (match_operand:X 2 "register_operand" "r"))) ++ (not:X (match_operand:X 2 "register_operand" "r")) ++ (match_operand:X 1 "register_operand" "r"))) + "" +- "<insn>n\t%0,%2,%1" ++ "<insn>n\t%0,%1,%2" + (set_attr "type" "logical") + (set_attr "mode" "<MODE>")) + +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 2eac11473..c7480aafd 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -2344,12 +2344,12 @@ + } + (set_attr "mode" "V4SF")) + +-(define_insn "vandn<mode>3" ++(define_insn "andn<mode>3" + (set (match_operand:LSX 0 "register_operand" "=f") +- (and:LSX (not:LSX (match_operand:LSX 1 "register_operand" "f")) +- (match_operand:LSX 2 "register_operand" "f"))) ++ (and:LSX (not:LSX (match_operand:LSX 2 "register_operand" "f")) ++ (match_operand:LSX 1 "register_operand" "f"))) + "ISA_HAS_LSX" +- "vandn.v\t%w0,%w1,%w2" ++ "vandn.v\t%w0,%w2,%w1" + (set_attr "type" "simd_logic") + (set_attr "mode" "<MODE>")) + +@@ -3028,7 +3028,7 @@ + (set_attr "type" "simd_int_arith") + (set_attr "mode" "<MODE>")) + +-(define_insn "vorn<mode>3" ++(define_insn "iorn<mode>3" + (set (match_operand:ILSX 0 "register_operand" "=f") + (ior:ILSX (not:ILSX (match_operand:ILSX 2 "register_operand" "f")) + (match_operand:ILSX 1 "register_operand" "f"))) +diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c b/gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c +new file mode 100644 +index 000000000..4aa5f19a6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c +@@ -0,0 +1,11 @@ ++#define N 8 ++ ++#include "./lsx-andn-iorn.c" ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -ftree-vectorize" } */ ++ ++/* We should produce a BIT_ANDC and BIT_IORC here. */ ++ ++/* { dg-final { scan-tree-dump ".BIT_ANDN " "optimized" } } */ ++/* { dg-final { scan-tree-dump ".BIT_IORN " "optimized" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c b/gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c +new file mode 100644 +index 000000000..7bceccd37 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -ftree-vectorize" } */ ++ ++#ifndef N
View file
_service:tar_scm:0183-Backport-SME-aarch64-Avoid-a-use-of-callee_offset.patch
Added
@@ -0,0 +1,103 @@ +From 54a6e52207703a8643fc406175377105f887ebef Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:04 +0100 +Subject: PATCH BackportSME aarch64: Avoid a use of callee_offset + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f9ab771fa8cd747f34786c6f33deea32c2eb828b + +When we emit the frame chain, i.e. when we reach Here in this statement +of aarch64_expand_prologue: + + if (emit_frame_chain) + { + // Here + ... + } + +the stack is in one of two states: + +- We've allocated up to the frame chain, but no more. + +- We've allocated the whole frame, and the frame chain is within easy + reach of the new SP. + +The offset of the frame chain from the current SP is available +in aarch64_frame as callee_offset. It is also available as the +chain_offset local variable, where the latter is calculated from other +data. (However, chain_offset is not always equal to callee_offset when +!emit_frame_chain, so chain_offset isn't redundant.) + +In c600df9a4060da3c6121ff4d0b93f179eafd69d1 I switched to using +chain_offset for the initialisation of the hard frame pointer: + + aarch64_add_offset (Pmode, hard_frame_pointer_rtx, +- stack_pointer_rtx, callee_offset, ++ stack_pointer_rtx, chain_offset, + tmp1_rtx, tmp0_rtx, frame_pointer_needed); + +But the later REG_CFA_ADJUST_CFA handling still used callee_offset. + +I think the difference is harmless, but it's more logical for the +CFA note to be in sync, and it's more convenient for later patches +if it uses chain_offset. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_expand_prologue): Use + chain_offset rather than callee_offset. +--- + gcc/config/aarch64/aarch64.cc | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index b7da1d0be..fbd7a079a 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -10263,21 +10263,20 @@ aarch64_epilogue_uses (int regno) + current FP is also set up if it is in use. */ + + void + aarch64_expand_prologue (void) + { + aarch64_frame &frame = cfun->machine->frame; + poly_int64 frame_size = frame.frame_size; + poly_int64 initial_adjust = frame.initial_adjust; + HOST_WIDE_INT callee_adjust = frame.callee_adjust; + poly_int64 final_adjust = frame.final_adjust; +- poly_int64 callee_offset = frame.callee_offset; + poly_int64 sve_callee_adjust = frame.sve_callee_adjust; + poly_int64 below_hard_fp_saved_regs_size + = frame.below_hard_fp_saved_regs_size; + unsigned reg1 = frame.wb_push_candidate1; + unsigned reg2 = frame.wb_push_candidate2; + bool emit_frame_chain = frame.emit_frame_chain; + rtx_insn *insn; + + if (flag_stack_clash_protection && known_eq (callee_adjust, 0)) + { +@@ -10376,22 +10375,21 @@ aarch64_expand_prologue (void) + the CFA based on the frame pointer. We therefore need new + DW_CFA_expressions to re-express the save slots with addresses + based on the frame pointer. */ + rtx_insn *insn = get_last_insn (); + gcc_assert (RTX_FRAME_RELATED_P (insn)); + + /* Add an explicit CFA definition if this was previously + implicit. */ + if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX)) + { +- rtx src = plus_constant (Pmode, stack_pointer_rtx, +- callee_offset); ++ rtx src = plus_constant (Pmode, stack_pointer_rtx, chain_offset); + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (hard_frame_pointer_rtx, src)); + } + + /* Change the save slot expressions for the registers that + we've already saved. */ + aarch64_add_cfa_expression (insn, regno_reg_rtxreg2, + hard_frame_pointer_rtx, UNITS_PER_WORD); + aarch64_add_cfa_expression (insn, regno_reg_rtxreg1, + hard_frame_pointer_rtx, 0); +-- +2.38.1.windows.1 +
View file
_service:tar_scm:0183-LoongArch-Drop-vcond-u-expanders.patch
Added
@@ -0,0 +1,127 @@ +From 8394519779553a2c59214d76054dd1ba87a380b3 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 8 Aug 2024 10:39:54 +0800 +Subject: PATCH 183/188 LoongArch: Drop vcond{,u} expanders. + +Optabs vcond{,u} will be removed for GCC 15. Since regtest shows no +fallout, dropping the expanders, now. + +gcc/ChangeLog: + + PR target/114189 + * config/loongarch/lasx.md (vcondu<LASX:mode><ILASX:mode>): Delete. + (vcond<LASX:mode><LASX_2:mode>): Likewise. + * config/loongarch/lsx.md (vcondu<LSX:mode><ILSX:mode>): Likewise. + (vcond<LSX:mode><LSX_2:mode>): Likewise. +--- + gcc/config/loongarch/lasx.md | 37 ------------------------------------ + gcc/config/loongarch/lsx.md | 31 ------------------------------ + 2 files changed, 68 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 3775155ca..be2f6ca8e 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -165,9 +165,6 @@ + ;; All vector modes with 256 bits. + (define_mode_iterator LASX V4DF V8SF V4DI V8SI V16HI V32QI) + +-;; Same as LASX. Used by vcond to iterate two modes. +-(define_mode_iterator LASX_2 V4DF V8SF V4DI V8SI V16HI V32QI) +- + ;; Only used for splitting insert_d and copy_{u,s}.d. + (define_mode_iterator LASX_D V4DI V4DF) + +@@ -762,40 +759,6 @@ + DONE; + }) + +-;; FIXME: 256?? +-(define_expand "vcondu<LASX:mode><ILASX:mode>" +- (match_operand:LASX 0 "register_operand") +- (match_operand:LASX 1 "reg_or_m1_operand") +- (match_operand:LASX 2 "reg_or_0_operand") +- (match_operator 3 "" +- (match_operand:ILASX 4 "register_operand") +- (match_operand:ILASX 5 "register_operand")) +- "ISA_HAS_LASX +- && (GET_MODE_NUNITS (<LASX:MODE>mode) +- == GET_MODE_NUNITS (<ILASX:MODE>mode))" +-{ +- loongarch_expand_vec_cond_expr (<LASX:MODE>mode, <LASX:VIMODE256>mode, +- operands); +- DONE; +-}) +- +-;; FIXME: 256?? +-(define_expand "vcond<LASX:mode><LASX_2:mode>" +- (match_operand:LASX 0 "register_operand") +- (match_operand:LASX 1 "reg_or_m1_operand") +- (match_operand:LASX 2 "reg_or_0_operand") +- (match_operator 3 "" +- (match_operand:LASX_2 4 "register_operand") +- (match_operand:LASX_2 5 "register_operand")) +- "ISA_HAS_LASX +- && (GET_MODE_NUNITS (<LASX:MODE>mode) +- == GET_MODE_NUNITS (<LASX_2:MODE>mode))" +-{ +- loongarch_expand_vec_cond_expr (<LASX:MODE>mode, <LASX:VIMODE256>mode, +- operands); +- DONE; +-}) +- + ;; Same as vcond_ + (define_expand "vcond_mask_<mode><mode256_i>" + (match_operand:LASX 0 "register_operand") +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index c7480aafd..5cb5bc61f 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -186,9 +186,6 @@ + ;; All vector modes with 128 bits. + (define_mode_iterator LSX V2DF V4SF V2DI V4SI V8HI V16QI) + +-;; Same as LSX. Used by vcond to iterate two modes. +-(define_mode_iterator LSX_2 V2DF V4SF V2DI V4SI V8HI V16QI) +- + ;; Only used for vilvh and splitting insert_d and copy_{u,s}.d. + (define_mode_iterator LSX_D V2DI V2DF) + +@@ -533,34 +530,6 @@ + DONE; + }) + +-(define_expand "vcondu<LSX:mode><ILSX:mode>" +- (match_operand:LSX 0 "register_operand") +- (match_operand:LSX 1 "reg_or_m1_operand") +- (match_operand:LSX 2 "reg_or_0_operand") +- (match_operator 3 "" +- (match_operand:ILSX 4 "register_operand") +- (match_operand:ILSX 5 "register_operand")) +- "ISA_HAS_LSX +- && (GET_MODE_NUNITS (<LSX:MODE>mode) == GET_MODE_NUNITS (<ILSX:MODE>mode))" +-{ +- loongarch_expand_vec_cond_expr (<LSX:MODE>mode, <LSX:VIMODE>mode, operands); +- DONE; +-}) +- +-(define_expand "vcond<LSX:mode><LSX_2:mode>" +- (match_operand:LSX 0 "register_operand") +- (match_operand:LSX 1 "reg_or_m1_operand") +- (match_operand:LSX 2 "reg_or_0_operand") +- (match_operator 3 "" +- (match_operand:LSX_2 4 "register_operand") +- (match_operand:LSX_2 5 "register_operand")) +- "ISA_HAS_LSX +- && (GET_MODE_NUNITS (<LSX:MODE>mode) == GET_MODE_NUNITS (<LSX_2:MODE>mode))" +-{ +- loongarch_expand_vec_cond_expr (<LSX:MODE>mode, <LSX:VIMODE>mode, operands); +- DONE; +-}) +- + (define_expand "vcond_mask_<mode><mode_i>" + (match_operand:LSX 0 "register_operand") + (match_operand:LSX 1 "reg_or_m1_operand") +-- +2.43.0 +
View file
_service:tar_scm:0184-Backport-SME-aarch64-Explicitly-handle-frames-with-n.patch
Added
@@ -0,0 +1,51 @@ +From 82bbe6513987a7656150110164e25f44fe410796 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:05 +0100 +Subject: PATCH 085/157 BackportSME aarch64: Explicitly handle frames + with no saved registers + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c601c918c9ac01ef8315774a642ff924f77c85e5 + +If a frame has no saved registers, it can be allocated in one go. +There is no need to treat the areas below and above the saved +registers as separate. + +And if we allocate the frame in one go, it should be allocated +as the initial_adjust rather than the final_adjust. This allows the +frame size to grow to guard_size - guard_used_by_caller before a stack +probe is needed. (A frame with no register saves is necessarily a +leaf frame.) + +This is a no-op as thing stand, since a leaf function will have +no outgoing arguments, and so all the frame will be above where +the saved registers normally go. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_layout_frame): Explicitly + allocate the frame in one go if there are no saved registers. +--- + gcc/config/aarch64/aarch64.cc | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index fbd7a079a..c59af6b1c 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8978,9 +8978,11 @@ aarch64_layout_frame (void) + + HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset; + HOST_WIDE_INT const_saved_regs_size; +- if (frame.frame_size.is_constant (&const_size) +- && const_size < max_push_offset +- && known_eq (frame.hard_fp_offset, const_size)) ++ if (known_eq (frame.saved_regs_size, 0)) ++ frame.initial_adjust = frame.frame_size; ++ else if (frame.frame_size.is_constant (&const_size) ++ && const_size < max_push_offset ++ && known_eq (frame.hard_fp_offset, const_size)) + { + /* Simple, small frame with no outgoing arguments: + +-- +2.33.0 +
View file
_service:tar_scm:0184-LoongArch-Provide-ashr-lshr-and-ashl-RTL-pattern-for.patch
Added
@@ -0,0 +1,220 @@ +From d9ce0e85c8cba331413c6a521987a1ecbd94df1c Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 8 Aug 2024 09:59:28 +0800 +Subject: PATCH 184/188 LoongArch: Provide ashr lshr and ashl RTL pattern for + vectors. + +We support vashr vlshr and vashl. However, in r15-1638 support optimize +x < 0 ? -1 : 0 into (signed) x >> 31 and x < 0 ? 1 : 0 into (unsigned) x >> 31. +To support this optimization, vector ashr lshr and ashl need to be implemented. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (insn): Added rotatert rotr pairs. + * config/loongarch/simd.md (rotr<mode>3): Remove to ... + (<optab><mode>3): This. + +gcc/testsuite/ChangeLog: + + * g++.target/loongarch/vect-ashr-lshr.C: New test. +--- + gcc/config/loongarch/loongarch.md | 1 + + gcc/config/loongarch/simd.md | 13 +- + .../g++.target/loongarch/vect-ashr-lshr.C | 147 ++++++++++++++++++ + 3 files changed, 155 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 58c8f28ed..867977b36 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -559,6 +559,7 @@ + (define_code_attr insn (ashift "sll") + (ashiftrt "sra") + (lshiftrt "srl") ++ (rotatert "rotr") + (ior "or") + (xor "xor") + (and "and") +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 00d4c7831..c28b95282 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -306,14 +306,15 @@ + operands4 = gen_reg_rtx (<MODE>mode); + }); + +-;; <x>vrotri.{b/h/w/d} ++;; <x>v{rotr/sll/sra/srl}i.{b/h/w/d} + +-(define_insn "rotr<mode>3" ++(define_insn "<optab><mode>3" + (set (match_operand:IVEC 0 "register_operand" "=f") +- (rotatert:IVEC (match_operand:IVEC 1 "register_operand" "f") +- (match_operand:SI 2 "const_<bitimm>_operand"))) +- "" +- "<x>vrotri.<simdfmt>\t%<wu>0,%<wu>1,%2"; ++ (shift_w:IVEC ++ (match_operand:IVEC 1 "register_operand" "f") ++ (match_operand:SI 2 "const_<bitimm>_operand"))) ++ "ISA_HAS_LSX" ++ "<x>v<insn>i.<simdfmt>\t%<wu>0,%<wu>1,%2" + (set_attr "type" "simd_int_arith") + (set_attr "mode" "<MODE>")) + +diff --git a/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C b/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C +new file mode 100644 +index 000000000..bcef985fa +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C +@@ -0,0 +1,147 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mlasx -O2" } */ ++/* { dg-final { scan-assembler-times "vsrli.b" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrli.h" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrli.w" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrli.d" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrai.b" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrai.h" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrai.w" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrai.d" 2 } } */ ++ ++typedef signed char v16qi __attribute__((vector_size(16))); ++typedef signed char v32qi __attribute__((vector_size(32))); ++typedef short v8hi __attribute__((vector_size(16))); ++typedef short v16hi __attribute__((vector_size(32))); ++typedef int v4si __attribute__((vector_size(16))); ++typedef int v8si __attribute__((vector_size(32))); ++typedef long long v2di __attribute__((vector_size(16))); ++typedef long long v4di __attribute__((vector_size(32))); ++ ++v16qi ++foo (v16qi a) ++{ ++ v16qi const1_op = __extension__(v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; ++ v16qi const0_op = __extension__(v16qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v32qi ++foo2 (v32qi a) ++{ ++ v32qi const1_op = __extension__(v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; ++ v32qi const0_op = __extension__(v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v8hi ++foo3 (v8hi a) ++{ ++ v8hi const1_op = __extension__(v8hi){1,1,1,1,1,1,1,1}; ++ v8hi const0_op = __extension__(v8hi){0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v16hi ++foo4 (v16hi a) ++{ ++ v16hi const1_op = __extension__(v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; ++ v16hi const0_op = __extension__(v16hi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v4si ++foo5 (v4si a) ++{ ++ v4si const1_op = __extension__(v4si){1,1,1,1}; ++ v4si const0_op = __extension__(v4si){0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v8si ++foo6 (v8si a) ++{ ++ v8si const1_op = __extension__(v8si){1,1,1,1,1,1,1,1}; ++ v8si const0_op = __extension__(v8si){0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v2di ++foo7 (v2di a) ++{ ++ v2di const1_op = __extension__(v2di){1,1}; ++ v2di const0_op = __extension__(v2di){0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v4di ++foo8 (v4di a) ++{ ++ v4di const1_op = __extension__(v4di){1,1,1,1}; ++ v4di const0_op = __extension__(v4di){0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v16qi ++foo9 (v16qi a) ++{ ++ v16qi const1_op = __extension__(v16qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; ++ v16qi const0_op = __extension__(v16qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v32qi ++foo10 (v32qi a) ++{ ++ v32qi const1_op = __extension__(v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; ++ v32qi const0_op = __extension__(v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v8hi ++foo11 (v8hi a) ++{ ++ v8hi const1_op = __extension__(v8hi){-1,-1,-1,-1,-1,-1,-1,-1}; ++ v8hi const0_op = __extension__(v8hi){0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v16hi ++foo12 (v16hi a) ++{ ++ v16hi const1_op = __extension__(v16hi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; ++ v16hi const0_op = __extension__(v16hi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v4si ++foo13 (v4si a) ++{ ++ v4si const1_op = __extension__(v4si){-1,-1,-1,-1}; ++ v4si const0_op = __extension__(v4si){0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v8si ++foo14 (v8si a) ++{ ++ v8si const1_op = __extension__(v8si){-1,-1,-1,-1,-1,-1,-1,-1}; ++ v8si const0_op = __extension__(v8si){0,0,0,0,0,0,0,0};
View file
_service:tar_scm:0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch
Added
@@ -0,0 +1,236 @@ +From bf985fe08b6298218180666a7d20f4aa0b41326f Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:05 +0100 +Subject: PATCH 086/157 BackportSME aarch64: Add bytes_below_saved_regs + to frame info + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7b792ecaa9414bc81520b3da552d40ad854be976 + +The frame layout code currently hard-codes the assumption that +the number of bytes below the saved registers is equal to the +size of the outgoing arguments. This patch abstracts that +value into a new field of aarch64_frame. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::bytes_below_saved_regs): New + field. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it, + and use it instead of crtl->outgoing_args_size. + (aarch64_get_separate_components): Use bytes_below_saved_regs instead + of outgoing_args_size. + (aarch64_process_components): Likewise. +--- + gcc/config/aarch64/aarch64.cc | 71 ++++++++++++++++++----------------- + gcc/config/aarch64/aarch64.h | 5 +++ + 2 files changed, 41 insertions(+), 35 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index c59af6b1c..5533dd85b 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8817,6 +8817,8 @@ aarch64_layout_frame (void) + gcc_assert (crtl->is_leaf + || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED)); + ++ frame.bytes_below_saved_regs = crtl->outgoing_args_size; ++ + /* Now assign stack slots for the registers. Start with the predicate + registers, since predicate LDR and STR have a relatively small + offset range. These saves happen below the hard frame pointer. */ +@@ -8921,18 +8923,18 @@ aarch64_layout_frame (void) + + poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size; + +- poly_int64 above_outgoing_args ++ poly_int64 saved_regs_and_above + = aligned_upper_bound (varargs_and_saved_regs_size + + get_frame_size (), + STACK_BOUNDARY / BITS_PER_UNIT); + + frame.hard_fp_offset +- = above_outgoing_args - frame.below_hard_fp_saved_regs_size; ++ = saved_regs_and_above - frame.below_hard_fp_saved_regs_size; + + /* Both these values are already aligned. */ +- gcc_assert (multiple_p (crtl->outgoing_args_size, ++ gcc_assert (multiple_p (frame.bytes_below_saved_regs, + STACK_BOUNDARY / BITS_PER_UNIT)); +- frame.frame_size = above_outgoing_args + crtl->outgoing_args_size; ++ frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs; + + frame.locals_offset = frame.saved_varargs_size; + +@@ -8976,7 +8978,7 @@ aarch64_layout_frame (void) + else if (frame.wb_pop_candidate1 != INVALID_REGNUM) + max_push_offset = 256; + +- HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset; ++ HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset; + HOST_WIDE_INT const_saved_regs_size; + if (known_eq (frame.saved_regs_size, 0)) + frame.initial_adjust = frame.frame_size; +@@ -8984,31 +8986,31 @@ aarch64_layout_frame (void) + && const_size < max_push_offset + && known_eq (frame.hard_fp_offset, const_size)) + { +- /* Simple, small frame with no outgoing arguments: ++ /* Simple, small frame with no data below the saved registers. + + stp reg1, reg2, sp, -frame_size! + stp reg3, reg4, sp, 16 */ + frame.callee_adjust = const_size; + } +- else if (crtl->outgoing_args_size.is_constant (&const_outgoing_args_size) ++ else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs) + && frame.saved_regs_size.is_constant (&const_saved_regs_size) +- && const_outgoing_args_size + const_saved_regs_size < 512 +- /* We could handle this case even with outgoing args, provided +- that the number of args left us with valid offsets for all +- predicate and vector save slots. It's such a rare case that +- it hardly seems worth the effort though. */ +- && (!saves_below_hard_fp_p || const_outgoing_args_size == 0) ++ && const_below_saved_regs + const_saved_regs_size < 512 ++ /* We could handle this case even with data below the saved ++ registers, provided that that data left us with valid offsets ++ for all predicate and vector save slots. It's such a rare ++ case that it hardly seems worth the effort though. */ ++ && (!saves_below_hard_fp_p || const_below_saved_regs == 0) + && !(cfun->calls_alloca + && frame.hard_fp_offset.is_constant (&const_fp_offset) + && const_fp_offset < max_push_offset)) + { +- /* Frame with small outgoing arguments: ++ /* Frame with small area below the saved registers: + + sub sp, sp, frame_size +- stp reg1, reg2, sp, outgoing_args_size +- stp reg3, reg4, sp, outgoing_args_size + 16 */ ++ stp reg1, reg2, sp, bytes_below_saved_regs ++ stp reg3, reg4, sp, bytes_below_saved_regs + 16 */ + frame.initial_adjust = frame.frame_size; +- frame.callee_offset = const_outgoing_args_size; ++ frame.callee_offset = const_below_saved_regs; + } + else if (saves_below_hard_fp_p + && known_eq (frame.saved_regs_size, +@@ -9018,30 +9020,29 @@ aarch64_layout_frame (void) + + sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size + save SVE registers relative to SP +- sub sp, sp, outgoing_args_size */ ++ sub sp, sp, bytes_below_saved_regs */ + frame.initial_adjust = (frame.hard_fp_offset + + frame.below_hard_fp_saved_regs_size); +- frame.final_adjust = crtl->outgoing_args_size; ++ frame.final_adjust = frame.bytes_below_saved_regs; + } + else if (frame.hard_fp_offset.is_constant (&const_fp_offset) + && const_fp_offset < max_push_offset) + { +- /* Frame with large outgoing arguments or SVE saves, but with +- a small local area: ++ /* Frame with large area below the saved registers, or with SVE saves, ++ but with a small area above: + + stp reg1, reg2, sp, -hard_fp_offset! + stp reg3, reg4, sp, 16 + sub sp, sp, below_hard_fp_saved_regs_size + save SVE registers relative to SP +- sub sp, sp, outgoing_args_size */ ++ sub sp, sp, bytes_below_saved_regs */ + frame.callee_adjust = const_fp_offset; + frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; +- frame.final_adjust = crtl->outgoing_args_size; ++ frame.final_adjust = frame.bytes_below_saved_regs; + } + else + { +- /* Frame with large local area and outgoing arguments or SVE saves, +- using frame pointer: ++ /* General case: + + sub sp, sp, hard_fp_offset + stp x29, x30, sp, 0 +@@ -9049,10 +9050,10 @@ aarch64_layout_frame (void) + stp reg3, reg4, sp, 16 + sub sp, sp, below_hard_fp_saved_regs_size + save SVE registers relative to SP +- sub sp, sp, outgoing_args_size */ ++ sub sp, sp, bytes_below_saved_regs */ + frame.initial_adjust = frame.hard_fp_offset; + frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; +- frame.final_adjust = crtl->outgoing_args_size; ++ frame.final_adjust = frame.bytes_below_saved_regs; + } + + /* Make sure the individual adjustments add up to the full frame size. */ +@@ -9643,7 +9644,7 @@ aarch64_get_separate_components (void) + if (frame_pointer_needed) + offset -= frame.below_hard_fp_saved_regs_size; + else +- offset += crtl->outgoing_args_size; ++ offset += frame.bytes_below_saved_regs; + + /* Check that we can access the stack slot of the register with one + direct load with no adjustments needed. */ +@@ -9792,7 +9793,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) + if (frame_pointer_needed) + offset -= frame.below_hard_fp_saved_regs_size; + else +- offset += crtl->outgoing_args_size; ++ offset += frame.bytes_below_saved_regs; + + rtx addr = plus_constant (Pmode, ptr_reg, offset); + rtx mem = gen_frame_mem (mode, addr); +@@ -9846,7 +9847,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) + if (frame_pointer_needed) + offset2 -= frame.below_hard_fp_saved_regs_size; + else +- offset2 += crtl->outgoing_args_size; ++ offset2 += frame.bytes_below_saved_regs; + rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); + rtx mem2 = gen_frame_mem (mode, addr2); + rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2) +@@ -9920,10 +9921,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void) + registers. If POLY_SIZE is not large enough to require a probe this function + will only adjust the stack. When allocating the stack space + FRAME_RELATED_P is then used to indicate if the allocation is frame related. +- FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing +- arguments. If we are then we ensure that any allocation larger than the ABI
View file
_service:tar_scm:0185-LoongArch-Implement-scalar-isinf-isnormal-and-isfini.patch
Added
@@ -0,0 +1,203 @@ +From 7e8e122306feaecf8d7b520b4e7c0b9908ca6fd2 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Thu, 4 Jul 2024 02:49:28 +0800 +Subject: PATCH 185/188 LoongArch: Implement scalar isinf, isnormal, and + isfinite via fclass + +Doing so can avoid loading FP constants from the memory. It also +partially fixes PR 66262 as fclass does not signal on sNaN. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (extendsidi2): Add ("=r", "f") + alternative and use movfr2gr.s for it. The spec clearly states + movfr2gr.s sign extends the value to GRLEN. + (fclass_<fmt>): Make the result SImode instead of a floating + mode. The fclass results are really not FP values. + (FCLASS_MASK): New define_int_iterator. + (fclass_optab): New define_int_attr. + (<FCLASS_MASK:fclass_optab><ANYF:mode>): New define_expand + template. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/fclass-compile.c: New test. + * gcc.target/loongarch/fclass-run.c: New test. +--- + gcc/config/loongarch/loongarch.md | 53 ++++++++++++++++--- + .../gcc.target/loongarch/fclass-compile.c | 20 +++++++ + .../gcc.target/loongarch/fclass-run.c | 53 +++++++++++++++++++ + 3 files changed, 119 insertions(+), 7 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/fclass-compile.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/fclass-run.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 867977b36..15960a79f 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1851,16 +1851,17 @@ + ;; .................... + + (define_insn "extendsidi2" +- (set (match_operand:DI 0 "register_operand" "=r,r,r,r") ++ (set (match_operand:DI 0 "register_operand" "=r,r,r,r,r") + (sign_extend:DI +- (match_operand:SI 1 "nonimmediate_operand" "r,ZC,m,k"))) ++ (match_operand:SI 1 "nonimmediate_operand" "r,ZC,m,k,f"))) + "TARGET_64BIT" + "@ + slli.w\t%0,%1,0 + ldptr.w\t%0,%1 + ld.w\t%0,%1 +- ldx.w\t%0,%1" +- (set_attr "move_type" "sll0,load,load,load") ++ ldx.w\t%0,%1 ++ movfr2gr.s\t%0,%1" ++ (set_attr "move_type" "sll0,load,load,load,mftg") + (set_attr "mode" "DI")) + + (define_insn "extend<SHORT:mode><GPR:mode>2" +@@ -4110,14 +4111,52 @@ + "movgr2fcsr\t$r%0,%1") + + (define_insn "fclass_<fmt>" +- (set (match_operand:ANYF 0 "register_operand" "=f") +- (unspec:ANYF (match_operand:ANYF 1 "register_operand" "f") +- UNSPEC_FCLASS)) ++ (set (match_operand:SI 0 "register_operand" "=f") ++ (unspec:SI (match_operand:ANYF 1 "register_operand" "f") ++ UNSPEC_FCLASS)) + "TARGET_HARD_FLOAT" + "fclass.<fmt>\t%0,%1" + (set_attr "type" "unknown") + (set_attr "mode" "<MODE>")) + ++(define_int_iterator FCLASS_MASK 68 136 952) ++(define_int_attr fclass_optab ++ (68 "isinf") ++ (136 "isnormal") ++ (952 "isfinite")) ++ ++(define_expand "<FCLASS_MASK:fclass_optab><ANYF:mode>2" ++ (match_operand:SI 0 "register_operand" "=r") ++ (match_operand:ANYF 1 "register_operand" " f") ++ (const_int FCLASS_MASK) ++ "TARGET_HARD_FLOAT" ++ { ++ rtx ft0 = gen_reg_rtx (SImode); ++ rtx t0 = gen_reg_rtx (word_mode); ++ rtx mask = GEN_INT (<FCLASS_MASK>); ++ ++ emit_insn (gen_fclass_<ANYF:fmt> (ft0, operands1)); ++ ++ if (TARGET_64BIT) ++ emit_insn (gen_extend_insn (t0, ft0, DImode, SImode, 0)); ++ else ++ emit_move_insn (t0, ft0); ++ ++ emit_move_insn (t0, gen_rtx_AND (word_mode, t0, mask)); ++ emit_move_insn (t0, gen_rtx_NE (word_mode, t0, const0_rtx)); ++ ++ if (TARGET_64BIT) ++ { ++ t0 = lowpart_subreg (SImode, t0, DImode); ++ SUBREG_PROMOTED_VAR_P (t0) = 1; ++ SUBREG_PROMOTED_SET (t0, SRP_SIGNED); ++ } ++ ++ emit_move_insn (operands0, t0); ++ ++ DONE; ++ }) ++ + (define_insn "bytepick_w_<bytepick_imm>" + (set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") +diff --git a/gcc/testsuite/gcc.target/loongarch/fclass-compile.c b/gcc/testsuite/gcc.target/loongarch/fclass-compile.c +new file mode 100644 +index 000000000..9c24d6e26 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/fclass-compile.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mfpu=64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler-times "fclass\\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "fclass\\.d" 1 } } */ ++ ++__attribute__ ((noipa)) int ++test_fclass_f (float f) ++{ ++ return __builtin_isinf (f) ++ | __builtin_isnormal (f) << 1 ++ | __builtin_isfinite (f) << 2; ++} ++ ++__attribute__ ((noipa)) int ++test_fclass_d (double d) ++{ ++ return __builtin_isinf (d) ++ | __builtin_isnormal (d) << 1 ++ | __builtin_isfinite (d) << 2; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/fclass-run.c b/gcc/testsuite/gcc.target/loongarch/fclass-run.c +new file mode 100644 +index 000000000..e5585f9d5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/fclass-run.c +@@ -0,0 +1,53 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -fsignaling-nans -D_GNU_SOURCE -std=c23" } */ ++/* { dg-require-effective-target fenv_exceptions } */ ++ ++#include <fenv.h> ++#include "fclass-compile.c" ++ ++#define ASSERT_EQ(x, y) (void)(x == y || (__builtin_abort (), 1)) ++ ++int ++main (void) ++{ ++ volatile float f_inf = __builtin_inff (); ++ volatile float f_zero = 0; ++ volatile float f_normal = 114.514; ++ volatile float f_subnormal = 1e-40; ++ volatile float f_qnan = __builtin_nanf (""); ++ volatile float f_snan = __builtin_nansf (""); ++ volatile double d_inf = __builtin_inf (); ++ volatile double d_zero = 0; ++ volatile double d_normal = 1919.810; ++ volatile double d_subnormal = 1e-320; ++ volatile double d_qnan = __builtin_nan (""); ++ volatile double d_snan = __builtin_nans (""); ++ ++#if __loongarch_frlen >= 64 ++ /* With fclass.{s/d} we shouldn't signal, even if the input is sNaN. ++ PR 66462. */ ++ feenableexcept (FE_INVALID); ++#endif ++ ++ ASSERT_EQ (test_fclass_f (f_inf), 0b001); ++ ASSERT_EQ (test_fclass_f (-f_inf), 0b001); ++ ASSERT_EQ (test_fclass_f (f_zero), 0b100); ++ ASSERT_EQ (test_fclass_f (-f_zero), 0b100); ++ ASSERT_EQ (test_fclass_f (f_normal), 0b110); ++ ASSERT_EQ (test_fclass_f (-f_normal), 0b110); ++ ASSERT_EQ (test_fclass_f (f_subnormal), 0b100); ++ ASSERT_EQ (test_fclass_f (-f_subnormal), 0b100); ++ ASSERT_EQ (test_fclass_f (f_qnan), 0); ++ ASSERT_EQ (test_fclass_f (f_snan), 0); ++ ++ ASSERT_EQ (test_fclass_d (d_inf), 0b001); ++ ASSERT_EQ (test_fclass_d (-d_inf), 0b001); ++ ASSERT_EQ (test_fclass_d (d_zero), 0b100); ++ ASSERT_EQ (test_fclass_d (-d_zero), 0b100); ++ ASSERT_EQ (test_fclass_d (d_normal), 0b110); ++ ASSERT_EQ (test_fclass_d (-d_normal), 0b110); ++ ASSERT_EQ (test_fclass_d (d_subnormal), 0b100); ++ ASSERT_EQ (test_fclass_d (-d_subnormal), 0b100); ++ ASSERT_EQ (test_fclass_d (d_qnan), 0); ++ ASSERT_EQ (test_fclass_d (d_snan), 0);
View file
_service:tar_scm:0186-Backport-SME-aarch64-Add-bytes_below_hard_fp-to-fram.patch
Added
@@ -0,0 +1,87 @@ +From bd5299017c233bcdf0fcc3dd7217eec1641411fe Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:06 +0100 +Subject: PATCH 087/157 BackportSME aarch64: Add bytes_below_hard_fp to + frame info + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=99305f306246079cc57d30dae7c32107f02ff3e8 + +Following on from the previous bytes_below_saved_regs patch, this one +records the number of bytes that are below the hard frame pointer. +This eventually replaces below_hard_fp_saved_regs_size. + +If a frame pointer is not needed, the epilogue adds final_adjust +to the stack pointer before restoring registers: + + aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true); + +Therefore, if the epilogue needs to restore the stack pointer from +the hard frame pointer, the directly corresponding offset is: + + -bytes_below_hard_fp + final_adjust + +i.e. go from the hard frame pointer to the bottom of the frame, +then add the same amount as if we were using the stack pointer +from the outset. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::bytes_below_hard_fp): New + field. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it. + (aarch64_expand_epilogue): Use it instead of + below_hard_fp_saved_regs_size. +--- + gcc/config/aarch64/aarch64.cc | 6 +++--- + gcc/config/aarch64/aarch64.h | 5 +++++ + 2 files changed, 8 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 5533dd85b..2bb49b9b0 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8869,6 +8869,7 @@ aarch64_layout_frame (void) + of the callee save area. */ + bool saves_below_hard_fp_p = maybe_ne (offset, 0); + frame.below_hard_fp_saved_regs_size = offset; ++ frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs; + if (frame.emit_frame_chain) + { + /* FP and LR are placed in the linkage record. */ +@@ -10456,8 +10457,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + poly_int64 final_adjust = frame.final_adjust; + poly_int64 callee_offset = frame.callee_offset; + poly_int64 sve_callee_adjust = frame.sve_callee_adjust; +- poly_int64 below_hard_fp_saved_regs_size +- = frame.below_hard_fp_saved_regs_size; ++ poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp; + unsigned reg1 = frame.wb_pop_candidate1; + unsigned reg2 = frame.wb_pop_candidate2; + unsigned int last_gpr = (frame.is_scs_enabled +@@ -10515,7 +10515,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + is restored on the instruction doing the writeback. */ + aarch64_add_offset (Pmode, stack_pointer_rtx, + hard_frame_pointer_rtx, +- -callee_offset - below_hard_fp_saved_regs_size, ++ -bytes_below_hard_fp + final_adjust, + tmp1_rtx, tmp0_rtx, callee_adjust == 0); + else + /* The case where we need to re-use the register here is very rare, so +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 9e0ca380e..dedc5b32f 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -810,6 +810,11 @@ struct GTY (()) aarch64_frame + are saved below the hard frame pointer. */ + poly_int64 below_hard_fp_saved_regs_size; + ++ /* The number of bytes between the bottom of the static frame (the bottom ++ of the outgoing arguments) and the hard frame pointer. This value is ++ always a multiple of STACK_BOUNDARY. */ ++ poly_int64 bytes_below_hard_fp; ++ + /* Offset from the base of the frame (incomming SP) to the + top of the locals area. This value is always a multiple of + STACK_BOUNDARY. */ +-- +2.33.0 +
View file
_service:tar_scm:0186-LoongArch-Add-support-to-annotate-tablejump.patch
Added
@@ -0,0 +1,155 @@ +From 5079c41ada379bd8d1bdb92dd2b91e72e9496ea6 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Thu, 11 Jul 2024 19:43:48 +0800 +Subject: PATCH 186/188 LoongArch: Add support to annotate tablejump + +This is per the request from the kernel developers. For generating the +ORC unwind info, the objtool program needs to analysis the control flow +of a .o file. If a jump table is used, objtool has to correlate the +jump instruction with the table. + +On x86 (where objtool was initially developed) it's simple: a relocation +entry natrually correlates them because one single instruction is used +for table-based jump. But on an RISC machine objtool would have to +reconstruct the data flow if it must find out the correlation on its +own. + +So, emit an additional section to store the correlation info as pairs of +addresses, each pair contains the address of a jump instruction (jr) and +the address of the jump table. This is very trivial to implement in +GCC. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch.opt.in + (mannotate-tablejump): New option. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch.md (tablejump<mode>): Emit + additional correlation info between the jump instruction and the + jump table, if -mannotate-tablejump. + * doc/invoke.texi: Document -mannotate-tablejump. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/jump-table-annotate.c: New test. + +Suggested-by: Tiezhu Yang <yangtiezhu@loongson.cn> +--- + gcc/config/loongarch/genopts/loongarch.opt.in | 4 ++++ + gcc/config/loongarch/loongarch.md | 12 +++++++++++- + gcc/config/loongarch/loongarch.opt | 4 ++++ + gcc/doc/invoke.texi | 13 ++++++++++++- + .../gcc.target/loongarch/jump-table-annotate.c | 15 +++++++++++++++ + 5 files changed, 46 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 0ecd10922..20795f6bd 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -301,3 +301,7 @@ default value is 4. + ; CPUCFG independently, so we use bit flags to specify them. + TargetVariable + HOST_WIDE_INT la_isa_evolution = 0 ++ ++mannotate-tablejump ++Target Mask(ANNOTATE_TABLEJUMP) Save ++Annotate table jump instruction (jr {reg}) to correlate it with the jump table. +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 15960a79f..66236a7c7 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -3496,12 +3496,22 @@ + DONE; + }) + ++(define_mode_attr mode_size (DI "8") (SI "4")) ++ + (define_insn "@tablejump<mode>" + (set (pc) + (match_operand:P 0 "register_operand" "e")) + (use (label_ref (match_operand 1 "" ""))) + "" +- "jr\t%0" ++ { ++ return TARGET_ANNOTATE_TABLEJUMP ++ ? "1:jr\t%0\n\t" ++ ".pushsection\t.discard.tablejump_annotate\n\t" ++ "\t.<mode_size>byte\t1b\n\t" ++ "\t.<mode_size>byte\t%1\n\t" ++ ".popsection" ++ : "jr\t%0"; ++ } + (set_attr "type" "jump") + (set_attr "mode" "none")) + +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 69b3b965c..16fed6ec3 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -310,6 +310,10 @@ default value is 4. + TargetVariable + HOST_WIDE_INT la_isa_evolution = 0 + ++mannotate-tablejump ++Target Mask(ANNOTATE_TABLEJUMP) Save ++Annotate table jump instruction (jr {reg}) to correlate it with the jump table ++ + mfrecipe + Target Mask(ISA_FRECIPE) Var(la_isa_evolution) + Support frecipe.{s/d} and frsqrte.{s/d} instructions. +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index f6d59317b..d2c52cdf4 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1011,7 +1011,7 @@ Objective-C and Objective-C++ Dialects}. + -mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as @gol + -mrecip -mrecip=@var{opt} -mfrecipe -mno-frecipe -mdiv32 -mno-div32 @gol + -mlam-bh -mno-lam-bh -mlamcas -mno-lamcas -mld-seq-sa -mno-ld-seq-sa @gol +--mtls-dialect=@var{opt}} ++-mtls-dialect=@var{opt} -mannotate-tablejump -mno-annotate-tablejump} + + @emph{M32R/D Options} + @gccoptlist{-m32r2 -m32rx -m32r @gol +@@ -24750,6 +24750,17 @@ Whether a load-load barrier (@code{dbar 0x700}) is needed. When build with + This option controls which tls dialect may be used for general dynamic and + local dynamic TLS models. + ++@opindex mannotate-tablejump ++@opindex mno-annotate-tablejump ++@item -mannotate-tablejump ++@itemx -mno-annotate-tablejump ++Create an annotation section @code{.discard.tablejump_annotate} to ++correlate the @code{jirl} instruction and the jump table when a jump ++table is used to optimize the @code{switch} statement. Some external ++tools, for example @file{objtool} of the Linux kernel building system, ++need the annotation to analysis the control flow. The default is ++@option{-mno-annotate-tablejump}. ++ + @table @samp + @item trad + Use traditional TLS. This is the default. +diff --git a/gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c b/gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c +new file mode 100644 +index 000000000..9d58e60e3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-mannotate-tablejump" } */ ++ ++extern void asdf(int); ++void foo(int x) { ++ switch (x) { ++ case 0: asdf(10); break; ++ case 1: asdf(11); break; ++ case 2: asdf(12); break; ++ case 3: asdf(13); break; ++ case 4: asdf(14); break; ++ } ++} ++ ++/* { dg-final { scan-assembler "\\.discard\\.tablejump_annotate" } } */ +-- +2.43.0 +
View file
_service:tar_scm:0187-Backport-SME-aarch64-Robustify-stack-tie-handling.patch
Added
@@ -0,0 +1,126 @@ +From 4dc3e578d958ceb73f973483f42247c3d33210dc Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 20 Jun 2023 21:48:38 +0100 +Subject: PATCH 088/157 BackportSME aarch64: Robustify stack tie handling + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=580b74a79146e51268dd11192d3870645adb0bbb + +The SVE handling of stack clash protection copied the stack +pointer to X11 before the probe and set up X11 as the CFA +for unwind purposes: + + /* This is done to provide unwinding information for the stack + adjustments we're about to do, however to prevent the optimizers + from removing the R11 move and leaving the CFA note (which would be + very wrong) we tie the old and new stack pointer together. + The tie will expand to nothing but the optimizers will not touch + the instruction. */ + rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM); + emit_move_insn (stack_ptr_copy, stack_pointer_rtx); + emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx)); + + /* We want the CFA independent of the stack pointer for the + duration of the loop. */ + add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy); + RTX_FRAME_RELATED_P (insn) = 1; + +-fcprop-registers is now smart enough to realise that X11 = SP, +replace X11 with SP in the stack tie, and delete the instruction +created above. + +This patch tries to prevent that by making stack_tie fussy about +the register numbers. It fixes failures in +gcc.target/aarch64/sve/pcs/stack_clash*.c. + +gcc/ + * config/aarch64/aarch64.md (stack_tie): Hard-code the first + register operand to the stack pointer. Require the second register + operand to have the number specified in a separate const_int operand. + * config/aarch64/aarch64.cc (aarch64_emit_stack_tie): New function. + (aarch64_allocate_and_probe_stack_space): Use it. + (aarch64_expand_prologue, aarch64_expand_epilogue): Likewise. + (aarch64_expand_epilogue): Likewise. +--- + gcc/config/aarch64/aarch64.cc | 18 ++++++++++++++---- + gcc/config/aarch64/aarch64.md | 7 ++++--- + 2 files changed, 18 insertions(+), 7 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 2bb49b9b0..4d505c6fc 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -9917,6 +9917,16 @@ aarch64_stack_clash_protection_alloca_probe_range (void) + return STACK_CLASH_CALLER_GUARD; + } + ++/* Emit a stack tie that acts as a scheduling barrier for all previous and ++ subsequent memory accesses and that requires the stack pointer and REG ++ to have their current values. REG can be stack_pointer_rtx if no ++ other register's value needs to be fixed. */ ++ ++static void ++aarch64_emit_stack_tie (rtx reg) ++{ ++ emit_insn (gen_stack_tie (reg, gen_int_mode (REGNO (reg), DImode))); ++} + + /* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch + registers. If POLY_SIZE is not large enough to require a probe this function +@@ -10030,7 +10040,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + the instruction. */ + rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM); + emit_move_insn (stack_ptr_copy, stack_pointer_rtx); +- emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx)); ++ aarch64_emit_stack_tie (stack_ptr_copy); + + /* We want the CFA independent of the stack pointer for the + duration of the loop. */ +@@ -10398,7 +10408,7 @@ aarch64_expand_prologue (void) + aarch64_add_cfa_expression (insn, regno_reg_rtxreg1, + hard_frame_pointer_rtx, 0); + } +- emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); ++ aarch64_emit_stack_tie (hard_frame_pointer_rtx); + } + + aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM, +@@ -10501,7 +10511,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + || cfun->calls_alloca + || crtl->calls_eh_return) + { +- emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); ++ aarch64_emit_stack_tie (stack_pointer_rtx); + need_barrier_p = false; + } + +@@ -10540,7 +10550,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + callee_adjust != 0, &cfi_ops); + + if (need_barrier_p) +- emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); ++ aarch64_emit_stack_tie (stack_pointer_rtx); + + if (callee_adjust != 0) + aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops); +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 2becc888e..2ce123255 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -7088,10 +7088,11 @@ + + (define_insn "stack_tie" + (set (mem:BLK (scratch)) +- (unspec:BLK (match_operand:DI 0 "register_operand" "rk") +- (match_operand:DI 1 "register_operand" "rk") ++ (unspec:BLK (reg:DI SP_REGNUM) ++ (match_operand:DI 0 "register_operand" "rk") ++ (match_operand:DI 1 "const_int_operand") + UNSPEC_PRLG_STK)) +- "" ++ "REGNO (operands0) == INTVAL (operands1)" + "" + (set_attr "length" "0") + ) +-- +2.33.0 +
View file
_service:tar_scm:0187-LoongArch-Fix-up-r15-4130.patch
Added
@@ -0,0 +1,32 @@ +From 8cdf96cd61612746262a811b8a091ecab27bd3a1 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Wed, 10 Jul 2024 12:15:23 +0800 +Subject: PATCH 187/188 LoongArch: Fix up r15-4130 + +An earlier version of the patch (lacking the regeneration of some files) +was pushed. Fix it up now. + +gcc/ChangeLog: + + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch.opt.urls: Regenerate. +--- + gcc/config/loongarch/loongarch.opt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 16fed6ec3..f9c7bd446 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -312,7 +312,7 @@ HOST_WIDE_INT la_isa_evolution = 0 + + mannotate-tablejump + Target Mask(ANNOTATE_TABLEJUMP) Save +-Annotate table jump instruction (jr {reg}) to correlate it with the jump table ++Annotate table jump instruction (jr {reg}) to correlate it with the jump table. + + mfrecipe + Target Mask(ISA_FRECIPE) Var(la_isa_evolution) +-- +2.43.0 +
View file
_service:tar_scm:0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch
Added
@@ -0,0 +1,228 @@ +From 8e010ea1a3e122a74696250d7c6ce5660a88b8f5 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:06 +0100 +Subject: PATCH 089/157 BackportSME aarch64: Tweak + aarch64_save/restore_callee_saves + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=38698967268c44991e02aa1e5a2ce9382d6de9db + +aarch64_save_callee_saves and aarch64_restore_callee_saves took +a parameter called start_offset that gives the offset of the +bottom of the saved register area from the current stack pointer. +However, it's more convenient for later patches if we use the +bottom of the entire frame as the reference point, rather than +the bottom of the saved registers. + +Doing that removes the need for the callee_offset field. +Other than that, this is not a win on its own. It only really +makes sense in combination with the follow-on patches. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::callee_offset): Delete. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Remove + callee_offset handling. + (aarch64_save_callee_saves): Replace the start_offset parameter + with a bytes_below_sp parameter. + (aarch64_restore_callee_saves): Likewise. + (aarch64_expand_prologue): Update accordingly. + (aarch64_expand_epilogue): Likewise. +--- + gcc/config/aarch64/aarch64.cc | 56 +++++++++++++++++------------------ + gcc/config/aarch64/aarch64.h | 4 --- + 2 files changed, 28 insertions(+), 32 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 4d505c6fc..a0a4c7ac3 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8943,7 +8943,6 @@ aarch64_layout_frame (void) + frame.final_adjust = 0; + frame.callee_adjust = 0; + frame.sve_callee_adjust = 0; +- frame.callee_offset = 0; + + frame.wb_pop_candidate1 = frame.wb_push_candidate1; + frame.wb_pop_candidate2 = frame.wb_push_candidate2; +@@ -9011,7 +9010,6 @@ aarch64_layout_frame (void) + stp reg1, reg2, sp, bytes_below_saved_regs + stp reg3, reg4, sp, bytes_below_saved_regs + 16 */ + frame.initial_adjust = frame.frame_size; +- frame.callee_offset = const_below_saved_regs; + } + else if (saves_below_hard_fp_p + && known_eq (frame.saved_regs_size, +@@ -9358,12 +9356,13 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg, + } + + /* Emit code to save the callee-saved registers from register number START +- to LIMIT to the stack at the location starting at offset START_OFFSET, +- skipping any write-back candidates if SKIP_WB is true. HARD_FP_VALID_P +- is true if the hard frame pointer has been set up. */ ++ to LIMIT to the stack. The stack pointer is currently BYTES_BELOW_SP ++ bytes above the bottom of the static frame. Skip any write-back ++ candidates if SKIP_WB is true. HARD_FP_VALID_P is true if the hard ++ frame pointer has been set up. */ + + static void +-aarch64_save_callee_saves (poly_int64 start_offset, ++aarch64_save_callee_saves (poly_int64 bytes_below_sp, + unsigned start, unsigned limit, bool skip_wb, + bool hard_fp_valid_p) + { +@@ -9391,7 +9390,9 @@ aarch64_save_callee_saves (poly_int64 start_offset, + + machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); +- offset = start_offset + frame.reg_offsetregno; ++ offset = (frame.reg_offsetregno ++ + frame.bytes_below_saved_regs ++ - bytes_below_sp); + rtx base_rtx = stack_pointer_rtx; + poly_int64 sp_offset = offset; + +@@ -9402,9 +9403,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, + else if (GP_REGNUM_P (regno) + && (!offset.is_constant (&const_offset) || const_offset >= 512)) + { +- gcc_assert (known_eq (start_offset, 0)); +- poly_int64 fp_offset +- = frame.below_hard_fp_saved_regs_size; ++ poly_int64 fp_offset = frame.bytes_below_hard_fp - bytes_below_sp; + if (hard_fp_valid_p) + base_rtx = hard_frame_pointer_rtx; + else +@@ -9468,12 +9467,13 @@ aarch64_save_callee_saves (poly_int64 start_offset, + } + + /* Emit code to restore the callee registers from register number START +- up to and including LIMIT. Restore from the stack offset START_OFFSET, +- skipping any write-back candidates if SKIP_WB is true. Write the +- appropriate REG_CFA_RESTORE notes into CFI_OPS. */ ++ up to and including LIMIT. The stack pointer is currently BYTES_BELOW_SP ++ bytes above the bottom of the static frame. Skip any write-back ++ candidates if SKIP_WB is true. Write the appropriate REG_CFA_RESTORE ++ notes into CFI_OPS. */ + + static void +-aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, ++aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start, + unsigned limit, bool skip_wb, rtx *cfi_ops) + { + aarch64_frame &frame = cfun->machine->frame; +@@ -9499,7 +9499,9 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, + + machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); +- offset = start_offset + frame.reg_offsetregno; ++ offset = (frame.reg_offsetregno ++ + frame.bytes_below_saved_regs ++ - bytes_below_sp); + rtx base_rtx = stack_pointer_rtx; + if (mode == VNx2DImode && BYTES_BIG_ENDIAN) + aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, +@@ -10285,8 +10287,6 @@ aarch64_expand_prologue (void) + HOST_WIDE_INT callee_adjust = frame.callee_adjust; + poly_int64 final_adjust = frame.final_adjust; + poly_int64 sve_callee_adjust = frame.sve_callee_adjust; +- poly_int64 below_hard_fp_saved_regs_size +- = frame.below_hard_fp_saved_regs_size; + unsigned reg1 = frame.wb_push_candidate1; + unsigned reg2 = frame.wb_push_candidate2; + bool emit_frame_chain = frame.emit_frame_chain; +@@ -10362,8 +10362,8 @@ aarch64_expand_prologue (void) + - frame.hard_fp_offset); + gcc_assert (known_ge (chain_offset, 0)); + +- /* The offset of the bottom of the save area from the current SP. */ +- poly_int64 saved_regs_offset = chain_offset - below_hard_fp_saved_regs_size; ++ /* The offset of the current SP from the bottom of the static frame. */ ++ poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust; + + if (emit_frame_chain) + { +@@ -10371,7 +10371,7 @@ aarch64_expand_prologue (void) + { + reg1 = R29_REGNUM; + reg2 = R30_REGNUM; +- aarch64_save_callee_saves (saved_regs_offset, reg1, reg2, ++ aarch64_save_callee_saves (bytes_below_sp, reg1, reg2, + false, false); + } + else +@@ -10411,7 +10411,7 @@ aarch64_expand_prologue (void) + aarch64_emit_stack_tie (hard_frame_pointer_rtx); + } + +- aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM, ++ aarch64_save_callee_saves (bytes_below_sp, R0_REGNUM, R30_REGNUM, + callee_adjust != 0 || emit_frame_chain, + emit_frame_chain); + if (maybe_ne (sve_callee_adjust, 0)) +@@ -10421,16 +10421,17 @@ aarch64_expand_prologue (void) + aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, + sve_callee_adjust, + !frame_pointer_needed, false); +- saved_regs_offset += sve_callee_adjust; ++ bytes_below_sp -= sve_callee_adjust; + } +- aarch64_save_callee_saves (saved_regs_offset, P0_REGNUM, P15_REGNUM, ++ aarch64_save_callee_saves (bytes_below_sp, P0_REGNUM, P15_REGNUM, + false, emit_frame_chain); +- aarch64_save_callee_saves (saved_regs_offset, V0_REGNUM, V31_REGNUM, ++ aarch64_save_callee_saves (bytes_below_sp, V0_REGNUM, V31_REGNUM, + callee_adjust != 0 || emit_frame_chain, + emit_frame_chain); + + /* We may need to probe the final adjustment if it is larger than the guard + that is assumed by the called. */ ++ gcc_assert (known_eq (bytes_below_sp, final_adjust)); + aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust, + !frame_pointer_needed, true); + } +@@ -10465,7 +10466,6 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + poly_int64 initial_adjust = frame.initial_adjust; + HOST_WIDE_INT callee_adjust = frame.callee_adjust; + poly_int64 final_adjust = frame.final_adjust; +- poly_int64 callee_offset = frame.callee_offset; + poly_int64 sve_callee_adjust = frame.sve_callee_adjust; + poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp; + unsigned reg1 = frame.wb_pop_candidate1; +@@ -10535,9 +10535,9 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + + /* Restore the vector registers before the predicate registers, + so that we can use P4 as a temporary for big-endian SVE frames. */ +- aarch64_restore_callee_saves (callee_offset, V0_REGNUM, V31_REGNUM, ++ aarch64_restore_callee_saves (final_adjust, V0_REGNUM, V31_REGNUM, + callee_adjust != 0, &cfi_ops); +- aarch64_restore_callee_saves (callee_offset, P0_REGNUM, P15_REGNUM, ++ aarch64_restore_callee_saves (final_adjust, P0_REGNUM, P15_REGNUM, + false, &cfi_ops);
View file
_service:tar_scm:0188-libphobos-Update-build-scripts-for-LoongArch64.patch
Added
@@ -0,0 +1,304 @@ +From 46e279e1c79086e930965c9a15d08b70a2c06a80 Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Mon, 28 Oct 2024 01:53:57 +0000 +Subject: PATCH 188/188 libphobos: Update build scripts for LoongArch64. + +libphobos/ChangeLog: + + * m4/druntime/cpu.m4: Support loongarch* targets. + * libdruntime/Makefile.am: Same. + * libdruntime/Makefile.in: Regenerate. + * configure: Regenerate. +--- + libphobos/configure | 21 ++++++- + libphobos/libdruntime/Makefile.am | 3 + + libphobos/libdruntime/Makefile.in | 94 +++++++++++++++++++------------ + libphobos/m4/druntime/cpu.m4 | 5 ++ + 4 files changed, 85 insertions(+), 38 deletions(-) + +diff --git a/libphobos/configure b/libphobos/configure +index 9da06f087..6acb2dd89 100755 +--- a/libphobos/configure ++++ b/libphobos/configure +@@ -696,6 +696,8 @@ DRUNTIME_CPU_POWERPC_FALSE + DRUNTIME_CPU_POWERPC_TRUE + DRUNTIME_CPU_MIPS_FALSE + DRUNTIME_CPU_MIPS_TRUE ++DRUNTIME_CPU_LOONGARCH_FALSE ++DRUNTIME_CPU_LOONGARCH_TRUE + DRUNTIME_CPU_ARM_FALSE + DRUNTIME_CPU_ARM_TRUE + DRUNTIME_CPU_AARCH64_FALSE +@@ -11750,7 +11752,7 @@ else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +-#line 11753 "configure" ++#line 11755 "configure" + #include "confdefs.h" + + #if HAVE_DLFCN_H +@@ -11856,7 +11858,7 @@ else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +-#line 11859 "configure" ++#line 11861 "configure" + #include "confdefs.h" + + #if HAVE_DLFCN_H +@@ -14137,6 +14139,9 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu + ;; + mips*) druntime_target_cpu_parsed="mips" + ;; ++ loongarch*) ++ druntime_target_cpu_parsed="loongarch" ++ ;; + powerpc*) + druntime_target_cpu_parsed="powerpc" + ;; +@@ -14174,6 +14179,14 @@ else + DRUNTIME_CPU_MIPS_FALSE= + fi + ++ if test "$druntime_target_cpu_parsed" = "loongarch"; then ++ DRUNTIME_CPU_LOONGARCH_TRUE= ++ DRUNTIME_CPU_LOONGARCH_FALSE='#' ++else ++ DRUNTIME_CPU_LOONGARCH_TRUE='#' ++ DRUNTIME_CPU_LOONGARCH_FALSE= ++fi ++ + if test "$druntime_target_cpu_parsed" = "powerpc"; then + DRUNTIME_CPU_POWERPC_TRUE= + DRUNTIME_CPU_POWERPC_FALSE='#' +@@ -15738,6 +15751,10 @@ if test -z "${DRUNTIME_CPU_MIPS_TRUE}" && test -z "${DRUNTIME_CPU_MIPS_FALSE}"; + as_fn_error $? "conditional \"DRUNTIME_CPU_MIPS\" was never defined. + Usually this means the macro was only invoked conditionally." "$LINENO" 5 + fi ++if test -z "${DRUNTIME_CPU_LOONGARCH_TRUE}" && test -z "${DRUNTIME_CPU_LOONGARCH_FALSE}"; then ++ as_fn_error $? "conditional \"DRUNTIME_CPU_LOONGARCH\" was never defined. ++Usually this means the macro was only invoked conditionally." "$LINENO" 5 ++fi + if test -z "${DRUNTIME_CPU_POWERPC_TRUE}" && test -z "${DRUNTIME_CPU_POWERPC_FALSE}"; then + as_fn_error $? "conditional \"DRUNTIME_CPU_POWERPC\" was never defined. + Usually this means the macro was only invoked conditionally." "$LINENO" 5 +diff --git a/libphobos/libdruntime/Makefile.am b/libphobos/libdruntime/Makefile.am +index 6ca4012b7..65e3f1b44 100644 +--- a/libphobos/libdruntime/Makefile.am ++++ b/libphobos/libdruntime/Makefile.am +@@ -86,6 +86,9 @@ endif + if DRUNTIME_CPU_MIPS + DRUNTIME_SOURCES_CONFIGURED += config/mips/switchcontext.S + endif ++if DRUNTIME_CPU_LOONGARCH ++ DRUNTIME_SOURCES_CONFIGURED += config/loongarch/switchcontext.S ++endif + if DRUNTIME_CPU_POWERPC + DRUNTIME_SOURCES_CONFIGURED += config/powerpc/switchcontext.S + endif +diff --git a/libphobos/libdruntime/Makefile.in b/libphobos/libdruntime/Makefile.in +index f7f78d71f..91cd65362 100644 +--- a/libphobos/libdruntime/Makefile.in ++++ b/libphobos/libdruntime/Makefile.in +@@ -124,12 +124,13 @@ target_triplet = @target@ + # CPU specific sources + @DRUNTIME_CPU_AARCH64_TRUE@am__append_11 = config/aarch64/switchcontext.S + @DRUNTIME_CPU_ARM_TRUE@am__append_12 = config/arm/switchcontext.S +-@DRUNTIME_CPU_MIPS_TRUE@am__append_13 = config/mips/switchcontext.S +-@DRUNTIME_CPU_POWERPC_TRUE@am__append_14 = config/powerpc/switchcontext.S +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__append_15 = config/mingw/switchcontext.S +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__append_16 = config/x86/switchcontext.S +-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__append_17 = config/systemz/get_tls_offset.S +-@DRUNTIME_CPU_S390_TRUE@am__append_18 = config/s390/get_tls_offset.S ++@DRUNTIME_CPU_LOONGARCH_TRUE@am__append_13 = config/loongarch/switchcontext.S ++@DRUNTIME_CPU_MIPS_TRUE@am__append_14 = config/mips/switchcontext.S ++@DRUNTIME_CPU_POWERPC_TRUE@am__append_15 = config/powerpc/switchcontext.S ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__append_16 = config/mingw/switchcontext.S ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__append_17 = config/x86/switchcontext.S ++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__append_18 = config/systemz/get_tls_offset.S ++@DRUNTIME_CPU_S390_TRUE@am__append_19 = config/s390/get_tls_offset.S + subdir = libdruntime + ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 + am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \ +@@ -474,45 +475,49 @@ am__objects_22 = core/sys/solaris/dlfcn.lo core/sys/solaris/elf.lo \ + @DRUNTIME_OS_SOLARIS_TRUE@am__objects_23 = $(am__objects_22) + @DRUNTIME_CPU_AARCH64_TRUE@am__objects_24 = config/aarch64/libgdruntime_la-switchcontext.lo + @DRUNTIME_CPU_ARM_TRUE@am__objects_25 = config/arm/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_MIPS_TRUE@am__objects_26 = config/mips/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_POWERPC_TRUE@am__objects_27 = config/powerpc/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_28 = config/mingw/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_29 = config/x86/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_30 = config/systemz/libgdruntime_la-get_tls_offset.lo +-@DRUNTIME_CPU_S390_TRUE@am__objects_31 = config/s390/libgdruntime_la-get_tls_offset.lo +-am__objects_32 = $(am__objects_5) $(am__objects_7) $(am__objects_9) \ ++@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_26 = config/loongarch/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_MIPS_TRUE@am__objects_27 = config/mips/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_POWERPC_TRUE@am__objects_28 = config/powerpc/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_29 = config/mingw/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_30 = config/x86/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_31 = config/systemz/libgdruntime_la-get_tls_offset.lo ++@DRUNTIME_CPU_S390_TRUE@am__objects_32 = config/s390/libgdruntime_la-get_tls_offset.lo ++am__objects_33 = $(am__objects_6) $(am__objects_8) $(am__objects_10) \ + $(am__objects_11) $(am__objects_13) $(am__objects_15) \ + $(am__objects_17) $(am__objects_19) $(am__objects_21) \ + $(am__objects_23) $(am__objects_24) $(am__objects_25) \ + $(am__objects_26) $(am__objects_27) $(am__objects_28) \ +- $(am__objects_29) $(am__objects_30) $(am__objects_31) +-am__objects_33 = gcc/config.lo gcc/libbacktrace.lo +-am__objects_34 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \ +- $(am__objects_32) $(am__objects_33) +-am_libgdruntime_la_OBJECTS = $(am__objects_34) ++ $(am__objects_29) $(am__objects_30) $(am__objects_31) \ ++ $(am__objects_32) ++am__objects_34 = gcc/config.lo gcc/libbacktrace.lo ++am__objects_35 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \ ++ $(am__objects_33) $(am__objects_34) ++am_libgdruntime_la_OBJECTS = $(am__objects_35) + libgdruntime_la_OBJECTS = $(am_libgdruntime_la_OBJECTS) + am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) +-am__objects_35 = core/stdc/libgdruntime_convenience_la-errno_.lo +-@DRUNTIME_OS_MINGW_TRUE@am__objects_36 = $(am__objects_20) \ ++am__objects_36 = core/stdc/libgdruntime_convenience_la-errno_.lo ++@DRUNTIME_OS_MINGW_TRUE@am__objects_37 = $(am__objects_20) \ + @DRUNTIME_OS_MINGW_TRUE@ config/mingw/libgdruntime_convenience_la-msvc.lo +-@DRUNTIME_CPU_AARCH64_TRUE@am__objects_37 = config/aarch64/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_ARM_TRUE@am__objects_38 = config/arm/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_MIPS_TRUE@am__objects_39 = config/mips/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_POWERPC_TRUE@am__objects_40 = config/powerpc/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_41 = config/mingw/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_42 = config/x86/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_43 = config/systemz/libgdruntime_convenience_la-get_tls_offset.lo +-@DRUNTIME_CPU_S390_TRUE@am__objects_44 = config/s390/libgdruntime_convenience_la-get_tls_offset.lo +-am__objects_45 = $(am__objects_5) $(am__objects_7) $(am__objects_9) \ ++@DRUNTIME_CPU_AARCH64_TRUE@am__objects_38 = config/aarch64/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_ARM_TRUE@am__objects_39 = config/arm/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_40 = config/loongarch/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_MIPS_TRUE@am__objects_41 = config/mips/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_POWERPC_TRUE@am__objects_42 = config/powerpc/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_43 = config/mingw/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_44 = config/x86/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_45 = config/systemz/libgdruntime_convenience_la-get_tls_offset.lo ++@DRUNTIME_CPU_S390_TRUE@am__objects_46 = config/s390/libgdruntime_convenience_la-get_tls_offset.lo ++am__objects_47 = $(am__objects_5) $(am__objects_7) $(am__objects_9) \ + $(am__objects_11) $(am__objects_13) $(am__objects_15) \ + $(am__objects_17) $(am__objects_19) $(am__objects_36) \ + $(am__objects_23) $(am__objects_37) $(am__objects_38) \ + $(am__objects_39) $(am__objects_40) $(am__objects_41) \ +- $(am__objects_42) $(am__objects_43) $(am__objects_44) +-am__objects_46 = $(am__objects_1) $(am__objects_35) $(am__objects_3) \ +- $(am__objects_45) $(am__objects_33) +-am__objects_47 = $(am__objects_46) +-am_libgdruntime_convenience_la_OBJECTS = $(am__objects_47) ++ $(am__objects_42) $(am__objects_43) $(am__objects_44) \ ++ $(am__objects_45) $(am__objects_46) ++am__objects_48 = $(am__objects_1) $(am__objects_35) $(am__objects_3) \ ++ $(am__objects_47) $(am__objects_33) ++am__objects_49 = $(am__objects_48) ++am_libgdruntime_convenience_la_OBJECTS = $(am__objects_49) + libgdruntime_convenience_la_OBJECTS = \
View file
_service:tar_scm:0189-Backport-SME-aarch64-Only-calculate-chain_offset-if-.patch
Added
@@ -0,0 +1,47 @@ +From c8768dd861538817db8c1955dcce4b6d8ce17c48 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:07 +0100 +Subject: PATCH 090/157 BackportSME aarch64: Only calculate chain_offset + if there is a chain + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=aa8b57ee0206e8e5ac7078692ee67fb6ead05645 + +After previous patches, it is no longer necessary to calculate +a chain_offset in cases where there is no chain record. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_expand_prologue): Move the + calculation of chain_offset into the emit_frame_chain block. +--- + gcc/config/aarch64/aarch64.cc | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index a0a4c7ac3..bef6a658b 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -10357,16 +10357,16 @@ aarch64_expand_prologue (void) + if (callee_adjust != 0) + aarch64_push_regs (reg1, reg2, callee_adjust); + +- /* The offset of the frame chain record (if any) from the current SP. */ +- poly_int64 chain_offset = (initial_adjust + callee_adjust +- - frame.hard_fp_offset); +- gcc_assert (known_ge (chain_offset, 0)); +- + /* The offset of the current SP from the bottom of the static frame. */ + poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust; + + if (emit_frame_chain) + { ++ /* The offset of the frame chain record (if any) from the current SP. */ ++ poly_int64 chain_offset = (initial_adjust + callee_adjust ++ - frame.hard_fp_offset); ++ gcc_assert (known_ge (chain_offset, 0)); ++ + if (callee_adjust == 0) + { + reg1 = R29_REGNUM; +-- +2.33.0 +
View file
_service:tar_scm:0189-LoongArch-fix-building-errors.patch
Added
@@ -0,0 +1,273 @@ +From 142ae446cab26f1beb81a53a7da3c477ce42df40 Mon Sep 17 00:00:00 2001 +From: Peng Fan <fanpeng@loongson.cn> +Date: Mon, 28 Oct 2024 09:02:51 +0000 +Subject: PATCH LoongArch: fix building errors. + +--- + config/mt-loongarch-mlib | 2 +- + gcc/config/loongarch/loongarch-evolution.h | 2 +- + gcc/config/loongarch/loongarch-opts.cc | 1 + + gcc/config/loongarch/loongarch-str.h | 11 +++--- + gcc/config/loongarch/loongarch.cc | 9 +---- + gcc/config/loongarch/loongarch.md | 44 ++++++++++++++++------ + gcc/config/loongarch/simd.md | 15 +++++--- + gcc/doc/invoke.texi | 3 +- + 8 files changed, 53 insertions(+), 34 deletions(-) + +diff --git a/config/mt-loongarch-mlib b/config/mt-loongarch-mlib +index 4cfe568f1..bbbba277f 100644 +--- a/config/mt-loongarch-mlib ++++ b/config/mt-loongarch-mlib +@@ -1 +1 @@ +-FLAGS_FOR_TARGET += -fmultiflags ++FLAGS_FOR_TARGET += +diff --git a/gcc/config/loongarch/loongarch-evolution.h b/gcc/config/loongarch/loongarch-evolution.h +index d64996481..7e8e602c7 100644 +--- a/gcc/config/loongarch/loongarch-evolution.h ++++ b/gcc/config/loongarch/loongarch-evolution.h +@@ -1,7 +1,7 @@ + /* Generated automatically by "genstr" from "isa-evolution.in". + Please do not edit this file directly. + +- Copyright (C) 2023 Free Software Foundation, Inc. ++ Copyright (C) 2023-2024 Free Software Foundation, Inc. + + This file is part of GCC. + +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 735daeb7c..1d08bb6a1 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -1071,6 +1071,7 @@ loongarch_init_misc_options (struct gcc_options *opts, + + #undef INIT_TARGET_FLAG + ++#define TARGET_DIRECT_EXTERN_ACCESS_OPTS_P(opts) (((opts->x_target_flags) & MASK_DIRECT_EXTERN_ACCESS) != 0) + /* Set mexplicit-relocs default. */ + if (opts->x_la_opt_explicit_relocs == M_OPT_UNSET) + opts->x_la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index 3cbe12f7b..13d161a8c 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -66,9 +66,10 @@ along with GCC; see the file COPYING3. If not see + #define STR_CMODEL_LARGE "large" + #define STR_CMODEL_EXTREME "extreme" + +-#define OPTSTR_FRECIPE "frecipe" +-#define OPTSTR_DIV32 "div32" +-#define OPTSTR_LAM_BH "lam-bh" +-#define OPTSTR_LAMCAS "lamcas" +-#define OPTSTR_LD_SEQ_SA "ld-seq-sa" ++#define OPTSTR_FRECIPE "frecipe" ++#define OPTSTR_DIV32 "div32" ++#define OPTSTR_LAM_BH "lam-bh" ++#define OPTSTR_LAMCAS "lamcas" ++#define OPTSTR_LD_SEQ_SA "ld-seq-sa" ++ + #endif /* LOONGARCH_STR_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 53bd8d7ec..6be0d80b3 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -764,14 +764,7 @@ loongarch_setup_incoming_varargs (cumulative_args_t cum, + argument. Advance a local copy of CUM past the last "real" named + argument, to find out how many registers are left over. */ + local_cum = *get_cumulative_args (cum); +- +- /* For a C23 variadic function w/o any named argument, and w/o an +- artifical argument for large return value, skip advancing args. +- There is such an artifical argument iff. arg.type is non-NULL +- (PR 114175). */ +- if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)) +- || arg.type != NULL_TREE) +- loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg); ++ loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg); + + /* Found out how many registers we need to save. */ + gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs; +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 66236a7c7..d8d444c7a 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -32,6 +32,7 @@ + UNSPEC_FCLASS + UNSPEC_FMAX + UNSPEC_FMIN ++ UNSPEC_COPYSIGN + UNSPEC_FTINT + UNSPEC_FTINTRM + UNSPEC_FTINTRP +@@ -415,11 +416,13 @@ + + ;; A mode for anything with 32 bits or more, and able to be loaded with + ;; the same addressing mode as ld.w. +-(define_mode_iterator LD_AT_LEAST_32_BIT GPR ANYF) ++;; (define_mode_iterator LD_AT_LEAST_32_BIT GPR ANYF) ++(define_mode_iterator LD_AT_LEAST_32_BIT (SI "") (DI "TARGET_64BIT") (SF "TARGET_HARD_FLOAT") (DF "TARGET_DOUBLE_FLOAT")) + + ;; A mode for anything able to be stored with the same addressing mode as + ;; st.w. +-(define_mode_iterator ST_ANY QHWD ANYF) ++;; (define_mode_iterator ST_ANY QHWD ANYF) ++(define_mode_iterator ST_ANY (QI "") (HI "") (SI "") (DI "TARGET_64BIT") (SF "TARGET_HARD_FLOAT") (DF "TARGET_DOUBLE_FLOAT")) + + ;; A mode for anything legal as a input of a div or mod instruction. + (define_mode_iterator DIV (DI "TARGET_64BIT") +@@ -590,6 +593,10 @@ + (define_code_attr sel (eq "masknez") (ne "maskeqz")) + (define_code_attr selinv (eq "maskeqz") (ne "masknez")) + ++(define_int_attr lrint_allow_inexact (UNSPEC_FTINT "1") ++ (UNSPEC_FTINTRM "0") ++ (UNSPEC_FTINTRP "0")) ++ + ;; Iterator and attributes for floating-point to fixed-point conversion + ;; instructions. + (define_int_iterator LRINT UNSPEC_FTINT UNSPEC_FTINTRM UNSPEC_FTINTRP) +@@ -625,7 +632,8 @@ + ;; so the redundant sign extension can be removed if the output is used as + ;; an input of a bitwise operation. Note plus, rotl, and div are handled + ;; separately. +-(define_code_iterator shift_w any_shift rotatert) ++;; (define_code_iterator shift_w any_shift rotatert) ++(define_code_iterator shift_w ashift ashiftrt lshiftrt rotatert) + (define_code_iterator arith_w minus mult) + + (define_expand "<optab><mode>3" +@@ -1324,8 +1332,9 @@ + + (define_insn "copysign<mode>3" + (set (match_operand:ANYF 0 "register_operand" "=f") +- (copysign:ANYF (match_operand:ANYF 1 "register_operand" "f") +- (match_operand:ANYF 2 "register_operand" "f"))) ++ (unspec:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f") ++ UNSPEC_COPYSIGN)) + "TARGET_HARD_FLOAT" + "fcopysign.<fmt>\t%0,%1,%2" + (set_attr "type" "fcopysign") +@@ -2722,12 +2731,13 @@ + (set_attr "mode" "<MODE>")) + + ;; Convert floating-point numbers to integers ++;; (<LRINT> == UNSPEC_FTINT + (define_insn "<lrint_pattern><ANYF:mode><ANYFI:mode>2" + (set (match_operand:ANYFI 0 "register_operand" "=f") + (unspec:ANYFI (match_operand:ANYF 1 "register_operand" "f") + LRINT)) + "TARGET_HARD_FLOAT && +- (<LRINT> == UNSPEC_FTINT ++ (<lrint_allow_inexact> + || flag_fp_int_builtin_inexact + || !flag_trapping_math)" + "ftint<lrint_submenmonic>.<ANYFI:ifmt>.<ANYF:fmt> %0,%1" +@@ -4135,15 +4145,26 @@ + (136 "isnormal") + (952 "isfinite")) + +-(define_expand "<FCLASS_MASK:fclass_optab><ANYF:mode>2" ++;;(define_expand "<FCLASS_MASK:fclass_optab><ANYF:mode>2" ++;; (match_operand:SI 0 "register_operand" "=r") ++;; (match_operand:ANYF 1 "register_operand" " f") ++;; (const_int FCLASS_MASK) ++;; "TARGET_HARD_FLOAT" ++;; { ++;; rtx ft0 = gen_reg_rtx (SImode); ++;; rtx t0 = gen_reg_rtx (word_mode); ++;; rtx mask = GEN_INT (<FCLASS_MASK>); ++ ++(define_expand "fclass_optab<ANYF:mode>2" ++ (unspec:ANYF + (match_operand:SI 0 "register_operand" "=r") +- (match_operand:ANYF 1 "register_operand" " f") +- (const_int FCLASS_MASK) ++ (match_operand:ANYF 1 "register_operand" " f") ++ UNSPEC_FCLASS) + "TARGET_HARD_FLOAT" + { + rtx ft0 = gen_reg_rtx (SImode); + rtx t0 = gen_reg_rtx (word_mode); +- rtx mask = GEN_INT (<FCLASS_MASK>); ++ rtx mask = GEN_INT (GET_MODE_MASK (<MODE>mode)); + + emit_insn (gen_fclass_<ANYF:fmt> (ft0, operands1)); + +@@ -4165,7 +4186,8 @@ + emit_move_insn (operands0, t0); + + DONE;
View file
_service:tar_scm:0190-Backport-SME-aarch64-Rename-locals_offset-to-bytes_a.patch
Added
@@ -0,0 +1,94 @@ +From 43dc03de6d608e10d83cc7994d127e3764bfbcf7 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:07 +0100 +Subject: PATCH 091/157 BackportSME aarch64: Rename locals_offset to + bytes_above_locals +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=28034dbb5e32711d5f4d655576f2499e6f57f854 + +locals_offset was described as: + + /* Offset from the base of the frame (incomming SP) to the + top of the locals area. This value is always a multiple of + STACK_BOUNDARY. */ + +This is implicitly an “upside down” view of the frame: the incoming +SP is at offset 0, and anything N bytes below the incoming SP is at +offset N (rather than -N). + +However, reg_offset instead uses a “right way up” view; that is, +it views offsets in address terms. Something above X is at a +positive offset from X and something below X is at a negative +offset from X. + +Also, even on FRAME_GROWS_DOWNWARD targets like AArch64, +target-independent code views offsets in address terms too: +locals are allocated at negative offsets to virtual_stack_vars. + +It seems confusing to have *_offset fields of the same structure +using different polarities like this. This patch tries to avoid +that by renaming locals_offset to bytes_above_locals. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::locals_offset): Rename to... + (aarch64_frame::bytes_above_locals): ...this. + * config/aarch64/aarch64.cc (aarch64_layout_frame) + (aarch64_initial_elimination_offset): Update accordingly. +--- + gcc/config/aarch64/aarch64.cc | 6 +++--- + gcc/config/aarch64/aarch64.h | 6 +++--- + 2 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index bef6a658b..992f71bbd 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8937,7 +8937,7 @@ aarch64_layout_frame (void) + STACK_BOUNDARY / BITS_PER_UNIT)); + frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs; + +- frame.locals_offset = frame.saved_varargs_size; ++ frame.bytes_above_locals = frame.saved_varargs_size; + + frame.initial_adjust = 0; + frame.final_adjust = 0; +@@ -13047,13 +13047,13 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to) + return frame.hard_fp_offset; + + if (from == FRAME_POINTER_REGNUM) +- return frame.hard_fp_offset - frame.locals_offset; ++ return frame.hard_fp_offset - frame.bytes_above_locals; + } + + if (to == STACK_POINTER_REGNUM) + { + if (from == FRAME_POINTER_REGNUM) +- return frame.frame_size - frame.locals_offset; ++ return frame.frame_size - frame.bytes_above_locals; + } + + return frame.frame_size; +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index a1db4f689..2acff9a96 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -815,10 +815,10 @@ struct GTY (()) aarch64_frame + always a multiple of STACK_BOUNDARY. */ + poly_int64 bytes_below_hard_fp; + +- /* Offset from the base of the frame (incomming SP) to the +- top of the locals area. This value is always a multiple of ++ /* The number of bytes between the top of the locals area and the top ++ of the frame (the incomming SP). This value is always a multiple of + STACK_BOUNDARY. */ +- poly_int64 locals_offset; ++ poly_int64 bytes_above_locals; + + /* Offset from the base of the frame (incomming SP) to the + hard_frame_pointer. This value is always a multiple of +-- +2.33.0 +
View file
_service:tar_scm:0190-tree-optimization-110702-avoid-zero-based-memory-ref.patch
Added
@@ -0,0 +1,119 @@ +From 13dfb01e5c30c3bd09333ac79d6ff96a617fea67 Mon Sep 17 00:00:00 2001 +From: Richard Biener <rguenther@suse.de> +Date: Thu, 3 Aug 2023 13:11:12 +0200 +Subject: PATCH tree-optimization/110702 - avoid zero-based memory references + in IVOPTs + +Sometimes IVOPTs chooses a weird induction variable which downstream +leads to issues. Most of the times we can fend those off during costing +by rejecting the candidate but it looks like the address description +costing synthesizes is different from what we end up generating so +the following fixes things up at code generation time. Specifically +we avoid the create_mem_ref_raw fallback which uses a literal zero +address base with the actual base in index2. For the case in question +we have the address + + type = unsigned long + offset = 0 + elements = { + 0 = &e * -3, + 1 = (sizetype) a.9_30 * 232, + 2 = ivtmp.28_44 * 4 + } + +from which we code generate the problematical + + _3 = MEM(long int *)0B + ivtmp.36_9 + ivtmp.28_44 * 4; + +which references the object at address zero. The patch below +recognizes the fallback after the fact and transforms the +TARGET_MEM_REF memory reference into a LEA for which this form +isn't problematic: + + _24 = &MEM(long int *)0B + ivtmp.36_34 + ivtmp.28_44 * 4; + _3 = *_24; + +hereby avoiding the correctness issue. We'd later conclude the +program terminates at the null pointer dereference and make the +function pure, miscompling the main function of the testcase. + + PR tree-optimization/110702 + * tree-ssa-loop-ivopts.cc (rewrite_use_address): When + we created a NULL pointer based access rewrite that to + a LEA. + + * gcc.dg/torture/pr110702.c: New testcase. +--- + gcc/testsuite/gcc.dg/torture/pr110702.c | 31 +++++++++++++++++++++++++ + gcc/tree-ssa-loop-ivopts.cc | 17 +++++++++++++- + 2 files changed, 47 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.dg/torture/pr110702.c + +diff --git a/gcc/testsuite/gcc.dg/torture/pr110702.c b/gcc/testsuite/gcc.dg/torture/pr110702.c +new file mode 100644 +index 00000000000..aab9c7d923e +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/torture/pr110702.c +@@ -0,0 +1,31 @@ ++/* { dg-do run } */ ++ ++void abort (void); ++ ++int a, b, c, d; ++long e974; ++ ++void f() ++{ ++ for (; a >= 0; a--) ++ { ++ b = 0; ++ for (; b <= 3; b++) ++ { ++ c = 0; ++ for (; c <= 3; c++) ++ { ++ int *g = &d; ++ *g = e00b | eaba; ++ } ++ } ++ } ++} ++ ++int main() ++{ ++ f(); ++ if (a != -1) ++ abort (); ++ return 0; ++} +diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc +index 92fc1c7d734..934897af691 100644 +--- a/gcc/tree-ssa-loop-ivopts.cc ++++ b/gcc/tree-ssa-loop-ivopts.cc +@@ -7630,7 +7630,22 @@ rewrite_use_address (struct ivopts_data *data, + true, GSI_SAME_STMT); + } + else +- copy_ref_info (ref, *use->op_p); ++ { ++ /* When we end up confused enough and have no suitable base but ++ stuffed everything to index2 use a LEA for the address and ++ create a plain MEM_REF to avoid basing a memory reference ++ on address zero which create_mem_ref_raw does as fallback. */ ++ if (TREE_CODE (ref) == TARGET_MEM_REF ++ && TMR_INDEX2 (ref) != NULL_TREE ++ && integer_zerop (TREE_OPERAND (ref, 0))) ++ { ++ ref = fold_build1 (ADDR_EXPR, TREE_TYPE (TREE_OPERAND (ref, 0)), ref); ++ ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE, ++ true, GSI_SAME_STMT); ++ ref = build2 (MEM_REF, type, ref, build_zero_cst (alias_ptr_type)); ++ } ++ copy_ref_info (ref, *use->op_p); ++ } + + *use->op_p = ref; + } +-- +2.45.2 +
View file
_service:tar_scm:0191-Backport-SME-aarch64-Rename-hard_fp_offset-to-bytes_.patch
Added
@@ -0,0 +1,151 @@ +From e33aa6e25334fd94e1e4f2d8b6c8247029657a54 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:08 +0100 +Subject: PATCH 092/157 BackportSME aarch64: Rename hard_fp_offset to + bytes_above_hard_fp +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ed61c87f044f5460109c197855b316641db3c6c6 + +Similarly to the previous locals_offset patch, hard_fp_offset +was described as: + + /* Offset from the base of the frame (incomming SP) to the + hard_frame_pointer. This value is always a multiple of + STACK_BOUNDARY. */ + poly_int64 hard_fp_offset; + +which again took an “upside-down” view: higher offsets meant lower +addresses. This patch renames the field to bytes_above_hard_fp instead. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::hard_fp_offset): Rename + to... + (aarch64_frame::bytes_above_hard_fp): ...this. + * config/aarch64/aarch64.cc (aarch64_layout_frame) + (aarch64_expand_prologue): Update accordingly. + (aarch64_initial_elimination_offset): Likewise. +--- + gcc/config/aarch64/aarch64.cc | 26 +++++++++++++------------- + gcc/config/aarch64/aarch64.h | 6 +++--- + 2 files changed, 16 insertions(+), 16 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 992f71bbd..67199a026 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8929,7 +8929,7 @@ aarch64_layout_frame (void) + + get_frame_size (), + STACK_BOUNDARY / BITS_PER_UNIT); + +- frame.hard_fp_offset ++ frame.bytes_above_hard_fp + = saved_regs_and_above - frame.below_hard_fp_saved_regs_size; + + /* Both these values are already aligned. */ +@@ -8978,13 +8978,13 @@ aarch64_layout_frame (void) + else if (frame.wb_pop_candidate1 != INVALID_REGNUM) + max_push_offset = 256; + +- HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset; ++ HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; + HOST_WIDE_INT const_saved_regs_size; + if (known_eq (frame.saved_regs_size, 0)) + frame.initial_adjust = frame.frame_size; + else if (frame.frame_size.is_constant (&const_size) + && const_size < max_push_offset +- && known_eq (frame.hard_fp_offset, const_size)) ++ && known_eq (frame.bytes_above_hard_fp, const_size)) + { + /* Simple, small frame with no data below the saved registers. + +@@ -9001,8 +9001,8 @@ aarch64_layout_frame (void) + case that it hardly seems worth the effort though. */ + && (!saves_below_hard_fp_p || const_below_saved_regs == 0) + && !(cfun->calls_alloca +- && frame.hard_fp_offset.is_constant (&const_fp_offset) +- && const_fp_offset < max_push_offset)) ++ && frame.bytes_above_hard_fp.is_constant (&const_above_fp) ++ && const_above_fp < max_push_offset)) + { + /* Frame with small area below the saved registers: + +@@ -9020,12 +9020,12 @@ aarch64_layout_frame (void) + sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size + save SVE registers relative to SP + sub sp, sp, bytes_below_saved_regs */ +- frame.initial_adjust = (frame.hard_fp_offset ++ frame.initial_adjust = (frame.bytes_above_hard_fp + + frame.below_hard_fp_saved_regs_size); + frame.final_adjust = frame.bytes_below_saved_regs; + } +- else if (frame.hard_fp_offset.is_constant (&const_fp_offset) +- && const_fp_offset < max_push_offset) ++ else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp) ++ && const_above_fp < max_push_offset) + { + /* Frame with large area below the saved registers, or with SVE saves, + but with a small area above: +@@ -9035,7 +9035,7 @@ aarch64_layout_frame (void) + sub sp, sp, below_hard_fp_saved_regs_size + save SVE registers relative to SP + sub sp, sp, bytes_below_saved_regs */ +- frame.callee_adjust = const_fp_offset; ++ frame.callee_adjust = const_above_fp; + frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; + frame.final_adjust = frame.bytes_below_saved_regs; + } +@@ -9050,7 +9050,7 @@ aarch64_layout_frame (void) + sub sp, sp, below_hard_fp_saved_regs_size + save SVE registers relative to SP + sub sp, sp, bytes_below_saved_regs */ +- frame.initial_adjust = frame.hard_fp_offset; ++ frame.initial_adjust = frame.bytes_above_hard_fp; + frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; + frame.final_adjust = frame.bytes_below_saved_regs; + } +@@ -10364,7 +10364,7 @@ aarch64_expand_prologue (void) + { + /* The offset of the frame chain record (if any) from the current SP. */ + poly_int64 chain_offset = (initial_adjust + callee_adjust +- - frame.hard_fp_offset); ++ - frame.bytes_above_hard_fp); + gcc_assert (known_ge (chain_offset, 0)); + + if (callee_adjust == 0) +@@ -13044,10 +13044,10 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to) + if (to == HARD_FRAME_POINTER_REGNUM) + { + if (from == ARG_POINTER_REGNUM) +- return frame.hard_fp_offset; ++ return frame.bytes_above_hard_fp; + + if (from == FRAME_POINTER_REGNUM) +- return frame.hard_fp_offset - frame.bytes_above_locals; ++ return frame.bytes_above_hard_fp - frame.bytes_above_locals; + } + + if (to == STACK_POINTER_REGNUM) +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 2acff9a96..0f7822c3d 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -820,10 +820,10 @@ struct GTY (()) aarch64_frame + STACK_BOUNDARY. */ + poly_int64 bytes_above_locals; + +- /* Offset from the base of the frame (incomming SP) to the +- hard_frame_pointer. This value is always a multiple of ++ /* The number of bytes between the hard_frame_pointer and the top of ++ the frame (the incomming SP). This value is always a multiple of + STACK_BOUNDARY. */ +- poly_int64 hard_fp_offset; ++ poly_int64 bytes_above_hard_fp; + + /* The size of the frame. This value is the offset from base of the + frame (incomming SP) to the stack_pointer. This value is always +-- +2.33.0 +
View file
_service:tar_scm:0191-LoongArch-Change-OSDIR-for-distribution.patch
Added
@@ -0,0 +1,25 @@ +From 25423cf92026221b7c8798533c40d3e6269a1d7c Mon Sep 17 00:00:00 2001 +From: Peng Fan <fanpeng@loongson.cn> +Date: Thu, 31 Oct 2024 02:01:49 +0000 +Subject: PATCH LoongArch: Change OSDIR for distribution + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +--- + gcc/config/loongarch/t-linux | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/gcc/config/loongarch/t-linux b/gcc/config/loongarch/t-linux +index 7cd7cde25..1d1f42596 100644 +--- a/gcc/config/loongarch/t-linux ++++ b/gcc/config/loongarch/t-linux +@@ -28,4 +28,7 @@ ifeq ($(filter LA_DISABLE_MULTILIB,$(tm_defines)),) + MULTILIB_OSDIRNAMES += mabi.lp64f=$(MULTIOSDIR_lp64f) + MULTILIB_OSDIRNAMES += mabi.lp64s=$(MULTIOSDIR_lp64s) + ++else ++ MULTILIB_OSDIRNAMES := ../lib64 ++ + endif +-- +2.45.2 +
View file
_service:tar_scm:0192-Backport-SME-aarch64-Tweak-frame_size-comment.patch
Added
@@ -0,0 +1,37 @@ +From 6aa0db727b6e3a7fed95b014f25f3f022d1f46e2 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:08 +0100 +Subject: PATCH 093/157 BackportSME aarch64: Tweak frame_size comment +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=492b60670e69b0a7f11345b69a3c922c20d5d8c3 + +This patch fixes another case in which a value was described with +an “upside-down” view. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::frame_size): Tweak comment. +--- + gcc/config/aarch64/aarch64.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 0f7822c3d..39abca051 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -825,8 +825,8 @@ struct GTY (()) aarch64_frame + STACK_BOUNDARY. */ + poly_int64 bytes_above_hard_fp; + +- /* The size of the frame. This value is the offset from base of the +- frame (incomming SP) to the stack_pointer. This value is always ++ /* The size of the frame, i.e. the number of bytes between the bottom ++ of the outgoing arguments and the incoming SP. This value is always + a multiple of STACK_BOUNDARY. */ + poly_int64 frame_size; + +-- +2.33.0 +
View file
_service:tar_scm:0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch
Added
@@ -0,0 +1,198 @@ +From 3b10711c6a5610bf8e2287b9491557268ee148da Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:09 +0100 +Subject: PATCH 094/157 BackportSME aarch64: Measure reg_offset from the + bottom of the frame + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=67a36b6f5d6be11d280081b461e72910aca2fc54 + +reg_offset was measured from the bottom of the saved register area. +This made perfect sense with the original layout, since the bottom +of the saved register area was also the hard frame pointer address. +It became slightly less obvious with SVE, since we save SVE +registers below the hard frame pointer, but it still made sense. + +However, if we want to allow different frame layouts, it's more +convenient and obvious to measure reg_offset from the bottom of +the frame. After previous patches, it's also a slight simplification +in its own right. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame): Add comment above + reg_offset. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Walk offsets + from the bottom of the frame, rather than the bottom of the saved + register area. Measure reg_offset from the bottom of the frame + rather than the bottom of the saved register area. + (aarch64_save_callee_saves): Update accordingly. + (aarch64_restore_callee_saves): Likewise. + (aarch64_get_separate_components): Likewise. + (aarch64_process_components): Likewise. +--- + gcc/config/aarch64/aarch64.cc | 53 ++++++++++++++++------------------- + gcc/config/aarch64/aarch64.h | 3 ++ + 2 files changed, 27 insertions(+), 29 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 67199a026..df8a83b04 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8739,7 +8739,6 @@ aarch64_needs_frame_chain (void) + static void + aarch64_layout_frame (void) + { +- poly_int64 offset = 0; + int regno, last_fp_reg = INVALID_REGNUM; + machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM); + poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode); +@@ -8817,7 +8816,9 @@ aarch64_layout_frame (void) + gcc_assert (crtl->is_leaf + || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED)); + +- frame.bytes_below_saved_regs = crtl->outgoing_args_size; ++ poly_int64 offset = crtl->outgoing_args_size; ++ gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); ++ frame.bytes_below_saved_regs = offset; + + /* Now assign stack slots for the registers. Start with the predicate + registers, since predicate LDR and STR have a relatively small +@@ -8829,7 +8830,8 @@ aarch64_layout_frame (void) + offset += BYTES_PER_SVE_PRED; + } + +- if (maybe_ne (offset, 0)) ++ poly_int64 saved_prs_size = offset - frame.bytes_below_saved_regs; ++ if (maybe_ne (saved_prs_size, 0)) + { + /* If we have any vector registers to save above the predicate registers, + the offset of the vector register save slots need to be a multiple +@@ -8847,10 +8849,10 @@ aarch64_layout_frame (void) + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); + else + { +- if (known_le (offset, vector_save_size)) +- offset = vector_save_size; +- else if (known_le (offset, vector_save_size * 2)) +- offset = vector_save_size * 2; ++ if (known_le (saved_prs_size, vector_save_size)) ++ offset = frame.bytes_below_saved_regs + vector_save_size; ++ else if (known_le (saved_prs_size, vector_save_size * 2)) ++ offset = frame.bytes_below_saved_regs + vector_save_size * 2; + else + gcc_unreachable (); + } +@@ -8867,9 +8869,10 @@ aarch64_layout_frame (void) + + /* OFFSET is now the offset of the hard frame pointer from the bottom + of the callee save area. */ +- bool saves_below_hard_fp_p = maybe_ne (offset, 0); +- frame.below_hard_fp_saved_regs_size = offset; +- frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs; ++ frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; ++ bool saves_below_hard_fp_p ++ = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); ++ frame.bytes_below_hard_fp = offset; + if (frame.emit_frame_chain) + { + /* FP and LR are placed in the linkage record. */ +@@ -8920,9 +8923,10 @@ aarch64_layout_frame (void) + + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); + +- frame.saved_regs_size = offset; ++ frame.saved_regs_size = offset - frame.bytes_below_saved_regs; + +- poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size; ++ poly_int64 varargs_and_saved_regs_size ++ = frame.saved_regs_size + frame.saved_varargs_size; + + poly_int64 saved_regs_and_above + = aligned_upper_bound (varargs_and_saved_regs_size +@@ -9390,9 +9394,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, + + machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); +- offset = (frame.reg_offsetregno +- + frame.bytes_below_saved_regs +- - bytes_below_sp); ++ offset = frame.reg_offsetregno - bytes_below_sp; + rtx base_rtx = stack_pointer_rtx; + poly_int64 sp_offset = offset; + +@@ -9499,9 +9501,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start, + + machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); +- offset = (frame.reg_offsetregno +- + frame.bytes_below_saved_regs +- - bytes_below_sp); ++ offset = frame.reg_offsetregno - bytes_below_sp; + rtx base_rtx = stack_pointer_rtx; + if (mode == VNx2DImode && BYTES_BIG_ENDIAN) + aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, +@@ -9640,14 +9640,12 @@ aarch64_get_separate_components (void) + it as a stack probe for -fstack-clash-protection. */ + if (flag_stack_clash_protection + && maybe_ne (frame.below_hard_fp_saved_regs_size, 0) +- && known_eq (offset, 0)) ++ && known_eq (offset, frame.bytes_below_saved_regs)) + continue; + + /* Get the offset relative to the register we'll use. */ + if (frame_pointer_needed) +- offset -= frame.below_hard_fp_saved_regs_size; +- else +- offset += frame.bytes_below_saved_regs; ++ offset -= frame.bytes_below_hard_fp; + + /* Check that we can access the stack slot of the register with one + direct load with no adjustments needed. */ +@@ -9794,9 +9792,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) + rtx reg = gen_rtx_REG (mode, regno); + poly_int64 offset = frame.reg_offsetregno; + if (frame_pointer_needed) +- offset -= frame.below_hard_fp_saved_regs_size; +- else +- offset += frame.bytes_below_saved_regs; ++ offset -= frame.bytes_below_hard_fp; + + rtx addr = plus_constant (Pmode, ptr_reg, offset); + rtx mem = gen_frame_mem (mode, addr); +@@ -9848,9 +9844,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) + /* REGNO2 can be saved/restored in a pair with REGNO. */ + rtx reg2 = gen_rtx_REG (mode, regno2); + if (frame_pointer_needed) +- offset2 -= frame.below_hard_fp_saved_regs_size; +- else +- offset2 += frame.bytes_below_saved_regs; ++ offset2 -= frame.bytes_below_hard_fp; + rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); + rtx mem2 = gen_frame_mem (mode, addr2); + rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2) +@@ -9976,7 +9970,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + if (final_adjustment_p + && known_eq (frame.below_hard_fp_saved_regs_size, 0)) + { +- poly_int64 lr_offset = frame.reg_offsetLR_REGNUM; ++ poly_int64 lr_offset = (frame.reg_offsetLR_REGNUM ++ - frame.bytes_below_saved_regs); + if (known_ge (lr_offset, 0)) + min_probe_threshold -= lr_offset.to_constant (); + else +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 39abca051..f340237d0 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -790,6 +790,9 @@ extern enum aarch64_processor aarch64_tune; + #ifdef HAVE_POLY_INT_H + struct GTY (()) aarch64_frame + { ++ /* The offset from the bottom of the static frame (the bottom of the ++ outgoing arguments) of each register save slot, or -2 if no save is ++ needed. */ + poly_int64 reg_offsetLAST_SAVED_REGNUM + 1; + + /* The number of extra stack bytes taken up by register varargs. +-- +2.33.0 +
View file
_service:tar_scm:0194-Backport-SME-aarch64-Simplify-top-of-frame-allocatio.patch
Added
@@ -0,0 +1,58 @@ +From 4b8f3f194e68d0d411eaa6692699d8e5e2b4217d Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:09 +0100 +Subject: PATCH 095/157 BackportSME aarch64: Simplify top of frame + allocation + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bc9dcdde80915d7585a21daa2b69f4adf4a1e3c1 + +After previous patches, it no longer really makes sense to allocate +the top of the frame in terms of varargs_and_saved_regs_size and +saved_regs_and_above. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_layout_frame): Simplify + the allocation of the top of the frame. +--- + gcc/config/aarch64/aarch64.cc | 23 ++++++++--------------- + 1 file changed, 8 insertions(+), 15 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index df8a83b04..3329aa364 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8925,23 +8925,16 @@ aarch64_layout_frame (void) + + frame.saved_regs_size = offset - frame.bytes_below_saved_regs; + +- poly_int64 varargs_and_saved_regs_size +- = frame.saved_regs_size + frame.saved_varargs_size; +- +- poly_int64 saved_regs_and_above +- = aligned_upper_bound (varargs_and_saved_regs_size +- + get_frame_size (), +- STACK_BOUNDARY / BITS_PER_UNIT); +- +- frame.bytes_above_hard_fp +- = saved_regs_and_above - frame.below_hard_fp_saved_regs_size; ++ offset += get_frame_size (); ++ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); ++ auto top_of_locals = offset; + +- /* Both these values are already aligned. */ +- gcc_assert (multiple_p (frame.bytes_below_saved_regs, +- STACK_BOUNDARY / BITS_PER_UNIT)); +- frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs; ++ offset += frame.saved_varargs_size; ++ gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); ++ frame.frame_size = offset; + +- frame.bytes_above_locals = frame.saved_varargs_size; ++ frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp; ++ frame.bytes_above_locals = frame.frame_size - top_of_locals; + + frame.initial_adjust = 0; + frame.final_adjust = 0; +-- +2.33.0 +
View file
_service:tar_scm:0195-Backport-SME-aarch64-Minor-initial-adjustment-tweak.patch
Added
@@ -0,0 +1,41 @@ +From 0ab484f5de7d28c0a7166439d403e0983834b120 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:10 +0100 +Subject: PATCH 096/157 BackportSME aarch64: Minor initial adjustment + tweak + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ee5466ff4faca2076cc61f1f120d0b5062c8111c + +This patch just changes a calculation of initial_adjust +to one that makes it slightly more obvious that the total +adjustment is frame.frame_size. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_layout_frame): Tweak + calculation of initial_adjust for frames in which all saves + are SVE saves. +--- + gcc/config/aarch64/aarch64.cc | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 3329aa364..72604dd9d 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -9014,11 +9014,10 @@ aarch64_layout_frame (void) + { + /* Frame in which all saves are SVE saves: + +- sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size ++ sub sp, sp, frame_size - bytes_below_saved_regs + save SVE registers relative to SP + sub sp, sp, bytes_below_saved_regs */ +- frame.initial_adjust = (frame.bytes_above_hard_fp +- + frame.below_hard_fp_saved_regs_size); ++ frame.initial_adjust = frame.frame_size - frame.bytes_below_saved_regs; + frame.final_adjust = frame.bytes_below_saved_regs; + } + else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp) +-- +2.33.0 +
View file
_service:tar_scm:0196-Backport-SME-aarch64-Tweak-stack-clash-boundary-cond.patch
Added
@@ -0,0 +1,128 @@ +From b4581d1e6a7b94dfbd58871dad51d3f12889081f Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:10 +0100 +Subject: PATCH 097/157 BackportSME aarch64: Tweak stack clash boundary + condition + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1785b8077cc03214ebd1db953c870172fcf15966 + +The AArch64 ABI says that, when stack clash protection is used, +there can be a maximum of 1KiB of unprobed space at sp on entry +to a function. Therefore, we need to probe when allocating +>= guard_size - 1KiB of data (>= rather than >). This is what +GCC does. + +If an allocation is exactly guard_size bytes, it is enough to allocate +those bytes and probe once at offset 1024. It isn't possible to use a +single probe at any other offset: higher would conmplicate later code, +by leaving more unprobed space than usual, while lower would risk +leaving an entire page unprobed. For simplicity, the code probes all +allocations at offset 1024. + +Some register saves also act as probes. If we need to allocate +more space below the last such register save probe, we need to +probe the allocation if it is > 1KiB. Again, this allocation is +then sometimes (but not always) probed at offset 1024. This sort of +allocation is currently only used for outgoing arguments, which are +rarely this big. + +However, the code also probed if this final outgoing-arguments +allocation was == 1KiB, rather than just > 1KiB. This isn't +necessary, since the register save then probes at offset 1024 +as required. Continuing to probe allocations of exactly 1KiB +would complicate later patches. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space): + Don't probe final allocations that are exactly 1KiB in size (after + unprobed space above the final allocation has been deducted). + +gcc/testsuite/ + * gcc.target/aarch64/stack-check-prologue-17.c: New test. +--- + gcc/config/aarch64/aarch64.cc | 4 +- + .../aarch64/stack-check-prologue-17.c | 55 +++++++++++++++++++ + 2 files changed, 58 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 72604dd9d..ba92a23a7 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -9943,9 +9943,11 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + HOST_WIDE_INT guard_size + = 1 << param_stack_clash_protection_guard_size; + HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; ++ HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT; ++ gcc_assert (multiple_p (poly_size, byte_sp_alignment)); + HOST_WIDE_INT min_probe_threshold + = (final_adjustment_p +- ? guard_used_by_caller ++ ? guard_used_by_caller + byte_sp_alignment + : guard_size - guard_used_by_caller); + /* When doing the final adjustment for the outgoing arguments, take into + account any unprobed space there is above the current SP. There are +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c +new file mode 100644 +index 000000000..0d8a25d73 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c +@@ -0,0 +1,55 @@ ++/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++void f(int, ...); ++void g(); ++ ++/* ++** test1: ++** ... ++** str x30, \sp\ ++** sub sp, sp, #1024 ++** cbnz w0, .* ++** bl g ++** ... ++*/ ++int test1(int z) { ++ __uint128_t x = 0; ++ int y0x400; ++ if (z) ++ { ++ f(0, 0, 0, 0, 0, 0, 0, &y, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); ++ } ++ g(); ++ return 1; ++} ++ ++/* ++** test2: ++** ... ++** str x30, \sp\ ++** sub sp, sp, #1040 ++** str xzr, \sp\ ++** cbnz w0, .* ++** bl g ++** ... ++*/ ++int test2(int z) { ++ __uint128_t x = 0; ++ int y0x400; ++ if (z) ++ { ++ f(0, 0, 0, 0, 0, 0, 0, &y, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x); ++ } ++ g(); ++ return 1; ++} +-- +2.33.0 +
View file
_service:tar_scm:0197-Backport-SME-aarch64-Put-LR-save-probe-in-first-16-b.patch
Added
@@ -0,0 +1,409 @@ +From ffd483dc6a2a4af495d56cf5ebdbbb3b9ca58820 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:11 +0100 +Subject: PATCH 098/157 BackportSME aarch64: Put LR save probe in first + 16 bytes + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fee0a18abfdd4874194abd149943fa7c77a29b7c + +-fstack-clash-protection uses the save of LR as a probe for the next +allocation. The next allocation could be: + +* another part of the static frame, e.g. when allocating SVE save slots + or outgoing arguments + +* an alloca in the same function + +* an allocation made by a callee function + +However, when -fomit-frame-pointer is used, the LR save slot is placed +above the other GPR save slots. It could therefore be up to 80 bytes +above the base of the GPR save area (which is also the hard fp address). + +aarch64_allocate_and_probe_stack_space took this into account when +deciding how much subsequent space could be allocated without needing +a probe. However, it interacted badly with: + + /* If doing a small final adjustment, we always probe at offset 0. + This is done to avoid issues when LR is not at position 0 or when + the final adjustment is smaller than the probing offset. */ + else if (final_adjustment_p && rounded_size == 0) + residual_probe_offset = 0; + +which forces any allocation that is smaller than the guard page size +to be probed at offset 0 rather than the usual offset 1024. It was +therefore possible to construct cases in which we had: + +* a probe using LR at SP + 80 bytes (or some other value >= 16) +* an allocation of the guard page size - 16 bytes +* a probe at SP + 0 + +which allocates guard page size + 64 consecutive unprobed bytes. + +This patch requires the LR probe to be in the first 16 bytes of the +save area when stack clash protection is active. Doing it +unconditionally would cause code-quality regressions. + +Putting LR before other registers prevents push/pop allocation +when shadow call stacks are enabled, since LR is restored +separately from the other callee-saved registers. + +The new comment doesn't say that the probe register is required +to be LR, since a later patch removes that restriction. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_layout_frame): Ensure that + the LR save slot is in the first 16 bytes of the register save area. + Only form STP/LDP push/pop candidates if both registers are valid. + (aarch64_allocate_and_probe_stack_space): Remove workaround for + when LR was not in the first 16 bytes. + +gcc/testsuite/ + * gcc.target/aarch64/stack-check-prologue-18.c: New test. + * gcc.target/aarch64/stack-check-prologue-19.c: Likewise. + * gcc.target/aarch64/stack-check-prologue-20.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 72 ++++++------- + .../aarch64/stack-check-prologue-18.c | 100 ++++++++++++++++++ + .../aarch64/stack-check-prologue-19.c | 100 ++++++++++++++++++ + .../aarch64/stack-check-prologue-20.c | 3 + + 4 files changed, 233 insertions(+), 42 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index ba92a23a7..1ba4c2f89 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8873,26 +8873,34 @@ aarch64_layout_frame (void) + bool saves_below_hard_fp_p + = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); + frame.bytes_below_hard_fp = offset; ++ ++ auto allocate_gpr_slot = &(unsigned int regno) ++ { ++ frame.reg_offsetregno = offset; ++ if (frame.wb_push_candidate1 == INVALID_REGNUM) ++ frame.wb_push_candidate1 = regno; ++ else if (frame.wb_push_candidate2 == INVALID_REGNUM) ++ frame.wb_push_candidate2 = regno; ++ offset += UNITS_PER_WORD; ++ }; ++ + if (frame.emit_frame_chain) + { + /* FP and LR are placed in the linkage record. */ +- frame.reg_offsetR29_REGNUM = offset; +- frame.wb_push_candidate1 = R29_REGNUM; +- frame.reg_offsetR30_REGNUM = offset + UNITS_PER_WORD; +- frame.wb_push_candidate2 = R30_REGNUM; +- offset += 2 * UNITS_PER_WORD; ++ allocate_gpr_slot (R29_REGNUM); ++ allocate_gpr_slot (R30_REGNUM); + } ++ else if (flag_stack_clash_protection ++ && known_eq (frame.reg_offsetR30_REGNUM, SLOT_REQUIRED)) ++ /* Put the LR save slot first, since it makes a good choice of probe ++ for stack clash purposes. The idea is that the link register usually ++ has to be saved before a call anyway, and so we lose little by ++ stopping it from being individually shrink-wrapped. */ ++ allocate_gpr_slot (R30_REGNUM); + + for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) + if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) +- { +- frame.reg_offsetregno = offset; +- if (frame.wb_push_candidate1 == INVALID_REGNUM) +- frame.wb_push_candidate1 = regno; +- else if (frame.wb_push_candidate2 == INVALID_REGNUM) +- frame.wb_push_candidate2 = regno; +- offset += UNITS_PER_WORD; +- } ++ allocate_gpr_slot (regno); + + poly_int64 max_int_offset = offset; + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); +@@ -8970,10 +8978,13 @@ aarch64_layout_frame (void) + max_push_offset to 0, because no registers are popped at this time, + so callee_adjust cannot be adjusted. */ + HOST_WIDE_INT max_push_offset = 0; +- if (frame.wb_pop_candidate2 != INVALID_REGNUM) +- max_push_offset = 512; +- else if (frame.wb_pop_candidate1 != INVALID_REGNUM) +- max_push_offset = 256; ++ if (frame.wb_pop_candidate1 != INVALID_REGNUM) ++ { ++ if (frame.wb_pop_candidate2 != INVALID_REGNUM) ++ max_push_offset = 512; ++ else ++ max_push_offset = 256; ++ } + + HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; + HOST_WIDE_INT const_saved_regs_size; +@@ -9949,29 +9960,6 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + = (final_adjustment_p + ? guard_used_by_caller + byte_sp_alignment + : guard_size - guard_used_by_caller); +- /* When doing the final adjustment for the outgoing arguments, take into +- account any unprobed space there is above the current SP. There are +- two cases: +- +- - When saving SVE registers below the hard frame pointer, we force +- the lowest save to take place in the prologue before doing the final +- adjustment (i.e. we don't allow the save to be shrink-wrapped). +- This acts as a probe at SP, so there is no unprobed space. +- +- - When there are no SVE register saves, we use the store of the link +- register as a probe. We can't assume that LR was saved at position 0 +- though, so treat any space below it as unprobed. */ +- if (final_adjustment_p +- && known_eq (frame.below_hard_fp_saved_regs_size, 0)) +- { +- poly_int64 lr_offset = (frame.reg_offsetLR_REGNUM +- - frame.bytes_below_saved_regs); +- if (known_ge (lr_offset, 0)) +- min_probe_threshold -= lr_offset.to_constant (); +- else +- gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0)); +- } +- + poly_int64 frame_size = frame.frame_size; + + /* We should always have a positive probe threshold. */ +@@ -10151,8 +10139,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + if (final_adjustment_p && rounded_size != 0) + min_probe_threshold = 0; + /* If doing a small final adjustment, we always probe at offset 0. +- This is done to avoid issues when LR is not at position 0 or when +- the final adjustment is smaller than the probing offset. */ ++ This is done to avoid issues when the final adjustment is smaller ++ than the probing offset. */ + else if (final_adjustment_p && rounded_size == 0) + residual_probe_offset = 0; + +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c +new file mode 100644 +index 000000000..82447d20f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c +@@ -0,0 +1,100 @@ ++/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++void f(int, ...); ++void g(); ++ ++/* ++** test1:
View file
_service:tar_scm:0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch
Added
@@ -0,0 +1,126 @@ +From c12de24e57cbe26c224bab39698736fa4004f8ff Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:11 +0100 +Subject: PATCH 099/157 BackportSME aarch64: Simplify probe of final + frame allocation + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f87028a905059573ae7fdfe526d034fd70b3bcae + +Previous patches ensured that the final frame allocation only needs +a probe when the size is strictly greater than 1KiB. It's therefore +safe to use the normal 1024 probe offset in all cases. + +The main motivation for doing this is to simplify the code and +remove the number of special cases. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space): + Always probe the residual allocation at offset 1024, asserting + that that is in range. + +gcc/testsuite/ + * gcc.target/aarch64/stack-check-prologue-17.c: Expect the probe + to be at offset 1024 rather than offset 0. + * gcc.target/aarch64/stack-check-prologue-18.c: Likewise. + * gcc.target/aarch64/stack-check-prologue-19.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 12 ++++-------- + .../gcc.target/aarch64/stack-check-prologue-17.c | 2 +- + .../gcc.target/aarch64/stack-check-prologue-18.c | 4 ++-- + .../gcc.target/aarch64/stack-check-prologue-19.c | 4 ++-- + 4 files changed, 9 insertions(+), 13 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 1ba4c2f89..6d835dc8f 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -10133,16 +10133,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + are still safe. */ + if (residual) + { +- HOST_WIDE_INT residual_probe_offset = guard_used_by_caller; ++ gcc_assert (guard_used_by_caller + byte_sp_alignment <= size); ++ + /* If we're doing final adjustments, and we've done any full page + allocations then any residual needs to be probed. */ + if (final_adjustment_p && rounded_size != 0) + min_probe_threshold = 0; +- /* If doing a small final adjustment, we always probe at offset 0. +- This is done to avoid issues when the final adjustment is smaller +- than the probing offset. */ +- else if (final_adjustment_p && rounded_size == 0) +- residual_probe_offset = 0; + + aarch64_sub_sp (temp1, temp2, residual, frame_related_p); + if (residual >= min_probe_threshold) +@@ -10153,8 +10149,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required." + "\n", residual); + +- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, +- residual_probe_offset)); ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ guard_used_by_caller)); + emit_insn (gen_blockage ()); + } + } +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c +index 0d8a25d73..f0ec13897 100644 +--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c +@@ -33,7 +33,7 @@ int test1(int z) { + ** ... + ** str x30, \sp\ + ** sub sp, sp, #1040 +-** str xzr, \sp\ ++** str xzr, \sp, #?1024\ + ** cbnz w0, .* + ** bl g + ** ... +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c +index 82447d20f..6383bec5e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c +@@ -9,7 +9,7 @@ void g(); + ** ... + ** str x30, \sp\ + ** sub sp, sp, #4064 +-** str xzr, \sp\ ++** str xzr, \sp, #?1024\ + ** cbnz w0, .* + ** bl g + ** ... +@@ -50,7 +50,7 @@ int test1(int z) { + ** ... + ** str x30, \sp\ + ** sub sp, sp, #1040 +-** str xzr, \sp\ ++** str xzr, \sp, #?1024\ + ** cbnz w0, .* + ** bl g + ** ... +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c +index 73ac3e4e4..562039b5e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c +@@ -9,7 +9,7 @@ void g(); + ** ... + ** str x30, \sp\ + ** sub sp, sp, #4064 +-** str xzr, \sp\ ++** str xzr, \sp, #?1024\ + ** cbnz w0, .* + ** bl g + ** ... +@@ -50,7 +50,7 @@ int test1(int z) { + ** ... + ** str x30, \sp\ + ** sub sp, sp, #1040 +-** str xzr, \sp\ ++** str xzr, \sp, #?1024\ + ** cbnz w0, .* + ** bl g + ** ... +-- +2.33.0 +
View file
_service:tar_scm:0199-Backport-SME-aarch64-Explicitly-record-probe-registe.patch
Added
@@ -0,0 +1,280 @@ +From 1bf3e9a04411b483c89d2e2f9096ab66800c3b3f Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:12 +0100 +Subject: PATCH 100/157 BackportSME aarch64: Explicitly record probe + registers in frame info + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5ce957484eea15f09503fcffa4dfdfb70ad82f8f + +The stack frame is currently divided into three areas: + +A: the area above the hard frame pointer +B: the SVE saves below the hard frame pointer +C: the outgoing arguments + +If the stack frame is allocated in one chunk, the allocation needs a +probe if the frame size is >= guard_size - 1KiB. In addition, if the +function is not a leaf function, it must probe an address no more than +1KiB above the outgoing SP. We ensured the second condition by + +(1) using single-chunk allocations for non-leaf functions only if + the link register save slot is within 512 bytes of the bottom + of the frame; and + +(2) using the link register save as a probe (meaning, for instance, + that it can't be individually shrink wrapped) + +If instead the stack is allocated in multiple chunks, then: + +* an allocation involving only the outgoing arguments (C above) requires + a probe if the allocation size is > 1KiB + +* any other allocation requires a probe if the allocation size + is >= guard_size - 1KiB + +* second and subsequent allocations require the previous allocation + to probe at the bottom of the allocated area, regardless of the size + of that previous allocation + +The final point means that, unlike for single allocations, +it can be necessary to have both a non-SVE register probe and +an SVE register probe. For example: + +* allocate A, probe using a non-SVE register save +* allocate B, probe using an SVE register save +* allocate C + +The non-SVE register used in this case was again the link register. +It was previously used even if the link register save slot was some +bytes above the bottom of the non-SVE register saves, but an earlier +patch avoided that by putting the link register save slot first. + +As a belt-and-braces fix, this patch explicitly records which +probe registers we're using and allows the non-SVE probe to be +whichever register comes first (as for SVE). + +The patch also avoids unnecessary probes in sve/pcs/stack_clash_3.c. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::sve_save_and_probe) + (aarch64_frame::hard_fp_save_and_probe): New fields. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize them. + Rather than asserting that a leaf function saves LR, instead assert + that a leaf function saves something. + (aarch64_get_separate_components): Prevent the chosen probe + registers from being individually shrink-wrapped. + (aarch64_allocate_and_probe_stack_space): Remove workaround for + probe registers that aren't at the bottom of the previous allocation. + +gcc/testsuite/ + * gcc.target/aarch64/sve/pcs/stack_clash_3.c: Avoid redundant probes. +--- + gcc/config/aarch64/aarch64.cc | 68 +++++++++++++++---- + gcc/config/aarch64/aarch64.h | 8 +++ + .../aarch64/sve/pcs/stack_clash_3.c | 6 +- + 3 files changed, 64 insertions(+), 18 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 6d835dc8f..dd80ceba8 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8810,15 +8810,11 @@ aarch64_layout_frame (void) + && !crtl->abi->clobbers_full_reg_p (regno)) + frame.reg_offsetregno = SLOT_REQUIRED; + +- /* With stack-clash, LR must be saved in non-leaf functions. The saving of +- LR counts as an implicit probe which allows us to maintain the invariant +- described in the comment at expand_prologue. */ +- gcc_assert (crtl->is_leaf +- || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED)); + + poly_int64 offset = crtl->outgoing_args_size; + gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); + frame.bytes_below_saved_regs = offset; ++ frame.sve_save_and_probe = INVALID_REGNUM; + + /* Now assign stack slots for the registers. Start with the predicate + registers, since predicate LDR and STR have a relatively small +@@ -8826,6 +8822,8 @@ aarch64_layout_frame (void) + for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++) + if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) + { ++ if (frame.sve_save_and_probe == INVALID_REGNUM) ++ frame.sve_save_and_probe = regno; + frame.reg_offsetregno = offset; + offset += BYTES_PER_SVE_PRED; + } +@@ -8863,6 +8861,8 @@ aarch64_layout_frame (void) + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) + if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) + { ++ if (frame.sve_save_and_probe == INVALID_REGNUM) ++ frame.sve_save_and_probe = regno; + frame.reg_offsetregno = offset; + offset += vector_save_size; + } +@@ -8872,10 +8872,18 @@ aarch64_layout_frame (void) + frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; + bool saves_below_hard_fp_p + = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); ++ gcc_assert (!saves_below_hard_fp_p ++ || (frame.sve_save_and_probe != INVALID_REGNUM ++ && known_eq (frame.reg_offsetframe.sve_save_and_probe, ++ frame.bytes_below_saved_regs))); ++ + frame.bytes_below_hard_fp = offset; ++ frame.hard_fp_save_and_probe = INVALID_REGNUM; + + auto allocate_gpr_slot = &(unsigned int regno) + { ++ if (frame.hard_fp_save_and_probe == INVALID_REGNUM) ++ frame.hard_fp_save_and_probe = regno; + frame.reg_offsetregno = offset; + if (frame.wb_push_candidate1 == INVALID_REGNUM) + frame.wb_push_candidate1 = regno; +@@ -8909,6 +8917,8 @@ aarch64_layout_frame (void) + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) + if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) + { ++ if (frame.hard_fp_save_and_probe == INVALID_REGNUM) ++ frame.hard_fp_save_and_probe = regno; + /* If there is an alignment gap between integer and fp callee-saves, + allocate the last fp register to it if possible. */ + if (regno == last_fp_reg +@@ -8932,6 +8942,17 @@ aarch64_layout_frame (void) + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); + + frame.saved_regs_size = offset - frame.bytes_below_saved_regs; ++ gcc_assert (known_eq (frame.saved_regs_size, ++ frame.below_hard_fp_saved_regs_size) ++ || (frame.hard_fp_save_and_probe != INVALID_REGNUM ++ && known_eq (frame.reg_offsetframe.hard_fp_save_and_probe, ++ frame.bytes_below_hard_fp))); ++ ++ /* With stack-clash, a register must be saved in non-leaf functions. ++ The saving of the bottommost register counts as an implicit probe, ++ which allows us to maintain the invariant described in the comment ++ at expand_prologue. */ ++ gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0)); + + offset += get_frame_size (); + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); +@@ -9062,6 +9083,25 @@ aarch64_layout_frame (void) + frame.final_adjust = frame.bytes_below_saved_regs; + } + ++ /* The frame is allocated in pieces, with each non-final piece ++ including a register save at offset 0 that acts as a probe for ++ the following piece. In addition, the save of the bottommost register ++ acts as a probe for callees and allocas. Roll back any probes that ++ aren't needed. ++ ++ A probe isn't needed if it is associated with the final allocation ++ (including callees and allocas) that happens before the epilogue is ++ executed. */ ++ if (crtl->is_leaf ++ && !cfun->calls_alloca ++ && known_eq (frame.final_adjust, 0)) ++ { ++ if (maybe_ne (frame.sve_callee_adjust, 0)) ++ frame.sve_save_and_probe = INVALID_REGNUM; ++ else ++ frame.hard_fp_save_and_probe = INVALID_REGNUM; ++ } ++ + /* Make sure the individual adjustments add up to the full frame size. */ + gcc_assert (known_eq (frame.initial_adjust + + frame.callee_adjust +@@ -9639,13 +9679,6 @@ aarch64_get_separate_components (void) + + poly_int64 offset = frame.reg_offsetregno; + +- /* If the register is saved in the first SVE save slot, we use +- it as a stack probe for -fstack-clash-protection. */ +- if (flag_stack_clash_protection +- && maybe_ne (frame.below_hard_fp_saved_regs_size, 0) +- && known_eq (offset, frame.bytes_below_saved_regs)) +- continue; +- + /* Get the offset relative to the register we'll use. */
View file
_service:tar_scm:0200-Backport-SME-aarch64-Remove-below_hard_fp_saved_regs.patch
Added
@@ -0,0 +1,160 @@ +From 5c33afb2173f68a0166bd180977cd1e547df22dc Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:12 +0100 +Subject: PATCH 101/157 BackportSME aarch64: Remove + below_hard_fp_saved_regs_size + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2abfc867d3ba025ac2146bb21b92a93e6325dec1 + +After previous patches, it's no longer necessary to store +saved_regs_size and below_hard_fp_saved_regs_size in the frame info. +All measurements instead use the top or bottom of the frame as +reference points. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::saved_regs_size) + (aarch64_frame::below_hard_fp_saved_regs_size): Delete. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Update accordingly. +--- + gcc/config/aarch64/aarch64.cc | 45 ++++++++++++++++------------------- + gcc/config/aarch64/aarch64.h | 7 ------ + 2 files changed, 21 insertions(+), 31 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index dd80ceba8..0894ed325 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8869,9 +8869,8 @@ aarch64_layout_frame (void) + + /* OFFSET is now the offset of the hard frame pointer from the bottom + of the callee save area. */ +- frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; +- bool saves_below_hard_fp_p +- = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); ++ auto below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; ++ bool saves_below_hard_fp_p = maybe_ne (below_hard_fp_saved_regs_size, 0); + gcc_assert (!saves_below_hard_fp_p + || (frame.sve_save_and_probe != INVALID_REGNUM + && known_eq (frame.reg_offsetframe.sve_save_and_probe, +@@ -8941,9 +8940,8 @@ aarch64_layout_frame (void) + + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); + +- frame.saved_regs_size = offset - frame.bytes_below_saved_regs; +- gcc_assert (known_eq (frame.saved_regs_size, +- frame.below_hard_fp_saved_regs_size) ++ auto saved_regs_size = offset - frame.bytes_below_saved_regs; ++ gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size) + || (frame.hard_fp_save_and_probe != INVALID_REGNUM + && known_eq (frame.reg_offsetframe.hard_fp_save_and_probe, + frame.bytes_below_hard_fp))); +@@ -8952,7 +8950,7 @@ aarch64_layout_frame (void) + The saving of the bottommost register counts as an implicit probe, + which allows us to maintain the invariant described in the comment + at expand_prologue. */ +- gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0)); ++ gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0)); + + offset += get_frame_size (); + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); +@@ -9009,7 +9007,7 @@ aarch64_layout_frame (void) + + HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; + HOST_WIDE_INT const_saved_regs_size; +- if (known_eq (frame.saved_regs_size, 0)) ++ if (known_eq (saved_regs_size, 0)) + frame.initial_adjust = frame.frame_size; + else if (frame.frame_size.is_constant (&const_size) + && const_size < max_push_offset +@@ -9022,7 +9020,7 @@ aarch64_layout_frame (void) + frame.callee_adjust = const_size; + } + else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs) +- && frame.saved_regs_size.is_constant (&const_saved_regs_size) ++ && saved_regs_size.is_constant (&const_saved_regs_size) + && const_below_saved_regs + const_saved_regs_size < 512 + /* We could handle this case even with data below the saved + registers, provided that that data left us with valid offsets +@@ -9041,8 +9039,7 @@ aarch64_layout_frame (void) + frame.initial_adjust = frame.frame_size; + } + else if (saves_below_hard_fp_p +- && known_eq (frame.saved_regs_size, +- frame.below_hard_fp_saved_regs_size)) ++ && known_eq (saved_regs_size, below_hard_fp_saved_regs_size)) + { + /* Frame in which all saves are SVE saves: + +@@ -9064,7 +9061,7 @@ aarch64_layout_frame (void) + save SVE registers relative to SP + sub sp, sp, bytes_below_saved_regs */ + frame.callee_adjust = const_above_fp; +- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; ++ frame.sve_callee_adjust = below_hard_fp_saved_regs_size; + frame.final_adjust = frame.bytes_below_saved_regs; + } + else +@@ -9079,7 +9076,7 @@ aarch64_layout_frame (void) + save SVE registers relative to SP + sub sp, sp, bytes_below_saved_regs */ + frame.initial_adjust = frame.bytes_above_hard_fp; +- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; ++ frame.sve_callee_adjust = below_hard_fp_saved_regs_size; + frame.final_adjust = frame.bytes_below_saved_regs; + } + +@@ -10231,17 +10228,17 @@ aarch64_epilogue_uses (int regno) + | local variables | <-- frame_pointer_rtx + | | + +-------------------------------+ +- | padding | \ +- +-------------------------------+ | +- | callee-saved registers | | frame.saved_regs_size +- +-------------------------------+ | +- | LR' | | +- +-------------------------------+ | +- | FP' | | +- +-------------------------------+ |<- hard_frame_pointer_rtx (aligned) +- | SVE vector registers | | \ +- +-------------------------------+ | | below_hard_fp_saved_regs_size +- | SVE predicate registers | / / ++ | padding | ++ +-------------------------------+ ++ | callee-saved registers | ++ +-------------------------------+ ++ | LR' | ++ +-------------------------------+ ++ | FP' | ++ +-------------------------------+ <-- hard_frame_pointer_rtx (aligned) ++ | SVE vector registers | ++ +-------------------------------+ ++ | SVE predicate registers | + +-------------------------------+ + | dynamic allocation | + +-------------------------------+ +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index af480d9e8..292ef2eec 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -801,18 +801,11 @@ struct GTY (()) aarch64_frame + STACK_BOUNDARY. */ + HOST_WIDE_INT saved_varargs_size; + +- /* The size of the callee-save registers with a slot in REG_OFFSET. */ +- poly_int64 saved_regs_size; +- + /* The number of bytes between the bottom of the static frame (the bottom + of the outgoing arguments) and the bottom of the register save area. + This value is always a multiple of STACK_BOUNDARY. */ + poly_int64 bytes_below_saved_regs; + +- /* The size of the callee-save registers with a slot in REG_OFFSET that +- are saved below the hard frame pointer. */ +- poly_int64 below_hard_fp_saved_regs_size; +- + /* The number of bytes between the bottom of the static frame (the bottom + of the outgoing arguments) and the hard frame pointer. This value is + always a multiple of STACK_BOUNDARY. */ +-- +2.33.0 +
View file
_service:tar_scm:0201-Backport-SME-aarch64-Make-stack-smash-canary-protect.patch
Added
@@ -0,0 +1,301 @@ +From b225443d64481bc225e29bf119d99b719c69cd3c Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:13 +0100 +Subject: PATCH 102/157 BackportSME aarch64: Make stack smash canary + protect saved registers + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3e4afea3b192c205c9a9da99f4cac65c68087eaf + +AArch64 normally puts the saved registers near the bottom of the frame, +immediately above any dynamic allocations. But this means that a +stack-smash attack on those dynamic allocations could overwrite the +saved registers without needing to reach as far as the stack smash +canary. + +The same thing could also happen for variable-sized arguments that are +passed by value, since those are allocated before a call and popped on +return. + +This patch avoids that by putting the locals (and thus the canary) below +the saved registers when stack smash protection is active. + +The patch fixes CVE-2023-4039. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_save_regs_above_locals_p): + New function. + (aarch64_layout_frame): Use it to decide whether locals should + go above or below the saved registers. + (aarch64_expand_prologue): Update stack layout comment. + Emit a stack tie after the final adjustment. + +gcc/testsuite/ + * gcc.target/aarch64/stack-protector-8.c: New test. + * gcc.target/aarch64/stack-protector-9.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 46 +++++++-- + .../gcc.target/aarch64/stack-protector-8.c | 95 +++++++++++++++++++ + .../gcc.target/aarch64/stack-protector-9.c | 33 +++++++ + 3 files changed, 168 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-9.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 0894ed325..8d4dd2891 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8733,6 +8733,20 @@ aarch64_needs_frame_chain (void) + return aarch64_use_frame_pointer; + } + ++/* Return true if the current function should save registers above ++ the locals area, rather than below it. */ ++ ++static bool ++aarch64_save_regs_above_locals_p () ++{ ++ /* When using stack smash protection, make sure that the canary slot ++ comes between the locals and the saved registers. Otherwise, ++ it would be possible for a carefully sized smash attack to change ++ the saved registers (particularly LR and FP) without reaching the ++ canary. */ ++ return crtl->stack_protect_guard; ++} ++ + /* Mark the registers that need to be saved by the callee and calculate + the size of the callee-saved registers area and frame record (both FP + and LR may be omitted). */ +@@ -8744,6 +8758,7 @@ aarch64_layout_frame (void) + poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode); + bool frame_related_fp_reg_p = false; + aarch64_frame &frame = cfun->machine->frame; ++ poly_int64 top_of_locals = -1; + + frame.emit_frame_chain = aarch64_needs_frame_chain (); + +@@ -8810,9 +8825,16 @@ aarch64_layout_frame (void) + && !crtl->abi->clobbers_full_reg_p (regno)) + frame.reg_offsetregno = SLOT_REQUIRED; + ++ bool regs_at_top_p = aarch64_save_regs_above_locals_p (); + + poly_int64 offset = crtl->outgoing_args_size; + gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); ++ if (regs_at_top_p) ++ { ++ offset += get_frame_size (); ++ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); ++ top_of_locals = offset; ++ } + frame.bytes_below_saved_regs = offset; + frame.sve_save_and_probe = INVALID_REGNUM; + +@@ -8952,15 +8974,18 @@ aarch64_layout_frame (void) + at expand_prologue. */ + gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0)); + +- offset += get_frame_size (); +- offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); +- auto top_of_locals = offset; +- ++ if (!regs_at_top_p) ++ { ++ offset += get_frame_size (); ++ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); ++ top_of_locals = offset; ++ } + offset += frame.saved_varargs_size; + gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); + frame.frame_size = offset; + + frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp; ++ gcc_assert (known_ge (top_of_locals, 0)); + frame.bytes_above_locals = frame.frame_size - top_of_locals; + + frame.initial_adjust = 0; +@@ -10225,10 +10250,10 @@ aarch64_epilogue_uses (int regno) + | for register varargs | + | | + +-------------------------------+ +- | local variables | <-- frame_pointer_rtx ++ | local variables (1) | <-- frame_pointer_rtx + | | + +-------------------------------+ +- | padding | ++ | padding (1) | + +-------------------------------+ + | callee-saved registers | + +-------------------------------+ +@@ -10240,6 +10265,10 @@ aarch64_epilogue_uses (int regno) + +-------------------------------+ + | SVE predicate registers | + +-------------------------------+ ++ | local variables (2) | ++ +-------------------------------+ ++ | padding (2) | ++ +-------------------------------+ + | dynamic allocation | + +-------------------------------+ + | padding | +@@ -10249,6 +10278,9 @@ aarch64_epilogue_uses (int regno) + +-------------------------------+ + | | <-- stack_pointer_rtx (aligned) + ++ The regions marked (1) and (2) are mutually exclusive. (2) is used ++ when aarch64_save_regs_above_locals_p is true. ++ + Dynamic stack allocations via alloca() decrease stack_pointer_rtx + but leave frame_pointer_rtx and hard_frame_pointer_rtx + unchanged. +@@ -10444,6 +10476,8 @@ aarch64_expand_prologue (void) + gcc_assert (known_eq (bytes_below_sp, final_adjust)); + aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust, + !frame_pointer_needed, true); ++ if (emit_frame_chain && maybe_ne (final_adjust, 0)) ++ aarch64_emit_stack_tie (hard_frame_pointer_rtx); + } + + /* Return TRUE if we can use a simple_return insn. +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c +new file mode 100644 +index 000000000..e71d820e3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c +@@ -0,0 +1,95 @@ ++/* { dg-options " -O -fstack-protector-strong -mstack-protector-guard=sysreg -mstack-protector-guard-reg=tpidr2_el0 -mstack-protector-guard-offset=16" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++void g(void *); ++__SVBool_t *h(void *); ++ ++/* ++** test1: ++** sub sp, sp, #288 ++** stp x29, x30, \sp, #?272\ ++** add x29, sp, #?272 ++** mrs (x0-9+), tpidr2_el0 ++** ldr (x0-9+), \\1, #?16\ ++** str \2, \sp, #?264\ ++** mov \2, #?0 ++** add x0, sp, #?8 ++** bl g ++** ... ++** mrs .* ++** ... ++** bne .* ++** ... ++** ldp x29, x30, \sp, #?272\ ++** add sp, sp, #?288 ++** ret ++** bl __stack_chk_fail ++*/ ++int test1() { ++ int y0x40; ++ g(y); ++ return 1; ++} ++ ++/* ++** test2:
View file
_service:tar_scm:0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch
Added
@@ -0,0 +1,201 @@ +From 31433584b018cb2dc81e2366351a57bf5e1c4e44 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 17 Oct 2023 23:45:43 +0100 +Subject: PATCH 103/157 BackportSME Handle epilogues that contain jumps + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=aeb3f0436f8ae84e593eda9641fe4e6fdf0afb3e + +The prologue/epilogue pass allows the prologue sequence to contain +jumps. The sequence is then partitioned into basic blocks using +find_many_sub_basic_blocks. + +This patch treats epilogues in a similar way. Since only one block +might need to be split, the patch (re)introduces a find_sub_basic_blocks +routine to handle a single block. + +The new routine hard-codes the assumption that split_block will chain +the new block immediately after the original block. The routine doesn't +try to replicate the fix for PR81030, since that was specific to +gimple->rtl expansion. + +The patch is needed for follow-on aarch64 patches that add conditional +code to the epilogue. The tests are part of those patches. + +gcc/ + * cfgbuild.h (find_sub_basic_blocks): Declare. + * cfgbuild.cc (update_profile_for_new_sub_basic_block): New function, + split out from... + (find_many_sub_basic_blocks): ...here. + (find_sub_basic_blocks): New function. + * function.cc (thread_prologue_and_epilogue_insns): Handle + epilogues that contain jumps. +--- + gcc/cfgbuild.cc | 95 +++++++++++++++++++++++++++++++++---------------- + gcc/cfgbuild.h | 1 + + gcc/function.cc | 4 +++ + 3 files changed, 70 insertions(+), 30 deletions(-) + +diff --git a/gcc/cfgbuild.cc b/gcc/cfgbuild.cc +index 646a06614..58b865f29 100644 +--- a/gcc/cfgbuild.cc ++++ b/gcc/cfgbuild.cc +@@ -693,6 +693,43 @@ compute_outgoing_frequencies (basic_block b) + } + } + ++/* Update the profile information for BB, which was created by splitting ++ an RTL block that had a non-final jump. */ ++ ++static void ++update_profile_for_new_sub_basic_block (basic_block bb) ++{ ++ edge e; ++ edge_iterator ei; ++ ++ bool initialized_src = false, uninitialized_src = false; ++ bb->count = profile_count::zero (); ++ FOR_EACH_EDGE (e, ei, bb->preds) ++ { ++ if (e->count ().initialized_p ()) ++ { ++ bb->count += e->count (); ++ initialized_src = true; ++ } ++ else ++ uninitialized_src = true; ++ } ++ /* When some edges are missing with read profile, this is ++ most likely because RTL expansion introduced loop. ++ When profile is guessed we may have BB that is reachable ++ from unlikely path as well as from normal path. ++ ++ TODO: We should handle loops created during BB expansion ++ correctly here. For now we assume all those loop to cycle ++ precisely once. */ ++ if (!initialized_src ++ || (uninitialized_src ++ && profile_status_for_fn (cfun) < PROFILE_GUESSED)) ++ bb->count = profile_count::uninitialized (); ++ ++ compute_outgoing_frequencies (bb); ++} ++ + /* Assume that some pass has inserted labels or control flow + instructions within a basic block. Split basic blocks as needed + and create edges. */ +@@ -744,40 +781,15 @@ find_many_sub_basic_blocks (sbitmap blocks) + if (profile_status_for_fn (cfun) != PROFILE_ABSENT) + FOR_BB_BETWEEN (bb, min, max->next_bb, next_bb) + { +- edge e; +- edge_iterator ei; +- + if (STATE (bb) == BLOCK_ORIGINAL) + continue; + if (STATE (bb) == BLOCK_NEW) + { +- bool initialized_src = false, uninitialized_src = false; +- bb->count = profile_count::zero (); +- FOR_EACH_EDGE (e, ei, bb->preds) +- { +- if (e->count ().initialized_p ()) +- { +- bb->count += e->count (); +- initialized_src = true; +- } +- else +- uninitialized_src = true; +- } +- /* When some edges are missing with read profile, this is +- most likely because RTL expansion introduced loop. +- When profile is guessed we may have BB that is reachable +- from unlikely path as well as from normal path. +- +- TODO: We should handle loops created during BB expansion +- correctly here. For now we assume all those loop to cycle +- precisely once. */ +- if (!initialized_src +- || (uninitialized_src +- && profile_status_for_fn (cfun) < PROFILE_GUESSED)) +- bb->count = profile_count::uninitialized (); ++ update_profile_for_new_sub_basic_block (bb); ++ continue; + } +- /* If nothing changed, there is no need to create new BBs. */ +- else if (EDGE_COUNT (bb->succs) == n_succsbb->index) ++ /* If nothing changed, there is no need to create new BBs. */ ++ if (EDGE_COUNT (bb->succs) == n_succsbb->index) + { + /* In rare occassions RTL expansion might have mistakely assigned + a probabilities different from what is in CFG. This happens +@@ -788,10 +800,33 @@ find_many_sub_basic_blocks (sbitmap blocks) + update_br_prob_note (bb); + continue; + } +- + compute_outgoing_frequencies (bb); + } + + FOR_EACH_BB_FN (bb, cfun) + SET_STATE (bb, 0); + } ++ ++/* Like find_many_sub_basic_blocks, but look only within BB. */ ++ ++void ++find_sub_basic_blocks (basic_block bb) ++{ ++ basic_block end_bb = bb->next_bb; ++ find_bb_boundaries (bb); ++ if (bb->next_bb == end_bb) ++ return; ++ ++ /* Re-scan and wire in all edges. This expects simple (conditional) ++ jumps at the end of each new basic blocks. */ ++ make_edges (bb, end_bb->prev_bb, 1); ++ ++ /* Update branch probabilities. Expect only (un)conditional jumps ++ to be created with only the forward edges. */ ++ if (profile_status_for_fn (cfun) != PROFILE_ABSENT) ++ { ++ compute_outgoing_frequencies (bb); ++ for (bb = bb->next_bb; bb != end_bb; bb = bb->next_bb) ++ update_profile_for_new_sub_basic_block (bb); ++ } ++} +diff --git a/gcc/cfgbuild.h b/gcc/cfgbuild.h +index 85145da7f..53543bb75 100644 +--- a/gcc/cfgbuild.h ++++ b/gcc/cfgbuild.h +@@ -24,5 +24,6 @@ extern bool inside_basic_block_p (const rtx_insn *); + extern bool control_flow_insn_p (const rtx_insn *); + extern void rtl_make_eh_edge (sbitmap, basic_block, rtx); + extern void find_many_sub_basic_blocks (sbitmap); ++extern void find_sub_basic_blocks (basic_block); + + #endif /* GCC_CFGBUILD_H */ +diff --git a/gcc/function.cc b/gcc/function.cc +index ddab43ca4..f4fc211a0 100644 +--- a/gcc/function.cc ++++ b/gcc/function.cc +@@ -6126,6 +6126,8 @@ thread_prologue_and_epilogue_insns (void) + && returnjump_p (BB_END (e->src))) + e->flags &= ~EDGE_FALLTHRU; + } ++ ++ find_sub_basic_blocks (BLOCK_FOR_INSN (epilogue_seq)); + } + else if (next_active_insn (BB_END (exit_fallthru_edge->src))) + { +@@ -6234,6 +6236,8 @@ thread_prologue_and_epilogue_insns (void) + set_insn_locations (seq, epilogue_location); + + emit_insn_before (seq, insn); ++ ++ find_sub_basic_blocks (BLOCK_FOR_INSN (insn)); + } + } + +--
View file
_service:tar_scm:0203-Backport-SME-aarch64-Use-vecs-to-store-register-save.patch
Added
@@ -0,0 +1,709 @@ +From 554c83414c10909c39e0ad30026ffa4821dd9698 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 17 Oct 2023 23:46:33 +0100 +Subject: PATCH 104/157 BackportSME aarch64: Use vecs to store register + save order + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=575858508090b18dcbc176db285c9f55227ca4c0 + +aarch64_save/restore_callee_saves looped over registers in register +number order. This in turn meant that we could only use LDP and STP +for registers that were consecutive both number-wise and +offset-wise (after unsaved registers are excluded). + +This patch instead builds lists of the registers that we've decided to +save, in offset order. We can then form LDP/STP pairs regardless of +register number order, which in turn means that we can put the LR save +slot first without losing LDP/STP opportunities. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame): Add vectors that + store the list saved GPRs, FPRs and predicate registers. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize + the lists of saved registers. Use them to choose push candidates. + Invalidate pop candidates if we're not going to do a pop. + (aarch64_next_callee_save): Delete. + (aarch64_save_callee_saves): Take a list of registers, + rather than a range. Make !skip_wb select only write-back + candidates. + (aarch64_expand_prologue): Update calls accordingly. + (aarch64_restore_callee_saves): Take a list of registers, + rather than a range. Always skip pop candidates. Also skip + LR if shadow call stacks are enabled. + (aarch64_expand_epilogue): Update calls accordingly. + +gcc/testsuite/ + * gcc.target/aarch64/sve/pcs/stack_clash_2.c: Expect restores + to happen in offset order. + * gcc.target/aarch64/sve/pcs/stack_clash_2_128.c: Likewise. + * gcc.target/aarch64/sve/pcs/stack_clash_2_256.c: Likewise. + * gcc.target/aarch64/sve/pcs/stack_clash_2_512.c: Likewise. + * gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c: Likewise. + * gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 203 +++++++++--------- + gcc/config/aarch64/aarch64.h | 9 +- + .../aarch64/sve/pcs/stack_clash_2.c | 6 +- + .../aarch64/sve/pcs/stack_clash_2_1024.c | 6 +- + .../aarch64/sve/pcs/stack_clash_2_128.c | 6 +- + .../aarch64/sve/pcs/stack_clash_2_2048.c | 6 +- + .../aarch64/sve/pcs/stack_clash_2_256.c | 6 +- + .../aarch64/sve/pcs/stack_clash_2_512.c | 6 +- + 8 files changed, 128 insertions(+), 120 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 8d4dd2891..e10c9d763 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8753,13 +8753,17 @@ aarch64_save_regs_above_locals_p () + static void + aarch64_layout_frame (void) + { +- int regno, last_fp_reg = INVALID_REGNUM; ++ unsigned regno, last_fp_reg = INVALID_REGNUM; + machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM); + poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode); + bool frame_related_fp_reg_p = false; + aarch64_frame &frame = cfun->machine->frame; + poly_int64 top_of_locals = -1; + ++ vec_safe_truncate (frame.saved_gprs, 0); ++ vec_safe_truncate (frame.saved_fprs, 0); ++ vec_safe_truncate (frame.saved_prs, 0); ++ + frame.emit_frame_chain = aarch64_needs_frame_chain (); + + /* Adjust the outgoing arguments size if required. Keep it in sync with what +@@ -8844,6 +8848,7 @@ aarch64_layout_frame (void) + for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++) + if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) + { ++ vec_safe_push (frame.saved_prs, regno); + if (frame.sve_save_and_probe == INVALID_REGNUM) + frame.sve_save_and_probe = regno; + frame.reg_offsetregno = offset; +@@ -8865,7 +8870,7 @@ aarch64_layout_frame (void) + If we don't have any vector registers to save, and we know how + big the predicate save area is, we can just round it up to the + next 16-byte boundary. */ +- if (last_fp_reg == (int) INVALID_REGNUM && offset.is_constant ()) ++ if (last_fp_reg == INVALID_REGNUM && offset.is_constant ()) + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); + else + { +@@ -8879,10 +8884,11 @@ aarch64_layout_frame (void) + } + + /* If we need to save any SVE vector registers, add them next. */ +- if (last_fp_reg != (int) INVALID_REGNUM && crtl->abi->id () == ARM_PCS_SVE) ++ if (last_fp_reg != INVALID_REGNUM && crtl->abi->id () == ARM_PCS_SVE) + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) + if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) + { ++ vec_safe_push (frame.saved_fprs, regno); + if (frame.sve_save_and_probe == INVALID_REGNUM) + frame.sve_save_and_probe = regno; + frame.reg_offsetregno = offset; +@@ -8903,13 +8909,8 @@ aarch64_layout_frame (void) + + auto allocate_gpr_slot = &(unsigned int regno) + { +- if (frame.hard_fp_save_and_probe == INVALID_REGNUM) +- frame.hard_fp_save_and_probe = regno; ++ vec_safe_push (frame.saved_gprs, regno); + frame.reg_offsetregno = offset; +- if (frame.wb_push_candidate1 == INVALID_REGNUM) +- frame.wb_push_candidate1 = regno; +- else if (frame.wb_push_candidate2 == INVALID_REGNUM) +- frame.wb_push_candidate2 = regno; + offset += UNITS_PER_WORD; + }; + +@@ -8938,8 +8939,7 @@ aarch64_layout_frame (void) + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) + if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) + { +- if (frame.hard_fp_save_and_probe == INVALID_REGNUM) +- frame.hard_fp_save_and_probe = regno; ++ vec_safe_push (frame.saved_fprs, regno); + /* If there is an alignment gap between integer and fp callee-saves, + allocate the last fp register to it if possible. */ + if (regno == last_fp_reg +@@ -8952,21 +8952,25 @@ aarch64_layout_frame (void) + } + + frame.reg_offsetregno = offset; +- if (frame.wb_push_candidate1 == INVALID_REGNUM) +- frame.wb_push_candidate1 = regno; +- else if (frame.wb_push_candidate2 == INVALID_REGNUM +- && frame.wb_push_candidate1 >= V0_REGNUM) +- frame.wb_push_candidate2 = regno; + offset += vector_save_size; + } + + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); +- + auto saved_regs_size = offset - frame.bytes_below_saved_regs; +- gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size) +- || (frame.hard_fp_save_and_probe != INVALID_REGNUM +- && known_eq (frame.reg_offsetframe.hard_fp_save_and_probe, +- frame.bytes_below_hard_fp))); ++ ++ array_slice<unsigned int> push_regs = (!vec_safe_is_empty (frame.saved_gprs) ++ ? frame.saved_gprs ++ : frame.saved_fprs); ++ if (!push_regs.empty () ++ && known_eq (frame.reg_offsetpush_regs0, frame.bytes_below_hard_fp)) ++ { ++ frame.hard_fp_save_and_probe = push_regs0; ++ frame.wb_push_candidate1 = push_regs0; ++ if (push_regs.size () > 1) ++ frame.wb_push_candidate2 = push_regs1; ++ } ++ else ++ gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size)); + + /* With stack-clash, a register must be saved in non-leaf functions. + The saving of the bottommost register counts as an implicit probe, +@@ -9130,12 +9134,14 @@ aarch64_layout_frame (void) + + frame.sve_callee_adjust + + frame.final_adjust, frame.frame_size)); + +- if (!frame.emit_frame_chain && frame.callee_adjust == 0) ++ if (frame.callee_adjust == 0) + { +- /* We've decided not to associate any register saves with the initial +- stack allocation. */ +- frame.wb_pop_candidate1 = frame.wb_push_candidate1 = INVALID_REGNUM; +- frame.wb_pop_candidate2 = frame.wb_push_candidate2 = INVALID_REGNUM; ++ /* We've decided not to do a "real" push and pop. However, ++ setting up the frame chain is treated as being essentially ++ a multi-instruction push. */ ++ frame.wb_pop_candidate1 = frame.wb_pop_candidate2 = INVALID_REGNUM; ++ if (!frame.emit_frame_chain) ++ frame.wb_push_candidate1 = frame.wb_push_candidate2 = INVALID_REGNUM; + } + + frame.laid_out = true; +@@ -9150,17 +9156,6 @@ aarch64_register_saved_on_entry (int regno) + return known_ge (cfun->machine->frame.reg_offsetregno, 0); + } + +-/* Return the next register up from REGNO up to LIMIT for the callee +- to save. */ +- +-static unsigned +-aarch64_next_callee_save (unsigned regno, unsigned limit) +-{ +- while (regno <= limit && !aarch64_register_saved_on_entry (regno)) +- regno ++;
View file
_service:tar_scm:0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch
Added
@@ -0,0 +1,107 @@ +From ccc3ca614bbaa242fe25ec82b903dfcac03fe2de Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 17 Oct 2023 23:46:33 +0100 +Subject: PATCH 105/157 BackportSME aarch64: Put LR save slot first in + more cases + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=773306e9ef4ea1407f89686eb513a50602493666 + +Now that the prologue and epilogue code iterates over saved +registers in offset order, we can put the LR save slot first +without compromising LDP/STP formation. + +This isn't worthwhile when shadow call stacks are enabled, since the +first two registers are also push/pop candidates, and LR cannot be +popped when shadow call stacks are enabled. (LR is instead loaded +first and compared against the shadow stack's value.) + +But otherwise, it seems better to put the LR save slot first, +to reduce unnecessary variation with the layout for stack clash +protection. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_layout_frame): Don't make + the position of the LR save slot dependent on stack clash + protection unless shadow call stacks are enabled. + +gcc/testsuite/ + * gcc.target/aarch64/test_frame_2.c: Expect x30 to come before x19. + * gcc.target/aarch64/test_frame_4.c: Likewise. + * gcc.target/aarch64/test_frame_7.c: Likewise. + * gcc.target/aarch64/test_frame_10.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 2 +- + gcc/testsuite/gcc.target/aarch64/test_frame_10.c | 4 ++-- + gcc/testsuite/gcc.target/aarch64/test_frame_2.c | 4 ++-- + gcc/testsuite/gcc.target/aarch64/test_frame_4.c | 4 ++-- + gcc/testsuite/gcc.target/aarch64/test_frame_7.c | 4 ++-- + 5 files changed, 9 insertions(+), 9 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index e10c9d763..1c127192d 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8920,7 +8920,7 @@ aarch64_layout_frame (void) + allocate_gpr_slot (R29_REGNUM); + allocate_gpr_slot (R30_REGNUM); + } +- else if (flag_stack_clash_protection ++ else if ((flag_stack_clash_protection || !frame.is_scs_enabled) + && known_eq (frame.reg_offsetR30_REGNUM, SLOT_REQUIRED)) + /* Put the LR save slot first, since it makes a good choice of probe + for stack clash purposes. The idea is that the link register usually +diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c +index c19505082..c54ab2d0c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c ++++ b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c +@@ -14,6 +14,6 @@ + t_frame_pattern_outgoing (test10, 480, "x19", 24, a8, a9, a10) + t_frame_run (test10) + +-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\sp, \0-9\+\\\" 1 } } */ +-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\sp, \0-9\+\\\" } } */ ++/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\sp, \0-9\+\\\" 1 } } */ ++/* { dg-final { scan-assembler "ldp\tx30, x19, \\\sp, \0-9\+\\\" } } */ + +diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c +index 7e5df84cf..0d715314c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c +@@ -14,6 +14,6 @@ t_frame_pattern (test2, 200, "x19") + t_frame_run (test2) + + +-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\sp, -\0-9\+\\\!" 1 } } */ +-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\sp\\\, \0-9\+" } } */ ++/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\sp, -\0-9\+\\\!" 1 } } */ ++/* { dg-final { scan-assembler "ldp\tx30, x19, \\\sp\\\, \0-9\+" } } */ + +diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c +index ed13487a0..b41229c42 100644 +--- a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c +@@ -13,6 +13,6 @@ + t_frame_pattern (test4, 400, "x19") + t_frame_run (test4) + +-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\sp, -\0-9\+\\\!" 1 } } */ +-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\sp\\\, \0-9\+" } } */ ++/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\sp, -\0-9\+\\\!" 1 } } */ ++/* { dg-final { scan-assembler "ldp\tx30, x19, \\\sp\\\, \0-9\+" } } */ + +diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c +index 964527949..5702656a5 100644 +--- a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c ++++ b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c +@@ -13,6 +13,6 @@ + t_frame_pattern (test7, 700, "x19") + t_frame_run (test7) + +-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\sp" 1 } } */ +-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\sp\\\" } } */ ++/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\sp" 1 } } */ ++/* { dg-final { scan-assembler "ldp\tx30, x19, \\\sp\\\" } } */ + +-- +2.33.0 +
View file
_service:tar_scm:0205-Backport-SME-aarch64-Switch-PSTATE.SM-around-calls.patch
Added
@@ -0,0 +1,3270 @@ +From 88a41bc24eb793eee27aa9f4ef6b763b3c3e76e6 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:25 +0000 +Subject: PATCH 106/157 BackportSME aarch64: Switch PSTATE.SM around + calls + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dd8090f40079fa41ee58d9f76b2e50ed4f95c6bf + +This patch adds support for switching to the appropriate SME mode +for each call. Switching to streaming mode requires an SMSTART SM +instruction and switching to non-streaming mode requires an SMSTOP SM +instruction. If the call is being made from streaming-compatible code, +these switches are conditional on the current mode being the opposite +of the one that the call needs. + +Since changing PSTATE.SM changes the vector length and effectively +changes the ISA, the code to do the switching has to be emitted late. +The patch does this using a new pass that runs next to late prologue/ +epilogue insertion. (It doesn't use md_reorg because later additions +need the CFG.) + +If a streaming-compatible function needs to switch mode for a call, +it must restore the original mode afterwards. The old mode must +therefore be available immediately after the call. The easiest +way of ensuring this is to force the use of a hard frame pointer +and ensure that the old state is saved at an in-range offset +from there. + +Changing modes clobbers the Z and P registers, so we need to +save and restore live Z and P state around each mode switch. +However, mode switches are not expected to be performance +critical, so it seemed better to err on the side of being +correct rather than trying to optimise the save and restore +with surrounding code. + +gcc/ + * config/aarch64/aarch64-passes.def + (pass_late_thread_prologue_and_epilogue): New pass. + * config/aarch64/aarch64-sme.md: New file. + * config/aarch64/aarch64.md: Include it. + (*tb<optab><mode>1): Rename to... + (@aarch64_tb<optab><mode>): ...this. + (call, call_value, sibcall, sibcall_value): Don't require operand 2 + to be a CONST_INT. + * config/aarch64/aarch64-protos.h (aarch64_emit_call_insn): Return + the insn. + (make_pass_switch_sm_state): Declare. + * config/aarch64/aarch64.h (TARGET_STREAMING_COMPATIBLE): New macro. + (CALL_USED_REGISTER): Mark VG as call-preserved. + (aarch64_frame::old_svcr_offset): New member variable. + (machine_function::call_switches_sm_state): Likewise. + (CUMULATIVE_ARGS::num_sme_mode_switch_args): Likewise. + (CUMULATIVE_ARGS::sme_mode_switch_args): Likewise. + * config/aarch64/aarch64.cc: Include tree-pass.h and cfgbuild.h. + (aarch64_cfun_incoming_pstate_sm): New function. + (aarch64_call_switches_pstate_sm): Likewise. + (aarch64_reg_save_mode): Return DImode for VG_REGNUM. + (aarch64_callee_isa_mode): New function. + (aarch64_insn_callee_isa_mode): Likewise. + (aarch64_guard_switch_pstate_sm): Likewise. + (aarch64_switch_pstate_sm): Likewise. + (aarch64_sme_mode_switch_regs): New class. + (aarch64_record_sme_mode_switch_args): New function. + (aarch64_finish_sme_mode_switch_args): Likewise. + (aarch64_function_arg): Handle the end marker by returning a + PARALLEL that contains the ABI cookie that we used previously + alongside the result of aarch64_finish_sme_mode_switch_args. + (aarch64_init_cumulative_args): Initialize num_sme_mode_switch_args. + (aarch64_function_arg_advance): If a call would switch SM state, + record all argument registers that would need to be saved around + the mode switch. + (aarch64_need_old_pstate_sm): New function. + (aarch64_layout_frame): Decide whether the frame needs to store the + incoming value of PSTATE.SM and allocate a save slot for it if so. + If a function switches SME state, arrange to save the old value + of the DWARF VG register. Handle the case where this is the only + register save slot above the FP. + (aarch64_save_callee_saves): Handles saves of the DWARF VG register. + (aarch64_get_separate_components): Prevent such saves from being + shrink-wrapped. + (aarch64_old_svcr_mem): New function. + (aarch64_read_old_svcr): Likewise. + (aarch64_guard_switch_pstate_sm): Likewise. + (aarch64_expand_prologue): Handle saves of the DWARF VG register. + Initialize any SVCR save slot. + (aarch64_expand_call): Allow the cookie to be PARALLEL that contains + both the UNSPEC_CALLEE_ABI value and a list of registers that need + to be preserved across a change to PSTATE.SM. If the call does + involve such a change to PSTATE.SM, record the registers that + would be clobbered by this process. Also emit an instruction + to mark the temporary change in VG. Update call_switches_pstate_sm. + (aarch64_emit_call_insn): Return the emitted instruction. + (aarch64_frame_pointer_required): New function. + (aarch64_conditional_register_usage): Prevent VG_REGNUM from being + treated as a register operand. + (aarch64_switch_pstate_sm_for_call): New function. + (pass_data_switch_pstate_sm): New pass variable. + (pass_switch_pstate_sm): New pass class. + (make_pass_switch_pstate_sm): New function. + (TARGET_FRAME_POINTER_REQUIRED): Define. + * config/aarch64/t-aarch64 (s-check-sve-md): Add aarch64-sme.md. + +gcc/testsuite/ + * gcc.target/aarch64/sme/call_sm_switch_1.c: New test. + * gcc.target/aarch64/sme/call_sm_switch_2.c: Likewise. + * gcc.target/aarch64/sme/call_sm_switch_3.c: Likewise. + * gcc.target/aarch64/sme/call_sm_switch_4.c: Likewise. + * gcc.target/aarch64/sme/call_sm_switch_5.c: Likewise. + * gcc.target/aarch64/sme/call_sm_switch_6.c: Likewise. + * gcc.target/aarch64/sme/call_sm_switch_7.c: Likewise. + * gcc.target/aarch64/sme/call_sm_switch_8.c: Likewise. + * gcc.target/aarch64/sme/call_sm_switch_9.c: Likewise. + * gcc.target/aarch64/sme/call_sm_switch_10.c: Likewise. +--- + gcc/config/aarch64/aarch64-passes.def | 1 + + gcc/config/aarch64/aarch64-protos.h | 3 +- + gcc/config/aarch64/aarch64-sme.md | 171 ++++ + gcc/config/aarch64/aarch64.cc | 883 +++++++++++++++++- + gcc/config/aarch64/aarch64.h | 25 +- + gcc/config/aarch64/aarch64.md | 13 +- + gcc/config/aarch64/t-aarch64 | 5 +- + .../gcc.target/aarch64/sme/call_sm_switch_1.c | 233 +++++ + .../aarch64/sme/call_sm_switch_10.c | 37 + + .../gcc.target/aarch64/sme/call_sm_switch_2.c | 43 + + .../gcc.target/aarch64/sme/call_sm_switch_3.c | 166 ++++ + .../gcc.target/aarch64/sme/call_sm_switch_4.c | 43 + + .../gcc.target/aarch64/sme/call_sm_switch_5.c | 318 +++++++ + .../gcc.target/aarch64/sme/call_sm_switch_6.c | 45 + + .../gcc.target/aarch64/sme/call_sm_switch_7.c | 516 ++++++++++ + .../gcc.target/aarch64/sme/call_sm_switch_8.c | 87 ++ + .../gcc.target/aarch64/sme/call_sm_switch_9.c | 103 ++ + 17 files changed, 2668 insertions(+), 24 deletions(-) + create mode 100644 gcc/config/aarch64/aarch64-sme.md + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_10.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_6.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_7.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_9.c + +diff --git a/gcc/config/aarch64/aarch64-passes.def b/gcc/config/aarch64/aarch64-passes.def +index a2babc112..c6cbbf2ef 100644 +--- a/gcc/config/aarch64/aarch64-passes.def ++++ b/gcc/config/aarch64/aarch64-passes.def +@@ -20,6 +20,7 @@ + + INSERT_PASS_AFTER (pass_regrename, 1, pass_fma_steering); + INSERT_PASS_BEFORE (pass_reorder_blocks, 1, pass_track_speculation); ++INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue, 1, pass_switch_pstate_sm); + INSERT_PASS_AFTER (pass_machine_reorg, 1, pass_tag_collision_avoidance); + INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_bti); + INSERT_PASS_AFTER (pass_if_after_combine, 1, pass_cc_fusion); +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 9b03410dc..737f47026 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -913,7 +913,7 @@ void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, + const_tree, unsigned, bool = false); + void aarch64_init_expanders (void); + void aarch64_init_simd_builtins (void); +-void aarch64_emit_call_insn (rtx); ++rtx_call_insn *aarch64_emit_call_insn (rtx); + void aarch64_register_pragmas (void); + void aarch64_relayout_simd_types (void); + void aarch64_reset_previous_fndecl (void); +@@ -1055,6 +1055,7 @@ rtl_opt_pass *make_pass_track_speculation (gcc::context *); + rtl_opt_pass *make_pass_tag_collision_avoidance (gcc::context *); + rtl_opt_pass *make_pass_insert_bti (gcc::context *ctxt); + rtl_opt_pass *make_pass_cc_fusion (gcc::context *ctxt); ++rtl_opt_pass *make_pass_switch_pstate_sm (gcc::context *ctxt); + + poly_uint64 aarch64_regmode_natural_size (machine_mode); + +diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md +new file mode 100644 +index 000000000..52427b4f1 +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-sme.md +@@ -0,0 +1,171 @@ ++;; Machine description for AArch64 SME. ++;; Copyright (C) 2023 Free Software Foundation, Inc. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but ++;; WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++;; General Public License for more details. ++;; ++;; You should have received a copy of the GNU General Public License
View file
_service:tar_scm:0206-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch
Added
@@ -0,0 +1,4324 @@ +From 1efd433c779f66440facc8ba5cd23bdbdd6672ba Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:26 +0000 +Subject: PATCH 107/157 BackportSME aarch64: Add support for SME ZA + attributes + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3af9ceb631b741095d8eabd055ff7c23d4a69e6f + +SME has an array called ZA that can be enabled and disabled separately +from streaming mode. A status bit called PSTATE.ZA indicates whether +ZA is currently enabled or not. + +In C and C++, the state of PSTATE.ZA is controlled using function +attributes. There are four attributes that can be attached to +function types to indicate that the function shares ZA with its +caller. These are: + +- arm::in("za") +- arm::out("za") +- arm::inout("za") +- arm::preserves("za") + +If a function's type has one of these shared-ZA attributes, +PSTATE.ZA is specified to be 1 on entry to the function and on return +from the function. Otherwise, the caller and callee have separate +ZA contexts; they do not use ZA to share data. + +Although normal non-shared-ZA functions have a separate ZA context +from their callers, nested uses of ZA are expected to be rare. +The ABI therefore defines a cooperative lazy saving scheme that +allows saves and restore of ZA to be kept to a minimum. +(Callers still have the option of doing a full save and restore +if they prefer.) + +Functions that want to use ZA internally have an arm::new("za") +attribute, which tells the compiler to enable PSTATE.ZA for +the duration of the function body. It also tells the compiler +to commit any lazy save initiated by a caller. + +The patch uses various abstract hard registers to track dataflow +relating to ZA. See the comments in the patch for details. + +The lazy save scheme is intended to be transparent to most normal +functions, so that they don't need to be recompiled for SME. +This is reflected in the way that most normal functions ignore +the new hard registers added in the patch. + +As with arm::streaming and arm::streaming_compatible, the attributes are +also available as __arm_<attr>. This has two advantages: it triggers an +error on compilers that don't understand the attributes, and it eases +use on C, where ... attributes were only added in C23. + +gcc/ + * config/aarch64/aarch64-isa-modes.def (ZA_ON): New ISA mode. + * config/aarch64/aarch64-protos.h (aarch64_rdsvl_immediate_p) + (aarch64_output_rdsvl, aarch64_optimize_mode_switching) + (aarch64_restore_za): Declare. + * config/aarch64/constraints.md (UsR): New constraint. + * config/aarch64/aarch64.md (LOWERING_REGNUM, TPIDR_BLOCK_REGNUM) + (SME_STATE_REGNUM, TPIDR2_SETUP_REGNUM, ZA_FREE_REGNUM) + (ZA_SAVED_REGNUM, ZA_REGNUM, FIRST_FAKE_REGNUM): New constants. + (LAST_FAKE_REGNUM): Likewise. + (UNSPEC_SAVE_NZCV, UNSPEC_RESTORE_NZCV, UNSPEC_SME_VQ): New unspecs. + (arches): Add sme. + (arch_enabled): Handle it. + (*cb<optab><mode>1): Rename to... + (aarch64_cb<optab><mode>1): ...this. + (*movsi_aarch64): Add an alternative for RDSVL. + (*movdi_aarch64): Likewise. + (aarch64_save_nzcv, aarch64_restore_nzcv): New insns. + * config/aarch64/aarch64-sme.md (UNSPEC_SMSTOP_ZA) + (UNSPEC_INITIAL_ZERO_ZA, UNSPEC_TPIDR2_SAVE, UNSPEC_TPIDR2_RESTORE) + (UNSPEC_READ_TPIDR2, UNSPEC_WRITE_TPIDR2, UNSPEC_SETUP_LOCAL_TPIDR2) + (UNSPEC_RESTORE_ZA, UNSPEC_START_PRIVATE_ZA_CALL): New unspecs. + (UNSPEC_END_PRIVATE_ZA_CALL, UNSPEC_COMMIT_LAZY_SAVE): Likewise. + (UNSPECV_ASM_UPDATE_ZA): New unspecv. + (aarch64_tpidr2_save, aarch64_smstart_za, aarch64_smstop_za) + (aarch64_initial_zero_za, aarch64_setup_local_tpidr2) + (aarch64_clear_tpidr2, aarch64_write_tpidr2, aarch64_read_tpidr2) + (aarch64_tpidr2_restore, aarch64_restore_za, aarch64_asm_update_za) + (aarch64_start_private_za_call, aarch64_end_private_za_call) + (aarch64_commit_lazy_save): New patterns. + * config/aarch64/aarch64.h (AARCH64_ISA_ZA_ON, TARGET_ZA): New macros. + (FIXED_REGISTERS, REGISTER_NAMES): Add the new fake ZA registers. + (CALL_USED_REGISTERS): Replace with... + (CALL_REALLY_USED_REGISTERS): ...this and add the fake ZA registers. + (FIRST_PSEUDO_REGISTER): Bump to include the fake ZA registers. + (FAKE_REGS): New register class. + (REG_CLASS_NAMES): Update accordingly. + (REG_CLASS_CONTENTS): Likewise. + (machine_function::tpidr2_block): New member variable. + (machine_function::tpidr2_block_ptr): Likewise. + (machine_function::za_save_buffer): Likewise. + (machine_function::next_asm_update_za_id): Likewise. + (CUMULATIVE_ARGS::shared_za_flags): Likewise. + (aarch64_mode_entity, aarch64_local_sme_state): New enums. + (aarch64_tristate_mode): Likewise. + (OPTIMIZE_MODE_SWITCHING, NUM_MODES_FOR_MODE_SWITCHING): Define. + * config/aarch64/aarch64.cc (AARCH64_STATE_SHARED, AARCH64_STATE_IN) + (AARCH64_STATE_OUT): New constants. + (aarch64_attribute_shared_state_flags): New function. + (aarch64_lookup_shared_state_flags, aarch64_fndecl_has_new_state) + (aarch64_check_state_string, cmp_string_csts): Likewise. + (aarch64_merge_string_arguments, aarch64_check_arm_new_against_type) + (handle_arm_new, handle_arm_shared): Likewise. + (handle_arm_new_za_attribute): New + (aarch64_arm_attribute_table): Add new, preserves, in, out, and inout. + (aarch64_hard_regno_nregs): Handle FAKE_REGS. + (aarch64_hard_regno_mode_ok): Likewise. + (aarch64_fntype_shared_flags, aarch64_fntype_pstate_za): New functions. + (aarch64_fntype_isa_mode): Include aarch64_fntype_pstate_za. + (aarch64_fndecl_has_state, aarch64_fndecl_pstate_za): New functions. + (aarch64_fndecl_isa_mode): Include aarch64_fndecl_pstate_za. + (aarch64_cfun_incoming_pstate_za, aarch64_cfun_shared_flags) + (aarch64_cfun_has_new_state, aarch64_cfun_has_state): New functions. + (aarch64_sme_vq_immediate, aarch64_sme_vq_unspec_p): Likewise. + (aarch64_rdsvl_immediate_p, aarch64_output_rdsvl): Likewise. + (aarch64_expand_mov_immediate): Handle RDSVL immediates. + (aarch64_function_arg): Add the ZA sharing flags as a third limb + of the PARALLEL. + (aarch64_init_cumulative_args): Record the ZA sharing flags. + (aarch64_extra_live_on_entry): New function. Handle the new + ZA-related fake registers. + (aarch64_epilogue_uses): Handle the new ZA-related fake registers. + (aarch64_cannot_force_const_mem): Handle UNSPEC_SME_VQ constants. + (aarch64_get_tpidr2_block, aarch64_get_tpidr2_ptr): New functions. + (aarch64_init_tpidr2_block, aarch64_restore_za): Likewise. + (aarch64_layout_frame): Check whether the current function creates + new ZA state. Record that it clobbers LR if so. + (aarch64_expand_prologue): Handle functions that create new ZA state. + (aarch64_expand_epilogue): Likewise. + (aarch64_create_tpidr2_block): New function. + (aarch64_restore_za): Likewise. + (aarch64_start_call_args): Disallow calls to shared-ZA functions + from functions that have no ZA state. Emit a marker instruction + before calls to private-ZA functions from functions that have + SME state. + (aarch64_expand_call): Add return registers for state that is + managed via attributes. Record the use and clobber information + for the ZA registers. + (aarch64_end_call_args): New function. + (aarch64_regno_regclass): Handle FAKE_REGS. + (aarch64_class_max_nregs): Likewise. + (aarch64_override_options_internal): Require TARGET_SME for + functions that have ZA state. + (aarch64_conditional_register_usage): Handle FAKE_REGS. + (aarch64_mov_operand_p): Handle RDSVL immediates. + (aarch64_comp_type_attributes): Check that the ZA sharing flags + are equal. + (aarch64_merge_decl_attributes): New function. + (aarch64_optimize_mode_switching, aarch64_mode_emit_za_save_buffer) + (aarch64_mode_emit_local_sme_state, aarch64_mode_emit): Likewise. + (aarch64_insn_references_sme_state_p): Likewise. + (aarch64_mode_needed_local_sme_state): Likewise. + (aarch64_mode_needed_za_save_buffer, aarch64_mode_needed): Likewise. + (aarch64_mode_after_local_sme_state, aarch64_mode_after): Likewise. + (aarch64_local_sme_confluence, aarch64_mode_confluence): Likewise. + (aarch64_one_shot_backprop, aarch64_local_sme_backprop): Likewise. + (aarch64_mode_backprop, aarch64_mode_entry): Likewise. + (aarch64_mode_exit, aarch64_mode_eh_handler): Likewise. + (aarch64_mode_priority, aarch64_md_asm_adjust): Likewise. + (TARGET_END_CALL_ARGS, TARGET_MERGE_DECL_ATTRIBUTES): Define. + (TARGET_MODE_EMIT, TARGET_MODE_NEEDED, TARGET_MODE_AFTER): Likewise. + (TARGET_MODE_CONFLUENCE, TARGET_MODE_BACKPROP): Likewise. + (TARGET_MODE_ENTRY, TARGET_MODE_EXIT): Likewise. + (TARGET_MODE_EH_HANDLER, TARGET_MODE_PRIORITY): Likewise. + (TARGET_EXTRA_LIVE_ON_ENTRY): Likewise. + (TARGET_MD_ASM_ADJUST): Use aarch64_md_asm_adjust. + * config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros): + Define __arm_new, __arm_preserves,__arm_in, __arm_out, and __arm_inout. + +gcc/testsuite/ + * gcc.target/aarch64/sme/za_state_1.c: New test. + * gcc.target/aarch64/sme/za_state_2.c: Likewise. + * gcc.target/aarch64/sme/za_state_3.c: Likewise. + * gcc.target/aarch64/sme/za_state_4.c: Likewise. + * gcc.target/aarch64/sme/za_state_5.c: Likewise. + * gcc.target/aarch64/sme/za_state_6.c: Likewise. + * g++.target/aarch64/sme/exceptions_1.C: Likewise. + * gcc.target/aarch64/sme/keyword_macros_1.c: Add ZA macros. + * g++.target/aarch64/sme/keyword_macros_1.C: Likewise. +--- + gcc/config/aarch64/aarch64-c.cc | 32 + + gcc/config/aarch64/aarch64-isa-modes.def | 5 + + gcc/config/aarch64/aarch64-protos.h | 5 + + gcc/config/aarch64/aarch64-sme.md | 287 ++++ + gcc/config/aarch64/aarch64.cc | 1371 ++++++++++++++++- + gcc/config/aarch64/aarch64.h | 98 +- + gcc/config/aarch64/aarch64.md | 81 +- + gcc/config/aarch64/constraints.md | 6 + + .../g++.target/aarch64/sme/exceptions_1.C | 189 +++ + .../g++.target/aarch64/sme/keyword_macros_1.C | 5 + + .../gcc.target/aarch64/sme/keyword_macros_1.c | 5 + + .../gcc.target/aarch64/sme/za_state_1.c | 154 ++ + .../gcc.target/aarch64/sme/za_state_2.c | 73 + + .../gcc.target/aarch64/sme/za_state_3.c | 31 + + .../gcc.target/aarch64/sme/za_state_4.c | 585 +++++++ + .../gcc.target/aarch64/sme/za_state_5.c | 595 +++++++ + .../gcc.target/aarch64/sme/za_state_6.c | 23 +
View file
_service:tar_scm:0207-Backport-SME-aarch64-Add-a-register-class-for-w12-w1.patch
Added
@@ -0,0 +1,103 @@ +From 9866b4c1d85d88fd9e25ff3ac5224b69d4e0f0b2 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:26 +0000 +Subject: PATCH 108/157 BackportSME aarch64: Add a register class for + w12-w15 + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=084122adb5792a9c8e7f7876e2c1d59ba80c228b + +Some SME instructions use w12-w15 to index ZA. This patch +adds a register class for that range. + +gcc/ + * config/aarch64/aarch64.h (W12_W15_REGNUM_P): New macro. + (W12_W15_REGS): New register class. + (REG_CLASS_NAMES, REG_CLASS_CONTENTS): Add entries for it. + * config/aarch64/aarch64.cc (aarch64_regno_regclass) + (aarch64_class_max_nregs, aarch64_register_move_cost): Handle + W12_W15_REGS. +--- + gcc/config/aarch64/aarch64.cc | 12 +++++++----- + gcc/config/aarch64/aarch64.h | 6 ++++++ + 2 files changed, 13 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index a6e996c5b..112dfeabb 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -14213,6 +14213,9 @@ aarch64_label_mentioned_p (rtx x) + enum reg_class + aarch64_regno_regclass (unsigned regno) + { ++ if (W12_W15_REGNUM_P (regno)) ++ return W12_W15_REGS; ++ + if (STUB_REGNUM_P (regno)) + return STUB_REGS; + +@@ -14577,6 +14580,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) + unsigned int nregs, vec_flags; + switch (regclass) + { ++ case W12_W15_REGS: + case STUB_REGS: + case TAILCALL_ADDR_REGS: + case POINTER_REGS: +@@ -16926,13 +16930,11 @@ aarch64_register_move_cost (machine_mode mode, + const struct cpu_regmove_cost *regmove_cost + = aarch64_tune_params.regmove_cost; + +- /* Caller save and pointer regs are equivalent to GENERAL_REGS. */ +- if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS +- || to == STUB_REGS) ++ /* Trest any subset of POINTER_REGS as though it were GENERAL_REGS. */ ++ if (reg_class_subset_p (to, POINTER_REGS)) + to = GENERAL_REGS; + +- if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS +- || from == STUB_REGS) ++ if (reg_class_subset_p (from, POINTER_REGS)) + from = GENERAL_REGS; + + /* Make RDFFR very expensive. In particular, if we know that the FFR +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 89d30b9bf..8b21faf34 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -648,6 +648,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + && (REGNO) != R17_REGNUM \ + && (REGNO) != R30_REGNUM) \ + ++#define W12_W15_REGNUM_P(REGNO) \ ++ IN_RANGE (REGNO, R12_REGNUM, R15_REGNUM) ++ + #define FP_REGNUM_P(REGNO) \ + (((unsigned) (REGNO - V0_REGNUM)) <= (V31_REGNUM - V0_REGNUM)) + +@@ -674,6 +677,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + enum reg_class + { + NO_REGS, ++ W12_W15_REGS, + TAILCALL_ADDR_REGS, + STUB_REGS, + GENERAL_REGS, +@@ -698,6 +702,7 @@ enum reg_class + #define REG_CLASS_NAMES \ + { \ + "NO_REGS", \ ++ "W12_W15_REGS", \ + "TAILCALL_ADDR_REGS", \ + "STUB_REGS", \ + "GENERAL_REGS", \ +@@ -719,6 +724,7 @@ enum reg_class + #define REG_CLASS_CONTENTS \ + { \ + { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \ ++ { 0x0000f000, 0x00000000, 0x00000000 }, /* W12_W15_REGS */ \ + { 0x00030000, 0x00000000, 0x00000000 }, /* TAILCALL_ADDR_REGS */\ + { 0x3ffcffff, 0x00000000, 0x00000000 }, /* STUB_REGS */ \ + { 0x7fffffff, 0x00000000, 0x00000003 }, /* GENERAL_REGS */ \ +-- +2.33.0 +
View file
_service:tar_scm:0208-Backport-SME-aarch64-Add-a-VNx1TI-mode.patch
Added
@@ -0,0 +1,72 @@ +From 8310c0df319a86bc2f63b8d3198dd1c394827bac Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:27 +0000 +Subject: PATCH 109/157 BackportSME aarch64: Add a VNx1TI mode + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=80fc055cf00fee4b1f9f19f77c8880b12226e086 + +Although TI isn't really a native SVE element mode, it's convenient +for SME if we define VNx1TI anyway, so that it can be used to +distinguish .Q ZA operations from others. It's purely an RTL +convenience and isn't (yet) a valid storage mode. + +gcc/ + * config/aarch64/aarch64-modes.def: Add VNx1TI. +--- + gcc/config/aarch64/aarch64-modes.def | 21 ++++++++++++++------- + 1 file changed, 14 insertions(+), 7 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def +index 8f399225a..8fa66fdb3 100644 +--- a/gcc/config/aarch64/aarch64-modes.def ++++ b/gcc/config/aarch64/aarch64-modes.def +@@ -146,7 +146,7 @@ ADV_SIMD_Q_REG_STRUCT_MODES (4, V4x16, V4x8, V4x4, V4x2) + for 8-bit, 16-bit, 32-bit and 64-bit elements respectively. It isn't + strictly necessary to set the alignment here, since the default would + be clamped to BIGGEST_ALIGNMENT anyhow, but it seems clearer. */ +-#define SVE_MODES(NVECS, VB, VH, VS, VD) \ ++#define SVE_MODES(NVECS, VB, VH, VS, VD, VT) \ + VECTOR_MODES_WITH_PREFIX (VNx, INT, 16 * NVECS, NVECS == 1 ? 1 : 4); \ + VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 16 * NVECS, NVECS == 1 ? 1 : 4); \ + \ +@@ -154,6 +154,7 @@ ADV_SIMD_Q_REG_STRUCT_MODES (4, V4x16, V4x8, V4x4, V4x2) + ADJUST_NUNITS (VH##HI, aarch64_sve_vg * NVECS * 4); \ + ADJUST_NUNITS (VS##SI, aarch64_sve_vg * NVECS * 2); \ + ADJUST_NUNITS (VD##DI, aarch64_sve_vg * NVECS); \ ++ ADJUST_NUNITS (VT##TI, exact_div (aarch64_sve_vg * NVECS, 2)); \ + ADJUST_NUNITS (VH##BF, aarch64_sve_vg * NVECS * 4); \ + ADJUST_NUNITS (VH##HF, aarch64_sve_vg * NVECS * 4); \ + ADJUST_NUNITS (VS##SF, aarch64_sve_vg * NVECS * 2); \ +@@ -163,17 +164,23 @@ ADV_SIMD_Q_REG_STRUCT_MODES (4, V4x16, V4x8, V4x4, V4x2) + ADJUST_ALIGNMENT (VH##HI, 16); \ + ADJUST_ALIGNMENT (VS##SI, 16); \ + ADJUST_ALIGNMENT (VD##DI, 16); \ ++ ADJUST_ALIGNMENT (VT##TI, 16); \ + ADJUST_ALIGNMENT (VH##BF, 16); \ + ADJUST_ALIGNMENT (VH##HF, 16); \ + ADJUST_ALIGNMENT (VS##SF, 16); \ + ADJUST_ALIGNMENT (VD##DF, 16); + +-/* Give SVE vectors the names normally used for 256-bit vectors. +- The actual number depends on command-line flags. */ +-SVE_MODES (1, VNx16, VNx8, VNx4, VNx2) +-SVE_MODES (2, VNx32, VNx16, VNx8, VNx4) +-SVE_MODES (3, VNx48, VNx24, VNx12, VNx6) +-SVE_MODES (4, VNx64, VNx32, VNx16, VNx8) ++/* Give SVE vectors names of the form VNxX, where X describes what is ++ stored in each 128-bit unit. The actual size of the mode depends ++ on command-line flags. ++ ++ VNx1TI isn't really a native SVE mode, but it can be useful in some ++ limited situations. */ ++VECTOR_MODE_WITH_PREFIX (VNx, INT, TI, 1, 1); ++SVE_MODES (1, VNx16, VNx8, VNx4, VNx2, VNx1) ++SVE_MODES (2, VNx32, VNx16, VNx8, VNx4, VNx2) ++SVE_MODES (3, VNx48, VNx24, VNx12, VNx6, VNx3) ++SVE_MODES (4, VNx64, VNx32, VNx16, VNx8, VNx4) + + /* Partial SVE vectors: + +-- +2.33.0 +
View file
_service:tar_scm:0209-Backport-SME-aarch64-Generalise-unspec_based_functio.patch
Added
@@ -0,0 +1,118 @@ +From e3c0d3d98ab1f60900533f3f75c598f899f37c9f Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:27 +0000 +Subject: PATCH 110/157 BackportSME aarch64: Generalise + unspec_based_function_base + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1ec23d5a29bc5d89cef60e2aba2fe4095ee12a8f + +Until now, SVE intrinsics that map directly to unspecs +have always used type suffix 0 to distinguish between signed +integers, unsigned integers, and floating-point values. +SME adds functions that need to use type suffix 1 instead. +This patch generalises the classes accordingly. + +gcc/ + * config/aarch64/aarch64-sve-builtins-functions.h + (unspec_based_function_base): Allow type suffix 1 to determine + the mode of the operation. + (unspec_based_function): Update accordingly. + (unspec_based_fused_function): Likewise. + (unspec_based_fused_lane_function): Likewise. +--- + .../aarch64/aarch64-sve-builtins-functions.h | 29 ++++++++++++------- + 1 file changed, 18 insertions(+), 11 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h +index 94a6d1207..f5fa4030c 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h +@@ -250,18 +250,21 @@ class unspec_based_function_base : public function_base + public: + CONSTEXPR unspec_based_function_base (int unspec_for_sint, + int unspec_for_uint, +- int unspec_for_fp) ++ int unspec_for_fp, ++ unsigned int suffix_index = 0) + : m_unspec_for_sint (unspec_for_sint), + m_unspec_for_uint (unspec_for_uint), +- m_unspec_for_fp (unspec_for_fp) ++ m_unspec_for_fp (unspec_for_fp), ++ m_suffix_index (suffix_index) + {} + + /* Return the unspec code to use for INSTANCE, based on type suffix 0. */ + int + unspec_for (const function_instance &instance) const + { +- return (!instance.type_suffix (0).integer_p ? m_unspec_for_fp +- : instance.type_suffix (0).unsigned_p ? m_unspec_for_uint ++ auto &suffix = instance.type_suffix (m_suffix_index); ++ return (!suffix.integer_p ? m_unspec_for_fp ++ : suffix.unsigned_p ? m_unspec_for_uint + : m_unspec_for_sint); + } + +@@ -270,6 +273,9 @@ public: + int m_unspec_for_sint; + int m_unspec_for_uint; + int m_unspec_for_fp; ++ ++ /* Which type suffix is used to choose between the unspecs. */ ++ unsigned int m_suffix_index; + }; + + /* A function_base for functions that have an associated unspec code. +@@ -336,7 +342,8 @@ public: + rtx + expand (function_expander &e) const OVERRIDE + { +- return e.use_exact_insn (CODE (unspec_for (e), e.vector_mode (0))); ++ return e.use_exact_insn (CODE (unspec_for (e), ++ e.vector_mode (m_suffix_index))); + } + }; + +@@ -395,16 +402,16 @@ public: + { + int unspec = unspec_for (e); + insn_code icode; +- if (e.type_suffix (0).float_p) ++ if (e.type_suffix (m_suffix_index).float_p) + { + /* Put the operands in the normal (fma ...) order, with the accumulator + last. This fits naturally since that's also the unprinted operand + in the asm output. */ + e.rotate_inputs_left (0, e.pred != PRED_none ? 4 : 3); +- icode = code_for_aarch64_sve (unspec, e.vector_mode (0)); ++ icode = code_for_aarch64_sve (unspec, e.vector_mode (m_suffix_index)); + } + else +- icode = INT_CODE (unspec, e.vector_mode (0)); ++ icode = INT_CODE (unspec, e.vector_mode (m_suffix_index)); + return e.use_exact_insn (icode); + } + }; +@@ -430,16 +437,16 @@ public: + { + int unspec = unspec_for (e); + insn_code icode; +- if (e.type_suffix (0).float_p) ++ if (e.type_suffix (m_suffix_index).float_p) + { + /* Put the operands in the normal (fma ...) order, with the accumulator + last. This fits naturally since that's also the unprinted operand + in the asm output. */ + e.rotate_inputs_left (0, e.pred != PRED_none ? 5 : 4); +- icode = code_for_aarch64_lane (unspec, e.vector_mode (0)); ++ icode = code_for_aarch64_lane (unspec, e.vector_mode (m_suffix_index)); + } + else +- icode = INT_CODE (unspec, e.vector_mode (0)); ++ icode = INT_CODE (unspec, e.vector_mode (m_suffix_index)); + return e.use_exact_insn (icode); + } + }; +-- +2.33.0 +
View file
_service:tar_scm:0210-Backport-SME-aarch64-Generalise-_m-rules-for-SVE-int.patch
Added
@@ -0,0 +1,117 @@ +From 3d721b42c97baba562b77988cec0fec229217519 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:28 +0000 +Subject: PATCH 111/157 BackportSME aarch64: Generalise _m rules for SVE + intrinsics + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=8de9304d94d4ec42863a25c1cb1a1ba9a1e3e0fe + +In SVE there was a simple rule that unary merging (_m) intrinsics +had a separate initial argument to specify the values of inactive +lanes, whereas other merging functions took inactive lanes from +the first operand to the operation. + +That rule began to break down in SVE2, and it continues to do +so in SME. This patch therefore adds a virtual function to +specify whether the separate initial argument is present or not. +The old rule is still the default. + +gcc/ + * config/aarch64/aarch64-sve-builtins.h + (function_shape::has_merge_argument_p): New member function. + * config/aarch64/aarch64-sve-builtins.cc: + (function_resolver::check_gp_argument): Use it. + (function_expander::get_fallback_value): Likewise. + * config/aarch64/aarch64-sve-builtins-shapes.cc + (apply_predication): Likewise. + (unary_convert_narrowt_def::has_merge_argument_p): New function. +--- + gcc/config/aarch64/aarch64-sve-builtins-shapes.cc | 10 ++++++++-- + gcc/config/aarch64/aarch64-sve-builtins.cc | 4 ++-- + gcc/config/aarch64/aarch64-sve-builtins.h | 13 +++++++++++++ + 3 files changed, 23 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +index 95e40d8f3..c536949ba 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +@@ -66,8 +66,8 @@ apply_predication (const function_instance &instance, tree return_type, + the same type as the result. For unary_convert_narrowt it also + provides the "bottom" half of active elements, and is present + for all types of predication. */ +- if ((argument_types.length () == 2 && instance.pred == PRED_m) +- || instance.shape == shapes::unary_convert_narrowt) ++ auto nargs = argument_types.length () - 1; ++ if (instance.shape->has_merge_argument_p (instance, nargs)) + argument_types.quick_insert (0, return_type); + } + } +@@ -3271,6 +3271,12 @@ SHAPE (unary_convert) + predicate. */ + struct unary_convert_narrowt_def : public overloaded_base<1> + { ++ bool ++ has_merge_argument_p (const function_instance &, unsigned int) const override ++ { ++ return true; ++ } ++ + void + build (function_builder &b, const function_group_info &group) const OVERRIDE + { +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index 5f3a2baea..3441b4294 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -2287,7 +2287,7 @@ function_resolver::check_gp_argument (unsigned int nops, + if (pred != PRED_none) + { + /* Unary merge operations should use resolve_unary instead. */ +- gcc_assert (nops != 1 || pred != PRED_m); ++ gcc_assert (!shape->has_merge_argument_p (*this, nops)); + nargs = nops + 1; + if (!check_num_arguments (nargs) + || !require_vector_type (i, VECTOR_TYPE_svbool_t)) +@@ -2931,7 +2931,7 @@ function_expander::get_fallback_value (machine_mode mode, unsigned int nops, + + gcc_assert (pred == PRED_m || pred == PRED_x); + if (merge_argno == DEFAULT_MERGE_ARGNO) +- merge_argno = nops == 1 && pred == PRED_m ? 0 : 1; ++ merge_argno = shape->has_merge_argument_p (*this, nops) ? 0 : 1; + + if (merge_argno == 0) + return argsargno++; +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index 7132b6e77..f16ac3947 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -710,6 +710,9 @@ public: + class function_shape + { + public: ++ virtual bool has_merge_argument_p (const function_instance &, ++ unsigned int) const; ++ + virtual bool explicit_type_suffix_p (unsigned int) const = 0; + + /* True if the group suffix is present in overloaded names. +@@ -982,6 +985,16 @@ function_base::vectors_per_tuple (const function_instance &instance) const + return instance.group_suffix ().vectors_per_tuple; + } + ++/* Return true if INSTANCE (which has NARGS arguments) has an initial ++ vector argument whose only purpose is to specify the values of ++ inactive lanes. */ ++inline bool ++function_shape::has_merge_argument_p (const function_instance &instance, ++ unsigned int nargs) const ++{ ++ return nargs == 1 && instance.pred == PRED_m; ++} ++ + /* Return the mode of the result of a call. */ + inline machine_mode + function_expander::result_mode () const +-- +2.33.0 +
View file
_service:tar_scm:0211-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch
Added
@@ -0,0 +1,15955 @@ +From 6c651a11f8e68244c4c53ad7b29983f54a3bc737 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:28 +0000 +Subject: PATCH 112/157 BackportSME aarch64: Add support for <arm_sme.h> + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4f6ab9537051e156d52bd8e9df40107ba6685895 + +This adds support for the SME parts of arm_sme.h. + +gcc/ + * doc/invoke.texi: Document +sme-i16i64 and +sme-f64f64. + * config.gcc (aarch64*-*-*): Add arm_sme.h to the list of headers + to install and aarch64-sve-builtins-sme.o to the list of objects + to build. + * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Define + or undefine TARGET_SME, TARGET_SME_I16I64 and TARGET_SME_F64F64. + (aarch64_pragma_aarch64): Handle arm_sme.h. + * config/aarch64/aarch64-option-extensions.def (sme-i16i64) + (sme-f64f64): New extensions. + * config/aarch64/aarch64-protos.h (aarch64_sme_vq_immediate) + (aarch64_addsvl_addspl_immediate_p, aarch64_output_addsvl_addspl) + (aarch64_output_sme_zero_za): Declare. + (aarch64_output_move_struct): Delete. + (aarch64_sme_ldr_vnum_offset): Declare. + (aarch64_sve::handle_arm_sme_h): Likewise. + * config/aarch64/aarch64.h (AARCH64_ISA_SM_ON): New macro. + (AARCH64_ISA_SME_I16I64, AARCH64_ISA_SME_F64F64): Likewise. + (TARGET_STREAMING, TARGET_STREAMING_SME): Likewise. + (TARGET_SME_I16I64, TARGET_SME_F64F64): Likewise. + * config/aarch64/aarch64.cc (aarch64_sve_rdvl_factor_p): Rename to... + (aarch64_sve_rdvl_addvl_factor_p): ...this. + (aarch64_sve_rdvl_immediate_p): Update accordingly. + (aarch64_rdsvl_immediate_p, aarch64_add_offset): Likewise. + (aarch64_sme_vq_immediate): Likewise. Make public. + (aarch64_sve_addpl_factor_p): New function. + (aarch64_sve_addvl_addpl_immediate_p): Use + aarch64_sve_rdvl_addvl_factor_p and aarch64_sve_addpl_factor_p. + (aarch64_addsvl_addspl_immediate_p): New function. + (aarch64_output_addsvl_addspl): Likewise. + (aarch64_cannot_force_const_mem): Return true for RDSVL immediates. + (aarch64_classify_index): Handle .Q scaling for VNx1TImode. + (aarch64_classify_address): Likewise for vnum offsets. + (aarch64_output_sme_zero_za): New function. + (aarch64_sme_ldr_vnum_offset_p): Likewise. + * config/aarch64/predicates.md (aarch64_addsvl_addspl_immediate): + New predicate. + (aarch64_pluslong_operand): Include it for SME. + * config/aarch64/constraints.md (Ucj, Uav): New constraints. + * config/aarch64/iterators.md (VNx1TI_ONLY): New mode iterator. + (SME_ZA_I, SME_ZA_SDI, SME_ZA_SDF_I, SME_MOP_BHI): Likewise. + (SME_MOP_HSDF): Likewise. + (UNSPEC_SME_ADDHA, UNSPEC_SME_ADDVA, UNSPEC_SME_FMOPA) + (UNSPEC_SME_FMOPS, UNSPEC_SME_LD1_HOR, UNSPEC_SME_LD1_VER) + (UNSPEC_SME_READ_HOR, UNSPEC_SME_READ_VER, UNSPEC_SME_SMOPA) + (UNSPEC_SME_SMOPS, UNSPEC_SME_ST1_HOR, UNSPEC_SME_ST1_VER) + (UNSPEC_SME_SUMOPA, UNSPEC_SME_SUMOPS, UNSPEC_SME_UMOPA) + (UNSPEC_SME_UMOPS, UNSPEC_SME_USMOPA, UNSPEC_SME_USMOPS) + (UNSPEC_SME_WRITE_HOR, UNSPEC_SME_WRITE_VER): New unspecs. + (elem_bits): Handle x2 and x4 structure modes, plus VNx1TI. + (Vetype, Vesize, VPRED): Handle VNx1TI. + (b): New mode attribute. + (SME_LD1, SME_READ, SME_ST1, SME_WRITE, SME_BINARY_SDI, SME_INT_MOP) + (SME_FP_MOP): New int iterators. + (optab): Handle SME unspecs. + (hv): New int attribute. + * config/aarch64/aarch64.md (*add<mode>3_aarch64): Handle ADDSVL + and ADDSPL. + * config/aarch64/aarch64-sme.md (UNSPEC_SME_LDR): New unspec. + (@aarch64_sme_<optab><mode>, @aarch64_sme_<optab><mode>_plus) + (aarch64_sme_ldr0, @aarch64_sme_ldrn<mode>): New patterns. + (UNSPEC_SME_STR): New unspec. + (@aarch64_sme_<optab><mode>, @aarch64_sme_<optab><mode>_plus) + (aarch64_sme_str0, @aarch64_sme_strn<mode>): New patterns. + (@aarch64_sme_<optab><v_int_container><mode>): Likewise. + (*aarch64_sme_<optab><v_int_container><mode>_plus): Likewise. + (@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>): Likewise. + (@aarch64_sme_<optab><v_int_container><mode>): Likewise. + (*aarch64_sme_<optab><v_int_container><mode>_plus): Likewise. + (@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>): Likewise. + (UNSPEC_SME_ZERO): New unspec. + (aarch64_sme_zero): New pattern. + (@aarch64_sme_<SME_BINARY_SDI:optab><mode>): Likewise. + (@aarch64_sme_<SME_INT_MOP:optab><mode>): Likewise. + (@aarch64_sme_<SME_FP_MOP:optab><mode>): Likewise. + * config/aarch64/aarch64-sve-builtins.def: Add ZA type suffixes. + Include aarch64-sve-builtins-sme.def. + (DEF_SME_ZA_FUNCTION): New macro. + * config/aarch64/aarch64-sve-builtins.h (CP_READ_ZA): New call + property. + (CP_WRITE_ZA): Likewise. + (PRED_za_m): New predication type. + (type_suffix_index): Handle DEF_SME_ZA_SUFFIX. + (type_suffix_info): Add vector_p and za_p fields. + (function_instance::num_za_tiles): New member function. + (function_builder::get_attributes): Add an aarch64_feature_flags + argument. + (function_expander::get_contiguous_base): Take a base argument + number, a vnum argument number, and an argument that indicates + whether the vnum parameter is a factor of the SME vector length + or the prevailing vector length. + (function_expander::add_integer_operand): Take a poly_int64. + (sve_switcher::sve_switcher): Take a base set of flags. + (sme_switcher): New class. + (scalar_types): Add a null entry for NUM_VECTOR_TYPES. + * config/aarch64/aarch64-sve-builtins.cc: Include + aarch64-sve-builtins-sme.h. + (pred_suffixes): Add an entry for PRED_za_m. + (type_suffixes): Initialize vector_p and za_p. Handle ZA suffixes. + (TYPES_all_za, TYPES_d_za, TYPES_za_bhsd_data, TYPES_za_all_data) + (TYPES_za_s_integer, TYPES_za_d_integer, TYPES_mop_base) + (TYPES_mop_base_signed, TYPES_mop_base_unsigned, TYPES_mop_i16i64) + (TYPES_mop_i16i64_signed, TYPES_mop_i16i64_unsigned, TYPES_za): New + type suffix macros. + (preds_m, preds_za_m): New predication lists. + (function_groups): Handle DEF_SME_ZA_FUNCTION. + (scalar_types): Add an entry for NUM_VECTOR_TYPES. + (find_type_suffix_for_scalar_type): Check positively for vectors + rather than negatively for predicates. + (check_required_extensions): Handle PSTATE.SM and PSTATE.ZA + requirements. + (report_out_of_range): Handle the case where the minimum and + maximum are the same. + (function_instance::reads_global_state_p): Return true for functions + that read ZA. + (function_instance::modifies_global_state_p): Return true for functions + that write to ZA. + (sve_switcher::sve_switcher): Add a base flags argument. + (function_builder::get_name): Handle "__arm_" prefixes. + (add_attribute): Add an overload that takes a namespaces. + (add_shared_state_attribute): New function. + (function_builder::get_attributes): Take the required feature flags + as argument. Add streaming and ZA attributes where appropriate. + (function_builder::add_unique_function): Update calls accordingly. + (function_resolver::check_gp_argument): Assert that the predication + isn't ZA _m predication. + (function_checker::function_checker): Don't bias the argument + number for ZA _m predication. + (function_expander::get_contiguous_base): Add arguments that + specify the base argument number, the vnum argument number, + and an argument that indicates whether the vnum parameter is + a factor of the SME vector length or the prevailing vector length. + Handle the SME case. + (function_expander::add_input_operand): Handle pmode_register_operand. + (function_expander::add_integer_operand): Take a poly_int64. + (init_builtins): Call handle_arm_sme_h for LTO. + (handle_arm_sve_h): Skip SME intrinsics. + (handle_arm_sme_h): New function. + * config/aarch64/aarch64-sve-builtins-functions.h + (read_write_za, write_za): New classes. + (unspec_based_sme_function, za_arith_function): New using aliases. + (quiet_za_arith_function): Likewise. + * config/aarch64/aarch64-sve-builtins-shapes.h + (binary_za_int_m, binary_za_m, binary_za_uint_m, bool_inherent) + (inherent_za, inherent_mask_za, ldr_za, load_za, read_za_m, store_za) + (str_za, unary_za_m, write_za_m): Declare. + * config/aarch64/aarch64-sve-builtins-shapes.cc (apply_predication): + Expect za_m functions to have an existing governing predicate. + (binary_za_m_base, binary_za_int_m_def, binary_za_m_def): New classes. + (binary_za_uint_m_def, bool_inherent_def, inherent_za_def): Likewise. + (inherent_mask_za_def, ldr_za_def, load_za_def, read_za_m_def) + (store_za_def, str_za_def, unary_za_m_def, write_za_m_def): Likewise. + * config/aarch64/arm_sme.h: New file. + * config/aarch64/aarch64-sve-builtins-sme.h: Likewise. + * config/aarch64/aarch64-sve-builtins-sme.cc: Likewise. + * config/aarch64/aarch64-sve-builtins-sme.def: Likewise. + * config/aarch64/t-aarch64 (aarch64-sve-builtins.o): Depend on + aarch64-sve-builtins-sme.def and aarch64-sve-builtins-sme.h. + (aarch64-sve-builtins-sme.o): New rule. + +gcc/testsuite/ + * lib/target-supports.exp: Add sme and sme-i16i64 features. + * gcc.target/aarch64/pragma_cpp_predefs_4.c: Test __ARM_FEATURE_SME* + macros. + * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: Allow functions + to be marked as __arm_streaming, __arm_streaming_compatible, and + __arm_inout("za"). + * g++.target/aarch64/sve/acle/general-c++/func_redef_4.c: Mark the + function as __arm_streaming_compatible. + * g++.target/aarch64/sve/acle/general-c++/func_redef_5.c: Likewise. + * g++.target/aarch64/sve/acle/general-c++/func_redef_7.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/func_redef_4.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/func_redef_5.c: Likewise. + * g++.target/aarch64/sme/aarch64-sme-acle-asm.exp: New test harness. + * gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp: Likewise. + * gcc.target/aarch64/sve/acle/general-c/binary_za_int_m_1.c: New test. + * gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/binary_za_m_2.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/binary_za_uint_m_1.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/read_za_m_1.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/unary_za_m_1.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/write_za_m_1.c: Likewise. +--- + gcc/config.gcc | 4 +- + gcc/config/aarch64/aarch64-c.cc | 6 + + .../aarch64/aarch64-option-extensions.def | 4 + + gcc/config/aarch64/aarch64-protos.h | 8 +- + gcc/config/aarch64/aarch64-sme.md | 373 +++++++++++++++ + .../aarch64/aarch64-sve-builtins-functions.h | 64 +++ + .../aarch64/aarch64-sve-builtins-shapes.cc | 306 +++++++++++-
View file
_service:tar_scm:0212-Backport-SME-aarch64-Add-support-for-__arm_locally_s.patch
Added
@@ -0,0 +1,1748 @@ +From 0ad41f11bea5c303ff39c54cae8e46afdfae6070 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:29 +0000 +Subject: PATCH 113/157 BackportSME aarch64: Add support for + __arm_locally_streaming + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3f6e5991fab507aa79121dc44d1afcd622c78744 + +This patch adds support for the __arm_locally_streaming attribute, +which allows a function to use SME internally without changing +the function's ABI. The attribute is valid but redundant for +__arm_streaming functions. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_arm_attribute_table): Add + arm::locally_streaming. + (aarch64_fndecl_is_locally_streaming): New function. + (aarch64_fndecl_sm_state): Handle locally-streaming functions. + (aarch64_cfun_enables_pstate_sm): New function. + (aarch64_add_offset): Add an argument that specifies whether + the streaming vector length should be used instead of the + prevailing one. + (aarch64_split_add_offset, aarch64_add_sp, aarch64_sub_sp): Likewise. + (aarch64_allocate_and_probe_stack_space): Likewise. + (aarch64_expand_mov_immediate): Update calls accordingly. + (aarch64_need_old_pstate_sm): Return true for locally-streaming + streaming-compatible functions. + (aarch64_layout_frame): Force all call-preserved Z and P registers + to be saved and restored if the function switches PSTATE.SM in the + prologue. + (aarch64_get_separate_components): Disable shrink-wrapping of + such Z and P saves and restores. + (aarch64_use_late_prologue_epilogue): New function. + (aarch64_expand_prologue): Measure SVE lengths in the streaming + vector length for locally-streaming functions, then emit code + to enable streaming mode. + (aarch64_expand_epilogue): Likewise in reverse. + (TARGET_USE_LATE_PROLOGUE_EPILOGUE): Define. + * config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros): + Define __arm_locally_streaming. + +gcc/testsuite/ + * gcc.target/aarch64/sme/locally_streaming_1.c: New test. + * gcc.target/aarch64/sme/locally_streaming_2.c: Likewise. + * gcc.target/aarch64/sme/locally_streaming_3.c: Likewise. + * gcc.target/aarch64/sme/locally_streaming_4.c: Likewise. + * gcc.target/aarch64/sme/keyword_macros_1.c: Add + __arm_locally_streaming. + * g++.target/aarch64/sme/keyword_macros_1.C: Likewise. +--- + gcc/config/aarch64/aarch64-c.cc | 1 + + gcc/config/aarch64/aarch64.cc | 233 +++++++-- + .../g++.target/aarch64/sme/keyword_macros_1.C | 1 + + .../gcc.target/aarch64/sme/keyword_macros_1.c | 1 + + .../aarch64/sme/locally_streaming_1.c | 466 ++++++++++++++++++ + .../aarch64/sme/locally_streaming_2.c | 177 +++++++ + .../aarch64/sme/locally_streaming_3.c | 273 ++++++++++ + .../aarch64/sme/locally_streaming_4.c | 145 ++++++ + 8 files changed, 1259 insertions(+), 38 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_4.c + +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index cb8a6c2fc..745719d8b 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -86,6 +86,7 @@ aarch64_define_unconditional_macros (cpp_reader *pfile) + + DEFINE_ARM_KEYWORD_MACRO ("streaming"); + DEFINE_ARM_KEYWORD_MACRO ("streaming_compatible"); ++ DEFINE_ARM_KEYWORD_MACRO ("locally_streaming"); + + #undef DEFINE_ARM_KEYWORD_MACRO + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 113784e31..4cb43c2e2 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -3283,6 +3283,7 @@ static const attribute_spec aarch64_arm_attributes = + NULL, attr_streaming_exclusions }, + { "streaming_compatible", 0, 0, false, true, true, true, + NULL, attr_streaming_exclusions }, ++ { "locally_streaming", 0, 0, true, false, false, false, NULL, NULL }, + { "new", 1, -1, true, false, false, false, + handle_arm_new, NULL }, + { "preserves", 1, -1, false, true, true, true, +@@ -4657,6 +4658,16 @@ aarch64_fntype_isa_mode (const_tree fntype) + | aarch64_fntype_pstate_za (fntype)); + } + ++/* Return true if FNDECL uses streaming mode internally, as an ++ implementation choice. */ ++ ++static bool ++aarch64_fndecl_is_locally_streaming (const_tree fndecl) ++{ ++ return lookup_attribute ("arm", "locally_streaming", ++ DECL_ATTRIBUTES (fndecl)); ++} ++ + /* Return the state of PSTATE.SM when compiling the body of + function FNDECL. This might be different from the state of + PSTATE.SM on entry. */ +@@ -4664,6 +4675,9 @@ aarch64_fntype_isa_mode (const_tree fntype) + static aarch64_feature_flags + aarch64_fndecl_pstate_sm (const_tree fndecl) + { ++ if (aarch64_fndecl_is_locally_streaming (fndecl)) ++ return AARCH64_FL_SM_ON; ++ + return aarch64_fntype_pstate_sm (TREE_TYPE (fndecl)); + } + +@@ -4739,6 +4753,16 @@ aarch64_cfun_has_new_state (const char *state_name) + return aarch64_fndecl_has_new_state (cfun->decl, state_name); + } + ++/* Return true if PSTATE.SM is 1 in the body of the current function, ++ but is not guaranteed to be 1 on entry. */ ++ ++static bool ++aarch64_cfun_enables_pstate_sm () ++{ ++ return (aarch64_fndecl_is_locally_streaming (cfun->decl) ++ && aarch64_cfun_incoming_pstate_sm () != AARCH64_FL_SM_ON); ++} ++ + /* Return true if the current function has state STATE_NAME, either by + creating new state itself or by sharing state with callers. */ + +@@ -6931,6 +6955,10 @@ aarch64_add_offset_temporaries (rtx x) + TEMP2, if nonnull, is a second temporary register that doesn't + overlap either DEST or REG. + ++ FORCE_ISA_MODE is AARCH64_FL_SM_ON if any variable component of OFFSET ++ is measured relative to the SME vector length instead of the current ++ prevailing vector length. It is 0 otherwise. ++ + Since this function may be used to adjust the stack pointer, we must + ensure that it cannot cause transient stack deallocation (for example + by first incrementing SP and then decrementing when adjusting by a +@@ -6939,6 +6967,7 @@ aarch64_add_offset_temporaries (rtx x) + static void + aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, + poly_int64 offset, rtx temp1, rtx temp2, ++ aarch64_feature_flags force_isa_mode, + bool frame_related_p, bool emit_move_imm = true) + { + gcc_assert (emit_move_imm || temp1 != NULL_RTX); +@@ -6951,9 +6980,18 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, + /* Try using ADDVL or ADDPL to add the whole value. */ + if (src != const0_rtx && aarch64_sve_addvl_addpl_immediate_p (offset)) + { +- rtx offset_rtx = gen_int_mode (offset, mode); ++ gcc_assert (offset.coeffs0 == offset.coeffs1); ++ rtx offset_rtx; ++ if (force_isa_mode == 0) ++ offset_rtx = gen_int_mode (offset, mode); ++ else ++ offset_rtx = aarch64_sme_vq_immediate (mode, offset.coeffs0, 0); + rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx)); + RTX_FRAME_RELATED_P (insn) = frame_related_p; ++ if (frame_related_p && (force_isa_mode & AARCH64_FL_SM_ON)) ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, ++ gen_rtx_SET (dest, plus_constant (Pmode, src, ++ offset))); + return; + } + +@@ -6969,11 +7007,19 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, + if (src != const0_rtx + && aarch64_sve_addvl_addpl_immediate_p (poly_offset)) + { +- rtx offset_rtx = gen_int_mode (poly_offset, mode); ++ rtx offset_rtx; ++ if (force_isa_mode == 0) ++ offset_rtx = gen_int_mode (poly_offset, mode); ++ else ++ offset_rtx = aarch64_sme_vq_immediate (mode, factor, 0); + if (frame_related_p) + { + rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx)); + RTX_FRAME_RELATED_P (insn) = true; ++ if (force_isa_mode & AARCH64_FL_SM_ON) ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, ++ gen_rtx_SET (dest, plus_constant (Pmode, src, ++ poly_offset))); + src = dest; + } + else +@@ -7004,9 +7050,19 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, + rtx val; + if (IN_RANGE (rel_factor, -32, 31)) + { ++ if (force_isa_mode & AARCH64_FL_SM_ON) ++ { ++ /* Try to use an unshifted RDSVL, otherwise fall back on
View file
_service:tar_scm:0213-Backport-SME-aarch64-Handle-PSTATE.SM-across-abnorma.patch
Added
@@ -0,0 +1,708 @@ +From ef9c800309fa326ca56dd9d9affd7d5498624bb8 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:29 +0000 +Subject: PATCH 114/157 BackportSME aarch64: Handle PSTATE.SM across + abnormal edges + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=275706fc59b5fdcc26d46d9f19951fc86b40c515 + +PSTATE.SM is always off on entry to an exception handler, and on entry +to a nonlocal goto receiver. Those entry points need to switch +PSTATE.SM back to the appropriate state for the current function. +In the case of streaming-compatible functions, they need to restore +the mode that the caller was originally using. + +The requirement on nonlocal goto receivers means that nonlocal +jumps need to ensure that PSTATE.SM is zero. + +gcc/ + * config/aarch64/aarch64.cc: Include except.h + (aarch64_sme_mode_switch_regs::add_call_preserved_reg): New function. + (aarch64_sme_mode_switch_regs::add_call_preserved_regs): Likewise. + (aarch64_need_old_pstate_sm): Return true if the function has + a nonlocal-goto or exception receiver. + (aarch64_switch_pstate_sm_for_landing_pad): New function. + (aarch64_switch_pstate_sm_for_jump): Likewise. + (pass_switch_pstate_sm::gate): Enable the pass for all + streaming and streaming-compatible functions. + (pass_switch_pstate_sm::execute): Handle non-local gotos and their + receivers. Handle exception handler entry points. + +gcc/testsuite/ + * g++.target/aarch64/sme/exceptions_2.C: New test. + * gcc.target/aarch64/sme/nonlocal_goto_1.c: Likewise. + * gcc.target/aarch64/sme/nonlocal_goto_2.c: Likewise. + * gcc.target/aarch64/sme/nonlocal_goto_3.c: Likewise. + * gcc.target/aarch64/sme/nonlocal_goto_4.c: Likewise. + * gcc.target/aarch64/sme/nonlocal_goto_5.c: Likewise. + * gcc.target/aarch64/sme/nonlocal_goto_6.c: Likewise. + * gcc.target/aarch64/sme/nonlocal_goto_7.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 141 ++++++++++++++++- + .../g++.target/aarch64/sme/exceptions_2.C | 148 ++++++++++++++++++ + .../gcc.target/aarch64/sme/nonlocal_goto_1.c | 58 +++++++ + .../gcc.target/aarch64/sme/nonlocal_goto_2.c | 44 ++++++ + .../gcc.target/aarch64/sme/nonlocal_goto_3.c | 46 ++++++ + .../gcc.target/aarch64/sme/nonlocal_goto_4.c | 25 +++ + .../gcc.target/aarch64/sme/nonlocal_goto_5.c | 26 +++ + .../gcc.target/aarch64/sme/nonlocal_goto_6.c | 31 ++++ + .../gcc.target/aarch64/sme/nonlocal_goto_7.c | 25 +++ + 9 files changed, 537 insertions(+), 7 deletions(-) + create mode 100644 gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_7.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 4cb43c2e2..effb567c2 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -82,6 +82,7 @@ + #include "tree-dfa.h" + #include "asan.h" + #include "aarch64-feature-deps.h" ++#include "except.h" + #include "tree-pass.h" + #include "cfgbuild.h" + +@@ -7295,6 +7296,8 @@ public: + void add_reg (machine_mode, unsigned int); + void add_call_args (rtx_call_insn *); + void add_call_result (rtx_call_insn *); ++ void add_call_preserved_reg (unsigned int); ++ void add_call_preserved_regs (bitmap); + + void emit_prologue (); + void emit_epilogue (); +@@ -7427,6 +7430,46 @@ aarch64_sme_mode_switch_regs::add_call_result (rtx_call_insn *call_insn) + add_reg (GET_MODE (dest), REGNO (dest)); + } + ++/* REGNO is a register that is call-preserved under the current function's ABI. ++ Record that it must be preserved around the mode switch. */ ++ ++void ++aarch64_sme_mode_switch_regs::add_call_preserved_reg (unsigned int regno) ++{ ++ if (FP_REGNUM_P (regno)) ++ switch (crtl->abi->id ()) ++ { ++ case ARM_PCS_SVE: ++ add_reg (VNx16QImode, regno); ++ break; ++ case ARM_PCS_SIMD: ++ add_reg (V16QImode, regno); ++ break; ++ case ARM_PCS_AAPCS64: ++ add_reg (DImode, regno); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ else if (PR_REGNUM_P (regno)) ++ add_reg (VNx16BImode, regno); ++} ++ ++/* The hard registers in REGS are call-preserved under the current function's ++ ABI. Record that they must be preserved around the mode switch. */ ++ ++void ++aarch64_sme_mode_switch_regs::add_call_preserved_regs (bitmap regs) ++{ ++ bitmap_iterator bi; ++ unsigned int regno; ++ EXECUTE_IF_SET_IN_BITMAP (regs, 0, regno, bi) ++ if (HARD_REGISTER_NUM_P (regno)) ++ add_call_preserved_reg (regno); ++ else ++ break; ++} ++ + /* Emit code to save registers before the mode switch. */ + + void +@@ -9825,6 +9868,23 @@ aarch64_need_old_pstate_sm () + if (aarch64_cfun_enables_pstate_sm ()) + return true; + ++ /* Non-local goto receivers are entered with PSTATE.SM equal to 0, ++ but the function needs to return with PSTATE.SM unchanged. */ ++ if (nonlocal_goto_handler_labels) ++ return true; ++ ++ /* Likewise for exception handlers. */ ++ eh_landing_pad lp; ++ for (unsigned int i = 1; vec_safe_iterate (cfun->eh->lp_array, i, &lp); ++i) ++ if (lp && lp->post_landing_pad) ++ return true; ++ ++ /* Non-local gotos need to set PSTATE.SM to zero. It's possible to call ++ streaming-compatible functions without SME being available, so PSTATE.SM ++ should only be changed if it is currently set to one. */ ++ if (crtl->has_nonlocal_goto) ++ return true; ++ + if (cfun->machine->call_switches_pstate_sm) + for (auto insn = get_insns (); insn; insn = NEXT_INSN (insn)) + if (auto *call = dyn_cast<rtx_call_insn *> (insn)) +@@ -30209,6 +30269,59 @@ aarch64_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs, + return seq; + } + ++/* BB is the target of an exception or nonlocal goto edge, which means ++ that PSTATE.SM is known to be 0 on entry. Put it into the state that ++ the current function requires. */ ++ ++static bool ++aarch64_switch_pstate_sm_for_landing_pad (basic_block bb) ++{ ++ if (TARGET_NON_STREAMING) ++ return false; ++ ++ start_sequence (); ++ rtx_insn *guard_label = nullptr; ++ if (TARGET_STREAMING_COMPATIBLE) ++ guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM, ++ AARCH64_FL_SM_OFF); ++ aarch64_sme_mode_switch_regs args_switch; ++ args_switch.add_call_preserved_regs (df_get_live_in (bb)); ++ args_switch.emit_prologue (); ++ aarch64_switch_pstate_sm (AARCH64_FL_SM_OFF, AARCH64_FL_SM_ON); ++ args_switch.emit_epilogue (); ++ if (guard_label) ++ emit_label (guard_label); ++ auto seq = get_insns (); ++ end_sequence (); ++ ++ emit_insn_after (seq, bb_note (bb)); ++ return true; ++} ++ ++/* JUMP is a nonlocal goto. Its target requires PSTATE.SM to be 0 on entry, ++ so arrange to make it so. */ ++ ++static bool ++aarch64_switch_pstate_sm_for_jump (rtx_insn *jump) ++{ ++ if (TARGET_NON_STREAMING) ++ return false; ++ ++ start_sequence (); ++ rtx_insn *guard_label = nullptr; ++ if (TARGET_STREAMING_COMPATIBLE) ++ guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM, ++ AARCH64_FL_SM_OFF); ++ aarch64_switch_pstate_sm (AARCH64_FL_SM_ON, AARCH64_FL_SM_OFF);
View file
_service:tar_scm:0214-Backport-SME-aarch64-Enforce-inlining-restrictions-f.patch
Added
@@ -0,0 +1,913 @@ +From c4578108ab766178fe7ebd51421c1ac9f317b675 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:30 +0000 +Subject: PATCH 115/157 BackportSME aarch64: Enforce inlining + restrictions for SME + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0e9aa05df6c643610a3821af52eda642a525a886 + +A function that has local ZA state cannot be inlined into its caller, +since we only support managing ZA switches at function scope. + +A function whose body directly clobbers ZA state cannot be inlined into +a function with ZA state. + +A function whose body requires a particular PSTATE.SM setting can only +be inlined into a function body that guarantees that PSTATE.SM setting. +The callee's function type doesn't matter here: one locally-streaming +function can be inlined into another. + +gcc/ + * config/aarch64/aarch64.cc: Include symbol-summary.h, ipa-prop.h, + and ipa-fnsummary.h + (aarch64_function_attribute_inlinable_p): New function. + (AARCH64_IPA_SM_FIXED, AARCH64_IPA_CLOBBERS_ZA): New constants. + (aarch64_need_ipa_fn_target_info): New function. + (aarch64_update_ipa_fn_target_info): Likewise. + (aarch64_can_inline_p): Restrict the previous ISA flag checks + to non-modal features. Prevent callees that require a particular + PSTATE.SM state from being inlined into callers that can't guarantee + that state. Also prevent callees that have ZA state from being + inlined into callers that don't. Finally, prevent callees that + clobber ZA from being inlined into callers that have ZA state. + (TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P): Define. + (TARGET_NEED_IPA_FN_TARGET_INFO): Likewise. + (TARGET_UPDATE_IPA_FN_TARGET_INFO): Likewise. + +gcc/testsuite/ + * gcc.target/aarch64/sme/inlining_1.c: New test. + * gcc.target/aarch64/sme/inlining_2.c: Likewise. + * gcc.target/aarch64/sme/inlining_3.c: Likewise. + * gcc.target/aarch64/sme/inlining_4.c: Likewise. + * gcc.target/aarch64/sme/inlining_5.c: Likewise. + * gcc.target/aarch64/sme/inlining_6.c: Likewise. + * gcc.target/aarch64/sme/inlining_7.c: Likewise. + * gcc.target/aarch64/sme/inlining_8.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 132 +++++++++++++++++- + .../gcc.target/aarch64/sme/inlining_1.c | 47 +++++++ + .../gcc.target/aarch64/sme/inlining_10.c | 57 ++++++++ + .../gcc.target/aarch64/sme/inlining_11.c | 57 ++++++++ + .../gcc.target/aarch64/sme/inlining_12.c | 15 ++ + .../gcc.target/aarch64/sme/inlining_13.c | 15 ++ + .../gcc.target/aarch64/sme/inlining_14.c | 15 ++ + .../gcc.target/aarch64/sme/inlining_15.c | 27 ++++ + .../gcc.target/aarch64/sme/inlining_2.c | 47 +++++++ + .../gcc.target/aarch64/sme/inlining_3.c | 47 +++++++ + .../gcc.target/aarch64/sme/inlining_4.c | 47 +++++++ + .../gcc.target/aarch64/sme/inlining_5.c | 47 +++++++ + .../gcc.target/aarch64/sme/inlining_6.c | 31 ++++ + .../gcc.target/aarch64/sme/inlining_7.c | 31 ++++ + .../gcc.target/aarch64/sme/inlining_8.c | 31 ++++ + .../gcc.target/aarch64/sme/inlining_9.c | 55 ++++++++ + 16 files changed, 696 insertions(+), 5 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_10.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_11.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_12.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_13.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_14.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_15.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_5.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_6.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_7.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_9.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index effb567c2..eab94d5c2 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -85,6 +85,9 @@ + #include "except.h" + #include "tree-pass.h" + #include "cfgbuild.h" ++#include "symbol-summary.h" ++#include "ipa-prop.h" ++#include "ipa-fnsummary.h" + + /* This file should be included last. */ + #include "target-def.h" +@@ -21351,6 +21354,17 @@ aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int) + return ret; + } + ++/* Implement TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P. Use an opt-out ++ rather than an opt-in list. */ ++ ++static bool ++aarch64_function_attribute_inlinable_p (const_tree fndecl) ++{ ++ /* A function that has local ZA state cannot be inlined into its caller, ++ since we only support managing ZA switches at function scope. */ ++ return !aarch64_fndecl_has_new_state (fndecl, "za"); ++} ++ + /* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are + tri-bool options (yes, no, don't care) and the default value is + DEF, determine whether to reject inlining. */ +@@ -21372,6 +21386,60 @@ aarch64_tribools_ok_for_inlining_p (int caller, int callee, + return (callee == caller || callee == def); + } + ++/* Bit allocations for ipa_fn_summary::target_info. */ ++ ++/* Set if the function contains a stmt that relies on the function's ++ choice of PSTATE.SM setting (0 for non-streaming, 1 for streaming). ++ Not meaningful for streaming-compatible functions. */ ++constexpr auto AARCH64_IPA_SM_FIXED = 1U << 0; ++ ++/* Set if the function clobbers ZA. Not meaningful for functions that ++ have ZA state. */ ++constexpr auto AARCH64_IPA_CLOBBERS_ZA = 1U << 1; ++ ++/* Implement TARGET_NEED_IPA_FN_TARGET_INFO. */ ++ ++static bool ++aarch64_need_ipa_fn_target_info (const_tree, unsigned int &) ++{ ++ /* We could in principle skip this for streaming-compatible functions ++ that have ZA state, but that's a rare combination. */ ++ return true; ++} ++ ++/* Implement TARGET_UPDATE_IPA_FN_TARGET_INFO. */ ++ ++static bool ++aarch64_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt) ++{ ++ if (auto *ga = dyn_cast<const gasm *> (stmt)) ++ { ++ /* We don't know what the asm does, so conservatively assume that ++ it requires the function's current SM mode. */ ++ info |= AARCH64_IPA_SM_FIXED; ++ for (unsigned int i = 0; i < gimple_asm_nclobbers (ga); ++i) ++ { ++ tree op = gimple_asm_clobber_op (ga, i); ++ const char *clobber = TREE_STRING_POINTER (TREE_VALUE (op)); ++ if (strcmp (clobber, "za") == 0) ++ info |= AARCH64_IPA_CLOBBERS_ZA; ++ } ++ } ++ if (auto *call = dyn_cast<const gcall *> (stmt)) ++ { ++ if (gimple_call_builtin_p (call, BUILT_IN_MD)) ++ { ++ /* The attributes on AArch64 builtins are supposed to be accurate. ++ If the function isn't marked streaming-compatible then it ++ needs whichever SM mode it selects. */ ++ tree decl = gimple_call_fndecl (call); ++ if (aarch64_fndecl_pstate_sm (decl) != 0) ++ info |= AARCH64_IPA_SM_FIXED; ++ } ++ } ++ return true; ++} ++ + /* Implement TARGET_CAN_INLINE_P. Decide whether it is valid + to inline CALLEE into CALLER based on target-specific info. + Make sure that the caller and callee have compatible architectural +@@ -21394,12 +21462,56 @@ aarch64_can_inline_p (tree caller, tree callee) + : target_option_default_node); + + /* Callee's ISA flags should be a subset of the caller's. */ +- if ((caller_opts->x_aarch64_asm_isa_flags +- & callee_opts->x_aarch64_asm_isa_flags) +- != callee_opts->x_aarch64_asm_isa_flags) ++ auto caller_asm_isa = (caller_opts->x_aarch64_asm_isa_flags ++ & ~AARCH64_FL_ISA_MODES); ++ auto callee_asm_isa = (callee_opts->x_aarch64_asm_isa_flags ++ & ~AARCH64_FL_ISA_MODES); ++ if (callee_asm_isa & ~caller_asm_isa) + return false; +- if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags) +- != callee_opts->x_aarch64_isa_flags) ++ ++ auto caller_isa = (caller_opts->x_aarch64_isa_flags ++ & ~AARCH64_FL_ISA_MODES); ++ auto callee_isa = (callee_opts->x_aarch64_isa_flags ++ & ~AARCH64_FL_ISA_MODES); ++ if (callee_isa & ~caller_isa) ++ return false; ++ ++ /* Return true if the callee might have target_info property PROPERTY. ++ The answer must be true unless we have positive proof to the contrary. */ ++ auto callee_has_property = &(unsigned int property) ++ {
View file
_service:tar_scm:0215-Backport-SME-aarch64-Update-sibcall-handling-for-SME.patch
Added
@@ -0,0 +1,424 @@ +From 08b6cbe756ede25b16b8e9ff9ee32f76c4f8430f Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:30 +0000 +Subject: PATCH 116/157 BackportSME aarch64: Update sibcall handling for + SME + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0e7fee57c00ae17611651e0b057dc03b6e276b82 + +We only support tail calls between functions with the same PSTATE.ZA +setting ("private-ZA" to "private-ZA" and "shared-ZA" to "shared-ZA"). + +Only a normal non-streaming function can tail-call another non-streaming +function, and only a streaming function can tail-call another streaming +function. Any function can tail-call a streaming-compatible function. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_function_ok_for_sibcall): + Enforce PSTATE.SM and PSTATE.ZA restrictions. + (aarch64_expand_epilogue): Save and restore the arguments + to a sibcall around any change to PSTATE.SM. + +gcc/testsuite/ + * gcc.target/aarch64/sme/sibcall_1.c: New test. + * gcc.target/aarch64/sme/sibcall_2.c: Likewise. + * gcc.target/aarch64/sme/sibcall_3.c: Likewise. + * gcc.target/aarch64/sme/sibcall_4.c: Likewise. + * gcc.target/aarch64/sme/sibcall_5.c: Likewise. + * gcc.target/aarch64/sme/sibcall_6.c: Likewise. + * gcc.target/aarch64/sme/sibcall_7.c: Likewise. + * gcc.target/aarch64/sme/sibcall_8.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 9 +++- + .../gcc.target/aarch64/sme/sibcall_1.c | 45 +++++++++++++++++++ + .../gcc.target/aarch64/sme/sibcall_2.c | 45 +++++++++++++++++++ + .../gcc.target/aarch64/sme/sibcall_3.c | 45 +++++++++++++++++++ + .../gcc.target/aarch64/sme/sibcall_4.c | 45 +++++++++++++++++++ + .../gcc.target/aarch64/sme/sibcall_5.c | 45 +++++++++++++++++++ + .../gcc.target/aarch64/sme/sibcall_6.c | 26 +++++++++++ + .../gcc.target/aarch64/sme/sibcall_7.c | 26 +++++++++++ + .../gcc.target/aarch64/sme/sibcall_8.c | 19 ++++++++ + 9 files changed, 304 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_5.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_6.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_7.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_8.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index eab94d5c2..b8e540b6e 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8660,6 +8660,11 @@ aarch64_function_ok_for_sibcall (tree, tree exp) + if (crtl->abi->id () != expr_callee_abi (exp).id ()) + return false; + ++ tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp))); ++ if (aarch64_fntype_pstate_sm (fntype) & ~aarch64_cfun_incoming_pstate_sm ()) ++ return false; ++ if (aarch64_fntype_pstate_za (fntype) != aarch64_cfun_incoming_pstate_za ()) ++ return false; + return true; + } + +@@ -11923,7 +11928,9 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM, + aarch64_isa_flags); + aarch64_sme_mode_switch_regs return_switch; +- if (crtl->return_rtx && REG_P (crtl->return_rtx)) ++ if (sibcall) ++ return_switch.add_call_args (sibcall); ++ else if (crtl->return_rtx && REG_P (crtl->return_rtx)) + return_switch.add_reg (GET_MODE (crtl->return_rtx), + REGNO (crtl->return_rtx)); + return_switch.emit_prologue (); +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c +new file mode 100644 +index 000000000..c7530de5c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c +@@ -0,0 +1,45 @@ ++/* { dg-options "-O2" } */ ++ ++void sc_callee () arm::streaming_compatible; ++void s_callee () arm::streaming; ++void n_callee (); ++ ++arm::locally_streaming __attribute__((noipa)) void ++sc_ls_callee () arm::streaming_compatible {} ++arm::locally_streaming __attribute__((noipa)) void ++n_ls_callee () {} ++ ++void ++sc_to_sc () arm::streaming_compatible ++{ ++ sc_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tsc_callee} } } */ ++ ++void ++sc_to_s () arm::streaming_compatible ++{ ++ s_callee (); ++} ++/* { dg-final { scan-assembler {\tbl\ts_callee} } } */ ++ ++void ++sc_to_n () arm::streaming_compatible ++{ ++ n_callee (); ++} ++/* { dg-final { scan-assembler {\tbl\tn_callee} } } */ ++ ++void ++sc_to_sc_ls () arm::streaming_compatible ++{ ++ sc_ls_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */ ++ ++void ++sc_to_n_ls () arm::streaming_compatible ++{ ++ n_ls_callee (); ++} ++/* { dg-final { scan-assembler {\tbl\tn_ls_callee} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c +new file mode 100644 +index 000000000..8d1c8a9f9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c +@@ -0,0 +1,45 @@ ++/* { dg-options "-O2" } */ ++ ++void sc_callee () arm::streaming_compatible; ++void s_callee () arm::streaming; ++void n_callee (); ++ ++arm::locally_streaming __attribute__((noipa)) void ++sc_ls_callee () arm::streaming_compatible {} ++arm::locally_streaming __attribute__((noipa)) void ++n_ls_callee () {} ++ ++void ++s_to_sc () arm::streaming ++{ ++ sc_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tsc_callee} } } */ ++ ++void ++s_to_s () arm::streaming ++{ ++ s_callee (); ++} ++/* { dg-final { scan-assembler {\tb\ts_callee} } } */ ++ ++void ++s_to_n () arm::streaming ++{ ++ n_callee (); ++} ++/* { dg-final { scan-assembler {\tbl\tn_callee} } } */ ++ ++void ++s_to_sc_ls () arm::streaming ++{ ++ sc_ls_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */ ++ ++void ++s_to_n_ls () arm::streaming ++{ ++ n_ls_callee (); ++} ++/* { dg-final { scan-assembler {\tbl\tn_ls_callee} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c +new file mode 100644 +index 000000000..2ae937fc5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c +@@ -0,0 +1,45 @@ ++/* { dg-options "-O2" } */ ++ ++void sc_callee () arm::streaming_compatible; ++void s_callee () arm::streaming; ++void n_callee (); ++ ++arm::locally_streaming __attribute__((noipa)) void ++sc_ls_callee () arm::streaming_compatible {} ++arm::locally_streaming __attribute__((noipa)) void ++n_ls_callee () {} ++ ++void ++n_to_sc () ++{
View file
_service:tar_scm:0216-Backport-SME-libgcc-aarch64-Configure-check-for-.var.patch
Added
@@ -0,0 +1,117 @@ +From e0da78a258a34c26488b7ae623f9ae8727c2b264 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Mon, 14 Nov 2022 17:14:18 +0000 +Subject: PATCH 117/157 BackportSME libgcc: aarch64: Configure check for + .variant_pcs support + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3ebb591c65b4fbe0cddd23ebc0ca2d9f7aef2cec + +Ideally SME support routines in libgcc are marked as variant PCS symbols +so check if as supports the directive. + +libgcc/ChangeLog: + + * config.in: Undef HAVE_AS_VARIANT_PCS. + * configure: Regenerate. + * configure.ac: Check for .variant_pcs. +--- + libgcc/config.in | 3 +++ + libgcc/configure | 39 +++++++++++++++++++++++++++++++++++++++ + libgcc/configure.ac | 17 +++++++++++++++++ + 3 files changed, 59 insertions(+) + +diff --git a/libgcc/config.in b/libgcc/config.in +index f93c64a00..5dd96cdf6 100644 +--- a/libgcc/config.in ++++ b/libgcc/config.in +@@ -13,6 +13,9 @@ + /* Define to 1 if the assembler supports LSE. */ + #undef HAVE_AS_LSE + ++/* Define to 1 if the assembler supports .variant_pcs. */ ++#undef HAVE_AS_VARIANT_PCS ++ + /* Define to 1 if the target assembler supports thread-local storage. */ + #undef HAVE_CC_TLS + +diff --git a/libgcc/configure b/libgcc/configure +index 1f9b2ac57..afe02b303 100755 +--- a/libgcc/configure ++++ b/libgcc/configure +@@ -5619,6 +5619,45 @@ $as_echo "#define HAVE_AS_LSE 1" >>confdefs.h + ;; + esac + ++ ++ ++case "${target}" in ++aarch64*-*-*) ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking if as supports .variant_pcs" >&5 ++$as_echo_n "checking if as supports .variant_pcs... " >&6; } ++if ${libgcc_cv_as_variant_pcs+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ ++ cat confdefs.h - <<_ACEOF >conftest.$ac_ext ++/* end confdefs.h. */ ++ ++int ++main () ++{ ++asm (".variant_pcs foobar"); ++ ; ++ return 0; ++} ++_ACEOF ++if ac_fn_c_try_compile "$LINENO"; then : ++ libgcc_cv_as_variant_pcs=yes ++else ++ libgcc_cv_as_variant_pcs=no ++fi ++rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ++ ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgcc_cv_as_variant_pcs" >&5 ++$as_echo "$libgcc_cv_as_variant_pcs" >&6; } ++ if test x$libgcc_cv_as_variant_pcs = xyes; then ++ ++$as_echo "#define HAVE_AS_VARIANT_PCS 1" >>confdefs.h ++ ++ fi ++ ;; ++esac ++ + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for init priority support" >&5 + $as_echo_n "checking for init priority support... " >&6; } + if ${libgcc_cv_init_priority+:} false; then : +diff --git a/libgcc/configure.ac b/libgcc/configure.ac +index 2fc9d5d7c..abc398c91 100644 +--- a/libgcc/configure.ac ++++ b/libgcc/configure.ac +@@ -648,6 +648,23 @@ changequote(,)dnl + esac) + LIBGCC_CHECK_AS_LSE + ++dnl Check if as supports .variant_pcs. ++AC_DEFUN(LIBGCC_CHECK_AS_VARIANT_PCS, ++case "${target}" in ++aarch64*-*-*) ++ AC_CACHE_CHECK(if as supports .variant_pcs, libgcc_cv_as_variant_pcs, ++ AC_COMPILE_IFELSE(AC_LANG_PROGRAM(, ++ asm (".variant_pcs foobar");), ++ libgcc_cv_as_variant_pcs=yes, libgcc_cv_as_variant_pcs=no) ++ ) ++ if test x$libgcc_cv_as_variant_pcs = xyes; then ++ AC_DEFINE(HAVE_AS_VARIANT_PCS, 1, ++ Define to 1 if the assembler supports .variant_pcs.) ++ fi ++ ;; ++esac) ++LIBGCC_CHECK_AS_VARIANT_PCS ++ + dnl Check if as supports RTM instructions. + AC_CACHE_CHECK(for init priority support, libgcc_cv_init_priority, + AC_COMPILE_IFELSE(AC_LANG_PROGRAM(, +-- +2.33.0 +
View file
_service:tar_scm:0217-Backport-SME-libgcc-aarch64-Configure-check-for-__ge.patch
Added
@@ -0,0 +1,117 @@ +From 66d4035958e1dee2d16f9290004921674eb492b3 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Mon, 4 Dec 2023 10:52:52 +0000 +Subject: PATCH 118/157 BackportSME libgcc: aarch64: Configure check for + __getauxval + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dbbfb52b0e9c66ee9d05b8fd17c4f44655e48463 + +Add configure check for the __getauxval ABI symbol, which is always +available on aarch64 glibc, and may be available on other linux C +runtimes. For now only enabled on glibc, others have to override it + + target_configargs=libgcc_cv_have___getauxval=yes + +This is deliberately obscure as it should be auto detected, ideally +via a feature test macro in unistd.h (link time detection is not +possible since the libc may not be installed at libgcc build time), +but currently there is no such feature test mechanism. + +Without __getauxval, libgcc cannot do runtime CPU feature detection +and has to assume only the build time known features are available. + +libgcc/ChangeLog: + + * config.in: Undef HAVE___GETAUXVAL. + * configure: Regenerate. + * configure.ac: Check for __getauxval. +--- + libgcc/config.in | 3 +++ + libgcc/configure | 26 ++++++++++++++++++++++++++ + libgcc/configure.ac | 19 +++++++++++++++++++ + 3 files changed, 48 insertions(+) + +diff --git a/libgcc/config.in b/libgcc/config.in +index 5dd96cdf6..441d4d39b 100644 +--- a/libgcc/config.in ++++ b/libgcc/config.in +@@ -16,6 +16,9 @@ + /* Define to 1 if the assembler supports .variant_pcs. */ + #undef HAVE_AS_VARIANT_PCS + ++/* Define to 1 if __getauxval is available. */ ++#undef HAVE___GETAUXVAL ++ + /* Define to 1 if the target assembler supports thread-local storage. */ + #undef HAVE_CC_TLS + +diff --git a/libgcc/configure b/libgcc/configure +index afe02b303..a874ef57e 100755 +--- a/libgcc/configure ++++ b/libgcc/configure +@@ -5658,6 +5658,32 @@ $as_echo "#define HAVE_AS_VARIANT_PCS 1" >>confdefs.h + ;; + esac + ++# Check __getauxval ABI symbol for CPU feature detection. ++case ${target} in ++aarch64*-linux-*) ++ # No link check because the libc may not be present. ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __getauxval" >&5 ++$as_echo_n "checking for __getauxval... " >&6; } ++if ${libgcc_cv_have___getauxval+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ case ${target} in ++ *-linux-gnu*) ++ libgcc_cv_have___getauxval=yes ++ ;; ++ *) ++ libgcc_cv_have___getauxval=no ++ esac ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgcc_cv_have___getauxval" >&5 ++$as_echo "$libgcc_cv_have___getauxval" >&6; } ++ if test x$libgcc_cv_have___getauxval = xyes; then ++ ++$as_echo "#define HAVE___GETAUXVAL 1" >>confdefs.h ++ ++ fi ++esac ++ + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for init priority support" >&5 + $as_echo_n "checking for init priority support... " >&6; } + if ${libgcc_cv_init_priority+:} false; then : +diff --git a/libgcc/configure.ac b/libgcc/configure.ac +index abc398c91..64b45ae14 100644 +--- a/libgcc/configure.ac ++++ b/libgcc/configure.ac +@@ -665,6 +665,25 @@ aarch64*-*-*) + esac) + LIBGCC_CHECK_AS_VARIANT_PCS + ++# Check __getauxval ABI symbol for CPU feature detection. ++case ${target} in ++aarch64*-linux-*) ++ # No link check because the libc may not be present. ++ AC_CACHE_CHECK(for __getauxval, ++ libgcc_cv_have___getauxval, ++ case ${target} in ++ *-linux-gnu*) ++ libgcc_cv_have___getauxval=yes ++ ;; ++ *) ++ libgcc_cv_have___getauxval=no ++ esac) ++ if test x$libgcc_cv_have___getauxval = xyes; then ++ AC_DEFINE(HAVE___GETAUXVAL, 1, ++ Define to 1 if __getauxval is available.) ++ fi ++esac ++ + dnl Check if as supports RTM instructions. + AC_CACHE_CHECK(for init priority support, libgcc_cv_init_priority, + AC_COMPILE_IFELSE(AC_LANG_PROGRAM(, +-- +2.33.0 +
View file
_service:tar_scm:0218-Backport-SME-libgcc-aarch64-Add-SME-runtime-support.patch
Added
@@ -0,0 +1,627 @@ +From 1e111ac2d71c5469dc526559de009542acaeb16f Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Tue, 15 Nov 2022 14:08:55 +0000 +Subject: PATCH 119/157 BackportSME libgcc: aarch64: Add SME runtime + support + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=328c17af772207cb03740809c05ba2c3abfb86be + +The call ABI for SME (Scalable Matrix Extension) requires a number of +helper routines which are added to libgcc so they are tied to the +compiler version instead of the libc version. See +https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#sme-support-routines + +The routines are in shared libgcc and static libgcc eh, even though +they are not related to exception handling. This is to avoid linking +a copy of the routines into dynamic linked binaries, because TPIDR2_EL0 +block can be extended in the future which is better to handle in a +single place per process. + +The support routines have to decide if SME is accessible or not. Linux +tells userspace if SME is accessible via AT_HWCAP2, otherwise a new +__aarch64_sme_accessible symbol was introduced that a libc can define. +Due to libgcc and libc build order, the symbol availability cannot be +checked so for __aarch64_sme_accessible an unistd.h feature test macro +is used while such detection mechanism is not available for __getauxval +so we rely on configure checks based on the target triplet. + +Asm helper code is added to make writing the routines easier. + +libgcc/ChangeLog: + + * config/aarch64/t-aarch64: Add sources to the build. + * config/aarch64/__aarch64_have_sme.c: New file. + * config/aarch64/__arm_sme_state.S: New file. + * config/aarch64/__arm_tpidr2_restore.S: New file. + * config/aarch64/__arm_tpidr2_save.S: New file. + * config/aarch64/__arm_za_disable.S: New file. + * config/aarch64/aarch64-asm.h: New file. + * config/aarch64/libgcc-sme.ver: New file. +--- + libgcc/config/aarch64/__aarch64_have_sme.c | 75 ++++++++++++++ + libgcc/config/aarch64/__arm_sme_state.S | 55 ++++++++++ + libgcc/config/aarch64/__arm_tpidr2_restore.S | 89 ++++++++++++++++ + libgcc/config/aarch64/__arm_tpidr2_save.S | 101 +++++++++++++++++++ + libgcc/config/aarch64/__arm_za_disable.S | 65 ++++++++++++ + libgcc/config/aarch64/aarch64-asm.h | 98 ++++++++++++++++++ + libgcc/config/aarch64/libgcc-sme.ver | 24 +++++ + libgcc/config/aarch64/t-aarch64 | 10 ++ + 8 files changed, 517 insertions(+) + create mode 100644 libgcc/config/aarch64/__aarch64_have_sme.c + create mode 100644 libgcc/config/aarch64/__arm_sme_state.S + create mode 100644 libgcc/config/aarch64/__arm_tpidr2_restore.S + create mode 100644 libgcc/config/aarch64/__arm_tpidr2_save.S + create mode 100644 libgcc/config/aarch64/__arm_za_disable.S + create mode 100644 libgcc/config/aarch64/aarch64-asm.h + create mode 100644 libgcc/config/aarch64/libgcc-sme.ver + +diff --git a/libgcc/config/aarch64/__aarch64_have_sme.c b/libgcc/config/aarch64/__aarch64_have_sme.c +new file mode 100644 +index 000000000..5e6492462 +--- /dev/null ++++ b/libgcc/config/aarch64/__aarch64_have_sme.c +@@ -0,0 +1,75 @@ ++/* Initializer for SME support. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#include "auto-target.h" ++ ++#ifndef inhibit_libc ++/* For libc feature test macros. */ ++# include <unistd.h> ++#endif ++ ++#if __ARM_FEATURE_SME ++/* Avoid runtime SME detection if libgcc is built with SME. */ ++# define HAVE_SME_CONST const ++# define HAVE_SME_VALUE 1 ++#elif HAVE___GETAUXVAL ++/* SME access detection on Linux. */ ++# define HAVE_SME_CONST ++# define HAVE_SME_VALUE 0 ++# define HAVE_SME_CTOR sme_accessible () ++ ++# define AT_HWCAP2 26 ++# define HWCAP2_SME (1 << 23) ++unsigned long int __getauxval (unsigned long int); ++ ++static _Bool ++sme_accessible (void) ++{ ++ unsigned long hwcap2 = __getauxval (AT_HWCAP2); ++ return (hwcap2 & HWCAP2_SME) != 0; ++} ++#elif __LIBC___AARCH64_SME_ACCESSIBLE ++/* Alternative SME access detection. */ ++# define HAVE_SME_CONST ++# define HAVE_SME_VALUE 0 ++# define HAVE_SME_CTOR __aarch64_sme_accessible () ++_Bool __aarch64_sme_accessible (void); ++#else ++# define HAVE_SME_CONST const ++# define HAVE_SME_VALUE 0 ++#endif ++ ++/* Define the symbol gating SME support in libgcc. */ ++HAVE_SME_CONST _Bool __aarch64_have_sme ++ __attribute__((visibility("hidden"), nocommon)) = HAVE_SME_VALUE; ++ ++#ifdef HAVE_SME_CTOR ++/* Use a higher priority to ensure it runs before user constructors ++ with priority 100. */ ++static void __attribute__((constructor (90))) ++init_have_sme (void) ++{ ++ __aarch64_have_sme = HAVE_SME_CTOR; ++} ++#endif +diff --git a/libgcc/config/aarch64/__arm_sme_state.S b/libgcc/config/aarch64/__arm_sme_state.S +new file mode 100644 +index 000000000..c4e16cac0 +--- /dev/null ++++ b/libgcc/config/aarch64/__arm_sme_state.S +@@ -0,0 +1,55 @@ ++/* Support routine for SME. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#include "aarch64-asm.h" ++ ++/* Query SME state. Call ABI: ++ - Private ZA, streaming-compatible. ++ - x2-x15, x19-x29, sp and fp regs are call preserved. ++ - Takes no argument. ++ - Returns SME state in x0 and TPIDR2_EL0 in x1. */ ++ ++.hidden __aarch64_have_sme ++ ++variant_pcs (__arm_sme_state) ++ ++ENTRY (__arm_sme_state) ++ /* Check if SME is available. */ ++ adrp x1, __aarch64_have_sme ++ ldrb w1, x1, :lo12:__aarch64_have_sme ++ cbz w1, L(nosme) ++ ++ /* Expose the bottom 2 bits of svcr (SM, ZA) in x0 and set the ++ top 2 bits indicating that SME and TPIDR2_EL0 are available. */ ++ .inst 0xd53b4240 /* mrs x0, svcr */ ++ .inst 0xd53bd0a1 /* mrs x1, tpidr2_el0 */ ++ and x0, x0, 3 ++ orr x0, x0, 0xc000000000000000 ++ ret ++ ++L(nosme): ++ mov x0, 0 ++ mov x1, 0 ++ ret ++END (__arm_sme_state)
View file
_service:tar_scm:0219-Backport-SME-libgcc-aarch64-Add-SME-unwinder-support.patch
Added
@@ -0,0 +1,70 @@ +From 310c8b5aaedad1430146fed9d8992201278164a6 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Fri, 29 Sep 2023 13:55:51 +0100 +Subject: PATCH 120/157 BackportSME libgcc: aarch64: Add SME unwinder + support + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=91d68665b8b7a5dffd0bbf8cd1f74c3c41d4c2d8 + +To support the ZA lazy save scheme, the PCS requires the unwinder to +reset the SME state to PSTATE.SM=0, PSTATE.ZA=0, TPIDR2_EL0=0 on entry +to an exception handler. We use the __arm_za_disable SME runtime call +unconditionally to achieve this. +https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#exceptions + +The hidden alias is used to avoid a PLT and avoid inconsistent VPCS +marking (we don't rely on special PCS at the call site). In case of +static linking the SME runtime init code is linked in code that raises +exceptions. + +libgcc/ChangeLog: + + * config/aarch64/__arm_za_disable.S: Add hidden alias. + * config/aarch64/aarch64-unwind.h: Reset the SME state before + EH return via the _Unwind_Frames_Extra hook. +--- + libgcc/config/aarch64/__arm_za_disable.S | 5 +++++ + libgcc/config/aarch64/aarch64-unwind.h | 16 ++++++++++++++++ + 2 files changed, 21 insertions(+) + +diff --git a/libgcc/config/aarch64/__arm_za_disable.S b/libgcc/config/aarch64/__arm_za_disable.S +index cff5b9cec..03fc28a39 100644 +--- a/libgcc/config/aarch64/__arm_za_disable.S ++++ b/libgcc/config/aarch64/__arm_za_disable.S +@@ -63,3 +63,8 @@ ENTRY (__arm_za_disable) + L(end): + ret + END (__arm_za_disable) ++ ++/* Hidden alias used by the unwinder. */ ++.global __libgcc_arm_za_disable ++.hidden __libgcc_arm_za_disable ++.set __libgcc_arm_za_disable, __arm_za_disable +diff --git a/libgcc/config/aarch64/aarch64-unwind.h b/libgcc/config/aarch64/aarch64-unwind.h +index 40b22d3c2..bfa695dcb 100644 +--- a/libgcc/config/aarch64/aarch64-unwind.h ++++ b/libgcc/config/aarch64/aarch64-unwind.h +@@ -87,4 +87,20 @@ aarch64_frob_update_context (struct _Unwind_Context *context, + return; + } + ++/* SME runtime function local to libgcc, streaming compatible ++ and preserves more registers than the base PCS requires, but ++ we don't rely on that here. */ ++__attribute__ ((visibility ("hidden"))) ++void __libgcc_arm_za_disable (void); ++ ++/* Disable the SME ZA state in case an unwound frame used the ZA ++ lazy saving scheme. */ ++#undef _Unwind_Frames_Extra ++#define _Unwind_Frames_Extra(x) \ ++ do \ ++ { \ ++ __libgcc_arm_za_disable (); \ ++ } \ ++ while (0) ++ + #endif /* defined AARCH64_UNWIND_H && defined __ILP32__ */ +-- +2.33.0 +
View file
_service:tar_scm:0220-Backport-SME-libgcc-Fix-config.in.patch
Added
@@ -0,0 +1,51 @@ +From b20b75158d1230a8b6cbabb36e3b128cbd9ec86f Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Fri, 8 Dec 2023 12:22:54 +0000 +Subject: PATCH 121/157 BackportSME libgcc: Fix config.in + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=47575ec9edcd3078f066aa54ba428420be796bef + +It was updated incorrectly in + + commit dbbfb52b0e9c66ee9d05b8fd17c4f44655e48463 + Author: Szabolcs Nagy <szabolcs.nagy@arm.com> + CommitDate: 2023-12-08 11:29:06 +0000 + + libgcc: aarch64: Configure check for __getauxval + +so regenerate it. + +libgcc/ChangeLog: + + * config.in: Regenerate. +--- + libgcc/config.in | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/libgcc/config.in b/libgcc/config.in +index 441d4d39b..8f7dd437b 100644 +--- a/libgcc/config.in ++++ b/libgcc/config.in +@@ -16,9 +16,6 @@ + /* Define to 1 if the assembler supports .variant_pcs. */ + #undef HAVE_AS_VARIANT_PCS + +-/* Define to 1 if __getauxval is available. */ +-#undef HAVE___GETAUXVAL +- + /* Define to 1 if the target assembler supports thread-local storage. */ + #undef HAVE_CC_TLS + +@@ -67,6 +64,9 @@ + /* Define to 1 if you have the <unistd.h> header file. */ + #undef HAVE_UNISTD_H + ++/* Define to 1 if __getauxval is available. */ ++#undef HAVE___GETAUXVAL ++ + /* Define to the address where bug reports for this package should be sent. */ + #undef PACKAGE_BUGREPORT + +-- +2.33.0 +
View file
_service:tar_scm:0221-Backport-SME-aarch64-Add-funwind-tables-to-some-test.patch
Added
@@ -0,0 +1,54 @@ +From 0214ca06a182481851ed90aae21f460f87d26084 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sun, 10 Dec 2023 19:46:05 +0000 +Subject: PATCH 122/157 BackportSME aarch64: Add -funwind-tables to some + tests + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=02ecdaab7a50f4505fd905effb6d238d773dc813 + +The .cfi scans in these tests failed for *-elf targets because +those targets don't enable .eh_frame info by default. + +gcc/testsuite/ + * gcc.target/aarch64/sme/call_sm_switch_1.c: Add -funwind-tables. + * gcc.target/aarch64/sme/call_sm_switch_3.c: Likewise. + * gcc.target/aarch64/sme/call_sm_switch_5.c: Likewise. +--- + gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c | 2 +- + gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c | 2 +- + gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c +index a2de55773..98922aaea 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c +@@ -1,4 +1,4 @@ +-// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } ++// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" } + // { dg-final { check-function-bodies "**" "" } } + + void ns_callee (); +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c +index ed999d085..4250fe798 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c +@@ -1,4 +1,4 @@ +-// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } ++// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" } + // { dg-final { check-function-bodies "**" "" } } + + __attribute__((aarch64_vector_pcs)) void ns_callee (); +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c +index be9b5cc04..e3d9bc274 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c +@@ -1,4 +1,4 @@ +-// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } ++// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" } + // { dg-final { check-function-bodies "**" "" } } + + #include <arm_sve.h> +-- +2.33.0 +
View file
_service:tar_scm:0222-Backport-SME-aarch64-Skip-some-SME-register-save-tes.patch
Added
@@ -0,0 +1,106 @@ +From cc2e901eccd40992432f74270a9ebc1b708b6eb1 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sun, 10 Dec 2023 19:46:05 +0000 +Subject: PATCH 123/157 BackportSME aarch64: Skip some SME register save + tests on BE + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=23ea0bc2cf042d74c4adfe26a57cf96b1d837a91 + +Big-endian targets need to save Z8-Z15 in the same order as +the registers would appear for D8-D15, because the layout is +mandated by the EH ABI. BE targets therefore use ST1D instead +of the normal STR for those registers (but not for others). + +That difference is already tested elsewhere and isn't important +for the SME tests. This patch therefore restricts the affected +tests to LE. + +gcc/testsuite/ + * gcc.target/aarch64/sme/call_sm_switch_5.c: Restrict tests that + contain Z8-Z23 saves to little-endian. + * gcc.target/aarch64/sme/call_sm_switch_8.c: Likewise. + * gcc.target/aarch64/sme/locally_streaming_1.c: Likewise. +--- + gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c | 6 +++--- + gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c | 6 +++--- + gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c | 2 +- + 3 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c +index e3d9bc274..6238ab80d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c +@@ -14,7 +14,7 @@ struct callbacks { + }; + + /* +-** n_caller: { target lp64 } ++** n_caller: { target { lp64 && aarch64_little_endian } } + ** stp x30, (x19|x20-8), \sp, #?-32\! + ** cntd x16 + ** str x16, \sp, #?16\ +@@ -114,7 +114,7 @@ n_caller (struct callbacks *c) + } + + /* +-** s_caller: { target lp64 } ++** s_caller: { target { lp64 && aarch64_little_endian } } + ** stp x30, (x19|x20-8), \sp, #?-32\! + ** cntd x16 + ** str x16, \sp, #?16\ +@@ -214,7 +214,7 @@ s_caller (struct callbacks *c) arm::streaming + } + + /* +-** sc_caller: ++** sc_caller: { target aarch64_little_endian } + ** stp x29, x30, \sp, #?-32\! + ** mov x29, sp + ** cntd x16 +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c +index f44724df3..c909b34ff 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c +@@ -7,7 +7,7 @@ svint8_t produce_z0 (); + void consume_z0 (svint8_t); + + /* +-** test_z0: ++** test_z0: { target aarch64_little_endian } + ** ... + ** smstop sm + ** bl produce_z0 +@@ -32,7 +32,7 @@ svint8x4_t produce_z3 (); + void consume_z3 (svint8x4_t); + + /* +-** test_z3: ++** test_z3: { target aarch64_little_endian } + ** ... + ** smstop sm + ** bl produce_z3 +@@ -61,7 +61,7 @@ svbool_t produce_p0 (); + void consume_p0 (svbool_t); + + /* +-** test_p0: ++** test_p0: { target aarch64_little_endian } + ** ... + ** smstop sm + ** bl produce_p0 +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c +index 20ff4b87d..4bb637f47 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c +@@ -265,7 +265,7 @@ n_ls_vector_pcs () + } + + /* +-** n_ls_sve_pcs: ++** n_ls_sve_pcs: { target aarch64_little_endian } + ** sub sp, sp, #?16 + ** cntd x16 + ** str x16, \sp\ +-- +2.33.0 +
View file
_service:tar_scm:0223-Backport-SME-Add-OPTIONS_H_EXTRA-to-GTFILES.patch
Added
@@ -0,0 +1,37 @@ +From ab7a2c3b74c65d62d661621c56ef984cfb72f985 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:50 +0100 +Subject: PATCH 124/157 BackportSME Add OPTIONS_H_EXTRA to GTFILES + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c1e1fa054970a30844eb94d726b4954dcb8b9063 + +I have a patch that adds a typedef to aarch64's <cpu>-opts.h. +The typedef is used for a TargetVariable in the .opt file, +which means that it is covered by PCH and so needs to be +visible to gengtype. + +<cpu>-opts.h is not included directly in tm.h, but indirectly +by target headers (in this case aarch64.h). There was therefore +nothing that caused it to be added to GTFILES. + +gcc/ + * Makefile.in (GTFILES): Add OPTIONS_H_EXTRA. +--- + gcc/Makefile.in | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index 5cd838270..fcfa54697 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -2648,6 +2648,7 @@ s-match: build/genmatch$(build_exeext) $(srcdir)/match.pd cfn-operators.pd + + GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ + $(host_xm_file_list) \ ++ $(OPTIONS_H_EXTRA) \ + $(tm_file_list) $(HASHTAB_H) $(SPLAY_TREE_H) $(srcdir)/bitmap.h \ + $(srcdir)/wide-int.h $(srcdir)/alias.h \ + $(srcdir)/coverage.cc $(srcdir)/rtl.h \ +-- +2.33.0 +
View file
_service:tar_scm:0224-Backport-SME-aarch64-Add-V1DI-mode.patch
Added
@@ -0,0 +1,177 @@ +From 21f9190106f8324be42e3e8e0510467386dd68a0 Mon Sep 17 00:00:00 2001 +From: Andrew Carlotti <andrew.carlotti@arm.com> +Date: Fri, 15 Jul 2022 15:25:53 +0100 +Subject: PATCH 125/157 BackportSME aarch64: Add V1DI mode + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5ba864c5d11a1c20891a1e054cb7814ec23de5c9 + +We already have a V1DF mode, so this makes the vector modes more consistent. + +Additionally, this allows us to recognise uint64x1_t and int64x1_t types given +only the mode and type qualifiers (e.g. in aarch64_lookup_simd_builtin_type). + +gcc/ChangeLog: + + * config/aarch64/aarch64-builtins.cc + (v1di_UP): Add V1DI mode to _UP macros. + * config/aarch64/aarch64-modes.def (VECTOR_MODE): Add V1DI mode. + * config/aarch64/aarch64-simd-builtin-types.def: Use V1DI mode. + * config/aarch64/aarch64-simd.md + (vec_extractv2dfv1df): Replace with... + (vec_extract<mode><V1half>): ...this. + * config/aarch64/aarch64.cc + (aarch64_classify_vector_mode): Add V1DI mode. + * config/aarch64/iterators.md + (VQ_2E, V1HALF, V1half): New. + (nunits): Add V1DI mode. +--- + gcc/config/aarch64/aarch64-builtins.cc | 1 + + gcc/config/aarch64/aarch64-modes.def | 1 + + gcc/config/aarch64/aarch64-simd-builtin-types.def | 6 +++--- + gcc/config/aarch64/aarch64-simd.md | 14 +++++++------- + gcc/config/aarch64/aarch64.cc | 2 +- + gcc/config/aarch64/iterators.md | 14 ++++++++++++-- + 6 files changed, 25 insertions(+), 13 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc +index 015e9d975..37bb3af48 100644 +--- a/gcc/config/aarch64/aarch64-builtins.cc ++++ b/gcc/config/aarch64/aarch64-builtins.cc +@@ -55,6 +55,7 @@ + #define v2si_UP E_V2SImode + #define v2sf_UP E_V2SFmode + #define v1df_UP E_V1DFmode ++#define v1di_UP E_V1DImode + #define di_UP E_DImode + #define df_UP E_DFmode + #define v16qi_UP E_V16QImode +diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def +index 8fa66fdb3..dd74da4b3 100644 +--- a/gcc/config/aarch64/aarch64-modes.def ++++ b/gcc/config/aarch64/aarch64-modes.def +@@ -70,6 +70,7 @@ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI. */ + VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI. */ + VECTOR_MODES (FLOAT, 8); /* V2SF. */ + VECTOR_MODES (FLOAT, 16); /* V4SF V2DF. */ ++VECTOR_MODE (INT, DI, 1); /* V1DI. */ + VECTOR_MODE (FLOAT, DF, 1); /* V1DF. */ + VECTOR_MODE (FLOAT, HF, 2); /* V2HF. */ + +diff --git a/gcc/config/aarch64/aarch64-simd-builtin-types.def b/gcc/config/aarch64/aarch64-simd-builtin-types.def +index 248e51e96..405455814 100644 +--- a/gcc/config/aarch64/aarch64-simd-builtin-types.def ++++ b/gcc/config/aarch64/aarch64-simd-builtin-types.def +@@ -24,7 +24,7 @@ + ENTRY (Int16x8_t, V8HI, none, 11) + ENTRY (Int32x2_t, V2SI, none, 11) + ENTRY (Int32x4_t, V4SI, none, 11) +- ENTRY (Int64x1_t, DI, none, 11) ++ ENTRY (Int64x1_t, V1DI, none, 11) + ENTRY (Int64x2_t, V2DI, none, 11) + ENTRY (Uint8x8_t, V8QI, unsigned, 11) + ENTRY (Uint8x16_t, V16QI, unsigned, 12) +@@ -32,7 +32,7 @@ + ENTRY (Uint16x8_t, V8HI, unsigned, 12) + ENTRY (Uint32x2_t, V2SI, unsigned, 12) + ENTRY (Uint32x4_t, V4SI, unsigned, 12) +- ENTRY (Uint64x1_t, DI, unsigned, 12) ++ ENTRY (Uint64x1_t, V1DI, unsigned, 12) + ENTRY (Uint64x2_t, V2DI, unsigned, 12) + ENTRY (Poly8_t, QI, poly, 9) + ENTRY (Poly16_t, HI, poly, 10) +@@ -42,7 +42,7 @@ + ENTRY (Poly8x16_t, V16QI, poly, 12) + ENTRY (Poly16x4_t, V4HI, poly, 12) + ENTRY (Poly16x8_t, V8HI, poly, 12) +- ENTRY (Poly64x1_t, DI, poly, 12) ++ ENTRY (Poly64x1_t, V1DI, poly, 12) + ENTRY (Poly64x2_t, V2DI, poly, 12) + ENTRY (Float16x4_t, V4HF, none, 13) + ENTRY (Float16x8_t, V8HF, none, 13) +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index 62493cdfa..04592fc90 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -8326,16 +8326,16 @@ + }) + + ;; Extract a single-element 64-bit vector from one half of a 128-bit vector. +-(define_expand "vec_extractv2dfv1df" +- (match_operand:V1DF 0 "register_operand") +- (match_operand:V2DF 1 "register_operand") ++(define_expand "vec_extract<mode><V1half>" ++ (match_operand:<V1HALF> 0 "register_operand") ++ (match_operand:VQ_2E 1 "register_operand") + (match_operand 2 "immediate_operand") + "TARGET_SIMD" + { +- /* V1DF is rarely used by other patterns, so it should be better to hide +- it in a subreg destination of a normal DF op. */ +- rtx scalar0 = gen_lowpart (DFmode, operands0); +- emit_insn (gen_vec_extractv2dfdf (scalar0, operands1, operands2)); ++ /* V1DI and V1DF are rarely used by other patterns, so it should be better ++ to hide it in a subreg destination of a normal DI or DF op. */ ++ rtx scalar0 = gen_lowpart (<VHALF>mode, operands0); ++ emit_insn (gen_vec_extract<mode><Vhalf> (scalar0, operands1, operands2)); + DONE; + }) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index b8e540b6e..f7285555b 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -4117,7 +4117,7 @@ aarch64_classify_vector_mode (machine_mode mode) + case E_V8QImode: + case E_V4HImode: + case E_V2SImode: +- /* ...E_V1DImode doesn't exist. */ ++ case E_V1DImode: + case E_V4HFmode: + case E_V4BFmode: + case E_V2SFmode: +diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md +index 152d28f6b..94db8c53f 100644 +--- a/gcc/config/aarch64/iterators.md ++++ b/gcc/config/aarch64/iterators.md +@@ -138,6 +138,9 @@ + ;; VQ without 2 element modes. + (define_mode_iterator VQ_NO2E V16QI V8HI V4SI V8HF V4SF V8BF) + ++;; 2 element quad vector modes. ++(define_mode_iterator VQ_2E V2DI V2DF) ++ + ;; BFmode vector modes. + (define_mode_iterator VBF V4BF V8BF) + +@@ -1116,12 +1119,13 @@ + (define_mode_attr nunits (V8QI "8") (V16QI "16") + (V4HI "4") (V8HI "8") + (V2SI "2") (V4SI "4") +- (V2DI "2") (V8DI "8") ++ (V1DI "1") (V2DI "2") + (V4HF "4") (V8HF "8") + (V4BF "4") (V8BF "8") + (V2SF "2") (V4SF "4") + (V1DF "1") (V2DF "2") +- (DI "1") (DF "1")) ++ (DI "1") (DF "1") ++ (V8DI "8")) + + ;; Map a mode to the number of bits in it, if the size of the mode + ;; is constant. +@@ -1501,6 +1505,12 @@ + (V2DI "di") (V2SF "sf") + (V4SF "v2sf") (V2DF "df")) + ++;; Single-element half modes of quad vector modes. ++(define_mode_attr V1HALF (V2DI "V1DI") (V2DF "V1DF")) ++ ++;; Single-element half modes of quad vector modes, in lower-case ++(define_mode_attr V1half (V2DI "v1di") (V2DF "v1df")) ++ + ;; Double modes of vector modes. + (define_mode_attr VDBL (V8QI "V16QI") (V4HI "V8HI") + (V4HF "V8HF") (V4BF "V8BF") +-- +2.33.0 +
View file
_service:tar_scm:0225-Backport-SME-Allow-md-iterators-to-include-other-ite.patch
Added
@@ -0,0 +1,217 @@ +From eaea26e2218ee61a9be0e2933548c752167dcdb5 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Fri, 10 Nov 2023 15:46:21 +0000 +Subject: PATCH 126/157 BackportSME Allow md iterators to include other + iterators + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5dbaf4851bbf56b6176dca1f1e7d38a16b5b84ee + +This patch allows an .md iterator to include the contents of +previous iterators, possibly with an extra condition attached. + +Too much indirection might become hard to follow, so for the +AArch64 changes I tried to stick to things that seemed likely +to be uncontroversial: + +(a) structure iterators that combine modes for different sizes + and vector counts + +(b) iterators that explicitly duplicate another iterator + (for iterating over the cross product) + +gcc/ + * read-rtl.cc (md_reader::read_mapping): Allow iterators to + include other iterators. + * doc/md.texi: Document the change. + * config/aarch64/iterators.md (DREG2, VQ2, TX2, DX2, SX2): Include + the iterator that is being duplicated, rather than reproducing it. + (VSTRUCT_D): Redefine using VSTRUCT_234D. + (VSTRUCT_Q): Likewise VSTRUCT_234Q. + (VSTRUCT_2QD, VSTRUCT_3QD, VSTRUCT_4QD, VSTRUCT_QD): Redefine using + the individual D and Q iterators. +--- + gcc/config/aarch64/iterators.md | 58 ++++++++------------------------- + gcc/doc/md.texi | 13 ++++++++ + gcc/read-rtl.cc | 21 ++++++++++-- + 3 files changed, 46 insertions(+), 46 deletions(-) + +diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md +index 94db8c53f..a1659dfba 100644 +--- a/gcc/config/aarch64/iterators.md ++++ b/gcc/config/aarch64/iterators.md +@@ -106,7 +106,7 @@ + (define_mode_iterator DREG V8QI V4HI V4HF V2SI V2SF DF) + + ;; Copy of the above. +-(define_mode_iterator DREG2 V8QI V4HI V4HF V2SI V2SF DF) ++(define_mode_iterator DREG2 DREG) + + ;; All modes suitable to store/load pair (2 elements) using STP/LDP. + (define_mode_iterator VP_2E V2SI V2SF V2DI V2DF) +@@ -121,7 +121,7 @@ + (define_mode_iterator VQ V16QI V8HI V4SI V2DI V8HF V4SF V2DF V8BF) + + ;; Copy of the above. +-(define_mode_iterator VQ2 V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF) ++(define_mode_iterator VQ2 VQ) + + ;; Quad vector modes suitable for moving. Includes BFmode. + (define_mode_iterator VQMOV V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF) +@@ -321,14 +321,6 @@ + ;; Advanced SIMD opaque structure modes. + (define_mode_iterator VSTRUCT OI CI XI) + +-;; Advanced SIMD 64-bit vector structure modes. +-(define_mode_iterator VSTRUCT_D V2x8QI V2x4HI V2x2SI V2x1DI +- V2x4HF V2x2SF V2x1DF V2x4BF +- V3x8QI V3x4HI V3x2SI V3x1DI +- V3x4HF V3x2SF V3x1DF V3x4BF +- V4x8QI V4x4HI V4x2SI V4x1DI +- V4x4HF V4x2SF V4x1DF V4x4BF) +- + ;; Advanced SIMD 64-bit 2-vector structure modes. + (define_mode_iterator VSTRUCT_2D V2x8QI V2x4HI V2x2SI V2x1DI + V2x4HF V2x2SF V2x1DF V2x4BF) +@@ -341,6 +333,9 @@ + (define_mode_iterator VSTRUCT_4D V4x8QI V4x4HI V4x2SI V4x1DI + V4x4HF V4x2SF V4x1DF V4x4BF) + ++;; Advanced SIMD 64-bit vector structure modes. ++(define_mode_iterator VSTRUCT_D VSTRUCT_2D VSTRUCT_3D VSTRUCT_4D) ++ + ;; Advanced SIMD 64-bit 2-vector structure modes minus V2x1DI and V2x1DF. + (define_mode_iterator VSTRUCT_2DNX V2x8QI V2x4HI V2x2SI V2x4HF + V2x2SF V2x4BF) +@@ -365,14 +360,6 @@ + ;; Advanced SIMD 64-bit 4-vector structure modes with 64-bit elements. + (define_mode_iterator VSTRUCT_4DX V4x1DI V4x1DF) + +-;; Advanced SIMD 128-bit vector structure modes. +-(define_mode_iterator VSTRUCT_Q V2x16QI V2x8HI V2x4SI V2x2DI +- V2x8HF V2x4SF V2x2DF V2x8BF +- V3x16QI V3x8HI V3x4SI V3x2DI +- V3x8HF V3x4SF V3x2DF V3x8BF +- V4x16QI V4x8HI V4x4SI V4x2DI +- V4x8HF V4x4SF V4x2DF V4x8BF) +- + ;; Advanced SIMD 128-bit 2-vector structure modes. + (define_mode_iterator VSTRUCT_2Q V2x16QI V2x8HI V2x4SI V2x2DI + V2x8HF V2x4SF V2x2DF V2x8BF) +@@ -385,49 +372,32 @@ + (define_mode_iterator VSTRUCT_4Q V4x16QI V4x8HI V4x4SI V4x2DI + V4x8HF V4x4SF V4x2DF V4x8BF) + ++;; Advanced SIMD 128-bit vector structure modes. ++(define_mode_iterator VSTRUCT_Q VSTRUCT_2Q VSTRUCT_3Q VSTRUCT_4Q) ++ + ;; Advanced SIMD 2-vector structure modes. +-(define_mode_iterator VSTRUCT_2QD V2x8QI V2x4HI V2x2SI V2x1DI +- V2x4HF V2x2SF V2x1DF V2x4BF +- V2x16QI V2x8HI V2x4SI V2x2DI +- V2x8HF V2x4SF V2x2DF V2x8BF) ++(define_mode_iterator VSTRUCT_2QD VSTRUCT_2D VSTRUCT_2Q) + + ;; Advanced SIMD 3-vector structure modes. +-(define_mode_iterator VSTRUCT_3QD V3x8QI V3x4HI V3x2SI V3x1DI +- V3x4HF V3x2SF V3x1DF V3x4BF +- V3x16QI V3x8HI V3x4SI V3x2DI +- V3x8HF V3x4SF V3x2DF V3x8BF) ++(define_mode_iterator VSTRUCT_3QD VSTRUCT_3D VSTRUCT_3Q) + + ;; Advanced SIMD 4-vector structure modes. +-(define_mode_iterator VSTRUCT_4QD V4x8QI V4x4HI V4x2SI V4x1DI +- V4x4HF V4x2SF V4x1DF V4x4BF +- V4x16QI V4x8HI V4x4SI V4x2DI +- V4x8HF V4x4SF V4x2DF V4x8BF) ++(define_mode_iterator VSTRUCT_4QD VSTRUCT_4D VSTRUCT_4Q) + + ;; Advanced SIMD vector structure modes. +-(define_mode_iterator VSTRUCT_QD V2x8QI V2x4HI V2x2SI V2x1DI +- V2x4HF V2x2SF V2x1DF V2x4BF +- V3x8QI V3x4HI V3x2SI V3x1DI +- V3x4HF V3x2SF V3x1DF V3x4BF +- V4x8QI V4x4HI V4x2SI V4x1DI +- V4x4HF V4x2SF V4x1DF V4x4BF +- V2x16QI V2x8HI V2x4SI V2x2DI +- V2x8HF V2x4SF V2x2DF V2x8BF +- V3x16QI V3x8HI V3x4SI V3x2DI +- V3x8HF V3x4SF V3x2DF V3x8BF +- V4x16QI V4x8HI V4x4SI V4x2DI +- V4x8HF V4x4SF V4x2DF V4x8BF) ++(define_mode_iterator VSTRUCT_QD VSTRUCT_D VSTRUCT_Q) + + ;; Double scalar modes + (define_mode_iterator DX DI DF DD) + + ;; Duplicate of the above +-(define_mode_iterator DX2 DI DF DD) ++(define_mode_iterator DX2 DX) + + ;; Single scalar modes + (define_mode_iterator SX SI SF) + + ;; Duplicate of the above +-(define_mode_iterator SX2 SI SF) ++(define_mode_iterator SX2 SX) + + ;; Single and double integer and float modes + (define_mode_iterator DSX DF DI SF SI) +diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi +index 04ace8f7f..c0cf0ec64 100644 +--- a/gcc/doc/md.texi ++++ b/gcc/doc/md.texi +@@ -11561,6 +11561,19 @@ to @code{@var{mode}}. For example: + means that the @code{:DI} expansion only applies if @code{TARGET_64BIT} + but that the @code{:SI} expansion has no such constraint. + ++It is also possible to include iterators in other iterators. For example: ++ ++@smallexample ++(define_mode_iterator VI V16QI V8HI V4SI V2DI) ++(define_mode_iterator VF V8HF V4SF (V2DF "TARGET_DOUBLE")) ++(define_mode_iterator V VI (VF "TARGET_FLOAT")) ++@end smallexample ++ ++makes @samp{:V} iterate over the modes in @code{VI} and the modes ++in @code{VF}. When a construct uses @samp{:V}, the @code{V8HF} and ++@code{V4SF} expansions require @samp{TARGET_FLOAT} while the @code{V2DF} ++expansion requires @samp{TARGET_DOUBLE && TARGET_FLOAT}. ++ + Iterators are applied in the order they are defined. This can be + significant if two iterators are used in a construct that requires + substitutions. @xref{Substitutions}. +diff --git a/gcc/read-rtl.cc b/gcc/read-rtl.cc +index 798d24859..cdfa9e7b8 100644 +--- a/gcc/read-rtl.cc ++++ b/gcc/read-rtl.cc +@@ -1261,8 +1261,25 @@ md_reader::read_mapping (struct iterator_group *group, htab_t table) + string = read_string (false); + require_char_ws (')'); + } +- number = group->find_builtin (name.string); +- end_ptr = add_map_value (end_ptr, number, string); ++ auto *subm = (struct mapping *) htab_find (group->iterators, ++ &name.string); ++ if (subm) ++ { ++ if (m == subm) ++ fatal_with_file_and_line ("recursive definition of `%s'", ++ name.string);
View file
_service:tar_scm:0226-Backport-SME-riscv-Add-support-for-strlen-inline-exp.patch
Added
@@ -0,0 +1,142 @@ +From 637e6469f2225b6f6f6b0c84b4e7abcd8dfd7ca4 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christoph=20M=C3=BCllner?= <christoph.muellner@vrull.eu> +Date: Wed, 28 Sep 2022 11:19:06 +0200 +Subject: PATCH 127/157 BackportSME riscv: Add support for strlen inline + expansion +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=df48285b2484eb4f8e0570c566677114eb0e553a + +Note: Only introduce the definitions of function +emit_likely_jump_insn and emit_unlikely_jump_insn, +and drop others. + +This patch implements the expansion of the strlen builtin for RV32/RV64 +for xlen-aligned aligned strings if Zbb or XTheadBb instructions are available. +The inserted sequences are: + +rv32gc_zbb (RV64 is similar): + add a3,a0,4 + li a4,-1 +.L1: lw a5,0(a0) + add a0,a0,4 + orc.b a5,a5 + beq a5,a4,.L1 + not a5,a5 + ctz a5,a5 + srl a5,a5,0x3 + add a0,a0,a5 + sub a0,a0,a3 + +rv64gc_xtheadbb (RV32 is similar): + add a4,a0,8 +.L2: ld a5,0(a0) + add a0,a0,8 + th.tstnbz a5,a5 + beqz a5,.L2 + th.rev a5,a5 + th.ff1 a5,a5 + srl a5,a5,0x3 + add a0,a0,a5 + sub a0,a0,a4 + +This allows to inline calls to strlen(), with optimized code for +xlen-aligned strings, resulting in the following benefits over +a call to libc: +* no call/ret instructions +* no stack frame allocation +* no register saving/restoring +* no alignment test + +The inlining mechanism is gated by a new switch ('-minline-strlen') +and by the variable 'optimize_size'. + +Tested using the glibc string tests. + +Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu> + +gcc/ChangeLog: + + * config.gcc: Add new object riscv-string.o. + riscv-string.cc. + * config/riscv/riscv-protos.h (riscv_expand_strlen): + New function. + * config/riscv/riscv.md (strlen<mode>): New expand INSN. + * config/riscv/riscv.opt: New flag 'minline-strlen'. + * config/riscv/t-riscv: Add new object riscv-string.o. + * config/riscv/thead.md (th_rev<mode>2): Export INSN name. + (th_rev<mode>2): Likewise. + (th_tstnbz<mode>2): New INSN. + * doc/invoke.texi: Document '-minline-strlen'. + * emit-rtl.cc (emit_likely_jump_insn): New helper function. + (emit_unlikely_jump_insn): Likewise. + * rtl.h (emit_likely_jump_insn): New prototype. + (emit_unlikely_jump_insn): Likewise. + * config/riscv/riscv-string.cc: New file. + +gcc/testsuite/ChangeLog: + + * gcc.target/riscv/xtheadbb-strlen-unaligned.c: New test. + * gcc.target/riscv/xtheadbb-strlen.c: New test. + * gcc.target/riscv/zbb-strlen-disabled-2.c: New test. + * gcc.target/riscv/zbb-strlen-disabled.c: New test. + * gcc.target/riscv/zbb-strlen-unaligned.c: New test. + * gcc.target/riscv/zbb-strlen.c: New test. +--- + gcc/emit-rtl.cc | 24 ++++++++++++++++++++++++ + gcc/rtl.h | 2 ++ + 2 files changed, 26 insertions(+) + +diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc +index 1e02ae254..2df5ff414 100644 +--- a/gcc/emit-rtl.cc ++++ b/gcc/emit-rtl.cc +@@ -5163,6 +5163,30 @@ emit_jump_insn (rtx x) + return last; + } + ++/* Make an insn of code JUMP_INSN with pattern X, ++ add a REG_BR_PROB note that indicates very likely probability, ++ and add it to the end of the doubly-linked list. */ ++ ++rtx_insn * ++emit_likely_jump_insn (rtx x) ++{ ++ rtx_insn *jump = emit_jump_insn (x); ++ add_reg_br_prob_note (jump, profile_probability::very_likely ()); ++ return jump; ++} ++ ++/* Make an insn of code JUMP_INSN with pattern X, ++ add a REG_BR_PROB note that indicates very unlikely probability, ++ and add it to the end of the doubly-linked list. */ ++ ++rtx_insn * ++emit_unlikely_jump_insn (rtx x) ++{ ++ rtx_insn *jump = emit_jump_insn (x); ++ add_reg_br_prob_note (jump, profile_probability::very_unlikely ()); ++ return jump; ++} ++ + /* Make an insn of code CALL_INSN with pattern X + and add it to the end of the doubly-linked list. */ + +diff --git a/gcc/rtl.h b/gcc/rtl.h +index 488016bb4..a0db225cb 100644 +--- a/gcc/rtl.h ++++ b/gcc/rtl.h +@@ -3333,6 +3333,8 @@ extern rtx_note *emit_note_after (enum insn_note, rtx_insn *); + extern rtx_insn *emit_insn (rtx); + extern rtx_insn *emit_debug_insn (rtx); + extern rtx_insn *emit_jump_insn (rtx); ++extern rtx_insn *emit_likely_jump_insn (rtx); ++extern rtx_insn *emit_unlikely_jump_insn (rtx); + extern rtx_insn *emit_call_insn (rtx); + extern rtx_code_label *emit_label (rtx); + extern rtx_jump_table_data *emit_jump_table_data (rtx); +-- +2.33.0 +
View file
_service:tar_scm:0227-Backport-SME-attribs-Add-overloads-with-namespace-na.patch
Added
@@ -0,0 +1,189 @@ +From 8c6ffb4c6f86231eee318ceeb8546a53037edfe9 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek <jakub@redhat.com> +Date: Tue, 4 Oct 2022 23:13:15 +0200 +Subject: PATCH 128/157 BackportSME attribs: Add overloads with namespace + name + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0764dc8537a4f87089ecd32391cb5f8803b43c96 + +I've discovered a problem with the way we handle scoped attributes. For +declaration or type attributes for attributes we don't know anything about +we just don't add them to the declarations or types, so later in the FEs and +middle-end it is fine to use lookup_attribute etc. which just check the +attribute name and not namespace because non-standard non-GNU attributes +just won't show there. But in the case of attributes on statements, nothing +has filtered out the unknown attributes, so with my earlier assume +attribute patch e.g. c-c++-common/Wno-attributes-6.c test failed because +it uses: +vendor::assume(1 + 1 == 2); +with -Wno-attributes=vendor::assume and lookup_attribute ("assume", ) +finds such attribute and handled it that way. +So, for those cases, this patch introduces lookup_attribute and +remove_attribute overloads which specify also the namespace. +I think the fallthrough, hot, cold, likely, unlikely attribute handling +will need to use the new APIs too, so that we don't handle +msft::fallthrough attribute as something we'd know. + +2022-10-04 Jakub Jelinek <jakub@redhat.com> + + * attribs.h (remove_attribute): Declare overload with additional + attr_ns argument. + (private_lookup_attribute): Declare overload with additional + attr_ns and attr_ns_len arguments. + (lookup_attribute): New overload with additional attr_ns argument. + * attribs.cc (remove_attribute): New overload with additional + attr_ns argument. + (private_lookup_attribute): New overload with additional + attr_ns and attr_ns_len arguments. +--- + gcc/attribs.cc | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++ + gcc/attribs.h | 38 ++++++++++++++++++++++++++++++ + 2 files changed, 101 insertions(+) + +diff --git a/gcc/attribs.cc b/gcc/attribs.cc +index 656ea739e..8e2696bc5 100644 +--- a/gcc/attribs.cc ++++ b/gcc/attribs.cc +@@ -1598,6 +1598,36 @@ remove_attribute (const char *attr_name, tree list) + return list; + } + ++/* Similarly but also match namespace on the removed attributes. */ ++ ++tree ++remove_attribute (const char *attr_ns, const char *attr_name, tree list) ++{ ++ tree *p; ++ gcc_checking_assert (attr_name0 != '_'); ++ gcc_checking_assert (attr_ns == NULL || attr_ns0 != '_'); ++ ++ for (p = &list; *p;) ++ { ++ tree l = *p; ++ ++ tree attr = get_attribute_name (l); ++ if (is_attribute_p (attr_name, attr)) ++ { ++ tree ns = get_attribute_namespace (l); ++ if ((ns == NULL_TREE && attr_ns == NULL) ++ || (ns && attr_ns && is_attribute_p (attr_ns, ns))) ++ { ++ *p = TREE_CHAIN (l); ++ continue; ++ } ++ } ++ p = &TREE_CHAIN (l); ++ } ++ ++ return list; ++} ++ + /* Return an attribute list that is the union of a1 and a2. */ + + tree +@@ -1995,6 +2025,39 @@ private_lookup_attribute (const char *attr_name, size_t attr_len, tree list) + return list; + } + ++/* Similarly but with also attribute namespace. */ ++ ++tree ++private_lookup_attribute (const char *attr_ns, const char *attr_name, ++ size_t attr_ns_len, size_t attr_len, tree list) ++{ ++ while (list) ++ { ++ tree attr = get_attribute_name (list); ++ size_t ident_len = IDENTIFIER_LENGTH (attr); ++ if (cmp_attribs (attr_name, attr_len, IDENTIFIER_POINTER (attr), ++ ident_len)) ++ { ++ tree ns = get_attribute_namespace (list); ++ if (ns == NULL_TREE) ++ { ++ if (attr_ns == NULL) ++ break; ++ } ++ else if (attr_ns) ++ { ++ ident_len = IDENTIFIER_LENGTH (ns); ++ if (cmp_attribs (attr_ns, attr_ns_len, IDENTIFIER_POINTER (ns), ++ ident_len)) ++ break; ++ } ++ } ++ list = TREE_CHAIN (list); ++ } ++ ++ return list; ++} ++ + /* Return true if the function decl or type NODE has been declared + with attribute ANAME among attributes ATTRS. */ + +diff --git a/gcc/attribs.h b/gcc/attribs.h +index 0856f98fb..9ad530fcb 100644 +--- a/gcc/attribs.h ++++ b/gcc/attribs.h +@@ -88,6 +88,10 @@ extern tree merge_type_attributes (tree, tree); + + extern tree remove_attribute (const char *, tree); + ++/* Similarly but also with specific attribute namespace. */ ++ ++extern tree remove_attribute (const char *, const char *, tree); ++ + /* Given two attributes lists, return a list of their union. */ + + extern tree merge_attributes (tree, tree); +@@ -119,6 +123,10 @@ extern int attribute_list_contained (const_tree, const_tree); + for size. */ + extern tree private_lookup_attribute (const char *attr_name, size_t attr_len, + tree list); ++extern tree private_lookup_attribute (const char *attr_ns, ++ const char *attr_name, ++ size_t attr_ns_len, size_t attr_len, ++ tree list); + + extern unsigned decls_mismatched_attributes (tree, tree, tree, + const char* const, +@@ -215,6 +223,36 @@ lookup_attribute (const char *attr_name, tree list) + } + } + ++/* Similar to lookup_attribute, but also match the attribute namespace. */ ++ ++static inline tree ++lookup_attribute (const char *attr_ns, const char *attr_name, tree list) ++{ ++ if (CHECKING_P && attr_name0 != '_') ++ { ++ size_t attr_len = strlen (attr_name); ++ gcc_checking_assert (!canonicalize_attr_name (attr_name, attr_len)); ++ } ++ if (CHECKING_P && attr_ns && attr_ns0 != '_') ++ { ++ size_t attr_ns_len = strlen (attr_ns); ++ gcc_checking_assert (!canonicalize_attr_name (attr_ns, attr_ns_len)); ++ } ++ /* In most cases, list is NULL_TREE. */ ++ if (list == NULL_TREE) ++ return NULL_TREE; ++ else ++ { ++ size_t attr_ns_len = attr_ns ? strlen (attr_ns) : 0; ++ size_t attr_len = strlen (attr_name); ++ /* Do the strlen() before calling the out-of-line implementation. ++ In most cases attr_name is a string constant, and the compiler ++ will optimize the strlen() away. */ ++ return private_lookup_attribute (attr_ns, attr_name, ++ attr_ns_len, attr_len, list); ++ } ++} ++ + /* Given an attribute name ATTR_NAME and a list of attributes LIST, + return a pointer to the attribute's list first element if the attribute + starts with ATTR_NAME. ATTR_NAME must be in the form 'text' (not +-- +2.33.0 +
View file
_service:tar_scm:0228-Backport-SME-vec-Add-array_slice-constructors-from-n.patch
Added
@@ -0,0 +1,47 @@ +From 044dc671f7eb723df5b6ce2364d6ae579c0cc984 Mon Sep 17 00:00:00 2001 +From: Martin Jambor <mjambor@suse.cz> +Date: Tue, 30 Aug 2022 18:50:35 +0200 +Subject: PATCH 129/157 BackportSME vec: Add array_slice constructors + from non-const and gc vectors + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=15433c214df295f2281a90fcf283355b21beca0e + +This patch adds constructors of array_slice that are required to +create them from non-const (heap or auto) vectors or from GC vectors. + +gcc/ChangeLog: + +2022-08-08 Martin Jambor <mjambor@suse.cz> + + * vec.h (array_slice): Add constructors for non-const reference to + heap vector and pointers to heap vectors. +--- + gcc/vec.h | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/gcc/vec.h b/gcc/vec.h +index 3ba7ea7ed..fc3b10c85 100644 +--- a/gcc/vec.h ++++ b/gcc/vec.h +@@ -2264,6 +2264,18 @@ public: + array_slice (const vec<OtherT> &v) + : m_base (v.address ()), m_size (v.length ()) {} + ++ template<typename OtherT> ++ array_slice (vec<OtherT> &v) ++ : m_base (v.address ()), m_size (v.length ()) {} ++ ++ template<typename OtherT> ++ array_slice (const vec<OtherT, va_gc> *v) ++ : m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {} ++ ++ template<typename OtherT> ++ array_slice (vec<OtherT, va_gc> *v) ++ : m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {} ++ + iterator begin () { return m_base; } + iterator end () { return m_base + m_size; } + +-- +2.33.0 +
View file
_service:tar_scm:0229-Backport-SME-A-couple-of-va_gc_atomic-tweaks.patch
Added
@@ -0,0 +1,140 @@ +From 12dd36f06e13ee9cd684c00732caa684f49b3610 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Jun 2023 08:48:17 +0100 +Subject: PATCH 130/157 BackportSME A couple of va_gc_atomic tweaks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4e9f6c14280699997a633cefd3fb315b2bd4762c + +The only current user of va_gc_atomic is Ada's: + + vec<Entity_Id, va_gc_atomic> + +It uses the generic gt_pch_nx routines (with gt_pch_nx being the +“note pointers” hooks), such as: + + template<typename T, typename A> + void + gt_pch_nx (vec<T, A, vl_embed> *v) + { + extern void gt_pch_nx (T &); + for (unsigned i = 0; i < v->length (); i++) + gt_pch_nx ((*v)i); + } + +It then defines gt_pch_nx routines for Entity_Id &. + +The problem is that if we wanted to take the same approach for +an array of unsigned ints, we'd need to define: + + inline void gt_pch_nx (unsigned int &) { } + +which would then be ambiguous with: + + inline void gt_pch_nx (unsigned int) { } + +The point of va_gc_atomic is that the elements don't need to be GCed, +and so we have: + + template<typename T> + void + gt_ggc_mx (vec<T, va_gc_atomic, vl_embed> *v ATTRIBUTE_UNUSED) + { + /* Nothing to do. Vectors of atomic types wrt GC do not need to + be traversed. */ + } + +I think it's therefore reasonable to assume that no pointers will +need to be processed for PCH either. + +The patch also relaxes the array_slice constructor for vec<T, va_gc> * +so that it handles all embedded vectors. + +gcc/ + * vec.h (gt_pch_nx): Add overloads for va_gc_atomic. + (array_slice): Relax va_gc constructor to handle all vectors + with a vl_embed layout. + +gcc/ada/ + * gcc-interface/decl.cc (gt_pch_nx): Remove overloads for Entity_Id. +--- + gcc/ada/gcc-interface/decl.cc | 11 ----------- + gcc/vec.h | 22 ++++++++++++++++++---- + 2 files changed, 18 insertions(+), 15 deletions(-) + +diff --git a/gcc/ada/gcc-interface/decl.cc b/gcc/ada/gcc-interface/decl.cc +index 1c7a71684..7193b55c7 100644 +--- a/gcc/ada/gcc-interface/decl.cc ++++ b/gcc/ada/gcc-interface/decl.cc +@@ -163,17 +163,6 @@ struct GTY((for_user)) tree_entity_vec_map + vec<Entity_Id, va_gc_atomic> *to; + }; + +-void +-gt_pch_nx (Entity_Id &) +-{ +-} +- +-void +-gt_pch_nx (Entity_Id *x, gt_pointer_operator op, void *cookie) +-{ +- op (x, NULL, cookie); +-} +- + struct dummy_type_hasher : ggc_cache_ptr_hash<tree_entity_vec_map> + { + static inline hashval_t +diff --git a/gcc/vec.h b/gcc/vec.h +index fc3b10c85..592d3f7e0 100644 +--- a/gcc/vec.h ++++ b/gcc/vec.h +@@ -1383,6 +1383,13 @@ gt_pch_nx (vec<T, A, vl_embed> *v) + gt_pch_nx ((*v)i); + } + ++template<typename T> ++void ++gt_pch_nx (vec<T, va_gc_atomic, vl_embed> *) ++{ ++ /* No pointers to note. */ ++} ++ + template<typename T, typename A> + void + gt_pch_nx (vec<T *, A, vl_embed> *v, gt_pointer_operator op, void *cookie) +@@ -1400,6 +1407,13 @@ gt_pch_nx (vec<T, A, vl_embed> *v, gt_pointer_operator op, void *cookie) + gt_pch_nx (&((*v)i), op, cookie); + } + ++template<typename T> ++void ++gt_pch_nx (vec<T, va_gc_atomic, vl_embed> *, gt_pointer_operator, void *) ++{ ++ /* No pointers to note. */ ++} ++ + + /* Space efficient vector. These vectors can grow dynamically and are + allocated together with their control data. They are suited to be +@@ -2268,12 +2282,12 @@ public: + array_slice (vec<OtherT> &v) + : m_base (v.address ()), m_size (v.length ()) {} + +- template<typename OtherT> +- array_slice (const vec<OtherT, va_gc> *v) ++ template<typename OtherT, typename A> ++ array_slice (const vec<OtherT, A, vl_embed> *v) + : m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {} + +- template<typename OtherT> +- array_slice (vec<OtherT, va_gc> *v) ++ template<typename OtherT, typename A> ++ array_slice (vec<OtherT, A, vl_embed> *v) + : m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {} + + iterator begin () { return m_base; } +-- +2.33.0 +
View file
_service:tar_scm:0230-Backport-SME-middle-end-Fix-issue-of-poly_uint16-1-1.patch
Added
@@ -0,0 +1,34 @@ +From bb15d4c4476e3ba303c5afe0adae0d86ab5f0a9b Mon Sep 17 00:00:00 2001 +From: zhongjuzhe <juzhe.zhong@rivai.ai> +Date: Mon, 22 Aug 2022 10:15:31 +0100 +Subject: PATCH 131/157 BackportSME middle-end: Fix issue of poly_uint16 + (1, 1) in self test + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=21e7d87a901d45f0cb5e5510d22bfbdb0d0ac6a1 + +This patch fix issue of poly_uint16 (1, 1) in machine mode self test. + +gcc/ChangeLog: + + * simplify-rtx.cc (test_vector_subregs_fore_back): Make first value + and repeat value different. +--- + gcc/simplify-rtx.cc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc +index e152918b0..fc79a2e2e 100644 +--- a/gcc/simplify-rtx.cc ++++ b/gcc/simplify-rtx.cc +@@ -8380,7 +8380,7 @@ test_vector_subregs_fore_back (machine_mode inner_mode) + for (unsigned int i = 0; i < count; ++i) + builder.quick_push (gen_int_mode (i, int_mode)); + for (unsigned int i = 0; i < count; ++i) +- builder.quick_push (gen_int_mode (-(int) i, int_mode)); ++ builder.quick_push (gen_int_mode (-1 - (int) i, int_mode)); + rtx x = builder.build (); + + test_vector_subregs_modes (x); +-- +2.33.0 +
View file
_service:tar_scm:0231-SME-Add-missing-header-file-in-aarch64.cc.patch
Added
@@ -0,0 +1,24 @@ +From cce05b3365c3986ca74c04f442662a21b4f03a61 Mon Sep 17 00:00:00 2001 +From: xiezhiheng <xiezhiheng@huawei.com> +Date: Mon, 4 Mar 2024 14:39:36 +0800 +Subject: PATCH 132/157 SME Add missing header file in `aarch64.cc` + +--- + gcc/config/aarch64/aarch64.cc | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index f7285555b..0117a3e12 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -86,6 +86,7 @@ + #include "tree-pass.h" + #include "cfgbuild.h" + #include "symbol-summary.h" ++#include "value-range.h" + #include "ipa-prop.h" + #include "ipa-fnsummary.h" + +-- +2.33.0 +
View file
_service:tar_scm:0232-Backport-SME-c-Add-support-for-__extension__.patch
Added
@@ -0,0 +1,327 @@ +From 3714cfb47fafef884aa2ff330935fb44b7966909 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 24 Aug 2023 11:49:58 +0100 +Subject: PATCH 133/157 BackportSME c: Add support for __extension__ + ... + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=207a5daa9dcf31e367152163ad2a2ab4a0858967 + + attributes are a recent addition to C, but as a GNU extension, +GCC allows them to be used in C11 and earlier. Normally this use +would trigger a pedwarn (for -pedantic, -Wc11-c2x-compat, etc.). + +This patch allows the pedwarn to be suppressed by starting the +attribute-list with __extension__. + +Also, :: is not a single lexing token prior to C2X, so it wasn't +possible to use scoped attributes in C11, even as a GNU extension. +The patch allows two colons to be used in place of :: when +__extension__ is used. No attempt is made to check whether the +two colons are immediately adjacent. + +gcc/ + * doc/extend.texi: Document the C __extension__ ... construct. + +gcc/c/ + * c-parser.cc (c_parser_std_attribute): Conditionally allow + two colons to be used in place of ::. + (c_parser_std_attribute_list): New function, split out from... + (c_parser_std_attribute_specifier): ...here. Allow the attribute-list + to start with __extension__. When it does, also allow two colons + to be used in place of ::. + +gcc/testsuite/ + * gcc.dg/c2x-attr-syntax-6.c: New test. + * gcc.dg/c2x-attr-syntax-7.c: Likewise. +--- + gcc/c/c-parser.cc | 64 ++++++++++++++++++------ + gcc/doc/extend.texi | 27 ++++++++-- + gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c | 62 +++++++++++++++++++++++ + gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c | 60 ++++++++++++++++++++++ + 4 files changed, 193 insertions(+), 20 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c + create mode 100644 gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c + +diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc +index 78a313fe3..486f46e1c 100644 +--- a/gcc/c/c-parser.cc ++++ b/gcc/c/c-parser.cc +@@ -4894,10 +4894,18 @@ c_parser_balanced_token_sequence (c_parser *parser) + ( balanced-token-sequenceopt ) + + Keywords are accepted as identifiers for this purpose. +-*/ ++ ++ As an extension, we permit an attribute-specifier to be: ++ ++ __extension__ attribute-list ++ ++ Two colons are then accepted as a synonym for ::. No attempt is made ++ to check whether the colons are immediately adjacent. LOOSE_SCOPE_P ++ indicates whether this relaxation is in effect. */ + + static tree +-c_parser_std_attribute (c_parser *parser, bool for_tm) ++c_parser_std_attribute (c_parser *parser, bool for_tm, ++ bool loose_scope_p = false) + { + c_token *token = c_parser_peek_token (parser); + tree ns, name, attribute; +@@ -4910,9 +4918,14 @@ c_parser_std_attribute (c_parser *parser, bool for_tm) + } + name = canonicalize_attr_name (token->value); + c_parser_consume_token (parser); +- if (c_parser_next_token_is (parser, CPP_SCOPE)) ++ if (c_parser_next_token_is (parser, CPP_SCOPE) ++ || (loose_scope_p ++ && c_parser_next_token_is (parser, CPP_COLON) ++ && c_parser_peek_2nd_token (parser)->type == CPP_COLON)) + { + ns = name; ++ if (c_parser_next_token_is (parser, CPP_COLON)) ++ c_parser_consume_token (parser); + c_parser_consume_token (parser); + token = c_parser_peek_token (parser); + if (token->type != CPP_NAME && token->type != CPP_KEYWORD) +@@ -4981,19 +4994,9 @@ c_parser_std_attribute (c_parser *parser, bool for_tm) + } + + static tree +-c_parser_std_attribute_specifier (c_parser *parser, bool for_tm) ++c_parser_std_attribute_list (c_parser *parser, bool for_tm, ++ bool loose_scope_p = false) + { +- location_t loc = c_parser_peek_token (parser)->location; +- if (!c_parser_require (parser, CPP_OPEN_SQUARE, "expected %<%>")) +- return NULL_TREE; +- if (!c_parser_require (parser, CPP_OPEN_SQUARE, "expected %<%>")) +- { +- c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE, "expected %<%>"); +- return NULL_TREE; +- } +- if (!for_tm) +- pedwarn_c11 (loc, OPT_Wpedantic, +- "ISO C does not support %<%> attributes before C2X"); + tree attributes = NULL_TREE; + while (true) + { +@@ -5005,7 +5008,7 @@ c_parser_std_attribute_specifier (c_parser *parser, bool for_tm) + c_parser_consume_token (parser); + continue; + } +- tree attribute = c_parser_std_attribute (parser, for_tm); ++ tree attribute = c_parser_std_attribute (parser, for_tm, loose_scope_p); + if (attribute != error_mark_node) + { + TREE_CHAIN (attribute) = attributes; +@@ -5014,6 +5017,35 @@ c_parser_std_attribute_specifier (c_parser *parser, bool for_tm) + if (c_parser_next_token_is_not (parser, CPP_COMMA)) + break; + } ++ return attributes; ++} ++ ++static tree ++c_parser_std_attribute_specifier (c_parser *parser, bool for_tm) ++{ ++ location_t loc = c_parser_peek_token (parser)->location; ++ if (!c_parser_require (parser, CPP_OPEN_SQUARE, "expected %<%>")) ++ return NULL_TREE; ++ if (!c_parser_require (parser, CPP_OPEN_SQUARE, "expected %<%>")) ++ { ++ c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE, "expected %<%>"); ++ return NULL_TREE; ++ } ++ tree attributes; ++ if (c_parser_next_token_is_keyword (parser, RID_EXTENSION)) ++ { ++ auto ext = disable_extension_diagnostics (); ++ c_parser_consume_token (parser); ++ attributes = c_parser_std_attribute_list (parser, for_tm, true); ++ restore_extension_diagnostics (ext); ++ } ++ else ++ { ++ if (!for_tm) ++ pedwarn_c11 (loc, OPT_Wpedantic, ++ "ISO C does not support %<%> attributes before C2X"); ++ attributes = c_parser_std_attribute_list (parser, for_tm); ++ } + c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE, "expected %<%>"); + c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE, "expected %<%>"); + return nreverse (attributes); +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index 674db2f1a..3cfecee53 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -11726,10 +11726,29 @@ macros to replace them with the customary keywords. It looks like this: + @findex __extension__ + @opindex pedantic + @option{-pedantic} and other options cause warnings for many GNU C extensions. +-You can +-prevent such warnings within one expression by writing +-@code{__extension__} before the expression. @code{__extension__} has no +-effect aside from this. ++You can suppress such warnings using the keyword @code{__extension__}. ++Specifically: ++ ++@itemize @bullet ++@item ++Writing @code{__extension__} before an expression prevents warnings ++about extensions within that expression. ++ ++@item ++In C, writing: ++ ++@smallexample ++__extension__ @dots{} ++@end smallexample ++ ++suppresses warnings about using @samp{} attributes in C versions ++that predate C2X@. Since the scope token @samp{::} is not a single ++lexing token in earlier versions of C, this construct also allows two colons ++to be used in place of @code{::}. GCC does not check whether the two ++colons are immediately adjacent. ++@end itemize ++ ++@code{__extension__} has no effect aside from this. + + @node Incomplete Enums + @section Incomplete @code{enum} Types +diff --git a/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c b/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c +new file mode 100644 +index 000000000..9e5f65ce4 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c +@@ -0,0 +1,62 @@ ++/* Test C2x attribute syntax: use of __extension__ in C11 mode. */ ++/* { dg-do compile } */ ++/* { dg-options "-std=c11 -pedantic-errors" } */
View file
_service:tar_scm:0233-Backport-SME-lra-Updates-of-biggest-mode-for-hard-re.patch
Added
@@ -0,0 +1,140 @@ +From 29a71fc5cbfc3b5e4649abf51740daed5ea243bd Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 09:20:55 +0000 +Subject: PATCH 134/157 BackportSME lra: Updates of biggest mode for hard + regs PR112278 + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=6e2e0ce6795c863e295eb33559f8dc0500297da3 + +LRA keeps track of the biggest mode for both hard registers and +pseudos. The updates assume that the modes are ordered, i.e. that +we can tell whether one is no bigger than the other at compile time. + +That is (or at least seemed to be) a reasonable restriction for pseudos. +But it isn't necessarily so for hard registers, since the uses of hard +registers can be logically distinct. The testcase is an example of this. + +The biggest mode of hard registers is also special for other reasons. +As the existing comment says: + + /* A reg can have a biggest_mode of VOIDmode if it was only ever seen as + part of a multi-word register. In that case, just use the reg_rtx + mode. Do the same also if the biggest mode was larger than a register + or we can not compare the modes. Otherwise, limit the size to that of + the biggest access in the function or to the natural mode at least. */ + +This patch applies the same approach to the updates. + +gcc/ + PR rtl-optimization/112278 + * lra-int.h (lra_update_biggest_mode): New function. + * lra-coalesce.cc (merge_pseudos): Use it. + * lra-lives.cc (process_bb_lives): Likewise. + * lra.cc (new_insn_reg): Likewise. + +gcc/testsuite/ + PR rtl-optimization/112278 + * gcc.target/aarch64/sve/pr112278.c: New test. +--- + gcc/lra-coalesce.cc | 4 +--- + gcc/lra-int.h | 15 +++++++++++++++ + gcc/lra-lives.cc | 4 +--- + gcc/lra.cc | 5 ++--- + gcc/testsuite/gcc.target/aarch64/sve/pr112278.c | 15 +++++++++++++++ + 5 files changed, 34 insertions(+), 9 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr112278.c + +diff --git a/gcc/lra-coalesce.cc b/gcc/lra-coalesce.cc +index c82934569..901a44663 100644 +--- a/gcc/lra-coalesce.cc ++++ b/gcc/lra-coalesce.cc +@@ -112,9 +112,7 @@ merge_pseudos (int regno1, int regno2) + = (lra_merge_live_ranges + (lra_reg_infofirst.live_ranges, + lra_copy_live_range_list (lra_reg_infofirst2.live_ranges))); +- if (partial_subreg_p (lra_reg_infofirst.biggest_mode, +- lra_reg_infofirst2.biggest_mode)) +- lra_reg_infofirst.biggest_mode = lra_reg_infofirst2.biggest_mode; ++ lra_update_biggest_mode (first, lra_reg_infofirst2.biggest_mode); + } + + /* Change pseudos in *LOC on their coalescing group +diff --git a/gcc/lra-int.h b/gcc/lra-int.h +index 04baefef3..040e87d11 100644 +--- a/gcc/lra-int.h ++++ b/gcc/lra-int.h +@@ -525,4 +525,19 @@ lra_assign_reg_val (int from, int to) + lra_reg_infoto.offset = lra_reg_infofrom.offset; + } + ++/* Update REGNO's biggest recorded mode so that it includes a reference ++ in mode MODE. */ ++inline void ++lra_update_biggest_mode (int regno, machine_mode mode) ++{ ++ if (!ordered_p (GET_MODE_SIZE (lra_reg_inforegno.biggest_mode), ++ GET_MODE_SIZE (mode))) ++ { ++ gcc_checking_assert (HARD_REGISTER_NUM_P (regno)); ++ lra_reg_inforegno.biggest_mode = reg_raw_moderegno; ++ } ++ else if (partial_subreg_p (lra_reg_inforegno.biggest_mode, mode)) ++ lra_reg_inforegno.biggest_mode = mode; ++} ++ + #endif /* GCC_LRA_INT_H */ +diff --git a/gcc/lra-lives.cc b/gcc/lra-lives.cc +index a755464ee..fb4a12304 100644 +--- a/gcc/lra-lives.cc ++++ b/gcc/lra-lives.cc +@@ -770,9 +770,7 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) + { + int regno = reg->regno; + +- if (partial_subreg_p (lra_reg_inforegno.biggest_mode, +- reg->biggest_mode)) +- lra_reg_inforegno.biggest_mode = reg->biggest_mode; ++ lra_update_biggest_mode (regno, reg->biggest_mode); + if (HARD_REGISTER_NUM_P (regno)) + lra_hard_reg_usageregno += freq; + } +diff --git a/gcc/lra.cc b/gcc/lra.cc +index 1444cb759..8fda432f1 100644 +--- a/gcc/lra.cc ++++ b/gcc/lra.cc +@@ -559,9 +559,8 @@ new_insn_reg (rtx_insn *insn, int regno, enum op_type type, + lra_insn_reg *ir = lra_insn_reg_pool.allocate (); + ir->type = type; + ir->biggest_mode = mode; +- if (NONDEBUG_INSN_P (insn) +- && partial_subreg_p (lra_reg_inforegno.biggest_mode, mode)) +- lra_reg_inforegno.biggest_mode = mode; ++ if (NONDEBUG_INSN_P (insn)) ++ lra_update_biggest_mode (regno, mode); + ir->subreg_p = subreg_p; + ir->early_clobber_alts = early_clobber_alts; + ir->regno = regno; +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr112278.c b/gcc/testsuite/gcc.target/aarch64/sve/pr112278.c +new file mode 100644 +index 000000000..4f56add2b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pr112278.c +@@ -0,0 +1,15 @@ ++#include <arm_neon.h> ++#include <arm_sve.h> ++ ++void ++f (void) ++{ ++ { ++ register svint8_t v0 asm ("z0"); ++ asm volatile ("" : "=w" (v0)); ++ } ++ { ++ register int8x8x4_t v0 asm ("v0"); ++ asm volatile ("" : "=w" (v0)); ++ } ++} +-- +2.33.0 +
View file
_service:tar_scm:0234-Backport-SME-c-Support-C2x-empty-initializer-braces.patch
Added
@@ -0,0 +1,672 @@ +From 0a34bb6b18cdf34cb9d4f34b1697e1bcfcff139b Mon Sep 17 00:00:00 2001 +From: Joseph Myers <joseph@codesourcery.com> +Date: Thu, 25 Aug 2022 21:02:57 +0000 +Subject: PATCH 135/157 BackportSME c: Support C2x empty initializer + braces + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=14cfa01755a66afbae2539f8b5796c960ddcecc6 + +ISO C2x standardizes empty initializer braces {}. Implement this +feature accordingly. The basic case was already supported and so just +needed diagnostic adjustments. However, the standard feature also +includes two cases that were not previously supported: empty +initializer braces for scalars, and empty initializer braces for +VLAs. Thus, add support for those features as well, updating existing +tests that expected them to be diagnosed. + +There was already some gimplifier support for converting +variable-sized initializations with empty CONSTRUCTORs to memset. +However, it didn't apply here; code earlier in gimplify_modify_expr +ended up calling gimplify_init_constructor via +gimplify_modify_expr_rhs, which ended up handling the CONSTRUCTOR in a +way that generated an ICE later. Add a check for this case earlier in +gimplify_modify_expr to avoid that issue. + +Bootstrapped with no regressions for x86_64-pc-linux-gnu. + +gcc/ + * gimplify.cc (gimplify_modify_expr): Convert initialization from + a variable-size CONSTRUCTOR to memset before call to + gimplify_modify_expr_rhs. + +gcc/c/ + * c-decl.cc (start_decl): Do not diagnose initialization of + variable-sized objects here. + * c-parser.cc (c_parser_braced_init): Add argument DECL. All + callers changed. + (c_parser_initializer): Diagnose initialization of variable-sized + objects other than with braced initializer. + (c_parser_braced_init): Use pedwarn_c11 for empty initializer + braces and update diagnostic text. Diagnose initialization of + variable-sized objects with nonempty braces. + * c-typeck.cc (digest_init): Update diagnostic for initialization + of variable-sized objects. + (really_start_incremental_init, set_designator) + (process_init_element): Update comments. + (pop_init_level): Allow scalar empty initializers. + +gcc/testsuite/ + * gcc.dg/c11-empty-init-1.c, gcc.dg/c11-empty-init-2.c, + gcc.dg/c11-empty-init-3.c, gcc.dg/c2x-empty-init-1.c, + gcc.dg/c2x-empty-init-2.c, gcc.dg/c2x-empty-init-3.c, + gcc.dg/gnu2x-empty-init-1.c, gcc.dg/gnu2x-empty-init-2.c: New + tests. + * gcc.dg/torture/dfp-default-init-1.c: Also test empty + initializers. + * gcc.dg/init-bad-1.c, gcc.dg/noncompile/pr71583.c, + gcc.dg/pr61096-1.c, gcc.dg/vla-init-2.c, gcc.dg/vla-init-3.c, + gcc.target/i386/sse2-bfloat16-scalar-typecheck.c: Update expected + diagnostics. + * gcc.dg/ubsan/c-shift-1.c: Use nonempty initializers for VLA + initializations expected to be diagnosed. +--- + gcc/c/c-decl.cc | 20 +----- + gcc/c/c-parser.cc | 24 +++++-- + gcc/c/c-typeck.cc | 23 ++++--- + gcc/gimplify.cc | 15 +++++ + gcc/testsuite/gcc.dg/c11-empty-init-1.c | 25 +++++++ + gcc/testsuite/gcc.dg/c11-empty-init-2.c | 25 +++++++ + gcc/testsuite/gcc.dg/c11-empty-init-3.c | 25 +++++++ + gcc/testsuite/gcc.dg/c2x-empty-init-1.c | 80 +++++++++++++++++++++++ + gcc/testsuite/gcc.dg/c2x-empty-init-2.c | 18 +++++ + gcc/testsuite/gcc.dg/c2x-empty-init-3.c | 25 +++++++ + gcc/testsuite/gcc.dg/gnu2x-empty-init-1.c | 29 ++++++++ + gcc/testsuite/gcc.dg/gnu2x-empty-init-2.c | 16 +++++ + gcc/testsuite/gcc.dg/init-bad-1.c | 3 +- + gcc/testsuite/gcc.dg/noncompile/pr71583.c | 2 +- + gcc/testsuite/gcc.dg/pr61096-1.c | 2 +- + gcc/testsuite/gcc.dg/ubsan/c-shift-1.c | 12 ++-- + gcc/testsuite/gcc.dg/vla-init-2.c | 1 - + gcc/testsuite/gcc.dg/vla-init-3.c | 1 - + 18 files changed, 301 insertions(+), 45 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/c11-empty-init-1.c + create mode 100644 gcc/testsuite/gcc.dg/c11-empty-init-2.c + create mode 100644 gcc/testsuite/gcc.dg/c11-empty-init-3.c + create mode 100644 gcc/testsuite/gcc.dg/c2x-empty-init-1.c + create mode 100644 gcc/testsuite/gcc.dg/c2x-empty-init-2.c + create mode 100644 gcc/testsuite/gcc.dg/c2x-empty-init-3.c + create mode 100644 gcc/testsuite/gcc.dg/gnu2x-empty-init-1.c + create mode 100644 gcc/testsuite/gcc.dg/gnu2x-empty-init-2.c + +diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc +index 9d87a8cdb..685bb1757 100644 +--- a/gcc/c/c-decl.cc ++++ b/gcc/c/c-decl.cc +@@ -5166,29 +5166,15 @@ start_decl (struct c_declarator *declarator, struct c_declspecs *declspecs, + initialized = false; + else if (COMPLETE_TYPE_P (TREE_TYPE (decl))) + { +- /* A complete type is ok if size is fixed. */ +- +- if (!poly_int_tree_p (TYPE_SIZE (TREE_TYPE (decl))) +- || C_DECL_VARIABLE_SIZE (decl)) +- { +- error ("variable-sized object may not be initialized"); +- initialized = false; +- } ++ /* A complete type is ok if size is fixed. If the size is ++ variable, an empty initializer is OK and nonempty ++ initializers will be diagnosed in the parser. */ + } + else if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE) + { + error ("variable %qD has initializer but incomplete type", decl); + initialized = false; + } +- else if (C_DECL_VARIABLE_SIZE (decl)) +- { +- /* Although C99 is unclear about whether incomplete arrays +- of VLAs themselves count as VLAs, it does not make +- sense to permit them to be initialized given that +- ordinary VLAs may not be initialized. */ +- error ("variable-sized object may not be initialized"); +- initialized = false; +- } + } + + if (initialized) +diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc +index 486f46e1c..6db535d11 100644 +--- a/gcc/c/c-parser.cc ++++ b/gcc/c/c-parser.cc +@@ -1515,7 +1515,7 @@ static tree c_parser_simple_asm_expr (c_parser *); + static tree c_parser_gnu_attributes (c_parser *); + static struct c_expr c_parser_initializer (c_parser *, tree); + static struct c_expr c_parser_braced_init (c_parser *, tree, bool, +- struct obstack *); ++ struct obstack *, tree); + static void c_parser_initelt (c_parser *, struct obstack *); + static void c_parser_initval (c_parser *, struct c_expr *, + struct obstack *); +@@ -5247,11 +5247,15 @@ static struct c_expr + c_parser_initializer (c_parser *parser, tree decl) + { + if (c_parser_next_token_is (parser, CPP_OPEN_BRACE)) +- return c_parser_braced_init (parser, NULL_TREE, false, NULL); ++ return c_parser_braced_init (parser, NULL_TREE, false, NULL, decl); + else + { + struct c_expr ret; + location_t loc = c_parser_peek_token (parser)->location; ++ if (decl != error_mark_node && C_DECL_VARIABLE_SIZE (decl)) ++ error_at (loc, ++ "variable-sized object may not be initialized except " ++ "with an empty initializer"); + ret = c_parser_expr_no_commas (parser, NULL); + /* This is handled mostly by gimplify.cc, but we have to deal with + not warning about int x = x; as it is a GCC extension to turn off +@@ -5278,11 +5282,12 @@ location_t last_init_list_comma; + compound literal, and NULL_TREE for other initializers and for + nested braced lists. NESTED_P is true for nested braced lists, + false for the list of a compound literal or the list that is the +- top-level initializer in a declaration. */ ++ top-level initializer in a declaration. DECL is the declaration for ++ the top-level initializer for a declaration, otherwise NULL_TREE. */ + + static struct c_expr + c_parser_braced_init (c_parser *parser, tree type, bool nested_p, +- struct obstack *outer_obstack) ++ struct obstack *outer_obstack, tree decl) + { + struct c_expr ret; + struct obstack braced_init_obstack; +@@ -5300,10 +5305,15 @@ c_parser_braced_init (c_parser *parser, tree type, bool nested_p, + really_start_incremental_init (type); + if (c_parser_next_token_is (parser, CPP_CLOSE_BRACE)) + { +- pedwarn (brace_loc, OPT_Wpedantic, "ISO C forbids empty initializer braces"); ++ pedwarn_c11 (brace_loc, OPT_Wpedantic, ++ "ISO C forbids empty initializer braces before C2X"); + } + else + { ++ if (decl && decl != error_mark_node && C_DECL_VARIABLE_SIZE (decl)) ++ error_at (brace_loc, ++ "variable-sized object may not be initialized except " ++ "with an empty initializer"); + /* Parse a non-empty initializer list, possibly with a trailing + comma. */ + while (true) +@@ -5559,7 +5569,7 @@ c_parser_initval (c_parser *parser, struct c_expr *after, + + if (c_parser_next_token_is (parser, CPP_OPEN_BRACE) && !after) + init = c_parser_braced_init (parser, NULL_TREE, true, +- braced_init_obstack); ++ braced_init_obstack, NULL_TREE); + else + { + init = c_parser_expr_no_commas (parser, after); +@@ -10312,7 +10322,7 @@ c_parser_postfix_expression_after_paren_type (c_parser *parser,
View file
_service:tar_scm:0235-Backport-SME-aarch64-Update-sizeless-tests-for-recen.patch
Added
@@ -0,0 +1,115 @@ +From 67001778883e10110c505dd8876a447a19d1ac5e Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Wed, 31 Aug 2022 15:39:27 +0100 +Subject: PATCH 136/157 BackportSME aarch64: Update sizeless tests for + recent GNU C changes + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=de9805c08121a84ce368dccfe043a3f44c3ff13b + +The tests for sizeless SVE types include checks that the types +are handled for initialisation purposes in the same way as scalars. +GNU C and C2x now allow scalars to be initialised using empty braces, +so this patch updates the SVE tests to match. + +gcc/testsuite/ + * gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c: Update + tests for empty initializers. + * gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/sizeless-1.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/sizeless-2.c: Likewise. +--- + .../gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c | 4 ++-- + .../gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c | 4 ++-- + .../gcc.target/aarch64/sve/acle/general-c/sizeless-1.c | 4 ++-- + .../gcc.target/aarch64/sve/acle/general-c/sizeless-2.c | 4 ++-- + 4 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c +index 285751eeb..9db953583 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c +@@ -12,7 +12,7 @@ f (svuint8_t sve_u1, svint8_t sve_s1, + /* Initialization. */ + + svuint8_t init_sve_u1 = 0; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ +- svuint8_t init_sve_u2 = {}; /* { dg-error {empty scalar initializer} } */ ++ svuint8_t init_sve_u2 = {}; + svuint8_t init_sve_u3 = { sve_u1 }; + svuint8_t init_sve_u4 = { gnu_u1 }; + svuint8_t init_sve_u5 = { sve_s1 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'svint8_t'} } */ +@@ -31,7 +31,7 @@ f (svuint8_t sve_u1, svint8_t sve_s1, + + /* Compound literals. */ + +- (svuint8_t) {}; /* { dg-error {empty scalar initializer} } */ ++ (svuint8_t) {}; + (svuint8_t) { 0 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ + (svuint8_t) { sve_u1 }; + (svuint8_t) { gnu_u1 }; +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c +index 306fd4780..c05b16406 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c +@@ -12,7 +12,7 @@ f (svuint8_t sve_u1, svint8_t sve_s1, + /* Initialization. */ + + svuint8_t init_sve_u1 = 0; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ +- svuint8_t init_sve_u2 = {}; /* { dg-error {empty scalar initializer} } */ ++ svuint8_t init_sve_u2 = {}; + svuint8_t init_sve_u3 = { sve_u1 }; + svuint8_t init_sve_u4 = { gnu_u1 }; + svuint8_t init_sve_u5 = { sve_s1 }; +@@ -31,7 +31,7 @@ f (svuint8_t sve_u1, svint8_t sve_s1, + + /* Compound literals. */ + +- (svuint8_t) {}; /* { dg-error {empty scalar initializer} } */ ++ (svuint8_t) {}; + (svuint8_t) { 0 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ + (svuint8_t) { sve_u1 }; + (svuint8_t) { gnu_u1 }; +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c +index 7fc51e7ad..4b34a71c1 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c +@@ -66,14 +66,14 @@ statements (int n) + + svint8_t init_sve_sc1 = sve_sc1; + svint8_t init_sve_sc2 = sve_sh1; /* { dg-error {incompatible types when initializing type 'svint8_t' using type 'svint16_t'} } */ +- svint8_t init_sve_sc3 = {}; /* { dg-error {empty scalar initializer} } */ ++ svint8_t init_sve_sc3 = {}; + + int initi_a = sve_sc1; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ + int initi_b = { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ + + /* Compound literals. */ + +- (svint8_t) {}; /* { dg-error {empty scalar initializer} } */ ++ (svint8_t) {}; + (svint8_t) { sve_sc1 }; + + (int) { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c +index c575492c1..34dfd598e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c +@@ -66,14 +66,14 @@ statements (int n) + + svint8_t init_sve_sc1 = sve_sc1; + svint8_t init_sve_sc2 = sve_sh1; /* { dg-error {incompatible types when initializing type 'svint8_t' using type 'svint16_t'} } */ +- svint8_t init_sve_sc3 = {}; /* { dg-error {empty scalar initializer} } */ ++ svint8_t init_sve_sc3 = {}; + + int initi_a = sve_sc1; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ + int initi_b = { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ + + /* Compound literals. */ + +- (svint8_t) {}; /* { dg-error {empty scalar initializer} } */ ++ (svint8_t) {}; + (svint8_t) { sve_sc1 }; + + (int) { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ +-- +2.33.0 +
View file
_service:tar_scm:0236-Backport-SME-attribs-Namespace-aware-lookup_attribut.patch
Added
@@ -0,0 +1,58 @@ +From dbe5a29054d4eb1e0f5173c8f2291569eac71c96 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 2 Dec 2023 13:49:55 +0000 +Subject: PATCH 137/157 BackportSME attribs: Namespace-aware + lookup_attribute_spec + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=df4643f90c45db2501c731d4fded60dc1426b484 + +attribute_ignored_p already used a namespace-aware query +to find the attribute_spec for an existing attribute: + + const attribute_spec *as = lookup_attribute_spec (TREE_PURPOSE (attr)); + +This patch does the same for other callers in the file. + +gcc/ + * attribs.cc (comp_type_attributes): Pass the full TREE_PURPOSE + to lookup_attribute_spec, rather than just the name. + (remove_attributes_matching): Likewise. +--- + gcc/attribs.cc | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/gcc/attribs.cc b/gcc/attribs.cc +index 8e2696bc5..1dbc30a95 100644 +--- a/gcc/attribs.cc ++++ b/gcc/attribs.cc +@@ -1417,7 +1417,7 @@ comp_type_attributes (const_tree type1, const_tree type2) + const struct attribute_spec *as; + const_tree attr; + +- as = lookup_attribute_spec (get_attribute_name (a)); ++ as = lookup_attribute_spec (TREE_PURPOSE (a)); + if (!as || as->affects_type_identity == false) + continue; + +@@ -1431,7 +1431,7 @@ comp_type_attributes (const_tree type1, const_tree type2) + { + const struct attribute_spec *as; + +- as = lookup_attribute_spec (get_attribute_name (a)); ++ as = lookup_attribute_spec (TREE_PURPOSE (a)); + if (!as || as->affects_type_identity == false) + continue; + +@@ -1473,8 +1473,7 @@ remove_attributes_matching (tree attrs, Predicate predicate) + const_tree start = attrs; + for (const_tree attr = attrs; attr; attr = TREE_CHAIN (attr)) + { +- tree name = get_attribute_name (attr); +- const attribute_spec *as = lookup_attribute_spec (name); ++ const attribute_spec *as = lookup_attribute_spec (TREE_PURPOSE (attr)); + const_tree end; + if (!predicate (attr, as)) + end = attr; +-- +2.33.0 +
View file
_service:tar_scm:0237-Backport-SME-c-family-ICE-with-gnu-nocf_check-PR1069.patch
Added
@@ -0,0 +1,281 @@ +From 6f42edc5035b7f7e96730dca19757b148e1be70c Mon Sep 17 00:00:00 2001 +From: Marek Polacek <polacek@redhat.com> +Date: Thu, 29 Sep 2022 17:49:32 -0400 +Subject: PATCH 138/157 BackportSME c-family: ICE with + gnu::nocf_check PR106937 + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=67efffec943656a509e036cd3c785a5c3d6885e1 + +When getting the name of an attribute, we ought to use +get_attribute_name, which handles both and __attribute__(()) +forms. Failure to do so may result in an ICE, like here. + +pp_c_attributes_display wasn't able to print the form of +attributes, so this patch teaches it to. + +When printing a pointer to function with a standard attribute, the attribute +should be printed after the parameter-list. With this patch we print: + + aka 'void (*)(int) gnu::nocf_check' + +or, in C++ with noexcept: + + aka 'void (*)(int) noexcept gnu::nocf_check' + +pp_c_attributes has been unused since its introduction in r56273 so +this patch removes it. + + PR c++/106937 + +gcc/c-family/ChangeLog: + + * c-pretty-print.cc (pp_c_specifier_qualifier_list): Print only GNU + attributes here. + (c_pretty_printer::direct_abstract_declarator): Print the standard + attributes here. + (pp_c_attributes): Remove. + (pp_c_attributes_display): Print the form if appropriate. Use + get_attribute_name. Don't print a trailing space when printing the + form. + * c-pretty-print.h (pp_c_attributes): Remove. + +gcc/cp/ChangeLog: + + * error.cc: Include "attribs.h". + (dump_type_prefix): Print only GNU attributes here. + (dump_type_suffix): Print standard attributes here. + +gcc/testsuite/ChangeLog: + + * c-c++-common/pointer-to-fn1.c: New test. +--- + gcc/c-family/c-pretty-print.cc | 96 ++++++++++++--------- + gcc/c-family/c-pretty-print.h | 1 - + gcc/cp/error.cc | 16 +++- + gcc/testsuite/c-c++-common/pointer-to-fn1.c | 18 ++++ + 4 files changed, 86 insertions(+), 45 deletions(-) + create mode 100644 gcc/testsuite/c-c++-common/pointer-to-fn1.c + +diff --git a/gcc/c-family/c-pretty-print.cc b/gcc/c-family/c-pretty-print.cc +index 71a0cb510..4d60627b3 100644 +--- a/gcc/c-family/c-pretty-print.cc ++++ b/gcc/c-family/c-pretty-print.cc +@@ -462,7 +462,12 @@ pp_c_specifier_qualifier_list (c_pretty_printer *pp, tree t) + { + pp_c_whitespace (pp); + pp_c_left_paren (pp); +- pp_c_attributes_display (pp, TYPE_ATTRIBUTES (pointee)); ++ /* If we're dealing with the GNU form of attributes, print this: ++ void (__attribute__((noreturn)) *f) (); ++ If it is the standard attribute, we'll print the attribute ++ in c_pretty_printer::direct_abstract_declarator/FUNCTION_TYPE. */ ++ if (!cxx11_attribute_p (TYPE_ATTRIBUTES (pointee))) ++ pp_c_attributes_display (pp, TYPE_ATTRIBUTES (pointee)); + } + else if (!c_dialect_cxx ()) + pp_c_whitespace (pp); +@@ -591,6 +596,13 @@ c_pretty_printer::direct_abstract_declarator (tree t) + case FUNCTION_TYPE: + pp_c_parameter_type_list (this, t); + direct_abstract_declarator (TREE_TYPE (t)); ++ /* If this is the standard attribute, print ++ void (*)() noreturn; */ ++ if (cxx11_attribute_p (TYPE_ATTRIBUTES (t))) ++ { ++ pp_space (this); ++ pp_c_attributes_display (this, TYPE_ATTRIBUTES (t)); ++ } + break; + + case ARRAY_TYPE: +@@ -845,32 +857,7 @@ c_pretty_printer::declaration (tree t) + pp_c_init_declarator (this, t); + } + +-/* Pretty-print ATTRIBUTES using GNU C extension syntax. */ +- +-void +-pp_c_attributes (c_pretty_printer *pp, tree attributes) +-{ +- if (attributes == NULL_TREE) +- return; +- +- pp_c_ws_string (pp, "__attribute__"); +- pp_c_left_paren (pp); +- pp_c_left_paren (pp); +- for (; attributes != NULL_TREE; attributes = TREE_CHAIN (attributes)) +- { +- pp_tree_identifier (pp, TREE_PURPOSE (attributes)); +- if (TREE_VALUE (attributes)) +- pp_c_call_argument_list (pp, TREE_VALUE (attributes)); +- +- if (TREE_CHAIN (attributes)) +- pp_separate_with (pp, ','); +- } +- pp_c_right_paren (pp); +- pp_c_right_paren (pp); +-} +- +-/* Pretty-print ATTRIBUTES using GNU C extension syntax for attributes +- marked to be displayed on disgnostic. */ ++/* Pretty-print ATTRIBUTES marked to be displayed on diagnostic. */ + + void + pp_c_attributes_display (c_pretty_printer *pp, tree a) +@@ -880,10 +867,12 @@ pp_c_attributes_display (c_pretty_printer *pp, tree a) + if (a == NULL_TREE) + return; + ++ const bool std_p = cxx11_attribute_p (a); ++ + for (; a != NULL_TREE; a = TREE_CHAIN (a)) + { +- const struct attribute_spec *as; +- as = lookup_attribute_spec (TREE_PURPOSE (a)); ++ const struct attribute_spec *as ++ = lookup_attribute_spec (get_attribute_name (a)); + if (!as || as->affects_type_identity == false) + continue; + if (c_dialect_cxx () +@@ -891,26 +880,47 @@ pp_c_attributes_display (c_pretty_printer *pp, tree a) + /* In C++ transaction_safe is printed at the end of the declarator. */ + continue; + if (is_first) +- { +- pp_c_ws_string (pp, "__attribute__"); +- pp_c_left_paren (pp); +- pp_c_left_paren (pp); +- is_first = false; +- } ++ { ++ if (std_p) ++ { ++ pp_c_left_bracket (pp); ++ pp_c_left_bracket (pp); ++ } ++ else ++ { ++ pp_c_ws_string (pp, "__attribute__"); ++ pp_c_left_paren (pp); ++ pp_c_left_paren (pp); ++ } ++ is_first = false; ++ } + else +- { +- pp_separate_with (pp, ','); +- } +- pp_tree_identifier (pp, TREE_PURPOSE (a)); ++ pp_separate_with (pp, ','); ++ tree ns; ++ if (std_p && (ns = get_attribute_namespace (a))) ++ { ++ pp_tree_identifier (pp, ns); ++ pp_colon (pp); ++ pp_colon (pp); ++ } ++ pp_tree_identifier (pp, get_attribute_name (a)); + if (TREE_VALUE (a)) +- pp_c_call_argument_list (pp, TREE_VALUE (a)); ++ pp_c_call_argument_list (pp, TREE_VALUE (a)); + } + + if (!is_first) + { +- pp_c_right_paren (pp); +- pp_c_right_paren (pp); +- pp_c_whitespace (pp); ++ if (std_p) ++ { ++ pp_c_right_bracket (pp); ++ pp_c_right_bracket (pp); ++ } ++ else ++ { ++ pp_c_right_paren (pp); ++ pp_c_right_paren (pp); ++ pp_c_whitespace (pp); ++ } + }
View file
_service:tar_scm:0238-Backport-SME-AArch64-Fix-assert-in-aarch64_move_imm-.patch
Added
@@ -0,0 +1,35 @@ +From d13efe98cafa04aeb24f8e0f695e648887986228 Mon Sep 17 00:00:00 2001 +From: Wilco Dijkstra <wilco.dijkstra@arm.com> +Date: Wed, 7 Dec 2022 14:16:24 +0000 +Subject: PATCH 139/157 BackportSME AArch64: Fix assert in + aarch64_move_imm PR108006 + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=952c8a1dc6235dc49ab207a7f18f63d2bc97fbc9 + +Ensure we only pass SI/DImode which fixes the assert. + +gcc/ + PR target/108006 + * config/aarch64/aarch64.cc (aarch64_expand_sve_const_vector): + Fix call to aarch64_move_imm to use SI/DI. +--- + gcc/config/aarch64/aarch64.cc | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 0117a3e12..309ecc3d9 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -7925,7 +7925,8 @@ aarch64_expand_sve_const_vector (rtx target, rtx src) + /* If the integer can be moved into a general register by a + single instruction, do that and duplicate the result. */ + if (CONST_INT_P (elt_value) +- && aarch64_move_imm (INTVAL (elt_value), elt_mode)) ++ && aarch64_move_imm (INTVAL (elt_value), ++ encoded_bits <= 32 ? SImode : DImode)) + { + elt_value = force_reg (elt_mode, elt_value); + return expand_vector_broadcast (mode, elt_value); +-- +2.33.0 +
View file
_service:tar_scm:0239-Backport-SME-testsuite-Only-run-fcf-protection-test-.patch
Added
@@ -0,0 +1,37 @@ +From 071f26ce18db5a09cbae0607b065028a09a856ac Mon Sep 17 00:00:00 2001 +From: Marek Polacek <polacek@redhat.com> +Date: Tue, 11 Oct 2022 12:51:40 -0400 +Subject: PATCH 140/157 BackportSME testsuite: Only run -fcf-protection + test on i?86/x86_64 PR107213 + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=cc694f45087c892e69ebbb177203c708f00b1bc7 + +This test fails on non-i?86/x86_64 targets because on those targets +we get + + error: '-fcf-protection=full' is not supported for this target + +so this patch limits where the test is run. + + PR testsuite/107213 + +gcc/testsuite/ChangeLog: + + * c-c++-common/pointer-to-fn1.c: Only run on i?86/x86_64. +--- + gcc/testsuite/c-c++-common/pointer-to-fn1.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/testsuite/c-c++-common/pointer-to-fn1.c b/gcc/testsuite/c-c++-common/pointer-to-fn1.c +index 975885462..e2f948d82 100644 +--- a/gcc/testsuite/c-c++-common/pointer-to-fn1.c ++++ b/gcc/testsuite/c-c++-common/pointer-to-fn1.c +@@ -1,4 +1,5 @@ + /* PR c++/106937 */ ++/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ + /* { dg-options "-fcf-protection" } */ + /* { dg-additional-options "-std=c++11 -fpermissive" { target c++ } } */ + /* Test printing a pointer to function with attribute. */ +-- +2.33.0 +
View file
_service:tar_scm:0240-Backport-SME-Fix-PRs-106764-106765-and-107307-all-IC.patch
Added
@@ -0,0 +1,113 @@ +From 202ebc25e509ae0a2ac7d05c822cf6a8a817e49a Mon Sep 17 00:00:00 2001 +From: Andrew Pinski <apinski@marvell.com> +Date: Thu, 17 Nov 2022 22:08:07 +0000 +Subject: PATCH 141/157 BackportSME Fix PRs 106764, 106765, and 107307, + all ICE after invalid re-declaration + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bd0c9d9e706adaeea0d96152daade0a6819a8715 + +The problem here is the gimplifier returns GS_ERROR but +in some cases we don't check that soon enough and try +to do other work which could crash. +So the fix in these two cases is to return GS_ERROR +early if the gimplify_* functions had return GS_ERROR. + +OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. + +Thanks, +Andrew Pinski + +gcc/ChangeLog: + + PR c/106764 + PR c/106765 + PR c/107307 + * gimplify.cc (gimplify_compound_lval): Return GS_ERROR + if gimplify_expr had return GS_ERROR. + (gimplify_call_expr): Likewise. + +gcc/testsuite/ChangeLog: + + PR c/106764 + PR c/106765 + PR c/107307 + * gcc.dg/redecl-19.c: New test. + * gcc.dg/redecl-20.c: New test. + * gcc.dg/redecl-21.c: New test. +--- + gcc/gimplify.cc | 5 +++++ + gcc/testsuite/gcc.dg/redecl-19.c | 5 +++++ + gcc/testsuite/gcc.dg/redecl-20.c | 9 +++++++++ + gcc/testsuite/gcc.dg/redecl-21.c | 9 +++++++++ + 4 files changed, 28 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/redecl-19.c + create mode 100644 gcc/testsuite/gcc.dg/redecl-20.c + create mode 100644 gcc/testsuite/gcc.dg/redecl-21.c + +diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc +index 91500e2fb..e9f527850 100644 +--- a/gcc/gimplify.cc ++++ b/gcc/gimplify.cc +@@ -3272,6 +3272,8 @@ gimplify_compound_lval (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, + tret = gimplify_expr (p, pre_p, post_p, is_gimple_min_lval, + fallback | fb_lvalue); + ret = MIN (ret, tret); ++ if (ret == GS_ERROR) ++ return GS_ERROR; + + /* Step 2a: if we have component references we do not support on + registers then make sure the base isn't a register. Of course +@@ -3664,6 +3666,9 @@ gimplify_call_expr (tree *expr_p, gimple_seq *pre_p, bool want_value) + ret = gimplify_expr (&CALL_EXPR_FN (*expr_p), pre_p, NULL, + is_gimple_call_addr, fb_rvalue); + ++ if (ret == GS_ERROR) ++ return GS_ERROR; ++ + nargs = call_expr_nargs (*expr_p); + + /* Get argument types for verification. */ +diff --git a/gcc/testsuite/gcc.dg/redecl-19.c b/gcc/testsuite/gcc.dg/redecl-19.c +new file mode 100644 +index 000000000..cc1068544 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/redecl-19.c +@@ -0,0 +1,5 @@ ++/* We used to ICE in the gimplifier, PR 106764 */ ++/* { dg-do compile } */ ++/* { dg-options "-w" } */ ++(*a)(); // { dg-note "" } ++b(){a()} a; // { dg-error "" } +diff --git a/gcc/testsuite/gcc.dg/redecl-20.c b/gcc/testsuite/gcc.dg/redecl-20.c +new file mode 100644 +index 000000000..07f52115e +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/redecl-20.c +@@ -0,0 +1,9 @@ ++/* We used to ICE in the gimplifier, PR 107307 */ ++// { dg-do compile } ++// { dg-options "-w" } ++void f () ++{ ++ const struct { int a1; } b; // { dg-note "" } ++ int *c = b.a; ++ int *b; // { dg-error "" } ++} +diff --git a/gcc/testsuite/gcc.dg/redecl-21.c b/gcc/testsuite/gcc.dg/redecl-21.c +new file mode 100644 +index 000000000..2f2a6548a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/redecl-21.c +@@ -0,0 +1,9 @@ ++/* We used to ICE in the gimplifier, PR 106765 */ ++/* { dg-do compile } */ ++/* { dg-options "-w" } */ ++struct a { ++ int b ++} c() { ++ struct a a; // { dg-note "" } ++ a.b; ++ d a; // { dg-error "" } +-- +2.33.0 +
View file
_service:tar_scm:0241-Backport-SME-aarch64-Remove-expected-error-for-compo.patch
Added
@@ -0,0 +1,43 @@ +From bc42a8bdab7b2ffeb81441c7c8a9a1215d8502ee Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 26 Jan 2023 15:51:00 +0000 +Subject: PATCH 142/157 BackportSME aarch64: Remove expected error for + compound literals + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=96fbe541481fcc7d1a8884fb8dbefd7979eb9543 + +GCC no longer treats empty compound literals as an error +(see 14cfa01755a66afbae2539f8b5796c960ddcecc6). + +gcc/testsuite/ + * gcc.target/aarch64/bfloat16_scalar_typecheck.c: Accept empty + compound literals. +--- + gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c +index 7c9188cf2..f4ae68028 100644 +--- a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c ++++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c +@@ -40,7 +40,7 @@ bfloat16_t footest (bfloat16_t scalar0) + short initi_1_4 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ + double initi_1_5 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ + +- bfloat16_t scalar2_1 = {}; /* { dg-error {empty scalar initializer} } */ ++ bfloat16_t scalar2_1 = {}; + bfloat16_t scalar2_2 = { glob_bfloat }; + bfloat16_t scalar2_3 = { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ + bfloat16_t scalar2_4 = { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ +@@ -92,7 +92,7 @@ bfloat16_t footest (bfloat16_t scalar0) + + /* Compound literals. */ + +- (bfloat16_t) {}; /* { dg-error {empty scalar initializer} } */ ++ (bfloat16_t) {}; + (bfloat16_t) { glob_bfloat }; + (bfloat16_t) { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ + (bfloat16_t) { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ +-- +2.33.0 +
View file
_service:tar_scm:0242-Backport-SME-aarch64-Remove-redundant-builtins-code.patch
Added
@@ -0,0 +1,264 @@ +From 42bfa9a26205da222cebbe830168b6f0b5e668b4 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Jul 2022 12:59:25 +0100 +Subject: PATCH 143/157 BackportSME aarch64: Remove redundant builtins + code + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e80daf04c8888f527d2fc7f6cbcd1b4c853dcd04 + +aarch64_builtin_vectorized_function handles some built-in functions +that already have equivalent internal functions. This seems to be +redundant now, since the target builtins that it chooses are mapped +to the same optab patterns as the internal functions. + +gcc/ + * config/aarch64/aarch64-builtins.cc + (aarch64_builtin_vectorized_function): Remove handling of + floor, ceil, trunc, round, nearbyint, sqrt, clz and ctz. + +gcc/testsuite/ + * gcc.target/aarch64/vect_unary_1.c: New test. +--- + gcc/config/aarch64/aarch64-builtins.cc | 32 --- + .../gcc.target/aarch64/vect_unary_1.c | 186 ++++++++++++++++++ + 2 files changed, 186 insertions(+), 32 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/vect_unary_1.c + +diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc +index 37bb3af48..23a84cd53 100644 +--- a/gcc/config/aarch64/aarch64-builtins.cc ++++ b/gcc/config/aarch64/aarch64-builtins.cc +@@ -2653,38 +2653,6 @@ aarch64_builtin_vectorized_function (unsigned int fn, tree type_out, + switch (fn) + { + #undef AARCH64_CHECK_BUILTIN_MODE +-#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ +- (out_mode == V##C##N##Fmode && in_mode == V##C##N##Fmode) +- CASE_CFN_FLOOR: +- return AARCH64_FIND_FRINT_VARIANT (floor); +- CASE_CFN_CEIL: +- return AARCH64_FIND_FRINT_VARIANT (ceil); +- CASE_CFN_TRUNC: +- return AARCH64_FIND_FRINT_VARIANT (btrunc); +- CASE_CFN_ROUND: +- return AARCH64_FIND_FRINT_VARIANT (round); +- CASE_CFN_NEARBYINT: +- return AARCH64_FIND_FRINT_VARIANT (nearbyint); +- CASE_CFN_SQRT: +- return AARCH64_FIND_FRINT_VARIANT (sqrt); +-#undef AARCH64_CHECK_BUILTIN_MODE +-#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ +- (out_mode == V##C##SImode && in_mode == V##C##N##Imode) +- CASE_CFN_CLZ: +- { +- if (AARCH64_CHECK_BUILTIN_MODE (4, S)) +- return aarch64_builtin_declsAARCH64_SIMD_BUILTIN_UNOP_clzv4si; +- return NULL_TREE; +- } +- CASE_CFN_CTZ: +- { +- if (AARCH64_CHECK_BUILTIN_MODE (2, S)) +- return aarch64_builtin_declsAARCH64_SIMD_BUILTIN_UNOP_ctzv2si; +- else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) +- return aarch64_builtin_declsAARCH64_SIMD_BUILTIN_UNOP_ctzv4si; +- return NULL_TREE; +- } +-#undef AARCH64_CHECK_BUILTIN_MODE + #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ + (out_mode == V##C##N##Imode && in_mode == V##C##N##Fmode) + CASE_CFN_IFLOOR: +diff --git a/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c b/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c +new file mode 100644 +index 000000000..8516808be +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c +@@ -0,0 +1,186 @@ ++/* { dg-options "-O3 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#include <stdint.h> ++ ++#define TEST2(OUT, NAME, IN) \ ++OUT __attribute__((vector_size(sizeof(OUT) * 2))) \ ++test2_##OUT##_##NAME##_##IN (float dummy, \ ++ IN __attribute__((vector_size(sizeof(IN) * 2))) y) \ ++{ \ ++ OUT __attribute__((vector_size(sizeof(OUT) * 2))) x; \ ++ x0 = __builtin_##NAME (y0); \ ++ x1 = __builtin_##NAME (y1); \ ++ return x; \ ++} \ ++ ++#define TEST4(OUT, NAME, IN) \ ++OUT __attribute__((vector_size(16))) \ ++test4_##OUT##_##NAME##_##IN (float dummy, \ ++ IN __attribute__((vector_size(16))) y) \ ++{ \ ++ OUT __attribute__((vector_size(16))) x; \ ++ x0 = __builtin_##NAME (y0); \ ++ x1 = __builtin_##NAME (y1); \ ++ x2 = __builtin_##NAME (y2); \ ++ x3 = __builtin_##NAME (y3); \ ++ return x; \ ++} \ ++ ++/* ++** test2_float_truncf_float: ++** frintz v0.2s, v1.2s ++** ret ++*/ ++TEST2 (float, truncf, float) ++ ++/* ++** test2_double_trunc_double: ++** frintz v0.2d, v1.2d ++** ret ++*/ ++TEST2 (double, trunc, double) ++ ++/* ++** test4_float_truncf_float: ++** frintz v0.4s, v1.4s ++** ret ++*/ ++TEST4 (float, truncf, float) ++ ++/* ++** test2_float_roundf_float: ++** frinta v0.2s, v1.2s ++** ret ++*/ ++TEST2 (float, roundf, float) ++ ++/* ++** test2_double_round_double: ++** frinta v0.2d, v1.2d ++** ret ++*/ ++TEST2 (double, round, double) ++ ++/* ++** test4_float_roundf_float: ++** frinta v0.4s, v1.4s ++** ret ++*/ ++TEST4 (float, roundf, float) ++ ++/* ++** test2_float_nearbyintf_float: ++** frinti v0.2s, v1.2s ++** ret ++*/ ++TEST2 (float, nearbyintf, float) ++ ++/* ++** test2_double_nearbyint_double: ++** frinti v0.2d, v1.2d ++** ret ++*/ ++TEST2 (double, nearbyint, double) ++ ++/* ++** test4_float_nearbyintf_float: ++** frinti v0.4s, v1.4s ++** ret ++*/ ++TEST4 (float, nearbyintf, float) ++ ++/* ++** test2_float_floorf_float: ++** frintm v0.2s, v1.2s ++** ret ++*/ ++TEST2 (float, floorf, float) ++ ++/* ++** test2_double_floor_double: ++** frintm v0.2d, v1.2d ++** ret ++*/ ++TEST2 (double, floor, double) ++ ++/* ++** test4_float_floorf_float: ++** frintm v0.4s, v1.4s ++** ret ++*/ ++TEST4 (float, floorf, float) ++ ++/* ++** test2_float_ceilf_float: ++** frintp v0.2s, v1.2s ++** ret ++*/ ++TEST2 (float, ceilf, float) ++ ++/* ++** test2_double_ceil_double: ++** frintp v0.2d, v1.2d ++** ret
View file
_service:tar_scm:0243-Backport-SME-AArch64-Fix-Armv9-a-warnings-that-get-e.patch
Added
@@ -0,0 +1,63 @@ +From a1ba437195286af3389ba9f2d43b8cb6c73ba3d8 Mon Sep 17 00:00:00 2001 +From: Tamar Christina <tamar.christina@arm.com> +Date: Thu, 12 Oct 2023 15:55:58 +0100 +Subject: PATCH 144/157 BackportSME AArch64: Fix Armv9-a warnings that + get emitted whenever a ACLE header is used. + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=de593b3cffdc9c030c5e697ad9092b1b085dabc4 + +At the moment, trying to use -march=armv9-a with any ACLE header such as +arm_neon.h results in rows and rows of warnings saying: + +<built-in>: warning: "__ARM_ARCH" redefined +<built-in>: note: this is the location of the previous definition + +This is obviously not useful and happens because the header was defined at +__ARM_ARCH == 8 and the commandline changes it. + +The Arm port solves this by undef the macro during argument processing and we do +the same on AArch64 for the majority of macros. However we define this macro +using a different helper which requires the manual undef. + +Thanks, +Tamar + +gcc/ChangeLog: + + * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Add undef. + +gcc/testsuite/ChangeLog: + + * gcc.target/aarch64/armv9_warning.c: New test. +--- + gcc/config/aarch64/aarch64-c.cc | 1 + + gcc/testsuite/gcc.target/aarch64/armv9_warning.c | 5 +++++ + 2 files changed, 6 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/aarch64/armv9_warning.c + +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index 745719d8b..2d2ac42c4 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -129,6 +129,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) + { + aarch64_def_or_undef (flag_unsafe_math_optimizations, "__ARM_FP_FAST", pfile); + ++ cpp_undef (pfile, "__ARM_ARCH"); + builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9A ? 9 : 8); + + builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM", +diff --git a/gcc/testsuite/gcc.target/aarch64/armv9_warning.c b/gcc/testsuite/gcc.target/aarch64/armv9_warning.c +new file mode 100644 +index 000000000..35690d5bc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/armv9_warning.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv9-a -Wpedantic -Werror" } */ ++ ++#include <arm_neon.h> ++ +-- +2.33.0 +
View file
_service:tar_scm:0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch
Added
@@ -0,0 +1,184 @@ +From f6652dbebf81372884e9fd8b68627fc7a94d8d3b Mon Sep 17 00:00:00 2001 +From: Roger Sayle <roger@nextmovesoftware.com> +Date: Fri, 27 May 2022 08:57:46 +0100 +Subject: PATCH 145/157 BackportSME Canonicalize X&-Y as X*Y in match.pd + when Y is 0,1. + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=8fb94fc6097c0a934aac0d89c9c5e2038da67655 + +"For every pessimization, there's an equal and opposite optimization". + +In the review of my original patch for PR middle-end/98865, Richard +Biener pointed out that match.pd shouldn't be transforming X*Y into +X&-Y as the former is considered cheaper by tree-ssa's cost model +(operator count). A corollary of this is that we should instead be +transforming X&-Y into the cheaper X*Y as a preferred canonical form +(especially as RTL expansion now intelligently selects the appropriate +implementation based on the target's costs). + +With this patch we now generate identical code for: +int foo(int x, int y) { return -(x&1) & y; } +int bar(int x, int y) { return (x&1) * y; } + +specifically on x86_64-pc-linux-gnu both use and/neg/and with -O2, +but both use and/mul with -Os. + +One minor wrinkle/improvement is that this patch includes three +additional optimizations (that account for the change in canonical +form) to continue to optimize PR92834 and PR94786. + +2022-05-27 Roger Sayle <roger@nextmovesoftware.com> + +gcc/ChangeLog + * match.pd (match_zero_one_valued_p): New predicate. + (mult @0 @1): Use zero_one_valued_p for optimization to the + expression "bit_and @0 @1". + (bit_and (negate zero_one_valued_p@0) @1): Optimize to MULT_EXPR. + (plus @0 (mult (minus @1 @0) zero_one_valued_p@2)): New transform. + (minus @0 (mult (minus @0 @1) zero_one_valued_p@2)): Likewise. + (bit_xor @0 (mult (bit_xor @0 @1) zero_one_valued_p@2)): Likewise. + Remove three redundant transforms obsoleted by the three above. + +gcc/testsuite/ChangeLog + * gcc.dg/pr98865.c: New test case. +--- + gcc/match.pd | 86 ++++++++++++++++------------------ + gcc/testsuite/gcc.dg/pr98865.c | 14 ++++++ + 2 files changed, 55 insertions(+), 45 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/pr98865.c + +diff --git a/gcc/match.pd b/gcc/match.pd +index aee58e47b..6d3165bcd 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -285,14 +285,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + || !COMPLEX_FLOAT_TYPE_P (type))) + (negate @0))) + +-/* Transform { 0 or 1 } * { 0 or 1 } into { 0 or 1 } & { 0 or 1 } */ +-(simplify +- (mult SSA_NAME@1 SSA_NAME@2) +- (if (INTEGRAL_TYPE_P (type) +- && get_nonzero_bits (@1) == 1 +- && get_nonzero_bits (@2) == 1) +- (bit_and @1 @2))) +- + /* Transform x * { 0 or 1, 0 or 1, ... } into x & { 0 or -1, 0 or -1, ...}, + unless the target has native support for the former but not the latter. */ + (simplify +@@ -1790,6 +1782,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + (bit_not (bit_not @0)) + @0) + ++(match zero_one_valued_p ++ @0 ++ (if (INTEGRAL_TYPE_P (type) && tree_nonzero_bits (@0) == 1))) ++(match zero_one_valued_p ++ truth_valued_p@0) ++ ++/* Transform { 0 or 1 } * { 0 or 1 } into { 0 or 1 } & { 0 or 1 }. */ ++(simplify ++ (mult zero_one_valued_p@0 zero_one_valued_p@1) ++ (if (INTEGRAL_TYPE_P (type)) ++ (bit_and @0 @1))) ++ ++/* Transform X & -Y into X * Y when Y is { 0 or 1 }. */ ++(simplify ++ (bit_and:c (convert? (negate zero_one_valued_p@0)) @1) ++ (if (INTEGRAL_TYPE_P (type) ++ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) ++ && TREE_CODE (TREE_TYPE (@0)) != BOOLEAN_TYPE ++ && !TYPE_UNSIGNED (TREE_TYPE (@0))) ++ (mult (convert @0) @1))) ++ + /* Convert ~ (-A) to A - 1. */ + (simplify + (bit_not (convert? (negate @0))) +@@ -3281,44 +3294,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + (cmp @0 (minmax:c @0 @1)) + { constant_boolean_node (cmp == GE_EXPR || cmp == LE_EXPR, type); } )) + +-/* Undo fancy way of writing max/min or other ?: expressions, +- like a - ((a - b) & -(a < b)), in this case into (a < b) ? b : a. ++/* Undo fancy ways of writing max/min or other ?: expressions, like ++ a - ((a - b) & -(a < b)) and a - (a - b) * (a < b) into (a < b) ? b : a. + People normally use ?: and that is what we actually try to optimize. */ +-(for cmp (simple_comparison) +- (simplify +- (minus @0 (bit_and:c (minus @0 @1) +- (convert? (negate@4 (convert? (cmp@5 @2 @3)))))) +- (if (INTEGRAL_TYPE_P (type) +- && INTEGRAL_TYPE_P (TREE_TYPE (@4)) +- && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE +- && INTEGRAL_TYPE_P (TREE_TYPE (@5)) +- && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type) +- || !TYPE_UNSIGNED (TREE_TYPE (@4))) +- && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) +- (cond (cmp @2 @3) @1 @0))) +- (simplify +- (plus:c @0 (bit_and:c (minus @1 @0) +- (convert? (negate@4 (convert? (cmp@5 @2 @3)))))) +- (if (INTEGRAL_TYPE_P (type) +- && INTEGRAL_TYPE_P (TREE_TYPE (@4)) +- && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE +- && INTEGRAL_TYPE_P (TREE_TYPE (@5)) +- && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type) +- || !TYPE_UNSIGNED (TREE_TYPE (@4))) +- && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) +- (cond (cmp @2 @3) @1 @0))) +- /* Similarly with ^ instead of - though in that case with :c. */ +- (simplify +- (bit_xor:c @0 (bit_and:c (bit_xor:c @0 @1) +- (convert? (negate@4 (convert? (cmp@5 @2 @3)))))) +- (if (INTEGRAL_TYPE_P (type) +- && INTEGRAL_TYPE_P (TREE_TYPE (@4)) +- && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE +- && INTEGRAL_TYPE_P (TREE_TYPE (@5)) +- && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type) +- || !TYPE_UNSIGNED (TREE_TYPE (@4))) +- && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) +- (cond (cmp @2 @3) @1 @0)))) ++/* Transform A + (B-A)*cmp into cmp ? B : A. */ ++(simplify ++ (plus:c @0 (mult:c (minus @1 @0) zero_one_valued_p@2)) ++ (if (INTEGRAL_TYPE_P (type) ++ && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) ++ (cond (convert:boolean_type_node @2) @1 @0))) ++/* Transform A - (A-B)*cmp into cmp ? B : A. */ ++(simplify ++ (minus @0 (mult:c (minus @0 @1) zero_one_valued_p@2)) ++ (if (INTEGRAL_TYPE_P (type) ++ && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) ++ (cond (convert:boolean_type_node @2) @1 @0))) ++/* Transform A ^ (A^B)*cmp into cmp ? B : A. */ ++(simplify ++ (bit_xor:c @0 (mult:c (bit_xor:c @0 @1) zero_one_valued_p@2)) ++ (if (INTEGRAL_TYPE_P (type) ++ && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) ++ (cond (convert:boolean_type_node @2) @1 @0))) + + /* Simplifications of shift and rotates. */ + +diff --git a/gcc/testsuite/gcc.dg/pr98865.c b/gcc/testsuite/gcc.dg/pr98865.c +new file mode 100644 +index 000000000..95f727033 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/pr98865.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-optimized" } */ ++ ++int foo(int x, int y) ++{ ++ return -(x&1) & y; ++} ++ ++int bar(int x, int y) ++{ ++ return (x&1) * y; ++} ++ ++/* { dg-final { scan-tree-dump-times " \\* " 2 "optimized" } } */ +-- +2.33.0 +
View file
_service:tar_scm:0245-Backport-SME-middle-end-Add-new-tbranch-optab-to-add.patch
Added
@@ -0,0 +1,417 @@ +From a8f10b4b73c2624599765edf7ff19d53eca15135 Mon Sep 17 00:00:00 2001 +From: Tamar Christina <tamar.christina@arm.com> +Date: Mon, 12 Dec 2022 15:16:50 +0000 +Subject: PATCH 146/157 BackportSME middle-end: Add new tbranch optab to + add support for bit-test-and-branch operations + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dc582d2ef32e2d3723c68d111f4e49607631f34d + +This adds a new test-and-branch optab that can be used to do a conditional test +of a bit and branch. This is similar to the cbranch optab but instead can +test any arbitrary bit inside the register. + +This patch recognizes boolean comparisons and single bit mask tests. + +gcc/ChangeLog: + + * dojump.cc (do_jump): Pass along value. + (do_jump_by_parts_greater_rtx): Likewise. + (do_jump_by_parts_zero_rtx): Likewise. + (do_jump_by_parts_equality_rtx): Likewise. + (do_compare_rtx_and_jump): Likewise. + (do_compare_and_jump): Likewise. + * dojump.h (do_compare_rtx_and_jump): New. + * optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check. + (validate_test_and_branch): New. + (emit_cmp_and_jump_insns): Optiobally take a value, and when value is + supplied then check if it's suitable for tbranch. + * optabs.def (tbranch_eq$a4, tbranch_ne$a4): New. + * doc/md.texi (tbranch_@var{op}@var{mode}4): Document it. + * optabs.h (emit_cmp_and_jump_insns): New. + * tree.h (tree_zero_one_valued_p): New. +--- + gcc/doc/md.texi | 7 +++ + gcc/dojump.cc | 52 +++++++++++++++------- + gcc/dojump.h | 4 ++ + gcc/optabs.cc | 114 ++++++++++++++++++++++++++++++++++++++++++++---- + gcc/optabs.def | 2 + + gcc/optabs.h | 4 ++ + gcc/tree.h | 1 + + 7 files changed, 159 insertions(+), 25 deletions(-) + +diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi +index c0cf0ec64..2193900e7 100644 +--- a/gcc/doc/md.texi ++++ b/gcc/doc/md.texi +@@ -7299,6 +7299,13 @@ case, you can and should make operand 1's predicate reject some operators + in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether + from the machine description. + ++@cindex @code{tbranch_@var{op}@var{mode}3} instruction pattern ++@item @samp{tbranch_@var{op}@var{mode}3} ++Conditional branch instruction combined with a bit test-and-compare ++instruction. Operand 0 is the operand of the comparison. Operand 1 is the bit ++position of Operand 1 to test. Operand 3 is the @code{code_label} to jump to. ++@var{op} is one of @var{eq} or @var{ne}. ++ + @cindex @code{cbranch@var{mode}4} instruction pattern + @item @samp{cbranch@var{mode}4} + Conditional branch instruction combined with a compare instruction. +diff --git a/gcc/dojump.cc b/gcc/dojump.cc +index 0c880d653..604b28537 100644 +--- a/gcc/dojump.cc ++++ b/gcc/dojump.cc +@@ -621,7 +621,7 @@ do_jump (tree exp, rtx_code_label *if_false_label, + } + do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)), + NE, TYPE_UNSIGNED (TREE_TYPE (exp)), +- GET_MODE (temp), NULL_RTX, ++ exp, GET_MODE (temp), NULL_RTX, + if_false_label, if_true_label, prob); + } + +@@ -689,7 +689,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0, + + /* All but high-order word must be compared as unsigned. */ + do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0), +- word_mode, NULL_RTX, NULL, if_true_label, ++ NULL, word_mode, NULL_RTX, NULL, if_true_label, + prob); + + /* Emit only one comparison for 0. Do not emit the last cond jump. */ +@@ -697,8 +697,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0, + break; + + /* Consider lower words only if these are equal. */ +- do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode, +- NULL_RTX, NULL, if_false_label, ++ do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL, ++ word_mode, NULL_RTX, NULL, if_false_label, + prob.invert ()); + } + +@@ -757,7 +757,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0, + + if (part != 0) + { +- do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode, ++ do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode, + NULL_RTX, if_false_label, if_true_label, prob); + return; + } +@@ -768,7 +768,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0, + + for (i = 0; i < nwords; i++) + do_compare_rtx_and_jump (operand_subword_force (op0, i, mode), +- const0_rtx, EQ, 1, word_mode, NULL_RTX, ++ const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX, + if_false_label, NULL, prob); + + if (if_true_label) +@@ -811,8 +811,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1, + + for (i = 0; i < nwords; i++) + do_compare_rtx_and_jump (operand_subword_force (op0, i, mode), +- operand_subword_force (op1, i, mode), +- EQ, 0, word_mode, NULL_RTX, ++ operand_subword_force (op1, i, mode), ++ EQ, 0, NULL, word_mode, NULL_RTX, + if_false_label, NULL, prob); + + if (if_true_label) +@@ -964,6 +964,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, + rtx_code_label *if_false_label, + rtx_code_label *if_true_label, + profile_probability prob) ++{ ++ do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size, ++ if_false_label, if_true_label, prob); ++} ++ ++/* Like do_compare_and_jump but expects the values to compare as two rtx's. ++ The decision as to signed or unsigned comparison must be made by the caller. ++ ++ If MODE is BLKmode, SIZE is an RTX giving the size of the objects being ++ compared. */ ++ ++void ++do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, ++ tree val, machine_mode mode, rtx size, ++ rtx_code_label *if_false_label, ++ rtx_code_label *if_true_label, ++ profile_probability prob) + { + rtx tem; + rtx_code_label *dummy_label = NULL; +@@ -1179,8 +1196,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, + } + else + dest_label = if_false_label; +- do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode, +- size, dest_label, NULL, first_prob); ++ ++ do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, ++ val, mode, size, dest_label, NULL, ++ first_prob); + } + /* For !and_them we want to split: + if (x) goto t; // prob; +@@ -1194,8 +1213,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, + else + { + profile_probability first_prob = prob.split (cprob); +- do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode, +- size, NULL, if_true_label, first_prob); ++ do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, ++ val, mode, size, NULL, ++ if_true_label, first_prob); + if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump)) + { + /* x != y can be split into x unord y || x ltgt y +@@ -1217,7 +1237,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, + } + } + +- emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, ++ emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val, + if_true_label, prob); + } + +@@ -1291,9 +1311,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code, + op1 = new_op1; + } + +- do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, +- ((mode == BLKmode) +- ? expr_size (treeop0) : NULL_RTX), ++ do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode, ++ ((mode == BLKmode) ++ ? expr_size (treeop0) : NULL_RTX), + if_false_label, if_true_label, prob); + } + +diff --git a/gcc/dojump.h b/gcc/dojump.h +index e379cceb3..d1d79c490 100644 +--- a/gcc/dojump.h ++++ b/gcc/dojump.h +@@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label, + extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *, + profile_probability);
View file
_service:tar_scm:0246-Backport-SME-explow-Allow-dynamic-allocations-after-.patch
Added
@@ -0,0 +1,110 @@ +From fe64cc72e6221cf05e40bc868287bd1fcf07479f Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Mon, 6 Nov 2023 10:49:58 +0000 +Subject: PATCH 147/157 BackportSME explow: Allow dynamic allocations + after vregs + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a7a45364dddef399bfb550235166df51108a3142 + +This patch allows allocate_dynamic_stack_space to be called before +or after virtual registers have been instantiated. It uses the +same approach as allocate_stack_local, which already supported this. + +gcc/ + * function.h (get_stack_dynamic_offset): Declare. + * function.cc (get_stack_dynamic_offset): New function, + split out from... + (get_stack_dynamic_offset): ...here. + * explow.cc (allocate_dynamic_stack_space): Handle calls made + after virtual registers have been instantiated. +--- + gcc/explow.cc | 10 +++++++--- + gcc/function.cc | 12 +++++++++++- + gcc/function.h | 1 + + 3 files changed, 19 insertions(+), 4 deletions(-) + +diff --git a/gcc/explow.cc b/gcc/explow.cc +index d8aa75ee9..124e8f49e 100644 +--- a/gcc/explow.cc ++++ b/gcc/explow.cc +@@ -1375,12 +1375,16 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, + HOST_WIDE_INT stack_usage_size = -1; + rtx_code_label *final_label; + rtx final_target, target; ++ rtx addr = (virtuals_instantiated ++ ? plus_constant (Pmode, stack_pointer_rtx, ++ get_stack_dynamic_offset ()) ++ : virtual_stack_dynamic_rtx); + + /* If we're asking for zero bytes, it doesn't matter what we point + to since we can't dereference it. But return a reasonable + address anyway. */ + if (size == const0_rtx) +- return virtual_stack_dynamic_rtx; ++ return addr; + + /* Otherwise, show we're calling alloca or equivalent. */ + cfun->calls_alloca = 1; +@@ -1532,7 +1536,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, + poly_int64 saved_stack_pointer_delta; + + if (!STACK_GROWS_DOWNWARD) +- emit_move_insn (target, virtual_stack_dynamic_rtx); ++ emit_move_insn (target, force_operand (addr, target)); + + /* Check stack bounds if necessary. */ + if (crtl->limit_stack) +@@ -1575,7 +1579,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, + stack_pointer_delta = saved_stack_pointer_delta; + + if (STACK_GROWS_DOWNWARD) +- emit_move_insn (target, virtual_stack_dynamic_rtx); ++ emit_move_insn (target, force_operand (addr, target)); + } + + suppress_reg_args_size = false; +diff --git a/gcc/function.cc b/gcc/function.cc +index f4fc211a0..e62b2a8d6 100644 +--- a/gcc/function.cc ++++ b/gcc/function.cc +@@ -1945,6 +1945,16 @@ instantiate_decls (tree fndecl) + vec_free (cfun->local_decls); + } + ++/* Return the value of STACK_DYNAMIC_OFFSET for the current function. ++ This is done through a function wrapper so that the macro sees a ++ predictable set of included files. */ ++ ++poly_int64 ++get_stack_dynamic_offset () ++{ ++ return STACK_DYNAMIC_OFFSET (current_function_decl); ++} ++ + /* Pass through the INSNS of function FNDECL and convert virtual register + references to hard register references. */ + +@@ -1956,7 +1966,7 @@ instantiate_virtual_regs (void) + /* Compute the offsets to use for this function. */ + in_arg_offset = FIRST_PARM_OFFSET (current_function_decl); + var_offset = targetm.starting_frame_offset (); +- dynamic_offset = STACK_DYNAMIC_OFFSET (current_function_decl); ++ dynamic_offset = get_stack_dynamic_offset (); + out_arg_offset = STACK_POINTER_OFFSET; + #ifdef FRAME_POINTER_CFA_OFFSET + cfa_offset = FRAME_POINTER_CFA_OFFSET (current_function_decl); +diff --git a/gcc/function.h b/gcc/function.h +index 4e8131706..a5846465a 100644 +--- a/gcc/function.h ++++ b/gcc/function.h +@@ -711,6 +711,7 @@ extern vec<edge> convert_jumps_to_returns (basic_block last_bb, bool simple_p, + extern basic_block emit_return_for_exit (edge exit_fallthru_edge, + bool simple_p); + extern void reposition_prologue_and_epilogue_notes (void); ++extern poly_int64 get_stack_dynamic_offset (); + + /* Returns the name of the current function. */ + extern const char *fndecl_name (tree); +-- +2.33.0 +
View file
_service:tar_scm:0247-Backport-SME-PR105169-Fix-references-to-discarded-se.patch
Added
@@ -0,0 +1,225 @@ +From 6cc7bcadadbc2521a2db4c02adfe066d805e37ef Mon Sep 17 00:00:00 2001 +From: Giuliano Belinassi <gbelinassi@suse.de> +Date: Fri, 6 May 2022 23:37:52 -0300 +Subject: PATCH 148/157 BackportSME PR105169 Fix references to discarded + sections + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7a3f38a966a52893fb5bae301a1a3d56961358fb + +When -fpatchable-function-entry= is enabled, certain C++ codes fails to +link because of generated references to discarded sections in +__patchable_function_entry section. This commit fixes this problem by +puting those references in a COMDAT section. + +2022-05-06 Giuliano Belinassi <gbelinassi@suse.de> + +gcc/ChangeLog + PR c++/105169 + * targhooks.cc (default_print_patchable_function_entry_1): Handle COMDAT case. + * varasm.cc (switch_to_comdat_section): New + (handle_vtv_comdat_section): Call switch_to_comdat_section. + * varasm.h: Declare switch_to_comdat_section. + +gcc/testsuite/ChangeLog +2022-05-06 Giuliano Belinassi <gbelinassi@suse.de> + + PR c++/105169 + * g++.dg/modules/pr105169.h: New file. + * g++.dg/modules/pr105169_a.C: New test. + * g++.dg/modules/pr105169_b.C: New file. +--- + gcc/targhooks.cc | 8 ++++-- + gcc/testsuite/g++.dg/modules/pr105169.h | 22 +++++++++++++++ + gcc/testsuite/g++.dg/modules/pr105169_a.C | 25 +++++++++++++++++ + gcc/testsuite/g++.dg/modules/pr105169_b.C | 12 +++++++++ + gcc/varasm.cc | 33 ++++++++++++++--------- + gcc/varasm.h | 2 ++ + 6 files changed, 87 insertions(+), 15 deletions(-) + create mode 100644 gcc/testsuite/g++.dg/modules/pr105169.h + create mode 100644 gcc/testsuite/g++.dg/modules/pr105169_a.C + create mode 100644 gcc/testsuite/g++.dg/modules/pr105169_b.C + +diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc +index c88afa5db..175a0e18a 100644 +--- a/gcc/targhooks.cc ++++ b/gcc/targhooks.cc +@@ -2019,8 +2019,12 @@ default_print_patchable_function_entry_1 (FILE *file, + patch_area_number++; + ASM_GENERATE_INTERNAL_LABEL (buf, "LPFE", patch_area_number); + +- switch_to_section (get_section ("__patchable_function_entries", +- flags, current_function_decl)); ++ section *sect = get_section ("__patchable_function_entries", ++ flags, current_function_decl); ++ if (HAVE_COMDAT_GROUP && DECL_COMDAT_GROUP (current_function_decl)) ++ switch_to_comdat_section (sect, current_function_decl); ++ else ++ switch_to_section (sect); + assemble_align (POINTER_SIZE); + fputs (asm_op, file); + assemble_name_raw (file, buf); +diff --git a/gcc/testsuite/g++.dg/modules/pr105169.h b/gcc/testsuite/g++.dg/modules/pr105169.h +new file mode 100644 +index 000000000..a7e762705 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/modules/pr105169.h +@@ -0,0 +1,22 @@ ++class IPXAddressClass ++{ ++public: ++ IPXAddressClass(void); ++}; ++ ++class WinsockInterfaceClass ++{ ++ ++public: ++ WinsockInterfaceClass(void); ++ ++ virtual void Set_Broadcast_Address(void*){}; ++ ++ virtual int Get_Protocol(void) ++ { ++ return 0; ++ }; ++ ++protected: ++}; ++ +diff --git a/gcc/testsuite/g++.dg/modules/pr105169_a.C b/gcc/testsuite/g++.dg/modules/pr105169_a.C +new file mode 100644 +index 000000000..66dc4b790 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/modules/pr105169_a.C +@@ -0,0 +1,25 @@ ++/* { dg-module-do link } */ ++/* { dg-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */ ++/* { dg-additional-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */ ++ ++/* This test is in the "modules" package because it supports multiple files ++ linkage. */ ++ ++#include "pr105169.h" ++ ++WinsockInterfaceClass* PacketTransport; ++ ++IPXAddressClass::IPXAddressClass(void) ++{ ++} ++ ++int function() ++{ ++ return PacketTransport->Get_Protocol(); ++} ++ ++int main() ++{ ++ IPXAddressClass ipxaddr; ++ return 0; ++} +diff --git a/gcc/testsuite/g++.dg/modules/pr105169_b.C b/gcc/testsuite/g++.dg/modules/pr105169_b.C +new file mode 100644 +index 000000000..5f8b00dfe +--- /dev/null ++++ b/gcc/testsuite/g++.dg/modules/pr105169_b.C +@@ -0,0 +1,12 @@ ++/* { dg-module-do link } */ ++/* { dg-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */ ++/* { dg-additional-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */ ++ ++/* This test is in the "modules" package because it supports multiple files ++ linkage. */ ++ ++#include "pr105169.h" ++ ++WinsockInterfaceClass::WinsockInterfaceClass(void) ++{ ++} +diff --git a/gcc/varasm.cc b/gcc/varasm.cc +index 3f69b47a7..bae935694 100644 +--- a/gcc/varasm.cc ++++ b/gcc/varasm.cc +@@ -8459,25 +8459,21 @@ default_asm_output_ident_directive (const char *ident_str) + fprintf (asm_out_file, "%s\"%s\"\n", ident_asm_op, ident_str); + } + +- +-/* This function ensures that vtable_map variables are not only +- in the comdat section, but that each variable has its own unique +- comdat name. Without this the variables end up in the same section +- with a single comdat name. +- ++/* Switch to a COMDAT section with COMDAT name of decl. ++ + FIXME: resolve_unique_section needs to deal better with + decls with both DECL_SECTION_NAME and DECL_ONE_ONLY. Once + that is fixed, this if-else statement can be replaced with + a single call to "switch_to_section (sect)". */ + +-static void +-handle_vtv_comdat_section (section *sect, const_tree decl ATTRIBUTE_UNUSED) ++void ++switch_to_comdat_section (section *sect, tree decl) + { + #if defined (OBJECT_FORMAT_ELF) + targetm.asm_out.named_section (sect->named.name, + sect->named.common.flags + | SECTION_LINKONCE, +- DECL_NAME (decl)); ++ decl); + in_section = sect; + #else + /* Neither OBJECT_FORMAT_PE, nor OBJECT_FORMAT_COFF is set here. +@@ -8492,18 +8488,18 @@ handle_vtv_comdat_section (section *sect, const_tree decl ATTRIBUTE_UNUSED) + { + char *name; + +- if (TREE_CODE (DECL_NAME (decl)) == IDENTIFIER_NODE) ++ if (TREE_CODE (decl) == IDENTIFIER_NODE) + name = ACONCAT ((sect->named.name, "$", +- IDENTIFIER_POINTER (DECL_NAME (decl)), NULL)); ++ IDENTIFIER_POINTER (decl), NULL)); + else + name = ACONCAT ((sect->named.name, "$", +- IDENTIFIER_POINTER (DECL_COMDAT_GROUP (DECL_NAME (decl))), ++ IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)), + NULL)); + + targetm.asm_out.named_section (name, + sect->named.common.flags + | SECTION_LINKONCE, +- DECL_NAME (decl)); ++ decl); + in_section = sect; + } + else +@@ -8511,4 +8507,15 @@ handle_vtv_comdat_section (section *sect, const_tree decl ATTRIBUTE_UNUSED) + #endif + } +
View file
_service:tar_scm:0248-Backport-SME-RISC-V-autovec-Verify-that-GET_MODE_NUN.patch
Added
@@ -0,0 +1,53 @@ +From a3b4a0ac472415a52ce836e8997f7a69a06fad33 Mon Sep 17 00:00:00 2001 +From: Michael Collison <collison@rivosinc.com> +Date: Sat, 6 May 2023 12:37:50 -0600 +Subject: PATCH 149/157 BackportSME RISC-V: autovec: Verify that + GET_MODE_NUNITS is a multiple of 2. + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=730909fa858bd691095bc23655077aa13b7941a9 + +While working on autovectorizing for the RISCV port I encountered an issue +where can_duplicate_and_interleave_p assumes that GET_MODE_NUNITS is a +evenly divisible by two. The RISC-V target has vector modes (e.g. VNx1DImode), +where GET_MODE_NUNITS is equal to one. + +Tested on RISCV and x86_64-linux-gnu. Okay? + +gcc/ + * tree-vect-slp.cc (can_duplicate_and_interleave_p): + Check that GET_MODE_NUNITS is a multiple of 2. +--- + gcc/tree-vect-slp.cc | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc +index 6cbf8085f..d02f0ce37 100644 +--- a/gcc/tree-vect-slp.cc ++++ b/gcc/tree-vect-slp.cc +@@ -401,10 +401,13 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count, + (GET_MODE_BITSIZE (int_mode), 1); + tree vector_type + = get_vectype_for_scalar_type (vinfo, int_type, count); ++ poly_int64 half_nelts; + if (vector_type + && VECTOR_MODE_P (TYPE_MODE (vector_type)) + && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)), +- GET_MODE_SIZE (base_vector_mode))) ++ GET_MODE_SIZE (base_vector_mode)) ++ && multiple_p (GET_MODE_NUNITS (TYPE_MODE (vector_type)), ++ 2, &half_nelts)) + { + /* Try fusing consecutive sequences of COUNT / NVECTORS elements + together into elements of type INT_TYPE and using the result +@@ -412,7 +415,7 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count, + poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type)); + vec_perm_builder sel1 (nelts, 2, 3); + vec_perm_builder sel2 (nelts, 2, 3); +- poly_int64 half_nelts = exact_div (nelts, 2); ++ + for (unsigned int i = 0; i < 3; ++i) + { + sel1.quick_push (i); +-- +2.33.0 +
View file
_service:tar_scm:0249-Backport-SME-Add-operator-to-gimple_stmt_iterator-an.patch
Added
@@ -0,0 +1,42 @@ +From b0ca9a6eb1406a60eec566cf302790bee89879af Mon Sep 17 00:00:00 2001 +From: Richard Biener <rguenther@suse.de> +Date: Tue, 18 Apr 2023 16:58:26 +0200 +Subject: PATCH 150/157 BackportSME Add operator* to gimple_stmt_iterator + and gphi_iterator + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c39cdd9e654540f74cd2478019c40f1611554a44 + +This allows STL style iterator dereference. It's the same +as gsi_stmt () or .phi (). + + * gimple-iterator.h (gimple_stmt_iterator::operator*): Add. + (gphi_iterator::operator*): Likewise. +--- + gcc/gimple-iterator.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/gcc/gimple-iterator.h b/gcc/gimple-iterator.h +index 216ebee24..5d281e4f7 100644 +--- a/gcc/gimple-iterator.h ++++ b/gcc/gimple-iterator.h +@@ -24,6 +24,8 @@ along with GCC; see the file COPYING3. If not see + + struct gimple_stmt_iterator + { ++ gimple *operator * () const { return ptr; } ++ + /* Sequence node holding the current statement. */ + gimple_seq_node ptr; + +@@ -38,6 +40,8 @@ struct gimple_stmt_iterator + /* Iterator over GIMPLE_PHI statements. */ + struct gphi_iterator : public gimple_stmt_iterator + { ++ gphi *operator * () const { return as_a <gphi *> (ptr); } ++ + gphi *phi () const + { + return as_a <gphi *> (ptr); +-- +2.33.0 +
View file
_service:tar_scm:0250-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch
Added
@@ -0,0 +1,75 @@ +From 90518c07dfb770b680fd8bdba76dd1b39103277d Mon Sep 17 00:00:00 2001 +From: Richard Biener <rguenther@suse.de> +Date: Fri, 10 Nov 2023 12:39:11 +0100 +Subject: PATCH 151/157 BackportSME tree-optimization/110221 - SLP and + loop mask/len + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e5f1956498251a4973d52c8aad3faf34d0443169 + +The following fixes the issue that when SLP stmts are internal defs +but appear invariant because they end up only using invariant defs +then they get scheduled outside of the loop. This nice optimization +breaks down when loop masks or lens are applied since those are not +explicitly tracked as dependences. The following makes sure to never +schedule internal defs outside of the vectorized loop when the +loop uses masks/lens. + + PR tree-optimization/110221 + * tree-vect-slp.cc (vect_schedule_slp_node): When loop + masking / len is applied make sure to not schedule + intenal defs outside of the loop. + + * gfortran.dg/pr110221.f: New testcase. +--- + gcc/testsuite/gfortran.dg/pr110221.f | 17 +++++++++++++++++ + gcc/tree-vect-slp.cc | 10 ++++++++++ + 2 files changed, 27 insertions(+) + create mode 100644 gcc/testsuite/gfortran.dg/pr110221.f + +diff --git a/gcc/testsuite/gfortran.dg/pr110221.f b/gcc/testsuite/gfortran.dg/pr110221.f +new file mode 100644 +index 000000000..8b5738431 +--- /dev/null ++++ b/gcc/testsuite/gfortran.dg/pr110221.f +@@ -0,0 +1,17 @@ ++C PR middle-end/68146 ++C { dg-do compile } ++C { dg-options "-O2 -w" } ++C { dg-additional-options "-mavx512f --param vect-partial-vector-usage=2" { target avx512f } } ++ SUBROUTINE CJYVB(V,Z,V0,CBJ,CDJ,CBY,CYY) ++ IMPLICIT DOUBLE PRECISION (A,B,G,O-Y) ++ IMPLICIT COMPLEX*16 (C,Z) ++ DIMENSION CBJ(0:*),CDJ(0:*),CBY(0:*) ++ N=INT(V) ++ CALL GAMMA2(VG,GA) ++ DO 65 K=1,N ++ CBY(K)=CYY ++65 CONTINUE ++ CDJ(0)=V0/Z*CBJ(0)-CBJ(1) ++ DO 70 K=1,N ++70 CDJ(K)=-(K+V0)/Z*CBJ(K)+CBJ(K-1) ++ END +diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc +index d02f0ce37..e3e246977 100644 +--- a/gcc/tree-vect-slp.cc ++++ b/gcc/tree-vect-slp.cc +@@ -8531,6 +8531,16 @@ vect_schedule_slp_node (vec_info *vinfo, + /* Emit other stmts after the children vectorized defs which is + earliest possible. */ + gimple *last_stmt = NULL; ++ if (auto loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) ++ if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) ++ || LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)) ++ { ++ /* But avoid scheduling internal defs outside of the loop when ++ we might have only implicitly tracked loop mask/len defs. */ ++ gimple_stmt_iterator si ++ = gsi_after_labels (LOOP_VINFO_LOOP (loop_vinfo)->header); ++ last_stmt = *si; ++ } + bool seen_vector_def = false; + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) + if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) +-- +2.33.0 +
View file
_service:tar_scm:0251-SME-Adapt-some-testsuites.patch
Added
@@ -0,0 +1,116 @@ +From b60c29e6658c8620f1116ce5a38a6eb823af64e6 Mon Sep 17 00:00:00 2001 +From: xiezhiheng <xiezhiheng@huawei.com> +Date: Thu, 7 Mar 2024 10:22:39 +0800 +Subject: PATCH 152/157 SME Adapt some testsuites + +gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp: + GCC 12.3.0 do not support -std=c23 and -std=gnu23 + +gcc.target/aarch64/sme/streaming_mode_2.c: + It's a warning in GCC 12.3.0 + +gcc.dg/c2x-attr-syntax-6.c: +gcc.dg/c2x-attr-syntax-7.c: + GCC 12.3.0 do not support C2x (...) function prototypes and + C2x noreturn attribute + +gcc.target/aarch64/sme/za_state_4.c: + Seems need a ldp/stp optimization, not a functionality issue +--- + gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c | 2 -- + gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c | 2 -- + .../gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp | 2 -- + .../gcc.target/aarch64/sme/streaming_mode_2.c | 12 ++++++------ + gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c | 1 + + 5 files changed, 7 insertions(+), 12 deletions(-) + +diff --git a/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c b/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c +index 9e5f65ce4..2385b25fe 100644 +--- a/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c ++++ b/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c +@@ -15,13 +15,11 @@ typedef int __extension__ gnu FOO vector_size (4) g5; + typedef int __extension__ gnu BAR BAR vector_size (4) g6; + typedef int __extension__ gnu :/**/: vector_size (4) g7; + typedef int __extension__ gnu JOIN(:,:) vector_size (4) g8; +-typedef int __extension__ gnu :: vector_size (sizeof (void (*)(...))) g10; + typedef int __extension__ g11; + typedef int __extension__, g12; + typedef int __extension__, ,,,, ,, , g13; + __extension__ deprecated int g14 (); + __extension__ nodiscard int g15 (); +-__extension__ noreturn void g16 (); + + int + cases (int x) +diff --git a/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c b/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c +index 702f733b1..5bbdba665 100644 +--- a/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c ++++ b/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c +@@ -15,13 +15,11 @@ typedef int __extension__ gnu FOO vector_size (4) g5; + typedef int __extension__ gnu BAR BAR vector_size (4) g6; + typedef int __extension__ gnu :/**/: vector_size (4) g7; + typedef int __extension__ gnu JOIN(:,:) vector_size (4) g8; +-typedef int __extension__ gnu :: vector_size (sizeof (void (*)(...))) g10; + typedef int __extension__ g11; + typedef int __extension__, g12; + typedef int __extension__, ,,,, ,, , g13; + __extension__ deprecated int g14 (); + __extension__ nodiscard int g15 (); +-__extension__ noreturn void g16 (); + + int + cases (int x) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp +index e2d002f26..a0a4fe4f7 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp ++++ b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp +@@ -52,9 +52,7 @@ set-torture-options { + "-std=c90 -O0 -g" + "-std=c99 -Og -g" + "-std=c11 -Os -g" +- "-std=c23 -O2 -fno-schedule-insns -fno-schedule-insns2 -DCHECK_ASM --save-temps" + "-std=gnu90 -O3 -g" +- "-std=gnu23 -Ofast -g" + } { + "-DTEST_FULL" + "-DTEST_OVERLOADS" +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c +index e8be0f821..1e328c817 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c +@@ -12,14 +12,14 @@ void + f () + { + sc_fn_ptr = sc_fn; +- sc_fn_ptr = s_fn; // { dg-error "incompatible pointer type" } +- sc_fn_ptr = ns_fn; // { dg-error "incompatible pointer type" } ++ sc_fn_ptr = s_fn; // { dg-warning "incompatible pointer type" } ++ sc_fn_ptr = ns_fn; // { dg-warning "incompatible pointer type" } + +- s_fn_ptr = sc_fn; // { dg-error "incompatible pointer type" } ++ s_fn_ptr = sc_fn; // { dg-warning "incompatible pointer type" } + s_fn_ptr = s_fn; +- s_fn_ptr = ns_fn; // { dg-error "incompatible pointer type" } ++ s_fn_ptr = ns_fn; // { dg-warning "incompatible pointer type" } + +- ns_fn_ptr = sc_fn; // { dg-error "incompatible pointer type" } +- ns_fn_ptr = s_fn; // { dg-error "incompatible pointer type" } ++ ns_fn_ptr = sc_fn; // { dg-warning "incompatible pointer type" } ++ ns_fn_ptr = s_fn; // { dg-warning "incompatible pointer type" } + ns_fn_ptr = ns_fn; + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c +index cec0abf0e..a764a7c89 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c +@@ -105,6 +105,7 @@ __arm_new("za") void test5() + ** mul (x0-9+), \1, \1 + ** sub sp, sp, \2 + ** mov (x0-9+), sp ++** add ^\n+ + ** stp \3, \1, \x29, #?16\ + ** add (x0-9+), x29, #?16 + ** msr tpidr2_el0, \4 +-- +2.33.0 +
View file
_service:tar_scm:0252-SME-Fix-error-by-backported-patches-and-IPA-prefetch.patch
Added
@@ -0,0 +1,43 @@ +From ce53aec1f43f79c093db662a2e8e3062462757b4 Mon Sep 17 00:00:00 2001 +From: xiezhiheng <xiezhiheng@huawei.com> +Date: Thu, 22 Aug 2024 16:35:28 +0800 +Subject: PATCH 153/157 SME Fix error by backported patches and IPA + prefetch + +Fix +gtype-desc.cc: In function 'void gt_pch_p_30vec_cgraph_node__va_gc_atomic_(void*, void*, gt_pointer_operator, void*)': +gtype-desc.cc:11032:35: error: call of overloaded 'gt_pch_nx(vec<cgraph_node*, va_gc_atomic>*, void (*&)(void*, void*, void*), void*&)' is ambiguous +11032 | gt_pch_nx (&((*x)), op, cookie); + | ^ +In file included from ../../gcc/hash-table.h:248, + from ../../gcc/coretypes.h:486, + from gtype-desc.cc:23: +../../gcc/vec.h:1395:1: note: candidate: 'void gt_pch_nx(vec<T*, A, vl_embed>*, gt_pointer_operator, void*) with T = cgraph_node; A = va_gc_atomic; gt_pointer_operator = void (*)(void*, void*, void*)' + 1395 | gt_pch_nx (vec<T *, A, vl_embed> *v, gt_pointer_operator op, void *cookie) + | ^~~~~~~~~ +../../gcc/vec.h:1403:1: note: candidate: 'void gt_pch_nx(vec<T, A, vl_embed>*, gt_pointer_operator, void*) with T = cgraph_node*; A = va_gc_atomic; gt_pointer_operator = void (*)(void*, void*, void*)' + 1403 | gt_pch_nx (vec<T, A, vl_embed> *v, gt_pointer_operator op, void *cookie) + | ^~~~~~~~~ +../../gcc/vec.h:1412:1: note: candidate: 'void gt_pch_nx(vec<T, va_gc_atomic, vl_embed>*, gt_pointer_operator, void*) with T = cgraph_node*; gt_pointer_operator = void (*)(void*, void*, void*)' + 1412 | gt_pch_nx (vec<T, va_gc_atomic, vl_embed> *, gt_pointer_operator, void *) + | ^~~~~~~~~ +--- + gcc/cgraph.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/cgraph.h b/gcc/cgraph.h +index b84ff2f98..2332539e5 100644 +--- a/gcc/cgraph.h ++++ b/gcc/cgraph.h +@@ -1660,7 +1660,7 @@ public: + /* ECF flags determined from the caller. */ + int ecf_flags; + /* Vector of potential call targets determined by analysis. */ +- vec<cgraph_node *, va_gc_atomic> *targets; ++ vec<cgraph_node *, va_gc> *targets; + + /* Number of speculative call targets, it's less than GCOV_TOPN_VALUES. */ + unsigned num_speculative_call_targets : 16; +-- +2.33.0 +
View file
_service:tar_scm:0253-aarch64-Fix-return-register-handling-in-untyped_call.patch
Added
@@ -0,0 +1,66 @@ +From 38d0605ac8bc90324170041676fc05e7e595769e Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Wed, 20 Sep 2023 11:13:19 +0100 +Subject: PATCH aarch64: Fix return register handling in untyped_call + +While working on another patch, I hit a problem with the aarch64 +expansion of untyped_call. The expander emits the usual: + + (set (mem ...) (reg resN)) + +instructions to store the result registers to memory, but it didn't +say in RTL where those resN results came from. This eventually led +to a failure of gcc.dg/torture/stackalign/builtin-return-2.c, +via regrename. + +This patch turns the untyped call from a plain call to a call_value, +to represent that the call returns (or might return) a useful value. +The patch also uses a PARALLEL return rtx to represent all the possible +return registers. + +gcc/ + * config/aarch64/aarch64.md (untyped_call): Emit a call_value + rather than a call. List each possible destination register + in the call pattern. + +(cherry picked from commit 629efe27744d13c3b83bbe8338b84c37c83dbe4f) +--- + gcc/config/aarch64/aarch64.md | 20 +++++++++++++++++++- + 1 file changed, 19 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index b9b3ba4f9164..cd568f3daa0f 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -1159,9 +1159,27 @@ + { + int i; + ++ /* Generate a PARALLEL that contains all of the register results. ++ The offsets are somewhat arbitrary, since we don't know the ++ actual return type. The main thing we need to avoid is having ++ overlapping byte ranges, since those might give the impression ++ that two registers are known to have data in common. */ ++ rtvec rets = rtvec_alloc (XVECLEN (operands2, 0)); ++ poly_int64 offset = 0; ++ for (i = 0; i < XVECLEN (operands2, 0); i++) ++ { ++ rtx reg = SET_SRC (XVECEXP (operands2, 0, i)); ++ gcc_assert (REG_P (reg)); ++ rtx offset_rtx = gen_int_mode (offset, Pmode); ++ rtx piece = gen_rtx_EXPR_LIST (VOIDmode, reg, offset_rtx); ++ RTVEC_ELT (rets, i) = piece; ++ offset += GET_MODE_SIZE (GET_MODE (reg)); ++ } ++ rtx ret = gen_rtx_PARALLEL (VOIDmode, rets); ++ + /* Untyped calls always use the default ABI. It's only possible to use + ABI variants if we know the type of the target function. */ +- emit_call_insn (gen_call (operands0, const0_rtx, const0_rtx)); ++ emit_call_insn (gen_call_value (ret, operands0, const0_rtx, const0_rtx)); + + for (i = 0; i < XVECLEN (operands2, 0); i++) + { +-- +2.43.5 +
View file
_service:tar_scm:0254-aarch64-Fix-loose-ldpstp-check.patch
Added
@@ -0,0 +1,119 @@ +From 74f99f1adc696f446115f36974a3f94f66294a53 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Wed, 20 Sep 2023 11:13:20 +0100 +Subject: PATCH aarch64: Fix loose ldpstp check PR111411 + +aarch64_operands_ok_for_ldpstp contained the code: + + /* One of the memory accesses must be a mempair operand. + If it is not the first one, they need to be swapped by the + peephole. */ + if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1)) + && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2))) + return false; + +But the requirement isn't just that one of the accesses must be a +valid mempair operand. It's that the lower access must be, since +that's the access that will be used for the instruction operand. + +gcc/ + PR target/111411 + * config/aarch64/aarch64.cc (aarch64_operands_ok_for_ldpstp): Require + the lower memory access to a mem-pair operand. + +gcc/testsuite/ + PR target/111411 + * gcc.dg/rtl/aarch64/pr111411.c: New test. + +(cherry picked from commit 2d38f45bcca62ca0c7afef4b579f82c5c2a01610) +--- + gcc/config/aarch64/aarch64.cc | 8 ++- + gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c | 57 +++++++++++++++++++++ + 2 files changed, 60 insertions(+), 5 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 96c3f48fdc49..a979accd90a9 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -26031,11 +26031,9 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, + gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)), + GET_MODE_SIZE (GET_MODE (mem_2)))); + +- /* One of the memory accesses must be a mempair operand. +- If it is not the first one, they need to be swapped by the +- peephole. */ +- if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1)) +- && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2))) ++ /* The lower memory access must be a mem-pair operand. */ ++ rtx lower_mem = reversed ? mem_2 : mem_1; ++ if (!aarch64_mem_pair_operand (lower_mem, GET_MODE (lower_mem))) + return false; + + if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1))) +diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c b/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c +new file mode 100644 +index 000000000000..ad07e9c6c893 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c +@@ -0,0 +1,57 @@ ++/* { dg-do compile { target aarch64*-*-* } } */ ++/* { dg-require-effective-target lp64 } */ ++/* { dg-options "-O -fdisable-rtl-postreload -fpeephole2 -fno-schedule-fusion" } */ ++ ++extern int data; ++ ++void __RTL (startwith ("ira")) foo (void *ptr) ++{ ++ (function "foo" ++ (param "ptr" ++ (DECL_RTL (reg/v:DI <0> ptr )) ++ (DECL_RTL_INCOMING (reg/v:DI x0 ptr )) ++ ) ;; param "ptr" ++ (insn-chain ++ (block 2 ++ (edge-from entry (flags "FALLTHRU")) ++ (cnote 3 bb 2 NOTE_INSN_BASIC_BLOCK) ++ (insn 4 (set (reg:DI <0>) (reg:DI x0))) ++ (insn 5 (set (reg:DI <1>) ++ (plus:DI (reg:DI <0>) (const_int 768)))) ++ (insn 6 (set (mem:SI (plus:DI (reg:DI <0>) ++ (const_int 508)) 1 &data+508 S4 A4) ++ (const_int 0))) ++ (insn 7 (set (mem:SI (plus:DI (reg:DI <1>) ++ (const_int -256)) 1 &data+512 S4 A4) ++ (const_int 0))) ++ (edge-to exit (flags "FALLTHRU")) ++ ) ;; block 2 ++ ) ;; insn-chain ++ ) ;; function ++} ++ ++void __RTL (startwith ("ira")) bar (void *ptr) ++{ ++ (function "bar" ++ (param "ptr" ++ (DECL_RTL (reg/v:DI <0> ptr )) ++ (DECL_RTL_INCOMING (reg/v:DI x0 ptr )) ++ ) ;; param "ptr" ++ (insn-chain ++ (block 2 ++ (edge-from entry (flags "FALLTHRU")) ++ (cnote 3 bb 2 NOTE_INSN_BASIC_BLOCK) ++ (insn 4 (set (reg:DI <0>) (reg:DI x0))) ++ (insn 5 (set (reg:DI <1>) ++ (plus:DI (reg:DI <0>) (const_int 768)))) ++ (insn 6 (set (mem:SI (plus:DI (reg:DI <1>) ++ (const_int -256)) 1 &data+512 S4 A4) ++ (const_int 0))) ++ (insn 7 (set (mem:SI (plus:DI (reg:DI <0>) ++ (const_int 508)) 1 &data+508 S4 A4) ++ (const_int 0))) ++ (edge-to exit (flags "FALLTHRU")) ++ ) ;; block 2 ++ ) ;; insn-chain ++ ) ;; function ++} +-- +2.43.5 +
View file
_service:tar_scm:0255-x86-Add-a-new-option-mdaz-ftz-to-enable-FTZ-and-DAZ-.patch
Added
@@ -0,0 +1,135 @@ +From 1649f9fbbc5267de2a675336d3ac665528a03db8 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Wed, 10 May 2023 15:16:58 +0800 +Subject: PATCH 01/28 x86: Add a new option -mdaz-ftz to enable FTZ and DAZ + flags in MXCSR. + + if (mdaz-ftz) + link crtfastmath.o + else if ((Ofast || ffast-math || funsafe-math-optimizations) + && !mno-daz-ftz) + link crtfastmath.o + else + Don't link crtfastmath.o + +gcc/ChangeLog: + + * config/i386/cygwin.h (ENDFILE_SPEC): Link crtfastmath.o + whenever -mdaz-ftz is specified. Don't link crtfastmath.o + when -mno-daz-ftz is specified. + * config/i386/darwin.h (ENDFILE_SPEC): Ditto. + * config/i386/gnu-user-common.h + (GNU_USER_TARGET_MATHFILE_SPEC): Ditto. + * config/i386/mingw32.h (ENDFILE_SPEC): Ditto. + * config/i386/i386.opt (mdaz-ftz): New option. + * doc/invoke.texi (x86 options): Document mftz-daz. +--- + gcc/config/i386/cygwin.h | 2 +- + gcc/config/i386/darwin.h | 4 ++-- + gcc/config/i386/gnu-user-common.h | 2 +- + gcc/config/i386/i386.opt | 4 ++++ + gcc/config/i386/mingw32.h | 2 +- + gcc/doc/invoke.texi | 11 ++++++++++- + 6 files changed, 19 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/i386/cygwin.h b/gcc/config/i386/cygwin.h +index d06eda369..5412c5d44 100644 +--- a/gcc/config/i386/cygwin.h ++++ b/gcc/config/i386/cygwin.h +@@ -57,7 +57,7 @@ along with GCC; see the file COPYING3. If not see + + #undef ENDFILE_SPEC + #define ENDFILE_SPEC \ +- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}\ ++ "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \ + %{!shared:%:if-exists(default-manifest.o%s)}\ + %{fvtable-verify=none:%s; \ + fvtable-verify=preinit:vtv_end.o%s; \ +diff --git a/gcc/config/i386/darwin.h b/gcc/config/i386/darwin.h +index a55f6b2b8..2f773924d 100644 +--- a/gcc/config/i386/darwin.h ++++ b/gcc/config/i386/darwin.h +@@ -109,8 +109,8 @@ along with GCC; see the file COPYING3. If not see + "%{!force_cpusubtype_ALL:-force_cpusubtype_ALL} " + + #undef ENDFILE_SPEC +-#define ENDFILE_SPEC \ +- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ ++#define ENDFILE_SPEC ++\ "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \ + %{mpc32:crtprec32.o%s} \ + %{mpc64:crtprec64.o%s} \ + %{mpc80:crtprec80.o%s}" TM_DESTRUCTOR +diff --git a/gcc/config/i386/gnu-user-common.h b/gcc/config/i386/gnu-user-common.h +index 23b54c5be..3d2a33f17 100644 +--- a/gcc/config/i386/gnu-user-common.h ++++ b/gcc/config/i386/gnu-user-common.h +@@ -47,7 +47,7 @@ along with GCC; see the file COPYING3. If not see + + /* Similar to standard GNU userspace, but adding -ffast-math support. */ + #define GNU_USER_TARGET_MATHFILE_SPEC \ +- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ ++ "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \ + %{mpc32:crtprec32.o%s} \ + %{mpc64:crtprec64.o%s} \ + %{mpc80:crtprec80.o%s}" +diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt +index fc1b944ac..498fb454d 100644 +--- a/gcc/config/i386/i386.opt ++++ b/gcc/config/i386/i386.opt +@@ -420,6 +420,10 @@ mpc80 + Target RejectNegative + Set 80387 floating-point precision to 80-bit. + ++mdaz-ftz ++Target ++Set the FTZ and DAZ Flags. ++ + mpreferred-stack-boundary= + Target RejectNegative Joined UInteger Var(ix86_preferred_stack_boundary_arg) + Attempt to keep stack aligned to this power of 2. +diff --git a/gcc/config/i386/mingw32.h b/gcc/config/i386/mingw32.h +index d3ca0cd02..ddbe6a405 100644 +--- a/gcc/config/i386/mingw32.h ++++ b/gcc/config/i386/mingw32.h +@@ -197,7 +197,7 @@ along with GCC; see the file COPYING3. If not see + + #undef ENDFILE_SPEC + #define ENDFILE_SPEC \ +- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ ++ "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \ + %{!shared:%:if-exists(default-manifest.o%s)}\ + %{fvtable-verify=none:%s; \ + fvtable-verify=preinit:vtv_end.o%s; \ +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 2b376e0e9..3a48655e5 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1437,7 +1437,7 @@ See RS/6000 and PowerPC Options. + -m96bit-long-double -mlong-double-64 -mlong-double-80 -mlong-double-128 @gol + -mregparm=@var{num} -msseregparm @gol + -mveclibabi=@var{type} -mvect8-ret-in-mem @gol +--mpc32 -mpc64 -mpc80 -mstackrealign @gol ++-mpc32 -mpc64 -mpc80 -mdaz-ftz -mstackrealign @gol + -momit-leaf-frame-pointer -mno-red-zone -mno-tls-direct-seg-refs @gol + -mcmodel=@var{code-model} -mabi=@var{name} -maddress-mode=@var{mode} @gol + -m32 -m64 -mx32 -m16 -miamcu -mlarge-data-threshold=@var{num} @gol +@@ -32122,6 +32122,15 @@ are enabled by default; routines in such libraries could suffer significant + loss of accuracy, typically through so-called ``catastrophic cancellation'', + when this option is used to set the precision to less than extended precision. + ++@item -mdaz-ftz ++@opindex mdaz-ftz ++ ++The flush-to-zero (FTZ) and denormals-are-zero (DAZ) flags in the MXCSR register ++are used to control floating-point calculations.SSE and AVX instructions ++including scalar and vector instructions could benefit from enabling the FTZ ++and DAZ flags when @option{-mdaz-ftz} is specified. Don't set FTZ/DAZ flags ++when @option{-mno-daz-ftz} is specified. ++ + @item -mstackrealign + @opindex mstackrealign + Realign the stack at entry. On the x86, the @option{-mstackrealign} +-- +2.31.1 +
View file
_service:tar_scm:0256-Explicitly-view_convert_expr-mask-to-signed-type-whe.patch
Added
@@ -0,0 +1,65 @@ +From e70fa730dcfcb3a7b1d56a2e166752d4299f0504 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Mon, 5 Jun 2023 12:38:41 +0800 +Subject: PATCH 02/28 Explicitly view_convert_expr mask to signed type when + folding pblendvb builtins. + +Since mask < 0 will be always false for vector char when +-funsigned-char, but vpblendvb needs to check the most significant +bit. The patch explicitly VCE to vector signed char. + +gcc/ChangeLog: + + PR target/110108 + * config/i386/i386.cc (ix86_gimple_fold_builtin): Explicitly + view_convert_expr mask to signed type when folding pblendvb + builtins. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/pr110108-2.c: New test. +--- + gcc/config/i386/i386.cc | 4 +++- + gcc/testsuite/gcc.target/i386/pr110108-2.c | 14 ++++++++++++++ + 2 files changed, 17 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr110108-2.c + +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index 462dce10e..479fc6010 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -18396,8 +18396,10 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) + tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode + ? intSI_type_node : intDI_type_node; + type = get_same_sized_vectype (itype, type); +- arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2); + } ++ else ++ type = signed_type_for (type); ++ arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2); + tree zero_vec = build_zero_cst (type); + tree cmp_type = truth_type_for (type); + tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec); +diff --git a/gcc/testsuite/gcc.target/i386/pr110108-2.c b/gcc/testsuite/gcc.target/i386/pr110108-2.c +new file mode 100644 +index 000000000..2d1d2fd49 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr110108-2.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mavx2 -O2 -funsigned-char" } */ ++/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */ ++ ++#include <immintrin.h> ++__m128i do_stuff_128(__m128i X0, __m128i X1, __m128i X2) { ++ __m128i Result = _mm_blendv_epi8(X0, X1, X2); ++ return Result; ++} ++ ++__m256i do_stuff_256(__m256i X0, __m256i X1, __m256i X2) { ++ __m256i Result = _mm256_blendv_epi8(X0, X1, X2); ++ return Result; ++} +-- +2.31.1 +
View file
_service:tar_scm:0257-Make-option-mvzeroupper-independent-of-optimization-.patch
Added
@@ -0,0 +1,138 @@ +From 48715f03ad08f185153bfb0ff4c0802ab2d9579c Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Mon, 26 Jun 2023 09:50:25 +0800 +Subject: PATCH 03/28 Make option mvzeroupper independent of optimization + level. + +pass_insert_vzeroupper is under condition + +TARGET_AVX && TARGET_VZEROUPPER +&& flag_expensive_optimizations && !optimize_size + +But the document of mvzeroupper doesn't mention the insertion +required -O2 and above, it may confuse users when they explicitly +use -Os -mvzeroupper. + +------------ +mvzeroupper +Target Mask(VZEROUPPER) Save +Generate vzeroupper instruction before a transfer of control flow out of +the function. +------------ + +The patch moves flag_expensive_optimizations && !optimize_size to +ix86_option_override_internal. It makes -mvzeroupper independent of +optimization level, but still keeps the behavior of architecture +tuning(emit_vzeroupper) unchanged. + +gcc/ChangeLog: + + * config/i386/i386-features.cc (pass_insert_vzeroupper:gate): + Move flag_expensive_optimizations && !optimize_size to .. + * config/i386/i386-options.cc (ix86_option_override_internal): + .. this, it makes -mvzeroupper independent of optimization + level, but still keeps the behavior of architecture + tuning(emit_vzeroupper) unchanged. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/avx-vzeroupper-29.c: New testcase. + * gcc.target/i386/avx-vzeroupper-12.c: Adjust testcase. + * gcc.target/i386/avx-vzeroupper-7.c: Ditto. + * gcc.target/i386/avx-vzeroupper-9.c: Ditto. +--- + gcc/config/i386/i386-features.cc | 3 +-- + gcc/config/i386/i386-options.cc | 4 +++- + gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c | 3 ++- + gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c | 14 ++++++++++++++ + gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c | 3 ++- + gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c | 3 ++- + 6 files changed, 24 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c + +diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc +index 6fe41c3c2..6a2444eb6 100644 +--- a/gcc/config/i386/i386-features.cc ++++ b/gcc/config/i386/i386-features.cc +@@ -1875,8 +1875,7 @@ public: + /* opt_pass methods: */ + virtual bool gate (function *) + { +- return TARGET_AVX && TARGET_VZEROUPPER +- && flag_expensive_optimizations && !optimize_size; ++ return TARGET_AVX && TARGET_VZEROUPPER; + } + + virtual unsigned int execute (function *) +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index ff44ad4e0..74e969b68 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -2702,7 +2702,9 @@ ix86_option_override_internal (bool main_args_p, + sorry ("%<-mcall-ms2sysv-xlogues%> isn%'t currently supported with SEH"); + + if (!(opts_set->x_target_flags & MASK_VZEROUPPER) +- && TARGET_EMIT_VZEROUPPER) ++ && TARGET_EMIT_VZEROUPPER ++ && flag_expensive_optimizations ++ && !optimize_size) + opts->x_target_flags |= MASK_VZEROUPPER; + if (!(opts_set->x_target_flags & MASK_STV)) + opts->x_target_flags |= MASK_STV; +diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c +index e694d4048..5a40e8783 100644 +--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c ++++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c +@@ -16,5 +16,6 @@ foo () + _mm256_zeroupper (); + } + +-/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */ ++/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */ ++/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */ + /* { dg-final { scan-assembler-times "\\*avx_vzeroall" 1 } } */ +diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c +new file mode 100644 +index 000000000..4af637757 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O0 -mavx -mtune=generic -mvzeroupper -dp" } */ ++ ++#include <immintrin.h> ++ ++extern __m256 x, y; ++ ++void ++foo () ++{ ++ x = y; ++} ++ ++/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */ +diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c +index ab6d68779..75fe58897 100644 +--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c ++++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c +@@ -12,4 +12,5 @@ foo () + _mm256_zeroupper (); + } + +-/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */ ++/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 { target ia32 } } } */ ++/* { dg-final { scan-assembler-times "avx_vzeroupper" 2 { target { ! ia32 } } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c +index 974e1626a..fa0a6dfca 100644 +--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c ++++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c +@@ -15,4 +15,5 @@ foo () + _mm256_zeroupper (); + } + +-/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */ ++/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */ ++/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */ +-- +2.31.1 +
View file
_service:tar_scm:0258-i386-Sync-tune_string-with-arch_string-for-target-at.patch
Added
@@ -0,0 +1,68 @@ +From 8039d773354360ed8ff2f25c63843fc637eacc67 Mon Sep 17 00:00:00 2001 +From: Hongyu Wang <hongyu.wang@intel.com> +Date: Sun, 25 Jun 2023 09:50:21 +0800 +Subject: PATCH 04/28 i386: Sync tune_string with arch_string for target + attribute + +arch=* + +For function with target attribute arch=*, current logic will set its +tune to -mtune from command line so all target_clones will get same +tuning flags which would affect the performance for each clone. Override +tune with arch if tune was not explicitly specified to get proper tuning +flags for target_clones. + +gcc/ChangeLog: + + * config/i386/i386-options.cc (ix86_valid_target_attribute_tree): + Override tune_string with arch_string if tune_string is not + explicitly specified. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/mvc17.c: New test. + +(cherry picked from commit 2916278d14e9ac28c361c396a67256acbebda6e8) +--- + gcc/config/i386/i386-options.cc | 6 +++++- + gcc/testsuite/gcc.target/i386/mvc17.c | 11 +++++++++++ + 2 files changed, 16 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/i386/mvc17.c + +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 74e969b68..fb2ed942f 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -1378,7 +1378,11 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args, + if (option_stringsIX86_FUNCTION_SPECIFIC_TUNE) + opts->x_ix86_tune_string + = ggc_strdup (option_stringsIX86_FUNCTION_SPECIFIC_TUNE); +- else if (orig_tune_defaulted) ++ /* If we have explicit arch string and no tune string specified, set ++ tune_string to NULL and later it will be overriden by arch_string ++ so target clones can get proper optimization. */ ++ else if (option_stringsIX86_FUNCTION_SPECIFIC_ARCH ++ || orig_tune_defaulted) + opts->x_ix86_tune_string = NULL; + + /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ +diff --git a/gcc/testsuite/gcc.target/i386/mvc17.c b/gcc/testsuite/gcc.target/i386/mvc17.c +new file mode 100644 +index 000000000..8b83c1aec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/mvc17.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-require-ifunc "" } */ ++/* { dg-options "-O2 -march=x86-64" } */ ++/* { dg-final { scan-assembler-times "rep mov" 1 } } */ ++ ++__attribute__((target_clones("default","arch=icelake-server"))) ++void ++foo (char *a, char *b, int size) ++{ ++ __builtin_memcpy (a, b, size & 0x7F); ++} +-- +2.31.1 +
View file
_service:tar_scm:0259-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch
Added
@@ -0,0 +1,111 @@ +From fbcb1a5899b1bd3964aed78ed74041121e618d36 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Tue, 20 Jun 2023 15:41:00 +0800 +Subject: PATCH 05/28 Refine maskloadmn pattern with UNSPEC_MASKLOAD. + +If mem_addr points to a memory region with less than whole vector size +bytes of accessible memory and k is a mask that would prevent reading +the inaccessible bytes from mem_addr, add UNSPEC_MASKLOAD to prevent +it to be transformed to vpblendd. + +gcc/ChangeLog: + + PR target/110309 + * config/i386/sse.md (maskload<mode><avx512fmaskmodelower>): + Refine pattern with UNSPEC_MASKLOAD. + (maskload<mode><avx512fmaskmodelower>): Ditto. + (*<avx512>_load<mode>_mask): Extend mode iterator to + VI12HF_AVX512VL. + (*<avx512>_load<mode>): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/pr110309.c: New test. +--- + gcc/config/i386/sse.md | 32 +++++++++++++----------- + gcc/testsuite/gcc.target/i386/pr110309.c | 10 ++++++++ + 2 files changed, 28 insertions(+), 14 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr110309.c + +diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md +index eb767e56c..b30e96cb1 100644 +--- a/gcc/config/i386/sse.md ++++ b/gcc/config/i386/sse.md +@@ -1411,12 +1411,12 @@ + }) + + (define_insn "*<avx512>_load<mode>_mask" +- (set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") +- (vec_merge:VI12_AVX512VL +- (unspec:VI12_AVX512VL +- (match_operand:VI12_AVX512VL 1 "memory_operand" "m") ++ (set (match_operand:VI12HF_AVX512VL 0 "register_operand" "=v") ++ (vec_merge:VI12HF_AVX512VL ++ (unspec:VI12HF_AVX512VL ++ (match_operand:VI12HF_AVX512VL 1 "memory_operand" "m") + UNSPEC_MASKLOAD) +- (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C") ++ (match_operand:VI12HF_AVX512VL 2 "nonimm_or_0_operand" "0C") + (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk"))) + "TARGET_AVX512BW" + "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" +@@ -1425,9 +1425,9 @@ + (set_attr "mode" "<sseinsnmode>")) + + (define_insn_and_split "*<avx512>_load<mode>" +- (set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") +- (unspec:VI12_AVX512VL +- (match_operand:VI12_AVX512VL 1 "memory_operand" "m") ++ (set (match_operand:VI12HF_AVX512VL 0 "register_operand" "=v") ++ (unspec:VI12HF_AVX512VL ++ (match_operand:VI12HF_AVX512VL 1 "memory_operand" "m") + UNSPEC_MASKLOAD)) + "TARGET_AVX512BW" + "#" +@@ -25973,17 +25973,21 @@ + "TARGET_AVX") + + (define_expand "maskload<mode><avx512fmaskmodelower>" +- (set (match_operand:V48H_AVX512VL 0 "register_operand") +- (vec_merge:V48H_AVX512VL +- (match_operand:V48H_AVX512VL 1 "memory_operand") ++ (set (match_operand:V48_AVX512VL 0 "register_operand") ++ (vec_merge:V48_AVX512VL ++ (unspec:V48_AVX512VL ++ (match_operand:V48_AVX512VL 1 "memory_operand") ++ UNSPEC_MASKLOAD) + (match_dup 0) + (match_operand:<avx512fmaskmode> 2 "register_operand"))) + "TARGET_AVX512F") + + (define_expand "maskload<mode><avx512fmaskmodelower>" +- (set (match_operand:VI12_AVX512VL 0 "register_operand") +- (vec_merge:VI12_AVX512VL +- (match_operand:VI12_AVX512VL 1 "memory_operand") ++ (set (match_operand:VI12HF_AVX512VL 0 "register_operand") ++ (vec_merge:VI12HF_AVX512VL ++ (unspec:VI12HF_AVX512VL ++ (match_operand:VI12HF_AVX512VL 1 "memory_operand") ++ UNSPEC_MASKLOAD) + (match_dup 0) + (match_operand:<avx512fmaskmode> 2 "register_operand"))) + "TARGET_AVX512BW") +diff --git a/gcc/testsuite/gcc.target/i386/pr110309.c b/gcc/testsuite/gcc.target/i386/pr110309.c +new file mode 100644 +index 000000000..f6e9e9c3c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr110309.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 --param vect-partial-vector-usage=1 -march=znver4 -mprefer-vector-width=256" } */ ++/* { dg-final { scan-assembler-not {(?n)vpblendd.*ymm} } } */ ++ ++ ++void foo (int * __restrict a, int *b) ++{ ++ for (int i = 0; i < 6; ++i) ++ ai = bi + 42; ++} +-- +2.31.1 +
View file
_service:tar_scm:0260-Refine-maskstore-patterns-with-UNSPEC_MASKMOV.patch
Added
@@ -0,0 +1,126 @@ +From 5ad28ef4010c1248b4d94396d03f863705f7b0db Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Mon, 26 Jun 2023 21:07:09 +0800 +Subject: PATCH 06/28 Refine maskstore patterns with UNSPEC_MASKMOV. + +Similar like r14-2070-gc79476da46728e + +If mem_addr points to a memory region with less than whole vector size +bytes of accessible memory and k is a mask that would prevent reading +the inaccessible bytes from mem_addr, add UNSPEC_MASKMOV to prevent +it to be transformed to any other whole memory access instructions. + +gcc/ChangeLog: + + PR rtl-optimization/110237 + * config/i386/sse.md (<avx512>_store<mode>_mask): Refine with + UNSPEC_MASKMOV. + (maskstore<mode><avx512fmaskmodelower): Ditto. + (*<avx512>_store<mode>_mask): New define_insn, it's renamed + from original <avx512>_store<mode>_mask. +--- + gcc/config/i386/sse.md | 69 ++++++++++++++++++++++++++++++++++-------- + 1 file changed, 57 insertions(+), 12 deletions(-) + +diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md +index b30e96cb1..3af159896 100644 +--- a/gcc/config/i386/sse.md ++++ b/gcc/config/i386/sse.md +@@ -1554,7 +1554,7 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")) + +-(define_insn "<avx512>_store<mode>_mask" ++(define_insn "*<avx512>_store<mode>_mask" + (set (match_operand:V48_AVX512VL 0 "memory_operand" "=m") + (vec_merge:V48_AVX512VL + (match_operand:V48_AVX512VL 1 "register_operand" "v") +@@ -1582,7 +1582,7 @@ + (set_attr "memory" "store") + (set_attr "mode" "<sseinsnmode>")) + +-(define_insn "<avx512>_store<mode>_mask" ++(define_insn "*<avx512>_store<mode>_mask" + (set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m") + (vec_merge:VI12HF_AVX512VL + (match_operand:VI12HF_AVX512VL 1 "register_operand" "v") +@@ -26002,21 +26002,66 @@ + "TARGET_AVX") + + (define_expand "maskstore<mode><avx512fmaskmodelower>" +- (set (match_operand:V48H_AVX512VL 0 "memory_operand") +- (vec_merge:V48H_AVX512VL +- (match_operand:V48H_AVX512VL 1 "register_operand") +- (match_dup 0) +- (match_operand:<avx512fmaskmode> 2 "register_operand"))) ++ (set (match_operand:V48_AVX512VL 0 "memory_operand") ++ (unspec:V48_AVX512VL ++ (match_operand:V48_AVX512VL 1 "register_operand") ++ (match_dup 0) ++ (match_operand:<avx512fmaskmode> 2 "register_operand") ++ UNSPEC_MASKMOV)) + "TARGET_AVX512F") + + (define_expand "maskstore<mode><avx512fmaskmodelower>" +- (set (match_operand:VI12_AVX512VL 0 "memory_operand") +- (vec_merge:VI12_AVX512VL +- (match_operand:VI12_AVX512VL 1 "register_operand") +- (match_dup 0) +- (match_operand:<avx512fmaskmode> 2 "register_operand"))) ++ (set (match_operand:VI12HF_AVX512VL 0 "memory_operand") ++ (unspec:VI12HF_AVX512VL ++ (match_operand:VI12HF_AVX512VL 1 "register_operand") ++ (match_dup 0) ++ (match_operand:<avx512fmaskmode> 2 "register_operand") ++ UNSPEC_MASKMOV)) + "TARGET_AVX512BW") + ++(define_insn "<avx512>_store<mode>_mask" ++ (set (match_operand:V48_AVX512VL 0 "memory_operand" "=m") ++ (unspec:V48_AVX512VL ++ (match_operand:V48_AVX512VL 1 "register_operand" "v") ++ (match_dup 0) ++ (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk") ++ UNSPEC_MASKMOV)) ++ "TARGET_AVX512F" ++{ ++ if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode))) ++ { ++ if (misaligned_operand (operands0, <MODE>mode)) ++ return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"; ++ else ++ return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"; ++ } ++ else ++ { ++ if (misaligned_operand (operands0, <MODE>mode)) ++ return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"; ++ else ++ return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"; ++ } ++} ++ (set_attr "type" "ssemov") ++ (set_attr "prefix" "evex") ++ (set_attr "memory" "store") ++ (set_attr "mode" "<sseinsnmode>")) ++ ++(define_insn "<avx512>_store<mode>_mask" ++ (set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m") ++ (unspec:VI12HF_AVX512VL ++ (match_operand:VI12HF_AVX512VL 1 "register_operand" "v") ++ (match_dup 0) ++ (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk") ++ UNSPEC_MASKMOV)) ++ "TARGET_AVX512BW" ++ "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}" ++ (set_attr "type" "ssemov") ++ (set_attr "prefix" "evex") ++ (set_attr "memory" "store") ++ (set_attr "mode" "<sseinsnmode>")) ++ + (define_expand "cbranch<mode>4" + (set (reg:CC FLAGS_REG) + (compare:CC (match_operand:VI48_AVX 1 "register_operand") +-- +2.31.1 +
View file
_service:tar_scm:0261-x86-Update-model-values-for-Alderlake-and-Rocketlake.patch
Added
@@ -0,0 +1,38 @@ +From 50757adc93ef32a97a8a1083f5d53a9c00da6ac8 Mon Sep 17 00:00:00 2001 +From: "Cui, Lili" <lili.cui@intel.com> +Date: Thu, 29 Jun 2023 03:10:35 +0000 +Subject: PATCH 07/28 x86: Update model values for Alderlake and Rocketlake. + +Update model values for Alderlake and Rocketlake according to SDM. + +gcc/ChangeLog + + * common/config/i386/cpuinfo.h (get_intel_cpu): Remove model value 0xa8 + from Rocketlake, remove model value 0xbf from Alderlake. +--- + gcc/common/config/i386/cpuinfo.h | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 0333da56b..28b2ff0b0 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -435,7 +435,6 @@ get_intel_cpu (struct __processor_model *cpu_model, + cpu_model->__cpu_subtype = INTEL_COREI7_SKYLAKE; + break; + case 0xa7: +- case 0xa8: + /* Rocket Lake. */ + cpu = "rocketlake"; + CHECK___builtin_cpu_is ("corei7"); +@@ -508,7 +507,6 @@ get_intel_cpu (struct __processor_model *cpu_model, + break; + case 0x97: + case 0x9a: +- case 0xbf: + /* Alder Lake. */ + cpu = "alderlake"; + CHECK___builtin_cpu_is ("corei7"); +-- +2.31.1 +
View file
_service:tar_scm:0262-Workaround-possible-CPUID-bug-in-Sandy-Bridge.patch
Added
@@ -0,0 +1,78 @@ +From 60364b439a80c217174e1830e0b7507d6f4538c4 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Fri, 4 Aug 2023 09:27:39 +0800 +Subject: PATCH 08/28 Workaround possible CPUID bug in Sandy Bridge. + +Don't access leaf 7 subleaf 1 unless subleaf 0 says it is +supported via EAX. + +Intel documentation says invalid subleaves return 0. We had been +relying on that behavior instead of checking the max sublef number. + +It appears that some Sandy Bridge CPUs return at least the subleaf 0 +EDX value for subleaf 1. Best guess is that this is a bug in a +microcode patch since all of the bits we're seeing set in EDX were +introduced after Sandy Bridge was originally released. + +This is causing avxvnniint16 to be incorrectly enabled with +-march=native on these CPUs. + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h (get_available_features): Check + max_subleaf_level for valid subleaf before use CPUID. +--- + gcc/common/config/i386/cpuinfo.h | 29 +++++++++++++++++------------ + 1 file changed, 17 insertions(+), 12 deletions(-) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 28b2ff0b0..316ad3cb3 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -647,7 +647,9 @@ get_available_features (struct __processor_model *cpu_model, + /* Get Advanced Features at level 7 (eax = 7, ecx = 0/1). */ + if (max_cpuid_level >= 7) + { +- __cpuid_count (7, 0, eax, ebx, ecx, edx); ++ unsigned int max_subleaf_level; ++ ++ __cpuid_count (7, 0, max_subleaf_level, ebx, ecx, edx); + if (ebx & bit_BMI) + set_feature (FEATURE_BMI); + if (ebx & bit_SGX) +@@ -759,18 +761,21 @@ get_available_features (struct __processor_model *cpu_model, + set_feature (FEATURE_AVX512FP16); + } + +- __cpuid_count (7, 1, eax, ebx, ecx, edx); +- if (eax & bit_HRESET) +- set_feature (FEATURE_HRESET); +- if (avx_usable) +- { +- if (eax & bit_AVXVNNI) +- set_feature (FEATURE_AVXVNNI); +- } +- if (avx512_usable) ++ if (max_subleaf_level >= 1) + { +- if (eax & bit_AVX512BF16) +- set_feature (FEATURE_AVX512BF16); ++ __cpuid_count (7, 1, eax, ebx, ecx, edx); ++ if (eax & bit_HRESET) ++ set_feature (FEATURE_HRESET); ++ if (avx_usable) ++ { ++ if (eax & bit_AVXVNNI) ++ set_feature (FEATURE_AVXVNNI); ++ } ++ if (avx512_usable) ++ { ++ if (eax & bit_AVX512BF16) ++ set_feature (FEATURE_AVX512BF16); ++ } + } + } + +-- +2.31.1 +
View file
_service:tar_scm:0263-Software-mitigation-Disable-gather-generation-in-vec.patch
Added
@@ -0,0 +1,220 @@ +From cfffbec938afdc45c31db5ec282ce21ad1ba2dc7 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Thu, 10 Aug 2023 11:41:39 +0800 +Subject: PATCH 09/28 Software mitigation: Disable gather generation in + vectorization for GDS affected Intel Processors. + +For more details of GDS (Gather Data Sampling), refer to +https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/gather-data-sampling.html + +After microcode update, there's performance regression. To avoid that, +the patch disables gather generation in autovectorization but uses +gather scalar emulation instead. + +gcc/ChangeLog: + + * config/i386/i386-options.cc (m_GDS): New macro. + * config/i386/x86-tune.def (X86_TUNE_USE_GATHER_2PARTS): Don't + enable for m_GDS. + (X86_TUNE_USE_GATHER_4PARTS): Ditto. + (X86_TUNE_USE_GATHER): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/avx2-gather-2.c: Adjust options to keep + gather vectorization. + * gcc.target/i386/avx2-gather-6.c: Ditto. + * gcc.target/i386/avx512f-pr88464-1.c: Ditto. + * gcc.target/i386/avx512f-pr88464-5.c: Ditto. + * gcc.target/i386/avx512vl-pr88464-1.c: Ditto. + * gcc.target/i386/avx512vl-pr88464-11.c: Ditto. + * gcc.target/i386/avx512vl-pr88464-3.c: Ditto. + * gcc.target/i386/avx512vl-pr88464-9.c: Ditto. + * gcc.target/i386/pr88531-1b.c: Ditto. + * gcc.target/i386/pr88531-1c.c: Ditto. + +(cherry picked from commit 3064d1f5c48cb6ce1b4133570dd08ecca8abb52d) +--- + gcc/config/i386/i386-options.cc | 5 +++++ + gcc/config/i386/x86-tune.def | 9 ++++++--- + gcc/testsuite/gcc.target/i386/avx2-gather-2.c | 2 +- + gcc/testsuite/gcc.target/i386/avx2-gather-6.c | 2 +- + gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c | 2 +- + gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c | 2 +- + gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c | 2 +- + gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c | 2 +- + gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c | 2 +- + gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c | 2 +- + gcc/testsuite/gcc.target/i386/pr88531-1b.c | 2 +- + gcc/testsuite/gcc.target/i386/pr88531-1c.c | 2 +- + 12 files changed, 21 insertions(+), 13 deletions(-) + +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index fb2ed942f..9617fc162 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -137,6 +137,11 @@ along with GCC; see the file COPYING3. If not see + #define m_GOLDMONT_PLUS (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT_PLUS) + #define m_TREMONT (HOST_WIDE_INT_1U<<PROCESSOR_TREMONT) + #define m_INTEL (HOST_WIDE_INT_1U<<PROCESSOR_INTEL) ++/* Gather Data Sampling / CVE-2022-40982 / INTEL-SA-00828. ++ Software mitigation. */ ++#define m_GDS (m_SKYLAKE | m_SKYLAKE_AVX512 | m_CANNONLAKE \ ++ | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \ ++ | m_TIGERLAKE | m_COOPERLAKE | m_ROCKETLAKE) + + #define m_GEODE (HOST_WIDE_INT_1U<<PROCESSOR_GEODE) + #define m_K6 (HOST_WIDE_INT_1U<<PROCESSOR_K6) +diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def +index e6b9e2125..4392709fc 100644 +--- a/gcc/config/i386/x86-tune.def ++++ b/gcc/config/i386/x86-tune.def +@@ -467,7 +467,8 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes", + /* X86_TUNE_USE_GATHER_2PARTS: Use gather instructions for vectors with 2 + elements. */ + DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts", +- ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC)) ++ ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE ++ | m_GENERIC | m_GDS)) + + /* X86_TUNE_USE_SCATTER_2PARTS: Use scater instructions for vectors with 2 + elements. */ +@@ -477,7 +478,8 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_2PARTS, "use_scatter_2parts", + /* X86_TUNE_USE_GATHER_4PARTS: Use gather instructions for vectors with 4 + elements. */ + DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts", +- ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC)) ++ ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE ++ | m_GENERIC | m_GDS)) + + /* X86_TUNE_USE_SCATTER_4PARTS: Use scater instructions for vectors with 4 + elements. */ +@@ -487,7 +489,8 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts", + /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more + elements. */ + DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather", +- ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC)) ++ ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE ++ | m_GENERIC | m_GDS)) + + /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more + elements. */ +diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c +index ad5ef7310..978924b0f 100644 +--- a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c ++++ b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake" } */ ++/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake -mtune=haswell" } */ + + #include "avx2-gather-1.c" + +diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-6.c b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c +index b9119581a..067b251e3 100644 +--- a/gcc/testsuite/gcc.target/i386/avx2-gather-6.c ++++ b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O3 -mavx2 -fno-common -fdump-tree-vect-details -mtune=skylake" } */ ++/* { dg-options "-O3 -mavx2 -fno-common -fdump-tree-vect-details -mtune=haswell" } */ + + #include "avx2-gather-5.c" + +diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c +index 06d21bb01..d1a229861 100644 +--- a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c ++++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c +@@ -1,6 +1,6 @@ + /* PR tree-optimization/88464 */ + /* { dg-do compile } */ +-/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ ++/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=haswell -fdump-tree-vect-details" } */ + /* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */ + /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c +index 462e951fd..d7b0b2b28 100644 +--- a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c ++++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c +@@ -1,6 +1,6 @@ + /* PR tree-optimization/88464 */ + /* { dg-do compile } */ +-/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ ++/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=haswell -fdump-tree-vect-details" } */ + /* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */ + /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c +index 55a28dddb..07439185e 100644 +--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c ++++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c +@@ -1,6 +1,6 @@ + /* PR tree-optimization/88464 */ + /* { dg-do compile } */ +-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ ++/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=haswell -fdump-tree-vect-details" } */ + /* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */ + /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c +index 969600885..3a9810827 100644 +--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c ++++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c +@@ -1,6 +1,6 @@ + /* PR tree-optimization/88464 */ + /* { dg-do compile } */ +-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ ++/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=haswell -fdump-tree-vect-details" } */ + /* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */ + /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c +index 6b0c8a859..ac669e048 100644 +--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c ++++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c +@@ -1,6 +1,6 @@ + /* PR tree-optimization/88464 */ + /* { dg-do compile } */ +-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ ++/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=haswell -fdump-tree-vect-details" } */ + /* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */ + /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c +index 3af568ab3..14a1083b6 100644 +--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c ++++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c +@@ -1,6 +1,6 @@ + /* PR tree-optimization/88464 */ + /* { dg-do compile } */ +-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ ++/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=haswell -fdump-tree-vect-details" } */ + /* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */ + /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +diff --git a/gcc/testsuite/gcc.target/i386/pr88531-1b.c b/gcc/testsuite/gcc.target/i386/pr88531-1b.c +index 812c8a10f..e6df789de 100644 +--- a/gcc/testsuite/gcc.target/i386/pr88531-1b.c ++++ b/gcc/testsuite/gcc.target/i386/pr88531-1b.c
View file
_service:tar_scm:0264-Support-m-no-gather-m-no-scatter-to-enable-disable-v.patch
Added
@@ -0,0 +1,187 @@ +From c269629130cb23252da2db026ce9ed13f57f69f4 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Thu, 10 Aug 2023 16:26:13 +0800 +Subject: PATCH 10/28 Support -mno-gather -mno-scatter to enable/disable + vectorization for all gather/scatter instructions + +Rename original use_gather to use_gather_8parts, Support +-mtune-ctrl={,^}use_gather to set/clear tune features +use_gather_{2parts, 4parts, 8parts}. Support the new option -mgather +as alias of -mtune-ctrl=, use_gather, ^use_gather. + +Similar for use_scatter. + +gcc/ChangeLog: + + * config/i386/i386-builtins.cc + (ix86_vectorize_builtin_gather): Adjust for use_gather_8parts. + * config/i386/i386-options.cc (parse_mtune_ctrl_str): + Set/Clear tune features use_{gather,scatter}_{2parts, 4parts, + 8parts} for -mtune-crtl={,^}{use_gather,use_scatter}. + * config/i386/i386.cc (ix86_vectorize_builtin_scatter): Adjust + for use_scatter_8parts + * config/i386/i386.h (TARGET_USE_GATHER): Rename to .. + (TARGET_USE_GATHER_8PARTS): .. this. + (TARGET_USE_SCATTER): Rename to .. + (TARGET_USE_SCATTER_8PARTS): .. this. + * config/i386/x86-tune.def (X86_TUNE_USE_GATHER): Rename to + (X86_TUNE_USE_GATHER_8PARTS): .. this. + (X86_TUNE_USE_SCATTER): Rename to + (X86_TUNE_USE_SCATTER_8PARTS): .. this. + * config/i386/i386.opt: Add new options mgather, mscatter. + +(cherry picked from commit b2a927fb5343db363ea4361da0d6bcee227b6737) +--- + gcc/config/i386/i386-builtins.cc | 2 +- + gcc/config/i386/i386-options.cc | 54 +++++++++++++++++++++++--------- + gcc/config/i386/i386.cc | 2 +- + gcc/config/i386/i386.h | 8 ++--- + gcc/config/i386/i386.opt | 4 +++ + gcc/config/i386/x86-tune.def | 4 +-- + 6 files changed, 52 insertions(+), 22 deletions(-) + +diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc +index 050c6228a..8ed32e14f 100644 +--- a/gcc/config/i386/i386-builtins.cc ++++ b/gcc/config/i386/i386-builtins.cc +@@ -1790,7 +1790,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype, + ? !TARGET_USE_GATHER_2PARTS + : (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), 4u) + ? !TARGET_USE_GATHER_4PARTS +- : !TARGET_USE_GATHER))) ++ : !TARGET_USE_GATHER_8PARTS))) + return NULL_TREE; + + if ((TREE_CODE (index_type) != INTEGER_TYPE +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 9617fc162..3df1f0c41 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -1705,20 +1705,46 @@ parse_mtune_ctrl_str (struct gcc_options *opts, bool dump) + curr_feature_string++; + clear = true; + } +- for (i = 0; i < X86_TUNE_LAST; i++) +- { +- if (!strcmp (curr_feature_string, ix86_tune_feature_namesi)) +- { +- ix86_tune_featuresi = !clear; +- if (dump) +- fprintf (stderr, "Explicitly %s feature %s\n", +- clear ? "clear" : "set", ix86_tune_feature_namesi); +- break; +- } +- } +- if (i == X86_TUNE_LAST) +- error ("unknown parameter to option %<-mtune-ctrl%>: %s", +- clear ? curr_feature_string - 1 : curr_feature_string); ++ ++ if (!strcmp (curr_feature_string, "use_gather")) ++ { ++ ix86_tune_featuresX86_TUNE_USE_GATHER_2PARTS = !clear; ++ ix86_tune_featuresX86_TUNE_USE_GATHER_4PARTS = !clear; ++ ix86_tune_featuresX86_TUNE_USE_GATHER_8PARTS = !clear; ++ if (dump) ++ fprintf (stderr, "Explicitly %s features use_gather_2parts," ++ " use_gather_4parts, use_gather_8parts\n", ++ clear ? "clear" : "set"); ++ ++ } ++ else if (!strcmp (curr_feature_string, "use_scatter")) ++ { ++ ix86_tune_featuresX86_TUNE_USE_SCATTER_2PARTS = !clear; ++ ix86_tune_featuresX86_TUNE_USE_SCATTER_4PARTS = !clear; ++ ix86_tune_featuresX86_TUNE_USE_SCATTER_8PARTS = !clear; ++ if (dump) ++ fprintf (stderr, "Explicitly %s features use_scatter_2parts," ++ " use_scatter_4parts, use_scatter_8parts\n", ++ clear ? "clear" : "set"); ++ } ++ else ++ { ++ for (i = 0; i < X86_TUNE_LAST; i++) ++ { ++ if (!strcmp (curr_feature_string, ix86_tune_feature_namesi)) ++ { ++ ix86_tune_featuresi = !clear; ++ if (dump) ++ fprintf (stderr, "Explicitly %s feature %s\n", ++ clear ? "clear" : "set", ix86_tune_feature_namesi); ++ break; ++ } ++ } ++ ++ if (i == X86_TUNE_LAST) ++ error ("unknown parameter to option %<-mtune-ctrl%>: %s", ++ clear ? curr_feature_string - 1 : curr_feature_string); ++ } + curr_feature_string = next_feature_string; + } + while (curr_feature_string); +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index 479fc6010..e75d37023 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -18937,7 +18937,7 @@ ix86_vectorize_builtin_scatter (const_tree vectype, + ? !TARGET_USE_SCATTER_2PARTS + : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u) + ? !TARGET_USE_SCATTER_4PARTS +- : !TARGET_USE_SCATTER)) ++ : !TARGET_USE_SCATTER_8PARTS)) + return NULL_TREE; + + if ((TREE_CODE (index_type) != INTEGER_TYPE +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index 688aaabd3..aaa136ba0 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -403,10 +403,10 @@ extern unsigned char ix86_tune_featuresX86_TUNE_LAST; + ix86_tune_featuresX86_TUNE_USE_GATHER_4PARTS + #define TARGET_USE_SCATTER_4PARTS \ + ix86_tune_featuresX86_TUNE_USE_SCATTER_4PARTS +-#define TARGET_USE_GATHER \ +- ix86_tune_featuresX86_TUNE_USE_GATHER +-#define TARGET_USE_SCATTER \ +- ix86_tune_featuresX86_TUNE_USE_SCATTER ++#define TARGET_USE_GATHER_8PARTS \ ++ ix86_tune_featuresX86_TUNE_USE_GATHER_8PARTS ++#define TARGET_USE_SCATTER_8PARTS \ ++ ix86_tune_featuresX86_TUNE_USE_SCATTER_8PARTS + #define TARGET_FUSE_CMP_AND_BRANCH_32 \ + ix86_tune_featuresX86_TUNE_FUSE_CMP_AND_BRANCH_32 + #define TARGET_FUSE_CMP_AND_BRANCH_64 \ +diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt +index 498fb454d..b154110d8 100644 +--- a/gcc/config/i386/i386.opt ++++ b/gcc/config/i386/i386.opt +@@ -1222,3 +1222,7 @@ Instructions number above which STFL stall penalty can be compensated. + munroll-only-small-loops + Target Var(ix86_unroll_only_small_loops) Init(0) Save + Enable conservative small loop unrolling. ++ ++mscatter ++Target Alias(mtune-ctrl=, use_scatter, ^use_scatter) ++Enable vectorization for scatter instruction. +diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def +index 4392709fc..bdb455d20 100644 +--- a/gcc/config/i386/x86-tune.def ++++ b/gcc/config/i386/x86-tune.def +@@ -488,13 +488,13 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts", + + /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more + elements. */ +-DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather", ++DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts", + ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE + | m_GENERIC | m_GDS)) + + /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more + elements. */ +-DEF_TUNE (X86_TUNE_USE_SCATTER, "use_scatter", ++DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts", + ~(m_ZNVER4)) + + /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or +-- +2.31.1 +
View file
_service:tar_scm:0265-Remove-constraint-modifier-for-fcmaddcph-fmaddcph-fc.patch
Added
@@ -0,0 +1,129 @@ +From 764518a35e90a3e13c469275da9c3c7002fe1982 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Fri, 8 Sep 2023 09:22:43 +0800 +Subject: PATCH 11/28 Remove constraint modifier % for + fcmaddcph/fmaddcph/fcmulcph since there're not commutative. + +gcc/ChangeLog: + + PR target/111306 + PR target/111335 + * config/i386/sse.md (int_comm): New int_attr. + (fma_<complexopname>_<mode><sdc_maskz_name><round_name>): + Remove % for Complex conjugate operations since they're not + commutative. + (fma_<complexpairopname>_<mode>_pair): Ditto. + (<avx512>_<complexopname>_<mode>_mask<round_name>): Ditto. + (cmul<conj_op><mode>3): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/pr111306.c: New test. + +(cherry picked from commit f197392a16ffb1327f1d12ff8ff05f9295e015cb) +--- + gcc/config/i386/sse.md | 16 ++++++++--- + gcc/testsuite/gcc.target/i386/pr111306.c | 36 ++++++++++++++++++++++++ + 2 files changed, 48 insertions(+), 4 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr111306.c + +diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md +index 3af159896..f25dd5f2b 100644 +--- a/gcc/config/i386/sse.md ++++ b/gcc/config/i386/sse.md +@@ -6318,6 +6318,14 @@ + (UNSPEC_COMPLEX_FMA_PAIR "fmaddc") + (UNSPEC_COMPLEX_FCMA_PAIR "fcmaddc")) + ++(define_int_attr int_comm ++ (UNSPEC_COMPLEX_FMA "") ++ (UNSPEC_COMPLEX_FMA_PAIR "") ++ (UNSPEC_COMPLEX_FCMA "") ++ (UNSPEC_COMPLEX_FCMA_PAIR "") ++ (UNSPEC_COMPLEX_FMUL "%") ++ (UNSPEC_COMPLEX_FCMUL "")) ++ + (define_int_attr conj_op + (UNSPEC_COMPLEX_FMA "") + (UNSPEC_COMPLEX_FCMA "_conj") +@@ -6431,7 +6439,7 @@ + (define_insn "fma_<complexopname>_<mode><sdc_maskz_name><round_name>" + (set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v") + (unspec:VF_AVX512FP16VL +- (match_operand:VF_AVX512FP16VL 1 "<round_nimm_predicate>" "%v") ++ (match_operand:VF_AVX512FP16VL 1 "<round_nimm_predicate>" "<int_comm>v") + (match_operand:VF_AVX512FP16VL 2 "<round_nimm_predicate>" "<round_constraint>") + (match_operand:VF_AVX512FP16VL 3 "<round_nimm_predicate>" "0") + UNSPEC_COMPLEX_F_C_MA)) +@@ -6495,7 +6503,7 @@ + (define_insn "fma_<complexpairopname>_<mode>_pair" + (set (match_operand:VF1_AVX512VL 0 "register_operand" "=&v") + (unspec:VF1_AVX512VL +- (match_operand:VF1_AVX512VL 1 "vector_operand" "%v") ++ (match_operand:VF1_AVX512VL 1 "vector_operand" "<int_comm>v") + (match_operand:VF1_AVX512VL 2 "bcst_vector_operand" "vmBr") + (match_operand:VF1_AVX512VL 3 "vector_operand" "0") + UNSPEC_COMPLEX_F_C_MA_PAIR)) +@@ -6562,7 +6570,7 @@ + (set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v") + (vec_merge:VF_AVX512FP16VL + (unspec:VF_AVX512FP16VL +- (match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "%v") ++ (match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "<int_comm>v") + (match_operand:VF_AVX512FP16VL 2 "nonimmediate_operand" "<round_constraint>") + (match_operand:VF_AVX512FP16VL 3 "register_operand" "0") + UNSPEC_COMPLEX_F_C_MA) +@@ -6586,7 +6594,7 @@ + (define_insn "<avx512>_<complexopname>_<mode><maskc_name><round_name>" + (set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v") + (unspec:VF_AVX512FP16VL +- (match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "%v") ++ (match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "<int_comm>v") + (match_operand:VF_AVX512FP16VL 2 "nonimmediate_operand" "<round_constraint>") + UNSPEC_COMPLEX_F_C_MUL)) + "TARGET_AVX512FP16 && <round_mode512bit_condition>" +diff --git a/gcc/testsuite/gcc.target/i386/pr111306.c b/gcc/testsuite/gcc.target/i386/pr111306.c +new file mode 100644 +index 000000000..541725ebd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr111306.c +@@ -0,0 +1,36 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */ ++/* { dg-require-effective-target avx512fp16 } */ ++ ++#define AVX512FP16 ++#include "avx512f-helper.h" ++ ++__attribute__((optimize("O2"),noipa)) ++void func1(_Float16 *a, _Float16 *b, int n, _Float16 *c) { ++ __m512h rA = _mm512_loadu_ph(a); ++ for (int i = 0; i < n; i += 32) { ++ __m512h rB = _mm512_loadu_ph(b + i); ++ _mm512_storeu_ph(c + i, _mm512_fcmul_pch(rB, rA)); ++ } ++} ++ ++void ++test_512 (void) ++{ ++ int n = 32; ++ _Float16 an, bn, cn; ++ _Float16 expn; ++ for (int i = 1; i <= n; i++) { ++ ai - 1 = i & 1 ? -i : i; ++ bi - 1 = i; ++ } ++ ++ func1(a, b, n, c); ++ for (int i = 0; i < n / 32; i += 2) { ++ if (ci != ai * bi + ai+1 * bi+1 ++ || ci+1 != ai * bi+1 - ai+1*bi) ++ __builtin_abort (); ++ } ++} ++ ++ +-- +2.31.1 +
View file
_service:tar_scm:0266-Disparage-slightly-for-the-alternative-which-move-DF.patch
Added
@@ -0,0 +1,106 @@ +From afd539adfe762adb57863299a11987b7e20e7987 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Wed, 5 Jul 2023 13:45:11 +0800 +Subject: PATCH 12/28 Disparage slightly for the alternative which move + DFmode between SSE_REGS and GENERAL_REGS. + +For testcase + +void __cond_swap(double* __x, double* __y) { + bool __r = (*__x < *__y); + auto __tmp = __r ? *__x : *__y; + *__y = __r ? *__y : *__x; + *__x = __tmp; +} + +GCC-14 with -O2 and -march=x86-64 options generates the following code: + +__cond_swap(double*, double*): + movsd xmm1, QWORD PTR rdi + movsd xmm0, QWORD PTR rsi + comisd xmm0, xmm1 + jbe .L2 + movq rax, xmm1 + movapd xmm1, xmm0 + movq xmm0, rax +.L2: + movsd QWORD PTR rsi, xmm1 + movsd QWORD PTR rdi, xmm0 + ret + +rax is used to save and restore DFmode value. In RA both GENERAL_REGS +and SSE_REGS cost zero since we didn't disparage the +alternative in movdf_internal pattern, according to register +allocation order, GENERAL_REGS is allocated. The patch add ? for +alternative (r,v) and (v,r) just like we did for movsf/hf/bf_internal +pattern, after that we get optimal RA. + +__cond_swap: +.LFB0: + .cfi_startproc + movsd (%rdi), %xmm1 + movsd (%rsi), %xmm0 + comisd %xmm1, %xmm0 + jbe .L2 + movapd %xmm1, %xmm2 + movapd %xmm0, %xmm1 + movapd %xmm2, %xmm0 +.L2: + movsd %xmm1, (%rsi) + movsd %xmm0, (%rdi) + ret + +gcc/ChangeLog: + + PR target/110170 + * config/i386/i386.md (movdf_internal): Disparage slightly for + 2 alternatives (r,v) and (v,r) by adding constraint modifier + '?'. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/pr110170-3.c: New test. + +(cherry picked from commit 37a231cc7594d12ba0822077018aad751a6fb94e) +--- + gcc/config/i386/i386.md | 4 ++-- + gcc/testsuite/gcc.target/i386/pr110170-3.c | 11 +++++++++++ + 2 files changed, 13 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr110170-3.c + +diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md +index be07be10d..71691f598 100644 +--- a/gcc/config/i386/i386.md ++++ b/gcc/config/i386/i386.md +@@ -3582,9 +3582,9 @@ + ;; Possible store forwarding (partial memory) stall in alternatives 4, 6 and 7. + (define_insn "*movdf_internal" + (set (match_operand:DF 0 "nonimmediate_operand" +- "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,v,r ,o ,r ,m") ++ "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,?r,?v,r ,o ,r ,m") + (match_operand:DF 1 "general_operand" +- "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,v,r ,roF,rF,rmF,rC")) ++ "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x, v, r,roF,rF,rmF,rC")) + "!(MEM_P (operands0) && MEM_P (operands1)) + && (lra_in_progress || reload_completed + || !CONST_DOUBLE_P (operands1) +diff --git a/gcc/testsuite/gcc.target/i386/pr110170-3.c b/gcc/testsuite/gcc.target/i386/pr110170-3.c +new file mode 100644 +index 000000000..70daa89e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr110170-3.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile { target { ! ia32 } } } */ ++/* { dg-options "-O2 -fno-if-conversion -fno-if-conversion2" } */ ++/* { dg-final { scan-assembler-not {(?n)movq.*r} } } */ ++ ++void __cond_swap(double* __x, double* __y) { ++ _Bool __r = (*__x < *__y); ++ double __tmp = __r ? *__x : *__y; ++ *__y = __r ? *__y : *__x; ++ *__x = __tmp; ++} ++ +-- +2.31.1 +
View file
_service:tar_scm:0267-Fix-wrong-code-due-to-vec_merge-pcmp-to-blendvb-spli.patch
Added
@@ -0,0 +1,163 @@ +From 88516507757932c1e67ce99d240596935971d2d0 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Thu, 9 Nov 2023 13:20:05 +0800 +Subject: PATCH 13/28 Fix wrong code due to vec_merge + pcmp to blendvb + splitter. + +gcc/ChangeLog: + + PR target/112443 + * config/i386/sse.md (*avx2_pcmp<mode>3_4): Fix swap condition + from LT to GT since there's not in the pattern. + (*avx2_pcmp<mode>3_5): Ditto. + +gcc/testsuite/ChangeLog: + + * g++.target/i386/pr112443.C: New test. + +(cherry picked from commit 9a0cc04b9c9b02426762892b88efc5c44ba546bd) +--- + gcc/config/i386/sse.md | 4 +- + gcc/testsuite/g++.target/i386/pr112443.C | 108 +++++++++++++++++++++++ + 2 files changed, 110 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/g++.target/i386/pr112443.C + +diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md +index f25dd5f2b..23b858ab2 100644 +--- a/gcc/config/i386/sse.md ++++ b/gcc/config/i386/sse.md +@@ -16358,7 +16358,7 @@ + (match_dup 4)) + UNSPEC_BLENDV)) + { +- if (INTVAL (operands5) == 1) ++ if (INTVAL (operands5) == 5) + std::swap (operands1, operands2); + operands3 = gen_lowpart (<MODE>mode, operands3); + }) +@@ -16388,7 +16388,7 @@ + (match_dup 4)) + UNSPEC_BLENDV)) + { +- if (INTVAL (operands5) == 1) ++ if (INTVAL (operands5) == 5) + std::swap (operands1, operands2); + }) + +diff --git a/gcc/testsuite/g++.target/i386/pr112443.C b/gcc/testsuite/g++.target/i386/pr112443.C +new file mode 100644 +index 000000000..ebfa9b4a7 +--- /dev/null ++++ b/gcc/testsuite/g++.target/i386/pr112443.C +@@ -0,0 +1,108 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target avx512bw } */ ++/* { dg-require-effective-target avx512vl } */ ++/* { dg-options "-O2 -std=c++17 -mavx512bw -mavx512vl" } */ ++ ++#include <cstdint> ++#include <x86intrin.h> ++#include <functional> ++#include <ostream> ++ ++#define AVX512BW ++#define AVX512VL ++ ++#include "avx512f-helper.h" ++ ++struct TensorIteratorBase{ ++ char* in; ++ char* out; ++ ++ void for_each(std::function<void(char*, char*, int64_t size)> loop){ ++ loop(out, in, 32); ++ } ++}; ++ ++class Vectorized { ++protected: ++ __m256i values; ++ ++ static inline __m256i invert(const __m256i& v) { ++ const auto ones = _mm256_set1_epi64x(-1); ++ return _mm256_xor_si256(ones, v); ++ } ++public: ++ operator __m256i() const { ++ return values; ++ } ++ ++ static constexpr int size() { ++ return 32; ++ } ++ ++ Vectorized() {} ++ Vectorized(__m256i v) : values(v) {} ++ Vectorized(uint8_t v) { values = _mm256_set1_epi8(v); } ++ static Vectorized blendv(const Vectorized& a, const Vectorized& b, ++ const Vectorized& mask) { ++ return _mm256_blendv_epi8(a, b, mask); ++ } ++ static Vectorized loadu(const void* ptr) { ++ return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr)); ++ } ++ void store(void* ptr) const { ++ _mm256_storeu_si256(reinterpret_cast<__m256i*>(ptr), values); ++ } ++ ++ Vectorized operator<(const Vectorized& other) const { ++ __m256i max = _mm256_max_epu8(values, other); ++ return invert(_mm256_cmpeq_epi8(max, values)); ++ } ++ Vectorized operator-(const Vectorized& b) { ++ return _mm256_sub_epi8(values, b); ++ } ++}; ++ ++std::ostream& operator<<(std::ostream& stream, const Vectorized& vec) { ++ uint8_t bufVectorized::size(); ++ vec.store(buf); ++ stream << "vec"; ++ for (int i = 0; i != Vectorized::size(); i++) { ++ if (i != 0) ++ stream << ", "; ++ stream << bufi*1; ++ } ++ stream << ""; ++ return stream; ++} ++ ++void run(TensorIteratorBase iter){ ++ Vectorized zero_vec(0); ++ Vectorized one_vec(1); ++ ++ iter.for_each(=(char* out, char* in, int64_t size) { ++ for (int64_t i = 0; i <= size - Vectorized::size(); i += Vectorized::size()) { ++ auto self_vec = Vectorized::loadu(in + i); ++ auto left = Vectorized::blendv(zero_vec, one_vec, zero_vec < self_vec); ++ auto right = Vectorized::blendv(zero_vec, one_vec, self_vec < zero_vec); ++ auto outv = left - right; ++ outv.store(out + i); ++ } ++ }); ++} ++ ++void ++test_256 (){ ++ char in32; ++ char out32; ++ for(auto& x: in) x = 1; ++ run(TensorIteratorBase{in, out}); ++ Vectorized::loadu (out); ++ for (int i = 0; i != 32; i++) ++ if (outi != 1) ++ __builtin_abort (); ++} ++ ++void ++test_128 () ++{ ++} +-- +2.31.1 +
View file
_service:tar_scm:0268-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch
Added
@@ -0,0 +1,151 @@ +From 204ffa7f503411ccac0161c951726274648b6374 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Thu, 7 Dec 2023 09:17:27 +0800 +Subject: PATCH 14/28 Don't assume it's AVX_U128_CLEAN after call_insn whose + abi.mode_clobber(V4DImode) deosn't contains all SSE_REGS. + +If the function desn't clobber any sse registers or only clobber +128-bit part, then vzeroupper isn't issued before the function exit. +the status not CLEAN but ANY after the function. + +Also for sibling_call, it's safe to issue an vzeroupper. Also there +could be missing vzeroupper since there's no mode_exit for +sibling_call_p. + +gcc/ChangeLog: + + PR target/112891 + * config/i386/i386.cc (ix86_avx_u128_mode_after): Return + AVX_U128_ANY if callee_abi doesn't clobber all_sse_regs to + align with ix86_avx_u128_mode_needed. + (ix86_avx_u128_mode_needed): Return AVX_U128_ClEAN for + sibling_call. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/pr112891.c: New test. + * gcc.target/i386/pr112891-2.c: New test. + +(cherry picked from commit fc189a08f5b7ad5889bd4c6b320c1dd99dd5d642) +--- + gcc/config/i386/i386.cc | 22 +++++++++++++--- + gcc/testsuite/gcc.target/i386/pr112891-2.c | 30 ++++++++++++++++++++++ + gcc/testsuite/gcc.target/i386/pr112891.c | 29 +++++++++++++++++++++ + 3 files changed, 78 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr112891-2.c + create mode 100644 gcc/testsuite/gcc.target/i386/pr112891.c + +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index e75d37023..60f3296b0 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -14416,8 +14416,12 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) + modes wider than 256 bits. It's only safe to issue a + vzeroupper if all SSE registers are clobbered. */ + const function_abi &abi = insn_callee_abi (insn); +- if (!hard_reg_set_subset_p (reg_class_contentsSSE_REGS, +- abi.mode_clobbers (V4DImode))) ++ /* Should be safe to issue an vzeroupper before sibling_call_p. ++ Also there not mode_exit for sibling_call, so there could be ++ missing vzeroupper for that. */ ++ if (!(SIBLING_CALL_P (insn) ++ || hard_reg_set_subset_p (reg_class_contentsSSE_REGS, ++ abi.mode_clobbers (V4DImode)))) + return AVX_U128_ANY; + + return AVX_U128_CLEAN; +@@ -14555,7 +14559,19 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn) + bool avx_upper_reg_found = false; + note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found); + +- return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; ++ if (avx_upper_reg_found) ++ return AVX_U128_DIRTY; ++ ++ /* If the function desn't clobber any sse registers or only clobber ++ 128-bit part, Then vzeroupper isn't issued before the function exit. ++ the status not CLEAN but ANY after the function. */ ++ const function_abi &abi = insn_callee_abi (insn); ++ if (!(SIBLING_CALL_P (insn) ++ || hard_reg_set_subset_p (reg_class_contentsSSE_REGS, ++ abi.mode_clobbers (V4DImode)))) ++ return AVX_U128_ANY; ++ ++ return AVX_U128_CLEAN; + } + + /* Otherwise, return current mode. Remember that if insn +diff --git a/gcc/testsuite/gcc.target/i386/pr112891-2.c b/gcc/testsuite/gcc.target/i386/pr112891-2.c +new file mode 100644 +index 000000000..164c3985d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr112891-2.c +@@ -0,0 +1,30 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mavx2 -O3" } */ ++/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ ++ ++void ++__attribute__((noinline)) ++bar (double* a) ++{ ++ a0 = 1.0; ++ a1 = 2.0; ++} ++ ++double ++__attribute__((noinline)) ++foo (double* __restrict a, double* b) ++{ ++ a0 += b0; ++ a1 += b1; ++ a2 += b2; ++ a3 += b3; ++ bar (b); ++ return a5 + b5; ++} ++ ++double ++foo1 (double* __restrict a, double* b) ++{ ++ double c = foo (a, b); ++ return __builtin_exp (c); ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr112891.c b/gcc/testsuite/gcc.target/i386/pr112891.c +new file mode 100644 +index 000000000..dbf6c6794 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr112891.c +@@ -0,0 +1,29 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mavx2 -O3" } */ ++/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ ++ ++void ++__attribute__((noinline)) ++bar (double* a) ++{ ++ a0 = 1.0; ++ a1 = 2.0; ++} ++ ++void ++__attribute__((noinline)) ++foo (double* __restrict a, double* b) ++{ ++ a0 += b0; ++ a1 += b1; ++ a2 += b2; ++ a3 += b3; ++ bar (b); ++} ++ ++double ++foo1 (double* __restrict a, double* b) ++{ ++ foo (a, b); ++ return __builtin_exp (b1); ++} +-- +2.31.1 +
View file
_service:tar_scm:0269-Disable-FMADD-in-chains-for-Zen4-and-generic.patch
Added
@@ -0,0 +1,142 @@ +From 19ee37b11702c86d7ed271e9e1d00e23cc4ab93c Mon Sep 17 00:00:00 2001 +From: Jan Hubicka <jh@suse.cz> +Date: Fri, 29 Dec 2023 23:51:03 +0100 +Subject: PATCH 15/28 Disable FMADD in chains for Zen4 and generic + +this patch disables use of FMA in matrix multiplication loop for generic (for +x86-64-v3) and zen4. I tested this on zen4 and Xenon Gold Gold 6212U. + +For Intel this is neutral both on the matrix multiplication microbenchmark +(attached) and spec2k17 where the difference was within noise for Core. + +On core the micro-benchmark runs as follows: + +With FMA: + + 578,500,241 cycles:u # 3.645 GHz + ( +- 0.12% ) + 753,318,477 instructions:u # 1.30 insn per +cycle ( +- 0.00% ) + 125,417,701 branches:u # 790.227 M/sec + ( +- 0.00% ) + 0.159146 +- 0.000363 seconds time elapsed ( +- 0.23% ) + +No FMA: + + 577,573,960 cycles:u # 3.514 GHz + ( +- 0.15% ) + 878,318,479 instructions:u # 1.52 insn per +cycle ( +- 0.00% ) + 125,417,702 branches:u # 763.035 M/sec + ( +- 0.00% ) + 0.164734 +- 0.000321 seconds time elapsed ( +- 0.19% ) + +So the cycle count is unchanged and discrete multiply+add takes same time as +FMA. + +While on zen: + +With FMA: + 484875179 cycles:u # 3.599 GHz + ( +- 0.05% ) (82.11%) + 752031517 instructions:u # 1.55 insn per +cycle + 125106525 branches:u # 928.712 M/sec + ( +- 0.03% ) (85.09%) + 128356 branch-misses:u # 0.10% of all +branches ( +- 0.06% ) (83.58%) + +No FMA: + 375875209 cycles:u # 3.592 GHz + ( +- 0.08% ) (80.74%) + 875725341 instructions:u # 2.33 insn per +cycle + 124903825 branches:u # 1.194 G/sec + ( +- 0.04% ) (84.59%) + 0.105203 +- 0.000188 seconds time elapsed ( +- 0.18% ) + +The diffrerence is that Cores understand the fact that fmadd does not need +all three parameters to start computation, while Zen cores doesn't. + +Since this seems noticeable win on zen and not loss on Core it seems like good +default for generic. + +float aSIZESIZE; +float bSIZESIZE; +float cSIZESIZE; + +void init(void) +{ + int i, j, k; + for(i=0; i<SIZE; ++i) + { + for(j=0; j<SIZE; ++j) + { + aij = (float)i + j; + bij = (float)i - j; + cij = 0.0f; + } + } +} + +void mult(void) +{ + int i, j, k; + + for(i=0; i<SIZE; ++i) + { + for(j=0; j<SIZE; ++j) + { + for(k=0; k<SIZE; ++k) + { + cij += aik * bkj; + } + } + } +} + +int main(void) +{ + clock_t s, e; + + init(); + s=clock(); + mult(); + e=clock(); + printf(" mult took %10d clocks\n", (int)(e-s)); + + return 0; + +} + +gcc/ChangeLog: + + * config/i386/x86-tune.def (X86_TUNE_AVOID_128FMA_CHAINS, + X86_TUNE_AVOID_256FMA_CHAINS): Enable for znver4 and Core. +--- + gcc/config/i386/x86-tune.def | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def +index bdb455d20..fd095f3ec 100644 +--- a/gcc/config/i386/x86-tune.def ++++ b/gcc/config/i386/x86-tune.def +@@ -499,12 +499,13 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts", + + /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or + smaller FMA chain. */ +-DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 | m_ZNVER3) ++DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 ++ | m_ZNVER3 | m_ZNVER4 | m_GENERIC) + + /* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or + smaller FMA chain. */ + DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3 +- | m_ALDERLAKE | m_SAPPHIRERAPIDS) ++ | m_ZNVER4 | m_ALDERLAKE | m_SAPPHIRERAPIDS | m_GENERIC) + + /* X86_TUNE_AVOID_512FMA_CHAINS: Avoid creating loops with tight 512bit or + smaller FMA chain. */ +-- +2.31.1 +
View file
_service:tar_scm:0270-Initial-Raptorlake-Support.patch
Added
@@ -0,0 +1,47 @@ +From 411d1f0bcc0d1c8018fdf5fe84ad2404929556ec Mon Sep 17 00:00:00 2001 +From: Haochen Jiang <haochen.jiang@intel.com> +Date: Fri, 16 Sep 2022 13:59:01 +0800 +Subject: PATCH 16/28 Initial Raptorlake Support + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h: + (get_intel_cpu): Handle Raptorlake. + * common/config/i386/i386-common.cc: + (processor_alias_table): Add Raptorlake. + +(cherry picked from commit 470a0659b508d684148f362c4dc0eccf5a83a23e) +--- + gcc/common/config/i386/cpuinfo.h | 2 ++ + gcc/common/config/i386/i386-common.cc | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 316ad3cb3..13d0f4cd8 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -508,6 +508,8 @@ get_intel_cpu (struct __processor_model *cpu_model, + case 0x97: + case 0x9a: + /* Alder Lake. */ ++ case 0xb7: ++ /* Raptor Lake. */ + cpu = "alderlake"; + CHECK___builtin_cpu_is ("corei7"); + CHECK___builtin_cpu_is ("alderlake"); +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index f650e255f..c1d700f89 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1939,6 +1939,8 @@ const pta processor_alias_table = + M_CPU_SUBTYPE (INTEL_COREI7_SAPPHIRERAPIDS), P_PROC_AVX512F}, + {"alderlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE, + M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, ++ {"raptorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE, ++ M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, + {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, + M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3}, + {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, +-- +2.31.1 +
View file
_service:tar_scm:0271-Initial-Meteorlake-Support.patch
Added
@@ -0,0 +1,49 @@ +From 87cea29ede520f4a5af01dff7071ab1d23bd47b5 Mon Sep 17 00:00:00 2001 +From: "Hu, Lin1" <lin1.hu@intel.com> +Date: Fri, 16 Sep 2022 11:25:13 +0800 +Subject: PATCH 17/28 Initial Meteorlake Support + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h: + (get_intel_cpu): Handle Meteorlake. + * common/config/i386/i386-common.cc: + (processor_alias_table): Add Meteorlake. + +(cherry picked from commit fd206f0e95fb6f41b96eaaaab1dc0c30378e5e08) +--- + gcc/common/config/i386/cpuinfo.h | 4 ++++ + gcc/common/config/i386/i386-common.cc | 2 ++ + 2 files changed, 6 insertions(+) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 13d0f4cd8..37af92d6b 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -510,6 +510,10 @@ get_intel_cpu (struct __processor_model *cpu_model, + /* Alder Lake. */ + case 0xb7: + /* Raptor Lake. */ ++ case 0xb5: ++ case 0xaa: ++ case 0xac: ++ /* Meteor Lake. */ + cpu = "alderlake"; + CHECK___builtin_cpu_is ("corei7"); + CHECK___builtin_cpu_is ("alderlake"); +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index c1d700f89..cfee672fb 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1941,6 +1941,8 @@ const pta processor_alias_table = + M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, + {"raptorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE, + M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, ++ {"meteorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE, ++ M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, + {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, + M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3}, + {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, +-- +2.31.1 +
View file
_service:tar_scm:0272-Support-Intel-AMX-FP16-ISA.patch
Added
@@ -0,0 +1,691 @@ +From c11301c7780213ddf46a0bcdb06079af485f431c Mon Sep 17 00:00:00 2001 +From: Hongyu Wang <hongyu.wang@intel.com> +Date: Fri, 4 Nov 2022 15:50:55 +0800 +Subject: PATCH 18/28 Support Intel AMX-FP16 ISA + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h (get_available_features): Detect + amx-fp16. + * common/config/i386/i386-common.cc (OPTION_MASK_ISA2_AMX_FP16_SET, + OPTION_MASK_ISA2_AMX_FP16_UNSET): New macros. + (ix86_handle_option): Handle -mamx-fp16. + * common/config/i386/i386-cpuinfo.h (enum processor_features): + Add FEATURE_AMX_FP16. + * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for + amx-fp16. + * config.gcc: Add amxfp16intrin.h. + * config/i386/cpuid.h (bit_AMX_FP16): New. + * config/i386/i386-c.cc (ix86_target_macros_internal): Define + __AMX_FP16__. + * config/i386/i386-isa.def: Add DEF_PTA for AMX_FP16. + * config/i386/i386-options.cc (isa2_opts): Add -mamx-fp16. + (ix86_valid_target_attribute_inner_p): Add new ATTR. + (ix86_option_override_internal): Handle AMX-FP16. + * config/i386/i386.opt: Add -mamx-fp16. + * config/i386/immintrin.h: Include amxfp16intrin.h. + * doc/extend.texi: Document -mamx-fp16. + * doc/invoke.texi: Document amx-fp16. + * doc/sourcebuild.texi: Document amx_fp16. + * config/i386/amxfp16intrin.h: New file. + +gcc/testsuite/ChangeLog: + + * g++.dg/other/i386-2.C: Add -mamx-fp16. + * g++.dg/other/i386-3.C: Ditto. + * gcc.target/i386/sse-12.c: Ditto. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-14.c: Ditto. + * gcc.target/i386/sse-22.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + * lib/target-supports.exp: (check_effective_target_amx_fp16): + New proc. + * gcc.target/i386/funcspec-56.inc: Add new target attribute. + * gcc.target/i386/amx-check.h: Add AMX_FP16. + * gcc.target/i386/amx-helper.h: New file to support amx-fp16. + * gcc.target/i386/amxfp16-asmatt-1.c: New test. + * gcc.target/i386/amxfp16-asmintel-1.c: Ditto. + * gcc.target/i386/amxfp16-dpfp16ps-2.c: Ditto. + +Co-authored-by: Haochen Jiang <haochen.jiang@intel.com> + +(cherry picked from commit 2b4a03962a0fe18cadc944d90f1fb85a40004226) +--- + gcc/common/config/i386/cpuinfo.h | 5 ++ + gcc/common/config/i386/i386-common.cc | 15 +++++ + gcc/common/config/i386/i386-cpuinfo.h | 1 + + gcc/common/config/i386/i386-isas.h | 1 + + gcc/config.gcc | 3 +- + gcc/config/i386/amxfp16intrin.h | 46 ++++++++++++++ + gcc/config/i386/cpuid.h | 1 + + gcc/config/i386/i386-c.cc | 2 + + gcc/config/i386/i386-isa.def | 1 + + gcc/config/i386/i386-options.cc | 4 +- + gcc/config/i386/i386.opt | 4 ++ + gcc/config/i386/immintrin.h | 2 + + gcc/doc/extend.texi | 5 ++ + gcc/doc/invoke.texi | 9 ++- + gcc/doc/sourcebuild.texi | 3 + + gcc/testsuite/g++.dg/other/i386-2.C | 2 +- + gcc/testsuite/g++.dg/other/i386-3.C | 2 +- + gcc/testsuite/gcc.target/i386/amx-check.h | 3 + + gcc/testsuite/gcc.target/i386/amx-helper.h | 61 +++++++++++++++++++ + .../gcc.target/i386/amxfp16-asmatt-1.c | 13 ++++ + .../gcc.target/i386/amxfp16-asmintel-1.c | 10 +++ + .../gcc.target/i386/amxfp16-dpfp16ps-2.c | 57 +++++++++++++++++ + gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 + + gcc/testsuite/gcc.target/i386/sse-12.c | 2 +- + gcc/testsuite/gcc.target/i386/sse-13.c | 2 +- + gcc/testsuite/gcc.target/i386/sse-14.c | 2 +- + gcc/testsuite/gcc.target/i386/sse-22.c | 4 +- + gcc/testsuite/gcc.target/i386/sse-23.c | 2 +- + gcc/testsuite/lib/target-supports.exp | 11 ++++ + 29 files changed, 262 insertions(+), 13 deletions(-) + create mode 100644 gcc/config/i386/amxfp16intrin.h + create mode 100644 gcc/testsuite/gcc.target/i386/amx-helper.h + create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-asmatt-1.c + create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c + create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 37af92d6b..5951a30aa 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -783,6 +783,11 @@ get_available_features (struct __processor_model *cpu_model, + set_feature (FEATURE_AVX512BF16); + } + } ++ if (amx_usable) ++ { ++ if (eax & bit_AMX_FP16) ++ set_feature (FEATURE_AMX_FP16); ++ } + } + + /* Get Advanced Features at level 0xd (eax = 0xd, ecx = 1). */ +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index cfee672fb..922db33ee 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -107,6 +107,7 @@ along with GCC; see the file COPYING3. If not see + #define OPTION_MASK_ISA2_AMX_TILE_SET OPTION_MASK_ISA2_AMX_TILE + #define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8 + #define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16 ++#define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16 + + /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same + as -msse4.2. */ +@@ -275,6 +276,7 @@ along with GCC; see the file COPYING3. If not see + #define OPTION_MASK_ISA2_KL_UNSET \ + (OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET) + #define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL ++#define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16 + + /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same + as -mno-sse4.1. */ +@@ -1125,6 +1127,19 @@ ix86_handle_option (struct gcc_options *opts, + } + return true; + ++ case OPT_mamx_fp16: ++ if (value) ++ { ++ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_FP16_SET; ++ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_FP16_SET; ++ } ++ else ++ { ++ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_FP16_UNSET; ++ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_FP16_UNSET; ++ } ++ return true; ++ + case OPT_mfma: + if (value) + { +diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h +index 82996ebb3..8f22897de 100644 +--- a/gcc/common/config/i386/i386-cpuinfo.h ++++ b/gcc/common/config/i386/i386-cpuinfo.h +@@ -240,6 +240,7 @@ enum processor_features + FEATURE_X86_64_V2, + FEATURE_X86_64_V3, + FEATURE_X86_64_V4, ++ FEATURE_AMX_FP16, + CPU_FEATURE_MAX + }; + +diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h +index 2d0646a68..95bab6da2 100644 +--- a/gcc/common/config/i386/i386-isas.h ++++ b/gcc/common/config/i386/i386-isas.h +@@ -175,4 +175,5 @@ ISA_NAMES_TABLE_START + ISA_NAMES_TABLE_ENTRY("x86-64-v2", FEATURE_X86_64_V2, P_X86_64_V2, NULL) + ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3, P_X86_64_V3, NULL) + ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL) ++ ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16") + ISA_NAMES_TABLE_END +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 4a0ae9328..e2b4a23dc 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -423,7 +423,8 @@ i3456786-*-* | x86_64-*-*) + tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h + amxbf16intrin.h x86gprintrin.h uintrintrin.h + hresetintrin.h keylockerintrin.h avxvnniintrin.h +- mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h" ++ mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h ++ amxfp16intrin.h" + ;; + ia64-*-*) + extra_headers=ia64intrin.h +diff --git a/gcc/config/i386/amxfp16intrin.h b/gcc/config/i386/amxfp16intrin.h +new file mode 100644 +index 000000000..6a114741a +--- /dev/null ++++ b/gcc/config/i386/amxfp16intrin.h +@@ -0,0 +1,46 @@ ++/* Copyright (C) 2020 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
View file
_service:tar_scm:0273-Support-Intel-prefetchit0-t1.patch
Added
@@ -0,0 +1,902 @@ +From 42a38c8abaa28f67e26b9af3f434fe0107894e7d Mon Sep 17 00:00:00 2001 +From: Haochen Jiang <haochen.jiang@intel.com> +Date: Fri, 4 Nov 2022 15:01:05 +0800 +Subject: PATCH 19/28 Support Intel prefetchit0/t1 + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h (get_available_features): + Detect PREFETCHI. + * common/config/i386/i386-common.cc + (OPTION_MASK_ISA2_PREFETCHI_SET, + OPTION_MASK_ISA2_PREFETCHI_UNSET): New. + (ix86_handle_option): Handle -mprefetchi. + * common/config/i386/i386-cpuinfo.h + (enum processor_features): Add FEATURE_PREFETCHI. + * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY + for prefetchi. + * config.gcc: Add prfchiintrin.h. + * config/i386/cpuid.h (bit_PREFETCHI): New. + * config/i386/i386-builtin-types.def: + Add DEF_FUNCTION_TYPE (VOID, PCVOID, INT) + and DEF_FUNCTION_TYPE (VOID, PCVOID, INT, INT, INT). + * config/i386/i386-builtin.def (BDESC): Add new builtins. + * config/i386/i386-c.cc (ix86_target_macros_internal): + Define __PREFETCHI__. + * config/i386/i386-expand.cc: Handle new builtins. + * config/i386/i386-isa.def (PREFETCHI): + Add DEF_PTA(PREFETCHI). + * config/i386/i386-options.cc + (ix86_valid_target_attribute_inner_p): Handle prefetchi. + * config/i386/i386.md (prefetchi): New define_insn. + * config/i386/i386.opt: Add option -mprefetchi. + * config/i386/predicates.md (local_func_symbolic_operand): + New predicates. + * config/i386/x86gprintrin.h: Include prfchiintrin.h. + * config/i386/xmmintrin.h (enum _mm_hint): New enum for + prefetchi. + (_mm_prefetch): Handle the highest bit of enum. + * doc/extend.texi: Document prefetchi. + * doc/invoke.texi: Document -mprefetchi. + * doc/sourcebuild.texi: Document target prefetchi. + * config/i386/prfchiintrin.h: New file. + +gcc/testsuite/ChangeLog: + + * g++.dg/other/i386-2.C: Add -mprefetchi. + * g++.dg/other/i386-3.C: Ditto. + * gcc.target/i386/avx-1.c: Ditto. + * gcc.target/i386/funcspec-56.inc: Add new target attribute. + * gcc.target/i386/sse-13.c: Add -mprefetchi. + * gcc.target/i386/sse-23.c: Ditto. + * gcc.target/i386/x86gprintrin-1.c: Ditto. + * gcc.target/i386/x86gprintrin-2.c: Ditto. + * gcc.target/i386/x86gprintrin-3.c: Ditto. + * gcc.target/i386/x86gprintrin-4.c: Ditto. + * gcc.target/i386/x86gprintrin-5.c: Ditto. + * gcc.target/i386/prefetchi-1.c: New test. + * gcc.target/i386/prefetchi-2.c: Ditto. + * gcc.target/i386/prefetchi-3.c: Ditto. + * gcc.target/i386/prefetchi-4.c: Ditto. + +Co-authored-by: Hongtao Liu <hongtao.liu@intel.com> +--- + gcc/common/config/i386/cpuinfo.h | 2 + + gcc/common/config/i386/i386-common.cc | 15 ++++ + gcc/common/config/i386/i386-cpuinfo.h | 1 + + gcc/common/config/i386/i386-isas.h | 1 + + gcc/config.gcc | 2 +- + gcc/config/i386/cpuid.h | 1 + + gcc/config/i386/i386-builtin-types.def | 4 + + gcc/config/i386/i386-builtin.def | 4 + + gcc/config/i386/i386-c.cc | 2 + + gcc/config/i386/i386-expand.cc | 77 +++++++++++++++++++ + gcc/config/i386/i386-isa.def | 1 + + gcc/config/i386/i386-options.cc | 4 +- + gcc/config/i386/i386.md | 23 ++++++ + gcc/config/i386/i386.opt | 4 + + gcc/config/i386/predicates.md | 15 ++++ + gcc/config/i386/prfchiintrin.h | 49 ++++++++++++ + gcc/config/i386/x86gprintrin.h | 2 + + gcc/config/i386/xmmintrin.h | 7 +- + gcc/doc/extend.texi | 5 ++ + gcc/doc/invoke.texi | 7 +- + gcc/doc/sourcebuild.texi | 3 + + gcc/testsuite/g++.dg/other/i386-2.C | 2 +- + gcc/testsuite/g++.dg/other/i386-3.C | 2 +- + gcc/testsuite/gcc.target/i386/avx-1.c | 4 +- + gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 + + gcc/testsuite/gcc.target/i386/prefetchi-1.c | 40 ++++++++++ + gcc/testsuite/gcc.target/i386/prefetchi-2.c | 26 +++++++ + gcc/testsuite/gcc.target/i386/prefetchi-3.c | 20 +++++ + gcc/testsuite/gcc.target/i386/prefetchi-4.c | 19 +++++ + gcc/testsuite/gcc.target/i386/sse-13.c | 4 +- + gcc/testsuite/gcc.target/i386/sse-23.c | 4 +- + .../gcc.target/i386/x86gprintrin-1.c | 2 +- + .../gcc.target/i386/x86gprintrin-2.c | 2 +- + .../gcc.target/i386/x86gprintrin-3.c | 2 +- + .../gcc.target/i386/x86gprintrin-4.c | 2 +- + .../gcc.target/i386/x86gprintrin-5.c | 2 +- + 36 files changed, 343 insertions(+), 19 deletions(-) + create mode 100644 gcc/config/i386/prfchiintrin.h + create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-1.c + create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-2.c + create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-3.c + create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-4.c + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 5951a30aa..f17e88144 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -772,6 +772,8 @@ get_available_features (struct __processor_model *cpu_model, + __cpuid_count (7, 1, eax, ebx, ecx, edx); + if (eax & bit_HRESET) + set_feature (FEATURE_HRESET); ++ if (edx & bit_PREFETCHI) ++ set_feature (FEATURE_PREFETCHI); + if (avx_usable) + { + if (eax & bit_AVXVNNI) +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index 922db33ee..c8cf532cf 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -108,6 +108,7 @@ along with GCC; see the file COPYING3. If not see + #define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8 + #define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16 + #define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16 ++#define OPTION_MASK_ISA2_PREFETCHI_SET OPTION_MASK_ISA2_PREFETCHI + + /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same + as -msse4.2. */ +@@ -277,6 +278,7 @@ along with GCC; see the file COPYING3. If not see + (OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET) + #define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL + #define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16 ++#define OPTION_MASK_ISA2_PREFETCHI_UNSET OPTION_MASK_ISA2_PREFETCHI + + /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same + as -mno-sse4.1. */ +@@ -1140,6 +1142,19 @@ ix86_handle_option (struct gcc_options *opts, + } + return true; + ++ case OPT_mprefetchi: ++ if (value) ++ { ++ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_PREFETCHI_SET; ++ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_PREFETCHI_SET; ++ } ++ else ++ { ++ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_PREFETCHI_UNSET; ++ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_PREFETCHI_UNSET; ++ } ++ return true; ++ + case OPT_mfma: + if (value) + { +diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h +index 8f22897de..95b078acf 100644 +--- a/gcc/common/config/i386/i386-cpuinfo.h ++++ b/gcc/common/config/i386/i386-cpuinfo.h +@@ -241,6 +241,7 @@ enum processor_features + FEATURE_X86_64_V3, + FEATURE_X86_64_V4, + FEATURE_AMX_FP16, ++ FEATURE_PREFETCHI, + CPU_FEATURE_MAX + }; + +diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h +index 95bab6da2..6caf06249 100644 +--- a/gcc/common/config/i386/i386-isas.h ++++ b/gcc/common/config/i386/i386-isas.h +@@ -176,4 +176,5 @@ ISA_NAMES_TABLE_START + ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3, P_X86_64_V3, NULL) + ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL) + ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16") ++ ISA_NAMES_TABLE_ENTRY("prefetchi", FEATURE_PREFETCHI, P_NONE, "-mprefetchi") + ISA_NAMES_TABLE_END +diff --git a/gcc/config.gcc b/gcc/config.gcc +index e2b4a23dc..81012c651 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -424,7 +424,7 @@ i3456786-*-* | x86_64-*-*) + amxbf16intrin.h x86gprintrin.h uintrintrin.h + hresetintrin.h keylockerintrin.h avxvnniintrin.h + mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h +- amxfp16intrin.h" ++ amxfp16intrin.h prfchiintrin.h" + ;; + ia64-*-*) + extra_headers=ia64intrin.h +diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h +index d6cd8d1bf..21100149a 100644 +--- a/gcc/config/i386/cpuid.h ++++ b/gcc/config/i386/cpuid.h +@@ -50,6 +50,7 @@
View file
_service:tar_scm:0274-Initial-Granite-Rapids-Support.patch
Added
@@ -0,0 +1,277 @@ +From 7f0f8b585cf60b4c09bca42b5339995c2cc74633 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang <haochen.jiang@intel.com> +Date: Mon, 7 Nov 2022 11:04:57 +0800 +Subject: PATCH 20/28 Initial Granite Rapids Support + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h + (get_intel_cpu): Handle Granite Rapids. + * common/config/i386/i386-common.cc: + (processor_names): Add graniterapids. + (processor_alias_table): Ditto. + * common/config/i386/i386-cpuinfo.h + (enum processor_subtypes): Add INTEL_GRANTIERAPIDS. + * config.gcc: Add -march=graniterapids. + * config/i386/driver-i386.cc (host_detect_local_cpu): + Handle graniterapids. + * config/i386/i386-c.cc (ix86_target_macros_internal): + Ditto. + * config/i386/i386-options.cc (m_GRANITERAPIDS): New. + (processor_cost_table): Add graniterapids. + * config/i386/i386.h (enum processor_type): + Add PROCESSOR_GRANITERAPIDS. + (PTA_GRANITERAPIDS): Ditto. + * doc/extend.texi: Add graniterapids. + * doc/invoke.texi: Ditto. + +gcc/testsuite/ChangeLog: + + * g++.target/i386/mv16.C: Add graniterapids. + * gcc.target/i386/funcspec-56.inc: Handle new march. + +(cherry picked from commit 339ffc5a792dd66647392a235f2f7f6344c5359e) +--- + gcc/common/config/i386/cpuinfo.h | 9 +++++++++ + gcc/common/config/i386/i386-common.cc | 3 +++ + gcc/common/config/i386/i386-cpuinfo.h | 1 + + gcc/config.gcc | 2 +- + gcc/config/i386/driver-i386.cc | 5 ++++- + gcc/config/i386/i386-c.cc | 7 +++++++ + gcc/config/i386/i386-options.cc | 4 +++- + gcc/config/i386/i386.h | 3 +++ + gcc/doc/extend.texi | 3 +++ + gcc/doc/invoke.texi | 11 +++++++++++ + gcc/testsuite/g++.target/i386/mv16.C | 6 ++++++ + gcc/testsuite/gcc.target/i386/funcspec-56.inc | 1 + + 12 files changed, 52 insertions(+), 3 deletions(-) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index f17e88144..1f75ff1ca 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -528,6 +528,15 @@ get_intel_cpu (struct __processor_model *cpu_model, + cpu_model->__cpu_type = INTEL_COREI7; + cpu_model->__cpu_subtype = INTEL_COREI7_SAPPHIRERAPIDS; + break; ++ case 0xad: ++ case 0xae: ++ /* Granite Rapids. */ ++ cpu = "graniterapids"; ++ CHECK___builtin_cpu_is ("corei7"); ++ CHECK___builtin_cpu_is ("graniterapids"); ++ cpu_model->__cpu_type = INTEL_COREI7; ++ cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS; ++ break; + case 0x17: + case 0x1d: + /* Penryn. */ +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index c8cf532cf..1aa163463 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1855,6 +1855,7 @@ const char *const processor_names = + "sapphirerapids", + "alderlake", + "rocketlake", ++ "graniterapids", + "intel", + "geode", + "k6", +@@ -1973,6 +1974,8 @@ const pta processor_alias_table = + M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, + {"meteorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE, + M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, ++ {"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS, ++ M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F}, + {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, + M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3}, + {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, +diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h +index 95b078acf..7b2d4d242 100644 +--- a/gcc/common/config/i386/i386-cpuinfo.h ++++ b/gcc/common/config/i386/i386-cpuinfo.h +@@ -92,6 +92,7 @@ enum processor_subtypes + AMDFAM19H_ZNVER3, + INTEL_COREI7_ROCKETLAKE, + AMDFAM19H_ZNVER4, ++ INTEL_COREI7_GRANITERAPIDS, + CPU_SUBTYPE_MAX + }; + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 81012c651..9bad238e3 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -670,7 +670,7 @@ slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \ + silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \ + skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \ + sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \ +-nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 native" ++nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids native" + + # Additional x86 processors supported by --with-cpu=. Each processor + # MUST be separated by exactly one space. +diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc +index 3b5161aed..ea8c3d8d1 100644 +--- a/gcc/config/i386/driver-i386.cc ++++ b/gcc/config/i386/driver-i386.cc +@@ -576,8 +576,11 @@ const char *host_detect_local_cpu (int argc, const char **argv) + /* This is unknown family 0x6 CPU. */ + if (has_feature (FEATURE_AVX)) + { ++ /* Assume Granite Rapids. */ ++ if (has_feature (FEATURE_AMX_FP16)) ++ cpu = "graniterapids"; + /* Assume Tiger Lake */ +- if (has_feature (FEATURE_AVX512VP2INTERSECT)) ++ else if (has_feature (FEATURE_AVX512VP2INTERSECT)) + cpu = "tigerlake"; + /* Assume Sapphire Rapids. */ + else if (has_feature (FEATURE_TSXLDTRK)) +diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc +index 00880bd17..04f1dd682 100644 +--- a/gcc/config/i386/i386-c.cc ++++ b/gcc/config/i386/i386-c.cc +@@ -242,6 +242,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + def_or_undef (parse_in, "__sapphirerapids"); + def_or_undef (parse_in, "__sapphirerapids__"); + break; ++ case PROCESSOR_GRANITERAPIDS: ++ def_or_undef (parse_in, "__graniterapids"); ++ def_or_undef (parse_in, "__graniterapids__"); ++ break; + case PROCESSOR_ALDERLAKE: + def_or_undef (parse_in, "__alderlake"); + def_or_undef (parse_in, "__alderlake__"); +@@ -419,6 +423,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + case PROCESSOR_ROCKETLAKE: + def_or_undef (parse_in, "__tune_rocketlake__"); + break; ++ case PROCESSOR_GRANITERAPIDS: ++ def_or_undef (parse_in, "__tune_graniterapids__"); ++ break; + case PROCESSOR_INTEL: + case PROCESSOR_GENERIC: + break; +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 724375f02..6645e3259 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -127,10 +127,11 @@ along with GCC; see the file COPYING3. If not see + #define m_SAPPHIRERAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_SAPPHIRERAPIDS) + #define m_ALDERLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ALDERLAKE) + #define m_ROCKETLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ROCKETLAKE) ++#define m_GRANITERAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_GRANITERAPIDS) + #define m_CORE_AVX512 (m_SKYLAKE_AVX512 | m_CANNONLAKE \ + | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \ + | m_TIGERLAKE | m_COOPERLAKE | m_SAPPHIRERAPIDS \ +- | m_ROCKETLAKE) ++ | m_ROCKETLAKE | m_GRANITERAPIDS) + #define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512) + #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2) + #define m_GOLDMONT (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT) +@@ -761,6 +762,7 @@ static const struct processor_costs *processor_cost_table = + &icelake_cost, + &alderlake_cost, + &icelake_cost, ++ &icelake_cost, + &intel_cost, + &geode_cost, + &k6_cost, +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index aaa136ba0..75953defc 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -2250,6 +2250,7 @@ enum processor_type + PROCESSOR_SAPPHIRERAPIDS, + PROCESSOR_ALDERLAKE, + PROCESSOR_ROCKETLAKE, ++ PROCESSOR_GRANITERAPIDS, + PROCESSOR_INTEL, + PROCESSOR_GEODE, + PROCESSOR_K6, +@@ -2356,6 +2357,8 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX + | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT + | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE + | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI; ++constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16 ++ | PTA_PREFETCHI;
View file
_service:tar_scm:0275-Support-Intel-AMX-COMPLEX.patch
Added
@@ -0,0 +1,722 @@ +From 4f1aff10d93cabe8dfbaf076b6d826a142efb6e1 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang <haochen.jiang@intel.com> +Date: Wed, 31 May 2023 10:45:00 +0800 +Subject: PATCH 21/28 Support Intel AMX-COMPLEX + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h (get_available_features): + Detect AMX-COMPLEX. + * common/config/i386/i386-common.cc + (OPTION_MASK_ISA2_AMX_COMPLEX_SET, + OPTION_MASK_ISA2_AMX_COMPLEX_UNSET): New. + (ix86_handle_option): Handle -mamx-complex. + * common/config/i386/i386-cpuinfo.h (enum processor_features): + Add FEATURE_AMX_COMPLEX. + * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for + amx-complex. + * config.gcc: Add amxcomplexintrin.h. + * config/i386/cpuid.h (bit_AMX_COMPLEX): New. + * config/i386/i386-c.cc (ix86_target_macros_internal): Define + __AMX_COMPLEX__. + * config/i386/i386-isa.def (AMX_COMPLEX): Add DEF_PTA(AMX_COMPLEX). + * config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p): + Handle amx-complex. + * config/i386/i386.opt: Add option -mamx-complex. + * config/i386/immintrin.h: Include amxcomplexintrin.h. + * doc/extend.texi: Document amx-complex. + * doc/invoke.texi: Document -mamx-complex. + * doc/sourcebuild.texi: Document target amx-complex. + * config/i386/amxcomplexintrin.h: New file. + +gcc/testsuite/ChangeLog: + + * g++.dg/other/i386-2.C: Add -mamx-complex. + * g++.dg/other/i386-3.C: Ditto. + * gcc.target/i386/amx-check.h: Add cpu check for AMX-COMPLEX. + * gcc.target/i386/amx-helper.h: Add amx-complex support. + * gcc.target/i386/funcspec-56.inc: Add new target attribute. + * gcc.target/i386/sse-12.c: Add -mamx-complex. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-14.c: Ditto. + * gcc.target/i386/sse-22.c: Add amx-complex. + * gcc.target/i386/sse-23.c: Ditto. + * lib/target-supports.exp (check_effective_target_amx_complex): New. + * gcc.target/i386/amxcomplex-asmatt-1.c: New test. + * gcc.target/i386/amxcomplex-asmintel-1.c: Ditto. + * gcc.target/i386/amxcomplex-cmmimfp16ps-2.c: Ditto. + * gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c: Ditto. +--- + gcc/common/config/i386/cpuinfo.h | 2 + + gcc/common/config/i386/i386-common.cc | 19 +++++- + gcc/common/config/i386/i386-cpuinfo.h | 1 + + gcc/common/config/i386/i386-isas.h | 2 + + gcc/config.gcc | 2 +- + gcc/config/i386/amxcomplexintrin.h | 59 +++++++++++++++++++ + gcc/config/i386/cpuid.h | 1 + + gcc/config/i386/i386-c.cc | 2 + + gcc/config/i386/i386-isa.def | 1 + + gcc/config/i386/i386-options.cc | 4 +- + gcc/config/i386/i386.opt | 4 ++ + gcc/config/i386/immintrin.h | 2 + + gcc/doc/extend.texi | 5 ++ + gcc/doc/invoke.texi | 7 ++- + gcc/doc/sourcebuild.texi | 3 + + gcc/testsuite/g++.dg/other/i386-2.C | 2 +- + gcc/testsuite/g++.dg/other/i386-3.C | 2 +- + gcc/testsuite/gcc.target/i386/amx-check.h | 3 + + gcc/testsuite/gcc.target/i386/amx-helper.h | 4 +- + .../gcc.target/i386/amxcomplex-asmatt-1.c | 15 +++++ + .../gcc.target/i386/amxcomplex-asmintel-1.c | 12 ++++ + .../i386/amxcomplex-cmmimfp16ps-2.c | 53 +++++++++++++++++ + .../i386/amxcomplex-cmmrlfp16ps-2.c | 53 +++++++++++++++++ + gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 + + gcc/testsuite/gcc.target/i386/sse-12.c | 2 +- + gcc/testsuite/gcc.target/i386/sse-13.c | 2 +- + gcc/testsuite/gcc.target/i386/sse-14.c | 2 +- + gcc/testsuite/gcc.target/i386/sse-22.c | 4 +- + gcc/testsuite/gcc.target/i386/sse-23.c | 2 +- + gcc/testsuite/lib/target-supports.exp | 11 ++++ + 30 files changed, 268 insertions(+), 15 deletions(-) + create mode 100644 gcc/config/i386/amxcomplexintrin.h + create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-asmatt-1.c + create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c + create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-cmmimfp16ps-2.c + create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 1f75ff1ca..39d3351db 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -798,6 +798,8 @@ get_available_features (struct __processor_model *cpu_model, + { + if (eax & bit_AMX_FP16) + set_feature (FEATURE_AMX_FP16); ++ if (edx & bit_AMX_COMPLEX) ++ set_feature (FEATURE_AMX_COMPLEX); + } + } + +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index 1aa163463..87e8afe9b 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -109,6 +109,8 @@ along with GCC; see the file COPYING3. If not see + #define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16 + #define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16 + #define OPTION_MASK_ISA2_PREFETCHI_SET OPTION_MASK_ISA2_PREFETCHI ++#define OPTION_MASK_ISA2_AMX_COMPLEX_SET \ ++ (OPTION_MASK_ISA2_AMX_TILE | OPTION_MASK_ISA2_AMX_COMPLEX) + + /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same + as -msse4.2. */ +@@ -269,7 +271,8 @@ along with GCC; see the file COPYING3. If not see + #define OPTION_MASK_ISA2_SERIALIZE_UNSET OPTION_MASK_ISA2_SERIALIZE + #define OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET OPTION_MASK_ISA2_AVX512VP2INTERSECT + #define OPTION_MASK_ISA2_TSXLDTRK_UNSET OPTION_MASK_ISA2_TSXLDTRK +-#define OPTION_MASK_ISA2_AMX_TILE_UNSET OPTION_MASK_ISA2_AMX_TILE ++#define OPTION_MASK_ISA2_AMX_TILE_UNSET \ ++ (OPTION_MASK_ISA2_AMX_TILE | OPTION_MASK_ISA2_AMX_COMPLEX_UNSET) + #define OPTION_MASK_ISA2_AMX_INT8_UNSET OPTION_MASK_ISA2_AMX_INT8 + #define OPTION_MASK_ISA2_AMX_BF16_UNSET OPTION_MASK_ISA2_AMX_BF16 + #define OPTION_MASK_ISA2_UINTR_UNSET OPTION_MASK_ISA2_UINTR +@@ -279,6 +282,7 @@ along with GCC; see the file COPYING3. If not see + #define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL + #define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16 + #define OPTION_MASK_ISA2_PREFETCHI_UNSET OPTION_MASK_ISA2_PREFETCHI ++#define OPTION_MASK_ISA2_AMX_COMPLEX_UNSET OPTION_MASK_ISA2_AMX_COMPLEX + + /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same + as -mno-sse4.1. */ +@@ -1155,6 +1159,19 @@ ix86_handle_option (struct gcc_options *opts, + } + return true; + ++ case OPT_mamx_complex: ++ if (value) ++ { ++ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_COMPLEX_SET; ++ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_COMPLEX_SET; ++ } ++ else ++ { ++ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_COMPLEX_UNSET; ++ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_COMPLEX_UNSET; ++ } ++ return true; ++ + case OPT_mfma: + if (value) + { +diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h +index 7b2d4d242..56020faac 100644 +--- a/gcc/common/config/i386/i386-cpuinfo.h ++++ b/gcc/common/config/i386/i386-cpuinfo.h +@@ -243,6 +243,7 @@ enum processor_features + FEATURE_X86_64_V4, + FEATURE_AMX_FP16, + FEATURE_PREFETCHI, ++ FEATURE_AMX_COMPLEX, + CPU_FEATURE_MAX + }; + +diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h +index 6caf06249..cbef68479 100644 +--- a/gcc/common/config/i386/i386-isas.h ++++ b/gcc/common/config/i386/i386-isas.h +@@ -177,4 +177,6 @@ ISA_NAMES_TABLE_START + ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL) + ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16") + ISA_NAMES_TABLE_ENTRY("prefetchi", FEATURE_PREFETCHI, P_NONE, "-mprefetchi") ++ ISA_NAMES_TABLE_ENTRY("amx-complex", FEATURE_AMX_COMPLEX, ++ P_NONE, "-mamx-complex") + ISA_NAMES_TABLE_END +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 9bad238e3..ca5c8f8a0 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -424,7 +424,7 @@ i3456786-*-* | x86_64-*-*) + amxbf16intrin.h x86gprintrin.h uintrintrin.h + hresetintrin.h keylockerintrin.h avxvnniintrin.h + mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h +- amxfp16intrin.h prfchiintrin.h" ++ amxfp16intrin.h prfchiintrin.h amxcomplexintrin.h" + ;; + ia64-*-*) + extra_headers=ia64intrin.h +diff --git a/gcc/config/i386/amxcomplexintrin.h b/gcc/config/i386/amxcomplexintrin.h +new file mode 100644 +index 000000000..6ea1eca04 +--- /dev/null ++++ b/gcc/config/i386/amxcomplexintrin.h +@@ -0,0 +1,59 @@ ++/* Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option)
View file
_service:tar_scm:0276-i386-Add-AMX-COMPLEX-to-Granite-Rapids.patch
Added
@@ -0,0 +1,30 @@ +From 40469a6119085e4c4741bcaeb9418606d28b40c4 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang <haochen.jiang@intel.com> +Date: Fri, 31 Mar 2023 10:49:14 +0800 +Subject: PATCH 22/28 i386: Add AMX-COMPLEX to Granite Rapids + +gcc/Changelog: + + * config/i386/i386.h (PTA_GRANITERAPIDS): Add PTA_AMX_COMPLEX. + +(cherry picked from commit afa87bd5f7b126e20268aa959441cde2e02bba0e) +--- + gcc/config/i386/i386.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index 75953defc..56d7794dc 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -2358,7 +2358,7 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX + | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE + | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI; + constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16 +- | PTA_PREFETCHI; ++ | PTA_PREFETCHI | PTA_AMX_COMPLEX; + constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW + | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ; + constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 +-- +2.31.1 +
View file
_service:tar_scm:0277-Initial-Granite-Rapids-D-Support.patch
Added
@@ -0,0 +1,212 @@ +From 125e5d448538f7534e0fe3df9b7947cf41605b51 Mon Sep 17 00:00:00 2001 +From: "Mo, Zewei" <zewei.mo@intel.com> +Date: Mon, 3 Jul 2023 11:00:26 +0800 +Subject: PATCH 23/28 Initial Granite Rapids D Support + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h + (get_intel_cpu): Handle Granite Rapids D. + * common/config/i386/i386-common.cc: + (processor_alias_table): Add graniterapids-d. + * common/config/i386/i386-cpuinfo.h + (enum processor_subtypes): Add INTEL_COREI7_GRANITERAPIDS_D. + * config.gcc: Add -march=graniterapids-d. + * config/i386/driver-i386.cc (host_detect_local_cpu): + Handle graniterapids-d. + * config/i386/i386.h: (PTA_GRANITERAPIDS_D): New. + * doc/extend.texi: Add graniterapids-d. + * doc/invoke.texi: Ditto. + +gcc/testsuite/ChangeLog: + + * g++.target/i386/mv16.C: Add graniterapids-d. + * gcc.target/i386/funcspec-56.inc: Handle new march. + +(cherry picked from commit a0cb65d34cc141571e870fb3b53b3ff47ae3338d) +--- + gcc/common/config/i386/cpuinfo.h | 9 ++++++++- + gcc/common/config/i386/i386-common.cc | 2 ++ + gcc/common/config/i386/i386-cpuinfo.h | 1 + + gcc/config.gcc | 3 ++- + gcc/config/i386/driver-i386.cc | 5 ++++- + gcc/config/i386/i386.h | 4 +++- + gcc/doc/extend.texi | 3 +++ + gcc/doc/invoke.texi | 11 +++++++++++ + gcc/testsuite/g++.target/i386/mv16.C | 6 ++++++ + gcc/testsuite/gcc.target/i386/funcspec-56.inc | 1 + + 10 files changed, 41 insertions(+), 4 deletions(-) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 39d3351db..1e53248ef 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -529,7 +529,6 @@ get_intel_cpu (struct __processor_model *cpu_model, + cpu_model->__cpu_subtype = INTEL_COREI7_SAPPHIRERAPIDS; + break; + case 0xad: +- case 0xae: + /* Granite Rapids. */ + cpu = "graniterapids"; + CHECK___builtin_cpu_is ("corei7"); +@@ -537,6 +536,14 @@ get_intel_cpu (struct __processor_model *cpu_model, + cpu_model->__cpu_type = INTEL_COREI7; + cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS; + break; ++ case 0xae: ++ /* Granite Rapids D. */ ++ cpu = "graniterapids-d"; ++ CHECK___builtin_cpu_is ("corei7"); ++ CHECK___builtin_cpu_is ("graniterapids-d"); ++ cpu_model->__cpu_type = INTEL_COREI7; ++ cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS_D; ++ break; + case 0x17: + case 0x1d: + /* Penryn. */ +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index 87e8afe9b..28f468f48 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1993,6 +1993,8 @@ const pta processor_alias_table = + M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, + {"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS, + M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F}, ++ {"graniterapids-d", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS_D, ++ M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), P_PROC_AVX512F}, + {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, + M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3}, + {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, +diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h +index 56020faac..a32f32c97 100644 +--- a/gcc/common/config/i386/i386-cpuinfo.h ++++ b/gcc/common/config/i386/i386-cpuinfo.h +@@ -93,6 +93,7 @@ enum processor_subtypes + INTEL_COREI7_ROCKETLAKE, + AMDFAM19H_ZNVER4, + INTEL_COREI7_GRANITERAPIDS, ++ INTEL_COREI7_GRANITERAPIDS_D, + CPU_SUBTYPE_MAX + }; + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index ca5c8f8a0..3108ac4eb 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -670,7 +670,8 @@ slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \ + silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \ + skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \ + sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \ +-nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids native" ++nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids \ ++graniterapids-d native" + + # Additional x86 processors supported by --with-cpu=. Each processor + # MUST be separated by exactly one space. +diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc +index ea8c3d8d1..e3bca4b49 100644 +--- a/gcc/config/i386/driver-i386.cc ++++ b/gcc/config/i386/driver-i386.cc +@@ -576,8 +576,11 @@ const char *host_detect_local_cpu (int argc, const char **argv) + /* This is unknown family 0x6 CPU. */ + if (has_feature (FEATURE_AVX)) + { ++ /* Assume Granite Rapids D. */ ++ if (has_feature (FEATURE_AMX_COMPLEX)) ++ cpu = "graniterapids-d"; + /* Assume Granite Rapids. */ +- if (has_feature (FEATURE_AMX_FP16)) ++ else if (has_feature (FEATURE_AMX_FP16)) + cpu = "graniterapids"; + /* Assume Tiger Lake */ + else if (has_feature (FEATURE_AVX512VP2INTERSECT)) +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index 56d7794dc..eda3e5e5b 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -2358,7 +2358,9 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX + | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE + | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI; + constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16 +- | PTA_PREFETCHI | PTA_AMX_COMPLEX; ++ | PTA_PREFETCHI; ++constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS ++ | PTA_AMX_COMPLEX; + constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW + | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ; + constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index d7b0bc802..674db2f1a 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -21837,6 +21837,9 @@ Intel Core i7 Rocketlake CPU. + @item graniterapids + Intel Core i7 graniterapids CPU. + ++@item graniterapids-d ++Intel Core i7 graniterapids D CPU. ++ + @item bonnell + Intel Atom Bonnell CPU. + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 186b33481..a2ec060fd 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -31626,6 +31626,17 @@ MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, + SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, + AVX512BF16, AMX-FP16 and PREFETCHI instruction set support. + ++@item graniterapids-d ++Intel graniterapids D CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, ++SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, ++RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, ++AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, ++AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, ++VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB, ++MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, ++SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, ++AVX512BF16, AMX-FP16, PREFETCHI and AMX-COMPLEX instruction set support. ++ + @item k6 + AMD K6 CPU with MMX instruction set support. + +diff --git a/gcc/testsuite/g++.target/i386/mv16.C b/gcc/testsuite/g++.target/i386/mv16.C +index 65cc24f32..17b1fc722 100644 +--- a/gcc/testsuite/g++.target/i386/mv16.C ++++ b/gcc/testsuite/g++.target/i386/mv16.C +@@ -96,6 +96,10 @@ int __attribute__ ((target("arch=graniterapids"))) foo () { + return 26; + } + ++int __attribute__ ((target("arch=graniterapids-d"))) foo () { ++ return 28; ++} ++ + int main () + { + int val = foo (); +@@ -136,6 +140,8 @@ int main () + assert (val == 24); + else if (__builtin_cpu_is ("graniterapids")) + assert (val == 25); ++ else if (__builtin_cpu_is ("graniterapids-d")) ++ assert (val == 26); + else + assert (val == 0); + +diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +index 1a2f3b83d..f0f3397a7 100644
View file
_service:tar_scm:0278-Correct-Granite-Rapids-D-documentation.patch
Added
@@ -0,0 +1,48 @@ +From a809a6a416af4d08f7feeadfdd5d1f5a76a830b5 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang <haochen.jiang@intel.com> +Date: Thu, 20 Jul 2023 10:47:18 +0800 +Subject: PATCH 24/28 Correct Granite Rapids{, D} documentation + +gcc/Changelog: + + * doc/invoke.texi: Remove AVX512VP2INTERSECT in + Granite Rapids{, D} from documentation. + +(cherry picked from commit 38daaaa91438d3f635a10bf5d5181c3b29f07df9) +--- + gcc/doc/invoke.texi | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index a2ec060fd..4d3eccdb2 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -31622,9 +31622,9 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, + AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, + AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, + VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB, +-MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, +-SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, +-AVX512BF16, AMX-FP16 and PREFETCHI instruction set support. ++MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK, ++UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512-FP16, AVX512BF16, AMX-FP16 ++and PREFETCHI instruction set support. + + @item graniterapids-d + Intel graniterapids D CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, +@@ -31633,9 +31633,9 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, + AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, + AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, + VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB, +-MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, +-SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, +-AVX512BF16, AMX-FP16, PREFETCHI and AMX-COMPLEX instruction set support. ++MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK, ++UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, AVX512BF16, AMX-FP16, ++PREFETCHI and AMX-COMPLEX instruction set support. + + @item k6 + AMD K6 CPU with MMX instruction set support. +-- +2.31.1 +
View file
_service:tar_scm:0279-i386-Remove-Meteorlake-s-family_model.patch
Added
@@ -0,0 +1,30 @@ +From 62852213bc6d3e56804ca05826bb95a3a2fe4eba Mon Sep 17 00:00:00 2001 +From: "Hu, Lin1" <lin1.hu@intel.com> +Date: Thu, 15 Dec 2022 15:51:18 +0800 +Subject: PATCH 25/28 i386: Remove Meteorlake's family_model + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h (get_intel_cpu): Remove case 0xb5 + for meteorlake. + +(cherry picked from commit 9e74b7ec0b218364905e3e7de5c41e8148ffc61b) +--- + gcc/common/config/i386/cpuinfo.h | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 1e53248ef..348bc0c12 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -510,7 +510,6 @@ get_intel_cpu (struct __processor_model *cpu_model, + /* Alder Lake. */ + case 0xb7: + /* Raptor Lake. */ +- case 0xb5: + case 0xaa: + case 0xac: + /* Meteor Lake. */ +-- +2.31.1 +
View file
_service:tar_scm:0280-x86-Update-model-values-for-Alderlake-Rocketlake-and.patch
Added
@@ -0,0 +1,33 @@ +From 73042aa18fe70aa30a9c7c760b08e642560ecccd Mon Sep 17 00:00:00 2001 +From: "Cui, Lili" <lili.cui@intel.com> +Date: Thu, 29 Jun 2023 03:10:35 +0000 +Subject: PATCH 26/28 x86: Update model values for Alderlake, Rocketlake and + Raptorlake. + +Update model values for Alderlake, Rocketlake and Raptorlake according to SDM. + +gcc/ChangeLog + + * common/config/i386/cpuinfo.h (get_intel_cpu): Remove model value 0xa8 + from Rocketlake, move model value 0xbf from Alderlake to Raptorlake. + +(cherry picked from commit e510c3be13a8ccdf1fc1b27c2501c126d493f335) +--- + gcc/common/config/i386/cpuinfo.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 348bc0c12..f9bcb6fad 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -509,6 +509,7 @@ get_intel_cpu (struct __processor_model *cpu_model, + case 0x9a: + /* Alder Lake. */ + case 0xb7: ++ case 0xbf: + /* Raptor Lake. */ + case 0xaa: + case 0xac: +-- +2.31.1 +
View file
_service:tar_scm:0281-x86-Update-model-values-for-Raptorlake.patch
Added
@@ -0,0 +1,32 @@ +From 3dbe28984e0f9c24d6670cfba42983bc32c08b0a Mon Sep 17 00:00:00 2001 +From: "Cui, Lili" <lili.cui@intel.com> +Date: Mon, 14 Aug 2023 02:06:00 +0000 +Subject: PATCH 27/28 x86: Update model values for Raptorlake. + +Update model values for Raptorlake according to SDM. + +gcc/ChangeLog + + * common/config/i386/cpuinfo.h (get_intel_cpu): Add model value 0xba + to Raptorlake. + +(cherry picked from commit 614052dd4ea083e086712809c754ffebd9361316) +--- + gcc/common/config/i386/cpuinfo.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index f9bcb6fad..da1568fd1 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -509,6 +509,7 @@ get_intel_cpu (struct __processor_model *cpu_model, + case 0x9a: + /* Alder Lake. */ + case 0xb7: ++ case 0xba: + case 0xbf: + /* Raptor Lake. */ + case 0xaa: +-- +2.31.1 +
View file
_service:tar_scm:0282-Fix-target_clone-arch-graniterapids-d.patch
Added
@@ -0,0 +1,159 @@ +From 8db0f3cd29bd7f937ffa01dd1100360fbbf5b6f4 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Tue, 22 Aug 2023 18:18:31 +0800 +Subject: PATCH 28/28 Fix target_clone ("arch=graniterapids-d") + +Both "graniterapid-d" and "graniterapids" are attached with +PROCESSOR_GRANITERAPID in processor_alias_table but mapped to +different __cpu_subtype in get_intel_cpu. + +And get_builtin_code_for_version will try to match the first +PROCESSOR_GRANITERAPIDS in processor_alias_table which maps to +"granitepraids" here. + +861 else if (new_target->arch_specified && new_target->arch > 0) +1862 for (i = 0; i < pta_size; i++) +1863 if (processor_alias_tablei.processor == new_target->arch) +1864 { +1865 const pta *arch_info = &processor_alias_tablei; +1866 switch (arch_info->priority) +1867 { +1868 default: +1869 arg_str = arch_info->name; + +This mismatch makes dispatch_function_versions check the preidcate +of__builtin_cpu_is ("graniterapids") for "graniterapids-d" and causes +the issue. +The patch explicitly adds PROCESSOR_GRANITERAPIDS_D to make a distinction. + +For "alderlake","raptorlake", "meteorlake" they share same isa, cost, +tuning, and mapped to the same __cpu_type/__cpu_subtype in +get_intel_cpu, so no need to add PROCESSOR_RAPTORLAKE and others. + +gcc/ChangeLog: + + * common/config/i386/i386-common.cc (processor_names): Add new + member graniterapids-s. + * config/i386/i386-options.cc (processor_alias_table): Update + table with and PROCESSOR_GRANITERAPIDS_D. + (m_GRANITERAPID_D): New macro. + (m_CORE_AVX512): Add m_GRANITERAPIDS_D. + (processor_cost_table): Add icelake_cost for + PROCESSOR_GRANITERAPIDS_D. + * config/i386/i386.h (enum processor_type): Add new member + PROCESSOR_GRANITERAPIDS_D. + * config/i386/i386-c.cc (ix86_target_macros_internal): Handle + PROCESSOR_GRANITERAPIDS_D +--- + gcc/common/config/i386/i386-common.cc | 6 ++++-- + gcc/config/i386/i386-c.cc | 8 ++++++++ + gcc/config/i386/i386-options.cc | 4 +++- + gcc/config/i386/i386.h | 3 ++- + 4 files changed, 17 insertions(+), 4 deletions(-) + +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index 28f468f48..bec6801ce 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1873,6 +1873,7 @@ const char *const processor_names = + "alderlake", + "rocketlake", + "graniterapids", ++ "graniterapids-d", + "intel", + "geode", + "k6", +@@ -1993,8 +1994,9 @@ const pta processor_alias_table = + M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, + {"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS, + M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F}, +- {"graniterapids-d", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS_D, +- M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), P_PROC_AVX512F}, ++ {"graniterapids-d", PROCESSOR_GRANITERAPIDS_D, CPU_HASWELL, ++ PTA_GRANITERAPIDS_D, M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), ++ P_PROC_AVX512F}, + {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, + M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3}, + {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, +diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc +index 5e0ac278c..49f0db2b8 100644 +--- a/gcc/config/i386/i386-c.cc ++++ b/gcc/config/i386/i386-c.cc +@@ -246,6 +246,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + def_or_undef (parse_in, "__graniterapids"); + def_or_undef (parse_in, "__graniterapids__"); + break; ++ case PROCESSOR_GRANITERAPIDS_D: ++ def_or_undef (parse_in, "__graniterapids_d"); ++ def_or_undef (parse_in, "__graniterapids_d__"); ++ break; + case PROCESSOR_ALDERLAKE: + def_or_undef (parse_in, "__alderlake"); + def_or_undef (parse_in, "__alderlake__"); +@@ -254,6 +258,7 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + def_or_undef (parse_in, "__rocketlake"); + def_or_undef (parse_in, "__rocketlake__"); + break; ++ + /* use PROCESSOR_max to not set/unset the arch macro. */ + case PROCESSOR_max: + break; +@@ -426,6 +431,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + case PROCESSOR_GRANITERAPIDS: + def_or_undef (parse_in, "__tune_graniterapids__"); + break; ++ case PROCESSOR_GRANITERAPIDS_D: ++ def_or_undef (parse_in, "__tune_graniterapids_d__"); ++ break; + case PROCESSOR_INTEL: + case PROCESSOR_GENERIC: + break; +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 7efd25084..86932d719 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -128,10 +128,11 @@ along with GCC; see the file COPYING3. If not see + #define m_ALDERLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ALDERLAKE) + #define m_ROCKETLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ROCKETLAKE) + #define m_GRANITERAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_GRANITERAPIDS) ++#define m_GRANITERAPIDS_D (HOST_WIDE_INT_1U<<PROCESSOR_GRANITERAPIDS_D) + #define m_CORE_AVX512 (m_SKYLAKE_AVX512 | m_CANNONLAKE \ + | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \ + | m_TIGERLAKE | m_COOPERLAKE | m_SAPPHIRERAPIDS \ +- | m_ROCKETLAKE | m_GRANITERAPIDS) ++ | m_ROCKETLAKE | m_GRANITERAPIDS | m_GRANITERAPIDS_D) + #define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512) + #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2) + #define m_GOLDMONT (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT) +@@ -764,6 +765,7 @@ static const struct processor_costs *processor_cost_table = + &alderlake_cost, + &icelake_cost, + &icelake_cost, ++ &icelake_cost, + &intel_cost, + &geode_cost, + &k6_cost, +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index eda3e5e5b..5052f878d 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -2216,7 +2216,7 @@ extern int const svr4_dbx_register_mapFIRST_PSEUDO_REGISTER; + #define DEFAULT_LARGE_SECTION_THRESHOLD 65536 +  + /* Which processor to tune code generation for. These must be in sync +- with processor_target_table in i386.cc. */ ++ with processor_cost_table in i386-options.cc. */ + + enum processor_type + { +@@ -2251,6 +2251,7 @@ enum processor_type + PROCESSOR_ALDERLAKE, + PROCESSOR_ROCKETLAKE, + PROCESSOR_GRANITERAPIDS, ++ PROCESSOR_GRANITERAPIDS_D, + PROCESSOR_INTEL, + PROCESSOR_GEODE, + PROCESSOR_K6, +-- +2.31.1 +
View file
_service:tar_scm:0283-i386-Change-prefetchi-output-template.patch
Added
@@ -0,0 +1,62 @@ +From 59e07c6c77dcc92d274ca6156b8916f66035dce8 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang <haochen.jiang@intel.com> +Date: Mon, 22 Jul 2024 14:06:18 +0800 +Subject: PATCH 1/2 i386: Change prefetchi output template + +For prefetchi instructions, RIP-relative address is explicitly mentioned +for operand and assembler obeys that rule strictly. This makes +instruction like: + + prefetchit0 bar + +got illegal for assembler, which should be a broad usage for prefetchi. + +Change to %a to explicitly add (%rip) after function label to make it +legal in assembler so that it could pass to linker to get the real address. + +gcc/ChangeLog: + + * config/i386/i386.md (prefetchi): Change to %a. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/prefetchi-1.c: Check (%rip). + +Reference: +https://gcc.gnu.org/git/?p=gcc.git;a=commit;h= +062e46a813799684c6f900815fd22451d6294ae1 +--- + gcc/config/i386/i386.md | 2 +- + gcc/testsuite/gcc.target/i386/prefetchi-1.c | 4 ++-- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md +index f08c2cfb1..1b733008e 100644 +--- a/gcc/config/i386/i386.md ++++ b/gcc/config/i386/i386.md +@@ -22917,7 +22917,7 @@ + "TARGET_PREFETCHI && TARGET_64BIT" + { + static const char * const patterns2 = { +- "prefetchit1\t%0", "prefetchit0\t%0" ++ "prefetchit1\t%a0", "prefetchit0\t%a0" + }; + + int locality = INTVAL (operands1); +diff --git a/gcc/testsuite/gcc.target/i386/prefetchi-1.c b/gcc/testsuite/gcc.target/i386/prefetchi-1.c +index 80f25e70e..03dfdc55e 100644 +--- a/gcc/testsuite/gcc.target/i386/prefetchi-1.c ++++ b/gcc/testsuite/gcc.target/i386/prefetchi-1.c +@@ -1,7 +1,7 @@ + /* { dg-do compile { target { ! ia32 } } } */ + /* { dg-options "-mprefetchi -O2" } */ +-/* { dg-final { scan-assembler-times "\ \\t\+prefetchit0\ \\t\+" 2 } } */ +-/* { dg-final { scan-assembler-times "\ \\t\+prefetchit1\ \\t\+" 2 } } */ ++/* { dg-final { scan-assembler-times "\ \\t\+prefetchit0\ \\t\+bar\\(%rip\\)" 2 } } */ ++/* { dg-final { scan-assembler-times "\ \\t\+prefetchit1\ \\t\+bar\\(%rip\\)" 2 } } */ + + #include <x86intrin.h> + +-- +2.31.1 +
View file
_service:tar_scm:0284-i386-Add-non-optimize-prefetchi-intrins.patch
Added
@@ -0,0 +1,92 @@ +From c19afda0ee549d294fd5714c63db24bcd4570d03 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang <haochen.jiang@intel.com> +Date: Thu, 25 Jul 2024 16:16:05 +0800 +Subject: PATCH 2/2 i386: Add non-optimize prefetchi intrins + +Under -O0, with the "newly" introduced intrins, the variable will be +transformed as mem instead of the origin symbol_ref. The compiler will +then treat the operand as invalid and turn the operation into nop, which +is not expected. Use macro for non-optimize to keep the variable as +symbol_ref just as how prefetch intrin does. + +gcc/ChangeLog: + + * config/i386/prfchiintrin.h + (_m_prefetchit0): Add macro for non-optimized option. + (_m_prefetchit1): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/prefetchi-1b.c: New test. + +Reference: +https://gcc.gnu.org/git/?p=gcc.git;a=commit;h= +b4524c4430ba9771265bd9fc31e69a3f35dfe117 +--- + gcc/config/i386/prfchiintrin.h | 9 +++++++ + gcc/testsuite/gcc.target/i386/prefetchi-1b.c | 26 ++++++++++++++++++++ + 2 files changed, 35 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-1b.c + +diff --git a/gcc/config/i386/prfchiintrin.h b/gcc/config/i386/prfchiintrin.h +index 06deef488..1e3d42dc3 100644 +--- a/gcc/config/i386/prfchiintrin.h ++++ b/gcc/config/i386/prfchiintrin.h +@@ -30,6 +30,7 @@ + + #ifdef __x86_64__ + ++#ifdef __OPTIMIZE__ + extern __inline void + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_prefetchit0 (void* __P) +@@ -43,6 +44,14 @@ _m_prefetchit1 (void* __P) + { + __builtin_ia32_prefetchi (__P, 2); + } ++#else ++#define _m_prefetchit0(P) \ ++ __builtin_ia32_prefetchi(P, 3) ++ ++#define _m_prefetchit1(P) \ ++ __builtin_ia32_prefetchi(P, 2) ++ ++#endif + + #endif + +diff --git a/gcc/testsuite/gcc.target/i386/prefetchi-1b.c b/gcc/testsuite/gcc.target/i386/prefetchi-1b.c +new file mode 100644 +index 000000000..93139554d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/prefetchi-1b.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile { target { ! ia32 } } } */ ++/* { dg-options "-mprefetchi -O0" } */ ++/* { dg-final { scan-assembler-times "\ \\t\+prefetchit0\ \\t\+bar\\(%rip\\)" 1 } } */ ++/* { dg-final { scan-assembler-times "\ \\t\+prefetchit1\ \\t\+bar\\(%rip\\)" 1 } } */ ++ ++#include <x86intrin.h> ++ ++int ++bar (int a) ++{ ++ return a + 1; ++} ++ ++int ++foo1 (int b) ++{ ++ _m_prefetchit0 (bar); ++ return bar (b) + 1; ++} ++ ++int ++foo2 (int b) ++{ ++ _m_prefetchit1 (bar); ++ return bar (b) + 1; ++} +-- +2.31.1 +
View file
_service:tar_scm:0285-SME-Recover-hip09-and-hip11-in-aarch64-cores.def.patch
Added
@@ -0,0 +1,32 @@ +From 239f0637307ff2f6afb1473e99d0bb0eaf8946b2 Mon Sep 17 00:00:00 2001 +From: xiezhiheng <xiezhiheng@huawei.com> +Date: Fri, 23 Aug 2024 15:37:17 +0800 +Subject: PATCH 154/157 SME Recover hip09 and hip11 in aarch64-cores.def + +--- + gcc/config/aarch64/aarch64-cores.def | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index f069c81cf..3337fd1a0 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -130,6 +130,7 @@ AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, (F16, SVE), a64fx, 0x46, 0x001, -1) + + /* HiSilicon ('H') cores. */ + AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, (CRYPTO, F16), tsv110, 0x48, 0xd01, -1) ++AARCH64_CORE("hip09", hip09, hip09, V8_5A, (SVE, I8MM, F32MM, F64MM, PROFILE, PREDRES), hip09, 0x48, 0xd02, 0x0) + + /* ARMv8.3-A Architecture Processors. */ + +@@ -171,6 +172,7 @@ AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, (SVE2_BITPERM, MEMTAG, + AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, (SVE2_BITPERM, MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd48, -1) + + AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversen2, 0x41, 0xd49, -1) ++AARCH64_CORE("hip11", hip11, hip11, V8_5A, (SVE, SVE2, F16), hip11, 0x48, 0xd22, -1) + + AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) + AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) +-- +2.33.0 +
View file
_service:tar_scm:0286-Try-to-use-AI-model-to-guide-optimization.patch
Added
@@ -0,0 +1,671 @@ +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index fcfa54697..f42aeb8e8 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -1449,6 +1449,7 @@ OBJS = \ + inchash.o \ + incpath.o \ + init-regs.o \ ++ ipa-hardware-detection.o \ + internal-fn.o \ + ipa-struct-reorg/ipa-struct-reorg.o \ + ipa-cp.o \ +diff --git a/gcc/common.opt b/gcc/common.opt +index fd98382fa..99e626641 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -185,6 +185,9 @@ const char *main_input_basename + Variable + int main_input_baselength + ++Variable ++bool optimize_maximum ++ + ; The base name used for auxiliary output files. + ; dump_base_name minus dump_base_ext. + +@@ -469,6 +472,10 @@ Ofast + Common Optimization + Optimize for speed disregarding exact standards compliance. + ++Om ++Common Optimization ++Optimize for maximizing radical optimization. ++ + Og + Common Optimization + Optimize for debugging experience rather than speed or size. +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 309ecc3d9..ad853af9a 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -18637,6 +18637,134 @@ aarch64_sve_adjust_stmt_cost (class vec_info *vinfo, vect_cost_for_stmt kind, + return stmt_cost; + } + ++/* Check whether in C language or LTO with only C language. */ ++extern bool lang_c_p (void); ++ ++static void ++override_C_optimize_options (struct gcc_options *opts) ++{ ++ opts->x_flag_ipa_reorder_fields = 1; ++ opts->x_flag_ipa_struct_reorg = 6; ++ opts->x_struct_layout_optimize_level = 6; ++ opts->x_flag_gnu89_inline = 1; ++ opts->x_flag_ccmp2 = 1; ++ opts->x_flag_array_widen_compare = 1; ++ opts->x_flag_convert_minmax = 1; ++ opts->x_flag_tree_slp_transpose_vectorize = 1; ++ opts->x_param_max_inline_insns_auto = 64; ++ opts->x_param_inline_unit_growth = 96; ++} ++ ++/* Check whether in CPP language or LTO with only CPP language. */ ++static bool ++lang_cpp_p (void) ++{ ++ const char *language_string = lang_hooks.name; ++ if (!language_string) ++ { ++ return false; ++ } ++ if (lang_GNU_CXX ()) ++ { ++ return true; ++ } ++ else if (strcmp (language_string, "GNU GIMPLE") == 0) // for LTO check ++ { ++ unsigned i = 0; ++ tree t = NULL_TREE; ++ FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t) ++ { ++ language_string = TRANSLATION_UNIT_LANGUAGE (t); ++ if (language_string == NULL ++ || strncmp (lang_hooks.name, "GNU C++", 7)) ++ { ++ return false; ++ } ++ } ++ return true; ++ } ++ return false; ++} ++ ++static void ++override_CPP_optimize_options (struct gcc_options *opts) ++{ ++ opts->x_flag_finite_loops = 1; ++ opts->x_flag_omit_frame_pointer = 1; ++ opts->x_flag_sized_deallocation = 0; ++ opts->x_flag_loop_elim = 1; ++ opts->x_flag_convert_minmax = 1; ++ opts->x_param_early_inlining_insns = 256; ++ opts->x_param_max_inline_insns_auto = 128; ++ opts->x_param_inline_unit_growth = 256; ++ opts->x_flag_cmlt_arith = 1; ++} ++ ++static void ++override_optimize_options_1 (struct gcc_options *opts) ++{ ++ opts->x_flag_split_ldp_stp = 1; ++ opts->x_flag_if_conversion_gimple = 1; ++ opts->x_flag_ifcvt_allow_complicated_cmps = 1; ++ opts->x_param_ifcvt_allow_register_renaming = 2; ++ opts->x_param_max_rtl_if_conversion_unpredictable_cost = 48; ++ opts->x_param_max_rtl_if_conversion_predictable_cost = 48; ++} ++ ++static void ++override_Fortran_optimize_options (struct gcc_options *opts) ++{ ++ opts->x_flag_unroll_loops = 1; ++ opts->x_flag_unconstrained_commons = 1; ++ opts->x_param_ipa_cp_eval_threshold = 1; ++ opts->x_param_ipa_cp_unit_growth = 80; ++ opts->x_param_ipa_cp_max_recursive_depth = 8; ++ opts->x_param_large_unit_insns = 30000; ++ opts->x_flag_ira_loop_pressure = 1; ++ opts->x_flag_inline_functions_called_once = 0; ++ opts->x_flag_ira_algorithm = IRA_ALGORITHM_PRIORITY; ++ opts->x_flag_delayed_branch = 1; ++ opts->x_flag_gcse_las = 1; ++ opts->x_flag_gcse_sm = 1; ++ opts->x_flag_ipa_pta = 1; ++ opts->x_flag_reorder_blocks_and_partition = 1; ++ opts->x_flag_reorder_blocks = 1; ++ opts->x_flag_crypto_accel_aes = 1; ++ opts->x_param_flexible_seg_len = 1; ++} ++ ++/* Reset the optimize option. ++ After checking the model result, this function can ++ reset the more appropriate options. */ ++static void ++reset_machine_option (struct gcc_options *opts) ++{ ++ if (!(opts->x_optimize_maximum) ++ || strstr (opts->x_aarch64_tune_string, "hip09") == NULL) ++ { ++ return; ++ } ++ ++ const char *ai_infer_level = getenv ("AI_INFER_LEVEL"); ++ if (ai_infer_level) ++ { ++ override_optimize_options_1 (opts); ++ if (lang_c_p ()) ++ { ++ override_C_optimize_options (opts); ++ } ++ else if (lang_cpp_p ()) ++ { ++ override_CPP_optimize_options (opts); ++ } ++ else if (lang_GNU_Fortran ()) ++ { ++ override_Fortran_optimize_options (opts); ++ } ++ } ++} ++ ++ + /* STMT_COST is the cost calculated for STMT_INFO, which has cost kind KIND + and which when vectorized would operate on vector type VECTYPE. Add the + cost of any embedded operations. */ +@@ -20089,6 +20217,7 @@ aarch64_override_options_internal (struct gcc_options *opts) + && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level) + opts->x_flag_prefetch_loop_arrays = 1; + ++ reset_machine_option (opts); + aarch64_override_options_after_change_1 (opts); + } + +diff --git a/gcc/ipa-hardware-detection.cc b/gcc/ipa-hardware-detection.cc +new file mode 100644 +index 000000000..8085a8c65 +--- /dev/null ++++ b/gcc/ipa-hardware-detection.cc +@@ -0,0 +1,243 @@ ++/* Hardware Detection. ++ Copyright (C) 2024-2024 Free Software Foundation, Inc. ++This file is part of GCC. ++GCC is free software; you can redistribute it and/or modify it ++under the terms of the GNU General Public License as published by the ++Free Software Foundation; either version 3, or (at your option) any ++later version. ++GCC is distributed in the hope that it will be useful, but WITHOUT ++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
View file
_service:tar_scm:0287-Add-dynamic-memory-access-checks.patch
Added
@@ -0,0 +1,774 @@ +From 08fb60d0a0707af4004b20358f4a921e4ae6cca6 Mon Sep 17 00:00:00 2001 +From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> +Date: Thu, 22 Aug 2024 15:23:36 +0800 +Subject: PATCH 156/157 Add dynamic memory access checks + +Signed-off-by: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> +--- + gcc/ipa-prefetch.cc | 622 +++++++++++++++++++++++++++++++++++++------- + gcc/params.opt | 4 + + 2 files changed, 525 insertions(+), 101 deletions(-) + +diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc +index 94290ea9c..b000d4d75 100644 +--- a/gcc/ipa-prefetch.cc ++++ b/gcc/ipa-prefetch.cc +@@ -368,6 +368,7 @@ typedef std::map<memref_t *, tree> memref_tree_map; + typedef std::set<gimple *> stmt_set; + typedef std::set<tree> tree_set; + typedef std::map<tree, tree> tree_map; ++typedef std::map<tree, poly_offset_int> tree_poly_offset_map; + + tree_memref_map *tm_map; + funct_mrs_map *fmrs_map; +@@ -710,6 +711,20 @@ get_mem_ref_address_ssa_name (tree mem, tree base) + return NULL_TREE; + } + ++static void ++dump_base_addr (tree base_addr) ++{ ++ if (base_addr) ++ { ++ fprintf (dump_file, "Base addr (%s): ", ++ get_tree_code_name (TREE_CODE (base_addr))); ++ print_generic_expr (dump_file, base_addr); ++ } ++ else ++ fprintf (dump_file, "Base addr (%s): ", "null"); ++ fprintf (dump_file, "\n"); ++} ++ + static void + analyse_mem_ref (gimple *stmt, tree mem, memref_t* mr) + { +@@ -736,14 +751,7 @@ analyse_mem_ref (gimple *stmt, tree mem, memref_t* mr) + { + tree base_addr = get_mem_ref_address_ssa_name (mem, base); + if (dump_file) +- { +- fprintf (dump_file, "Base addr (%s): ", +- base_addr ? get_tree_code_name (TREE_CODE (base_addr)) +- : "null"); +- if (base_addr) +- print_generic_expr (dump_file, base_addr); +- fprintf (dump_file, "\n"); +- } ++ dump_base_addr (base_addr); + if (base_addr) + { + mr->base = analyse_addr_eval (base_addr, mr); +@@ -1187,7 +1195,7 @@ reduce_memref_set (memref_set *set, vec<memref_t *> &vec) + } + + static void +-find_nearest_common_dominator (memref_t *mr, basic_block &dom) ++find_nearest_common_post_dominator (memref_t *mr, basic_block &dom) + { + for (unsigned int i = 0; i < mr->stmts.length (); i++) + { +@@ -1196,7 +1204,7 @@ find_nearest_common_dominator (memref_t *mr, basic_block &dom) + if (dom == bb) + continue; + if (dom) +- dom = nearest_common_dominator (CDI_DOMINATORS, dom, bb); ++ dom = nearest_common_dominator (CDI_POST_DOMINATORS, dom, bb); + else + dom = bb; + } +@@ -1495,10 +1503,13 @@ gimple_copy_and_remap (gimple *stmt) + + static gimple * + gimple_copy_and_remap_memref_stmts (memref_t *mr, gimple_seq &stmts, +- int last_idx, stmt_set &processed) ++ int first_idx, int last_idx, ++ stmt_set &processed) + { + gimple *last_stmt = NULL; +- for (int i = mr->stmts.length () - 1; i >= last_idx ; i--) ++ if (first_idx == 0) ++ first_idx = mr->stmts.length () - 1; ++ for (int i = first_idx; i >= last_idx; i--) + { + if (processed.count (mr->stmtsi)) + continue; +@@ -1515,6 +1526,436 @@ gimple_copy_and_remap_memref_stmts (memref_t *mr, gimple_seq &stmts, + return last_stmt; + } + ++/* Check if prefetch insertion may be always unsafe in this case. For now ++ reject cases with access to arrays with no domain or with no elements. */ ++ ++static bool ++check_prefetch_safety (vec<memref_t *> &mrs, memref_t *cmr) ++{ ++ for (unsigned int i = 0; i < mrs.length (); i++) ++ { ++ memref_t *mr = mrsi; ++ if (mr == cmr || mr->used_mrs.empty ()) ++ continue; ++ bool is_store; ++ tree *mem = simple_mem_ref_in_stmt (mr->stmts0, &is_store); ++ if (mem == NULL || TREE_CODE (*mem) != ARRAY_REF) ++ continue; ++ tree array = TREE_OPERAND (*mem, 0); ++ tree atype = TREE_TYPE (array); ++ gcc_assert (atype); ++ tree domain = TYPE_DOMAIN (atype); ++ if (!domain || !tree_fits_uhwi_p (TYPE_MIN_VALUE (domain)) ++ || !tree_fits_uhwi_p (TYPE_MAX_VALUE (domain))) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, "Unsupported array type: "); ++ print_generic_expr (dump_file, atype); ++ fprintf (dump_file, "\n"); ++ } ++ return false; ++ } ++ unsigned HOST_WIDE_INT min_val = tree_to_uhwi (TYPE_MIN_VALUE (domain)); ++ unsigned HOST_WIDE_INT max_val = tree_to_uhwi (TYPE_MAX_VALUE (domain)); ++ if (min_val == 0 && max_val == 0) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, "Unsupported array type's bounds: "); ++ print_generic_expr (dump_file, atype); ++ fprintf (dump_file, "\n"); ++ } ++ return false; ++ } ++ } ++ return true; ++} ++ ++/* Collect base addresses which we need to check. */ ++ ++static void ++collect_base_addresses (vec<memref_t *> &used_mr_vec, HOST_WIDE_INT dist_val, ++ memref_t *comp_mr, tree_poly_offset_map &offset_map) ++{ ++ if (dump_file) ++ fprintf (dump_file, "Collect base addresses which we need to check.\n"); ++ for (unsigned int i = 0; i < used_mr_vec.length (); i++) ++ { ++ memref_t *mr = used_mr_veci; ++ if (mr == comp_mr || mr->used_mrs.empty ()) ++ continue; ++ bool is_store; ++ tree *mem = simple_mem_ref_in_stmt (mr->stmts0, &is_store); ++ if (mem == NULL || TREE_CODE (*mem) != MEM_REF) ++ continue; ++ tree base = get_base_address (*mem); ++ tree base_addr = get_mem_ref_address_ssa_name (*mem, base); ++ if (!base_addr) ++ continue; ++ if (dump_file) ++ { ++ dump_base_addr (base_addr); ++ if (base) ++ { ++ fprintf (dump_file, "Base:"); ++ print_generic_expr (dump_file, base); ++ fprintf (dump_file, "\n"); ++ } ++ } ++ if (!TREE_OPERAND (base, 1)) ++ continue; ++ poly_offset_int curr_offset = mem_ref_offset (base); ++ poly_offset_int saved_offset = 0; ++ if (offset_map.count (base_addr)) ++ { ++ saved_offset = offset_mapbase_addr; ++ if ((dist_val > 0 && known_gt (curr_offset, saved_offset)) ++ || (dist_val < 0 && known_lt (curr_offset, saved_offset))) ++ offset_mapbase_addr = curr_offset; ++ else if (dump_file) ++ fprintf (dump_file, "Off: step=%ld gt=%d lt=%d\n", dist_val, ++ known_gt (curr_offset, saved_offset), ++ known_lt (curr_offset, saved_offset)); ++ } ++ else ++ offset_mapbase_addr = curr_offset; ++ } ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Final list of base addresses:\n"); ++ for (tree_poly_offset_map::iterator it1 = offset_map.begin (); ++ it1 != offset_map.end (); ++it1) ++ {
View file
_service:tar_scm:0288-Enable-macro-use-commandline.patch
Added
@@ -0,0 +1,207 @@ +From 7a578a8725f8fd7d92fcbbac14841ea7e8d0870f Mon Sep 17 00:00:00 2001 +From: zhangxiaohua <xiaohua20100827@163.com> +Date: Sun, 25 Aug 2024 23:08:53 +0800 +Subject: PATCH 157/157 Enable macro-use-commandline + +Signed-off-by: zhangxiaohua <xiaohua20100827@163.com> +--- + gcc/c-family/c-opts.cc | 4 +++ + gcc/c-family/c.opt | 4 +++ + gcc/doc/cppopts.texi | 4 +++ + gcc/doc/invoke.texi | 1 + + .../gcc.dg/cpp/macro-use-cmdline-1.c | 26 ++++++++++++++ + .../gcc.dg/cpp/macro-use-cmdline-2.c | 34 +++++++++++++++++++ + libcpp/include/cpplib.h | 3 ++ + libcpp/init.cc | 1 + + libcpp/macro.cc | 16 ++++++++- + 9 files changed, 92 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-1.c + create mode 100644 gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-2.c + +diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc +index 5134f6128..744b54dc3 100644 +--- a/gcc/c-family/c-opts.cc ++++ b/gcc/c-family/c-opts.cc +@@ -527,6 +527,10 @@ c_common_handle_option (size_t scode, const char *arg, HOST_WIDE_INT value, + cpp_opts->track_macro_expansion = 2; + break; + ++ case OPT_fmacro_use_commandline: ++ cpp_opts->macro_use_commandline = 1; ++ break; ++ + case OPT_fexec_charset_: + cpp_opts->narrow_charset = arg; + break; +diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt +index 07da40ef4..a36c27f07 100644 +--- a/gcc/c-family/c.opt ++++ b/gcc/c-family/c.opt +@@ -2012,6 +2012,10 @@ ftrack-macro-expansion= + C ObjC C++ ObjC++ JoinedOrMissing RejectNegative UInteger + -ftrack-macro-expansion=<0|1|2> Track locations of tokens coming from macro expansion and display them in error messages. + ++fmacro-use-commandline ++C ObjC C++ ObjC++ JoinedOrMissing RejectNegative UInteger ++Preferentially use options from the commandline. ++ + fpretty-templates + C++ ObjC++ Var(flag_pretty_templates) Init(1) + Do not pretty-print template specializations as the template signature followed by the arguments. +diff --git a/gcc/doc/cppopts.texi b/gcc/doc/cppopts.texi +index c0a92b370..8c8a81eac 100644 +--- a/gcc/doc/cppopts.texi ++++ b/gcc/doc/cppopts.texi +@@ -277,6 +277,10 @@ correct column numbers in warnings or errors, even if tabs appear on the + line. If the value is less than 1 or greater than 100, the option is + ignored. The default is 8. + ++@item -fmacro-use-commandline ++@opindex fmacro-use-commandline ++Preferentially use options from the command line. ++ + @item -ftrack-macro-expansion@r{}=@var{level}@r{} + @opindex ftrack-macro-expansion + Track locations of tokens across macro expansions. This allows the +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index bdd8b9429..2ff7d860d 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -630,6 +630,7 @@ Objective-C and Objective-C++ Dialects}. + -fexec-charset=@var{charset} -fextended-identifiers @gol + -finput-charset=@var{charset} -flarge-source-files @gol + -fmacro-prefix-map=@var{old}=@var{new} -fmax-include-depth=@var{depth} @gol ++-fmacro-use-commandline @gol + -fno-canonical-system-headers -fpch-deps -fpch-preprocess @gol + -fpreprocessed -ftabstop=@var{width} -ftrack-macro-expansion @gol + -fwide-exec-charset=@var{charset} -fworking-directory @gol +diff --git a/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-1.c b/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-1.c +new file mode 100644 +index 000000000..f85d9c268 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-1.c +@@ -0,0 +1,26 @@ ++/* ++ { dg-options "-fmacro-use-commandline -DTEST_MACRO=1 -DTEST_MACRO=20" } ++ { dg-do compile } ++ { dg-do run } ++*/ ++ ++/* { dg-warning "-:redefined" "redef TEST_MACRO" { target *-*-* } 0 } ++ { dg-message "-:previous" "prev def TEST_MACRO" { target *-*-* } 0 } ++*/ ++ ++#if DEBUG ++extern int puts (const char *); ++#else ++#define puts(X) ++#endif ++extern void abort (void); ++ ++#define err(str) do { puts(str); abort(); } while (0) ++ ++int main (int argc, char *argv) ++{ ++ int macroValue = TEST_MACRO; ++ if (macroValue != 20) ++ err("macroValue"); ++ return 0; ++} +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-2.c b/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-2.c +new file mode 100644 +index 000000000..99d92d1e4 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-2.c +@@ -0,0 +1,34 @@ ++/* ++ { dg-options "-fmacro-use-commandline -DTEST_MACRO=1" } ++ { dg-do compile } ++ { dg-do run } ++*/ ++ ++#define TEST_MACRO 300 ++#define TEST_MACRO_1 400 ++/* ++ { dg-warning "-:redefined" "redef TEST_MACRO" { target *-*-* } 7 } ++ { dg-message "-:previous" "prev def TEST_MACRO" { target *-*-* } 0 } ++*/ ++ ++#if DEBUG ++extern int puts (const char *); ++#else ++#define puts(X) ++#endif ++ ++extern void abort (void); ++ ++#define err(str) do { puts(str); abort(); } while (0) ++ ++int main (int argc, char *argv) ++{ ++ int macroValue = TEST_MACRO; ++ if (macroValue != 1) ++ err("macroValue"); ++ ++ int macroValue1 = TEST_MACRO_1; ++ if (macroValue1 != 400) ++ err("macroValue1"); ++ return 0; ++} +\ No newline at end of file +diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h +index 3eba6f74b..c6101ca01 100644 +--- a/libcpp/include/cpplib.h ++++ b/libcpp/include/cpplib.h +@@ -471,6 +471,9 @@ struct cpp_options + consumes the highest amount of memory. */ + unsigned char track_macro_expansion; + ++ /* Use the options on the command line first. */ ++ unsigned char macro_use_commandline; ++ + /* Nonzero means handle C++ alternate operator names. */ + unsigned char operator_names; + +diff --git a/libcpp/init.cc b/libcpp/init.cc +index f4ab83d21..47be60a36 100644 +--- a/libcpp/init.cc ++++ b/libcpp/init.cc +@@ -215,6 +215,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table, + cpp_options::track_macro_expansion to learn about the other + values. */ + CPP_OPTION (pfile, track_macro_expansion) = 2; ++ CPP_OPTION (pfile, macro_use_commandline) = 0; + CPP_OPTION (pfile, warn_normalize) = normalized_C; + CPP_OPTION (pfile, warn_literal_suffix) = 1; + CPP_OPTION (pfile, canonical_system_headers) +diff --git a/libcpp/macro.cc b/libcpp/macro.cc +index 8ebf360c0..aa9e4ffa6 100644 +--- a/libcpp/macro.cc ++++ b/libcpp/macro.cc +@@ -3852,7 +3852,21 @@ _cpp_create_definition (cpp_reader *pfile, cpp_hashnode *node) + node->value.macro->line, 0, + "this is the location of the previous definition"); + } +- _cpp_free_definition (node); ++#define LOCATION_FROM_LINEMAP 0 ++#define MIN_LINE_OF_MACRO_BEEN_OVERRIDDEN 96 ++#define MAX_LINE_OF_MACRO_BEEN_OVERRIDDEN 128 ++ if (CPP_OPTION (pfile, macro_use_commandline) ++ && node->value.macro->line >= MIN_LINE_OF_MACRO_BEEN_OVERRIDDEN ++ && node->value.macro->line <= MAX_LINE_OF_MACRO_BEEN_OVERRIDDEN ++ && pfile->forced_token_location == LOCATION_FROM_LINEMAP) ++ { ++ cpp_pedwarning_with_line (pfile, CPP_W_NONE, ++ node->value.macro->line, 0, ++ "use the previous definition from commandline"); ++ return false; ++ }
View file
_service:tar_scm:0289-tree-ssa-loop-crc.cc-TARGET_CRC32-may-be-not-defined.patch
Added
@@ -0,0 +1,35 @@ +From 63f99f46e851aecc070496a0e688a0d118c820a4 Mon Sep 17 00:00:00 2001 +From: YunQiang Su <yunqiang@isrc.iscas.ac.cn> +Date: Mon, 2 Sep 2024 17:57:52 +0800 +Subject: PATCH tree-ssa-loop-crc.cc: TARGET_CRC32 may be not defined + +TARGET_CRC32 may be not defined on some architectures, RISC-V is one example. +--- + gcc/tree-ssa-loop-crc.cc | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/gcc/tree-ssa-loop-crc.cc b/gcc/tree-ssa-loop-crc.cc +index b9c2f71ca..7eee9446d 100644 +--- a/gcc/tree-ssa-loop-crc.cc ++++ b/gcc/tree-ssa-loop-crc.cc +@@ -1227,6 +1227,9 @@ convert_to_new_loop (class loop *loop) + static unsigned int + tree_ssa_loop_crc () + { ++#ifndef TARGET_CRC32 ++ return 0; ++#else + if (TARGET_CRC32 == false) + { + warning (OPT____,"The loop-crc optimization is not working." \ +@@ -1269,6 +1272,7 @@ tree_ssa_loop_crc () + } + } + return todo; ++#endif + } + + /* Loop crc. */ +-- +2.33.0 +
View file
_service:tar_scm:0290-Add-ipa-prefetch-test-for-gcc-s-case.patch
Added
@@ -0,0 +1,209 @@ +From 0534ae05fc313c0d449b48ffe3e01642b644e6d2 Mon Sep 17 00:00:00 2001 +From: Diachkov Ilia <diachkov.ilial@huawei-partners.com> +Date: Fri, 6 Sep 2024 10:40:50 +0800 +Subject: PATCH 1/2 Add ipa-prefetch test for gcc's case + +--- + gcc/ipa-prefetch.cc | 4 +- + gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c | 167 ++++++++++++++++++++ + 2 files changed, 170 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c + +diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc +index b000d4d75..8e628390b 100644 +--- a/gcc/ipa-prefetch.cc ++++ b/gcc/ipa-prefetch.cc +@@ -1668,6 +1668,8 @@ static gimple * + insert_page_check (tree addr, tree_poly_offset_map &offset_map, + gimple_seq &stmts) + { ++ if (dump_file) ++ fprintf (dump_file, "Insert page check.\n"); + poly_offset_int offset = 0; + if (offset_map.count (addr)) + offset = offset_mapaddr; +@@ -1783,7 +1785,7 @@ static gimple * + insert_index_check (tree mem, gimple_seq &stmts) + { + if (dump_file) +- fprintf (dump_file, "Insert array index check\n"); ++ fprintf (dump_file, "Insert array index check.\n"); + tree atype = TREE_TYPE (TREE_OPERAND (mem, 0)); + tree ind = TREE_OPERAND (mem, 1); + if (decl_map->count (ind)) +diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c b/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c +new file mode 100644 +index 000000000..f1001c350 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c +@@ -0,0 +1,167 @@ ++/* { dg-do link } */ ++/* { dg-options "-O3 -fipa-prefetch -flto -flto-partition=one -fdump-ipa-ipa_prefetch" } */ ++/* { dg-require-effective-target lto } */ ++ ++/* Based on opensource gcc code. */ ++ ++#include <stdbool.h> ++#include <stdlib.h> ++#include <stddef.h> ++ ++#define SPARSESET_ELT_TYPE unsigned int ++#define ALLOCNO_NUM(A) ((A)->num) ++ ++typedef struct sparseset_def ++{ ++ SPARSESET_ELT_TYPE *dense; /* Dense array. */ ++ SPARSESET_ELT_TYPE *sparse; /* Sparse array. */ ++ SPARSESET_ELT_TYPE members; /* Number of elements. */ ++ SPARSESET_ELT_TYPE size; /* Maximum number of elements. */ ++ SPARSESET_ELT_TYPE iter; /* Iterator index. */ ++ unsigned char iter_inc; /* Iteration increment amount. */ ++ bool iterating; ++ SPARSESET_ELT_TYPE elms2; /* Combined dense and sparse arrays. */ ++} *sparseset; ++ ++struct ira_allocno ++{ ++ /* The allocno order number starting with 0. Each allocno has an ++ unique number and the number is never changed for the ++ allocno. */ ++ int num; ++ /* Regno for allocno or cap. */ ++ int regno; ++ /*...*/ ++}; ++ ++typedef struct ira_allocno_live_range *allocno_live_range_t; ++typedef struct ira_allocno *ira_allocno_t; ++ ++struct ira_allocno_live_range ++{ ++ /* Allocno whose live range is described by given structure. */ ++ ira_allocno_t allocno; ++ /* Program point range. */ ++ int start, finish; ++ /* Next structure describing program points where the allocno ++ lives. */ ++ allocno_live_range_t next; ++ /* Pointer to structures with the same start/finish. */ ++ allocno_live_range_t start_next, finish_next; ++}; ++ ++bool ++sparseset_bit_p (sparseset s, SPARSESET_ELT_TYPE e) ++{ ++ SPARSESET_ELT_TYPE idx; ++ ++ idx = s->sparsee; ++ ++ return idx < s->members && s->denseidx == e; ++} ++ ++bool new_pseudos_p; ++int ira_max_point, ira_allocnos_num; ++allocno_live_range_t *ira_finish_point_ranges; ++ ++static inline void ++sparseset_clear (sparseset s) ++{ ++ s->members = 0; ++ s->iterating = false; ++} ++ ++sparseset ++sparseset_alloc (SPARSESET_ELT_TYPE n_elms) ++{ ++ unsigned int n_bytes = sizeof (struct sparseset_def) ++ + ((n_elms - 1) * 2 * sizeof (SPARSESET_ELT_TYPE)); ++ ++ /* We use xcalloc rather than xmalloc to silence some valgrind uninitialized ++ read errors when accessing set->sparsen when "n" is not, and never has ++ been, in the set. These uninitialized reads are expected, by design and ++ harmless. If this turns into a performance problem due to some future ++ additional users of sparseset, we can revisit this decision. */ ++ sparseset set = (sparseset) calloc (1, n_bytes); ++ set->dense = &(set->elms0); ++ set->sparse = &(set->elmsn_elms); ++ set->size = n_elms; ++ sparseset_clear (set); ++ return set; ++} ++ ++void ++sparseset_insert_bit (sparseset s, SPARSESET_ELT_TYPE e, SPARSESET_ELT_TYPE idx) ++{ ++ s->sparsee = idx; ++ s->denseidx = e; ++} ++ ++void ++sparseset_swap (sparseset s, SPARSESET_ELT_TYPE idx1, SPARSESET_ELT_TYPE idx2) ++{ ++ SPARSESET_ELT_TYPE tmp = s->denseidx2; ++ sparseset_insert_bit (s, s->denseidx1, idx2); ++ sparseset_insert_bit (s, tmp, idx1); ++} ++ ++void __attribute__ ((noinline)) ++sparseset_clear_bit (sparseset s, SPARSESET_ELT_TYPE e) ++{ ++ if (sparseset_bit_p (s, e)) ++ { ++ SPARSESET_ELT_TYPE idx = s->sparsee; ++ SPARSESET_ELT_TYPE iter = s->iter; ++ SPARSESET_ELT_TYPE mem = s->members - 1; ++ ++ /* If we are iterating over this set and we want to delete a ++ member we've already visited, then we swap the element we ++ want to delete with the element at the current iteration ++ index so that it plays well together with the code below ++ that actually removes the element. */ ++ if (s->iterating && idx <= iter) ++ { ++ if (idx < iter) ++ { ++ sparseset_swap (s, idx, iter); ++ idx = iter; ++ } ++ s->iter_inc = 0; ++ } ++ ++ /* Replace the element we want to delete with the last element ++ in the dense array and then decrement s->members, effectively ++ removing the element we want to delete. */ ++ sparseset_insert_bit (s, s->densemem, idx); ++ s->members = mem; ++ } ++} ++ ++allocno_live_range_t r; ++sparseset allocnos_live; ++ ++void ++ira_flattening () ++{ ++ int i; ++ ++ if (new_pseudos_p) ++ { ++ allocnos_live = sparseset_alloc (ira_allocnos_num); ++ for (i = 0; i < ira_max_point; i++) ++ { ++ for (r = ira_finish_point_rangesi; r != NULL; r = r->finish_next) ++ sparseset_clear_bit (allocnos_live, ALLOCNO_NUM (r->allocno)); ++ } ++ } ++} ++ ++int main() ++{
View file
_service:tar_scm:0291-Fix-settings-for-wide-operations-tests.patch
Added
@@ -0,0 +1,73 @@ +From 411792b0bbb63715d8e90d46eb4f0d9c810ce8ba Mon Sep 17 00:00:00 2001 +From: Pronin Alexander 00812787 <pronin.alexander@huawei.com> +Date: Tue, 3 Sep 2024 21:26:03 +0800 +Subject: PATCH 2/2 Fix settings for wide operations tests + +Signed-off-by: lin-houzhong <hz_lin8@163.com> +--- + gcc/testsuite/gcc.dg/double_sized_mul-1.c | 8 +++++--- + gcc/testsuite/gcc.dg/double_sized_mul-2.c | 9 +++++---- + gcc/testsuite/gcc.dg/uaddsub.c | 6 ++++-- + 3 files changed, 14 insertions(+), 9 deletions(-) + +diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-1.c b/gcc/testsuite/gcc.dg/double_sized_mul-1.c +index d32a25223..b848e02de 100644 +--- a/gcc/testsuite/gcc.dg/double_sized_mul-1.c ++++ b/gcc/testsuite/gcc.dg/double_sized_mul-1.c +@@ -1,7 +1,8 @@ +-/* { dg-do compile } */ ++/* { dg-do compile { target aarch64*-*-* x86_64*-*-*} } */ + /* fif-conversion-gimple and fuaddsub-overflow-match-all are required for + proper overflow detection in some cases. */ +-/* { dg-options "-O2 -fif-conversion-gimple -march=armv8.2-a -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */ ++/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */ ++/* { dg-additional-options "-march=armv8.2-a" { target aarch64*-*-* } } */ + #include <stdint.h> + + typedef unsigned __int128 uint128_t; +@@ -138,4 +139,5 @@ uint128_t mul128_perm (uint64_t a, uint64_t b) + return res; + } + +-/* { dg-final { scan-tree-dump-times "double sized mul optimized: 1" 6 "widening_mul" } } */ ++/* { dg-final { scan-tree-dump-times "double sized mul optimized: 1" 6 "widening_mul" { target aarch64*-*-* } } } */ ++/* { dg-final { scan-tree-dump-times "double sized mul optimized: 1" 4 "widening_mul" { target x86_64*-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-2.c b/gcc/testsuite/gcc.dg/double_sized_mul-2.c +index ff35902b7..cf8f0aedd 100644 +--- a/gcc/testsuite/gcc.dg/double_sized_mul-2.c ++++ b/gcc/testsuite/gcc.dg/double_sized_mul-2.c +@@ -1,7 +1,8 @@ +-/* { dg-do compile } */ +-/* fif-conversion-gimple is required for proper overflow detection +- in some cases. */ +-/* { dg-options "-O2 -fif-conversion-gimple -march=armv8.2-a -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */ ++/* { dg-do compile { target aarch64*-*-* x86_64*-*-*} } */ ++/* fif-conversion-gimple and fuaddsub-overflow-match-all are required for ++ proper overflow detection in some cases. */ ++/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */ ++/* { dg-additional-options "-march=armv8.2-a" { target aarch64*-*-* } } */ + #include <stdint.h> + + typedef unsigned __int128 uint128_t; +diff --git a/gcc/testsuite/gcc.dg/uaddsub.c b/gcc/testsuite/gcc.dg/uaddsub.c +index 96c26d308..dcb587fc8 100644 +--- a/gcc/testsuite/gcc.dg/uaddsub.c ++++ b/gcc/testsuite/gcc.dg/uaddsub.c +@@ -1,5 +1,6 @@ +-/* { dg-do compile } */ ++/* { dg-do compile { target aarch64*-*-* x86_64-*-* } } */ + /* { dg-options "-O2 -fuaddsub-overflow-match-all -fdump-tree-optimized" } */ ++/* { dg-additional-options "-march=armv8.2-a" { target aarch64*-*-* } } */ + #include <stdint.h> + + typedef unsigned __int128 uint128_t; +@@ -140,4 +141,5 @@ uint256_t sub256 (uint128_t a, uint128_t b) + } + + /* { dg-final { scan-tree-dump-times "= .ADD_OVERFLOW \\(a_\0-9\+\\(D\\), b_\0-9\+\\(D\\)\\)" 5 "optimized" } } */ +-/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \\(a_\0-9\+\\(D\\), b_\0-9\+\\(D\\)\\)" 5 "optimized" } } */ ++/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \\(a_\0-9\+\\(D\\), b_\0-9\+\\(D\\)\\)" 5 "optimized" { target aarch64*-*-* } } } */ ++/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \\(a_\0-9\+\\(D\\), b_\0-9\+\\(D\\)\\)" 4 "optimized" { target x86_64*-*-* } } } */ +-- +2.33.0 +
View file
_service:tar_scm:0292-Fix-errors-in-ipa-prefetch-IAORPF-and-IAOSJ0.patch
Added
@@ -0,0 +1,42 @@ +From 808294bf0f32aaff1cc7e56a756b246d328b3402 Mon Sep 17 00:00:00 2001 +From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> +Date: Fri, 6 Sep 2024 11:10:03 +0800 +Subject: PATCH 2/3 Fix errors in ipa-prefetch (IAORPF and IAOSJ0) + +Signed-off-by: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> +--- + gcc/ipa-prefetch.cc | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc +index b000d4d75..74af55af0 100644 +--- a/gcc/ipa-prefetch.cc ++++ b/gcc/ipa-prefetch.cc +@@ -1681,7 +1681,8 @@ insert_page_check (tree addr, tree_poly_offset_map &offset_map, + unsigned long long pmask = ~(param_ipa_prefetch_pagesize - 1); + tree pmask_cst = build_int_cst (utype, pmask); + tree off_tree = wide_int_to_tree (sizetype, offset); +- gcc_assert (TREE_CODE (addr_type) == POINTER_TYPE); ++ gcc_assert (TREE_CODE (addr_type) == POINTER_TYPE ++ || TREE_CODE (addr_type) == REFERENCE_TYPE); + tree addr_with_offset = gimple_build (&stmts, POINTER_PLUS_EXPR, + addr_type, addr, off_tree); + tree conv_addr = make_ssa_name (utype); +@@ -2082,11 +2083,11 @@ optimize_function (cgraph_node *n, function *fn) + for (unsigned int i = 0; i < vmrs.length (); i++) + find_nearest_common_post_dominator (vmrsi, dom_bb); + +- if (!dom_bb) ++ if (!dom_bb || dom_bb->index == ENTRY_BLOCK || dom_bb->index == EXIT_BLOCK) + { + if (dump_file) +- fprintf (dump_file, "Post dominator bb for MRs is not found. " +- "Skip the case.\n"); ++ fprintf (dump_file, "Post dominator bb for MRs is not found or " ++ "it's an entry/exit block. Skip the case.\n"); + return 0; + } + else if (dump_file) +-- +2.33.0 +
View file
_service:tar_scm:0293-Fix-error-with-stmts-insertion-in-ipa-prefetch-for-I.patch
Added
@@ -0,0 +1,51 @@ +From bfb77997f423ffe3bdcbd8bb8d7f739fe51ce4f5 Mon Sep 17 00:00:00 2001 +From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> +Date: Fri, 6 Sep 2024 11:36:11 +0800 +Subject: PATCH 3/3 Fix error with stmts insertion in ipa-prefetch (for + IAO6R3) + +Signed-off-by: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> +--- + gcc/ipa-prefetch.cc | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc +index b000d4d75..6190c2ebb 100644 +--- a/gcc/ipa-prefetch.cc ++++ b/gcc/ipa-prefetch.cc +@@ -2096,7 +2096,7 @@ optimize_function (cgraph_node *n, function *fn) + fprintf (dump_file, "\n"); + } + +- /* Try to find comp_mr's stmt in the dominator bb. */ ++ /* Try to find comp_mr's stmt in the post dominator bb. */ + gimple *last_used = NULL; + for (gimple_stmt_iterator si = gsi_last_bb (dom_bb); !gsi_end_p (si); + gsi_prev (&si)) +@@ -2168,7 +2168,22 @@ optimize_function (cgraph_node *n, function *fn) + vec<gimple *> pcalls = vNULL; + gimple *last_pref = NULL; + insert_prefetch_stmts (pcalls, stmts, last_pref, vmrs, processed_stmts); +- gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ ++ gimple *gstmt = gsi_stmt (gsi); ++ bool insert_after = last_used || gstmt == NULL || !is_ctrl_stmt (gstmt); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Insert prefetch sequence %s stmt:\n", ++ insert_after ? "after": "before"); ++ if (gstmt) ++ print_gimple_stmt (dump_file, gstmt, 0); ++ else ++ fprintf (dump_file, "(no stmts)\n"); ++ } ++ if (insert_after) ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ else ++ gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT); + + correct_cfg (bbends, last_pref, dom_bb); + +-- +2.33.0 +
View file
_service:tar_scm:0294-Fix-errors-in-ipa-prefetch-IAO50J-and-IAO5H7.patch
Added
@@ -0,0 +1,80 @@ +From cd79fc29d2cdb73836f8699355113e94b833e0e0 Mon Sep 17 00:00:00 2001 +From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> +Date: Wed, 11 Sep 2024 17:18:58 +0800 +Subject: PATCH 2/2 Fix errors in ipa-prefetch(IAO50J and IAO5H7) + +Signed-off-by: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> +--- + gcc/ipa-prefetch.cc | 35 ++++++++++++++++++++++++++++++----- + 1 file changed, 30 insertions(+), 5 deletions(-) + +diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc +index 5184687aa..685f9c267 100644 +--- a/gcc/ipa-prefetch.cc ++++ b/gcc/ipa-prefetch.cc +@@ -2099,6 +2099,18 @@ optimize_function (cgraph_node *n, function *fn) + fprintf (dump_file, "\n"); + } + ++ /* Check that all used mrs dominate found post dominator bb. This case ++ may be supported later by copying MR evaluation to the bb. */ ++ for (unsigned int i = 0; i < used_mr_vec.length (); i++) ++ if (!dominated_by_p (CDI_DOMINATORS, dom_bb, ++ gimple_bb (used_mr_veci->stmts0))) ++ { ++ if (dump_file) ++ fprintf (dump_file, "MR's (%d) bb is not dominate the found bb %d. " ++ "Skip the case.\n", used_mr_veci->mr_id, dom_bb->index); ++ return 0; ++ } ++ + /* Try to find comp_mr's stmt in the post dominator bb. */ + gimple *last_used = NULL; + for (gimple_stmt_iterator si = gsi_last_bb (dom_bb); !gsi_end_p (si); +@@ -2133,17 +2145,29 @@ optimize_function (cgraph_node *n, function *fn) + + /* Create new inc var. Insert new_var = old_var + step * factor. */ + decl_map = new tree_map; +- gcc_assert (comp_mr->stmts0 && gimple_assign_single_p (comp_mr->stmts0)); +- tree inc_var = gimple_assign_lhs (comp_mr->stmts0); ++ gimple *old_inc_stmt = comp_mr->stmts0; ++ gcc_assert (old_inc_stmt && gimple_assign_single_p (old_inc_stmt)); ++ tree inc_var = gimple_assign_lhs (old_inc_stmt); ++ if (dump_file) ++ { ++ fprintf (dump_file, "Old inc stmt: "); ++ print_gimple_stmt (dump_file, old_inc_stmt, 0); ++ } + /* If old_var definition dominates the current use, just use it, otherwise + evaluate it just before new inc var evaluation. */ + gimple_seq stmts = NULL; + stmt_set processed_stmts; +- if (!dominated_by_p (CDI_DOMINATORS, dom_bb, gimple_bb (comp_mr->stmts0))) ++ tree local_inc_var = inc_var; ++ if (!dominated_by_p (CDI_DOMINATORS, dom_bb, gimple_bb (old_inc_stmt))) + { + gimple *tmp = gimple_copy_and_remap_memref_stmts (comp_mr, stmts, 0, 0, + processed_stmts); +- inc_var = gimple_assign_lhs (tmp); ++ local_inc_var = gimple_assign_lhs (tmp); ++ if (dump_file) ++ { ++ fprintf (dump_file, "Localized old inc stmt: "); ++ print_gimple_stmt (dump_file, tmp, 0); ++ } + } + tree var_type = TREE_TYPE (inc_var); + enum tree_code inc_code; +@@ -2155,7 +2179,8 @@ optimize_function (cgraph_node *n, function *fn) + HOST_WIDE_INT dist_val = tree_to_shwi (step) + * param_ipa_prefetch_distance_factor; + tree dist = build_int_cst (TREE_TYPE (step), dist_val); +- tree new_inc_var = gimple_build (&stmts, inc_code, var_type, inc_var, dist); ++ tree new_inc_var = gimple_build (&stmts, inc_code, var_type, local_inc_var, ++ dist); + (*decl_map)inc_var = new_inc_var; + if (dump_file) + { +-- +2.33.0 +
View file
_service:tar_scm:0295-Fix-error-with-grouped_load-merge-in-slp-transpose-v.patch
Added
@@ -0,0 +1,30 @@ +From 7b4cce4896cefefedba9545a9633585e086b7621 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com> +Date: Wed, 11 Sep 2024 18:26:22 +0800 +Subject: PATCH 1/2 Fix error with grouped_load merge in + slp-transpose-vectorize (for IALR8B) + +--- + gcc/tree-vect-slp.cc | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc +index e3e246977..d4870de43 100644 +--- a/gcc/tree-vect-slp.cc ++++ b/gcc/tree-vect-slp.cc +@@ -3807,7 +3807,11 @@ vect_slp_grouped_load_find (bb_vec_info bb_vinfo, vec<bool> &visited, + these two grouped loads need to be merged. */ + tree opb = get_op_base_address (first_element); + unsigned int grp_size_b = DR_GROUP_SIZE (first_element); +- if (opa == opb && grp_size_a == grp_size_b) ++ /* Ensure that the elements merge to load group meet the alignment condition (dr_misalignment) */ ++ HOST_WIDE_INT diff = 0; ++ diff = (TREE_INT_CST_LOW (DR_INIT (first_element->dr_aux.dr)) ++ - TREE_INT_CST_LOW (DR_INIT (merge_first_element->dr_aux.dr))); ++ if (opa == opb && grp_size_a == grp_size_b && diff >= 0) + { + res.safe_push (first_element); + visitedi = true; +-- +2.33.0 +
View file
_service:tar_scm:0296-Fix-error-in-slp-transpose-vectorize-for-IAQFM3.patch
Added
@@ -0,0 +1,28 @@ +From b3a6a170bf1dc0e460e98a7fd02c92e6b036784a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com> +Date: Fri, 13 Sep 2024 14:13:07 +0800 +Subject: PATCH 2/2 Fix error in slp-transpose-vectorize (for IAQFM3) + +--- + gcc/tree-vect-slp.cc | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc +index d4870de43..d7e198dff 100644 +--- a/gcc/tree-vect-slp.cc ++++ b/gcc/tree-vect-slp.cc +@@ -3811,7 +3811,10 @@ vect_slp_grouped_load_find (bb_vec_info bb_vinfo, vec<bool> &visited, + HOST_WIDE_INT diff = 0; + diff = (TREE_INT_CST_LOW (DR_INIT (first_element->dr_aux.dr)) + - TREE_INT_CST_LOW (DR_INIT (merge_first_element->dr_aux.dr))); +- if (opa == opb && grp_size_a == grp_size_b && diff >= 0) ++ if (opa == opb ++ && grp_size_a == grp_size_b ++ && diff >= 0 ++ && check_same_bb (first_element, merge_first_element)) + { + res.safe_push (first_element); + visitedi = true; +-- +2.33.0 +
View file
_service:tar_scm:0297-Fix-grouped-load-merging-error-in-SLP-transpose-vectorization.patch
Added
@@ -0,0 +1,26 @@ +From 8b30d71f881e15bfbc514f9b65fee178610e1536 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com> +Date: Wed, 18 Sep 2024 10:48:55 +0800 +Subject: PATCH Fix error in slp-transpose-vectorize (for IARHFM) + +--- + gcc/tree-vect-slp.cc | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc +index d7e198dff..fbd638333 100644 +--- a/gcc/tree-vect-slp.cc ++++ b/gcc/tree-vect-slp.cc +@@ -3814,7 +3814,8 @@ vect_slp_grouped_load_find (bb_vec_info bb_vinfo, vec<bool> &visited, + if (opa == opb + && grp_size_a == grp_size_b + && diff >= 0 +- && check_same_bb (first_element, merge_first_element)) ++ && check_same_bb (first_element, merge_first_element) ++ && DR_PTR_INFO (first_element->dr_aux.dr) != DR_PTR_INFO (merge_first_element->dr_aux.dr)) + { + res.safe_push (first_element); + visitedi = true; +-- +2.33.0 +
View file
_service:tar_scm:0298-Mark-prefetch-builtin-as-willreturn.patch
Added
@@ -0,0 +1,99 @@ +From a252bbd11d22481a1e719ed36d800e2192abb369 Mon Sep 17 00:00:00 2001 +From: Pronin Alexander <pronin.alexander@huawei.com> +Date: Thu, 31 Oct 2024 15:49:27 +0800 +Subject: PATCH 1/6 Mark prefetch builtin as willreturn + +Signed-off-by: Pronin Alexander <pronin.alexander@huawei.com> +--- + gcc/common.opt | 4 ++++ + gcc/gimple.cc | 30 ++++++++++++++++++++++++++++++ + gcc/gimple.h | 1 + + gcc/tree-ssa-pre.cc | 4 +--- + 4 files changed, 36 insertions(+), 3 deletions(-) + +diff --git a/gcc/common.opt b/gcc/common.opt +index 688d65e4d..be5fcc681 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1313,6 +1313,10 @@ fdelete-null-pointer-checks + Common Var(flag_delete_null_pointer_checks) Init(-1) Optimization + Delete useless null pointer checks. + ++fbuiltin-will-return ++Common Var(flag_builtin_will_return) Optimization ++Consider some of the builtins as definitely returning. ++ + fdevirtualize-at-ltrans + Common Var(flag_ltrans_devirtualize) + Stream extra data to support more aggressive devirtualization in LTO local transformation mode. +diff --git a/gcc/gimple.cc b/gcc/gimple.cc +index 9e62da426..04ca9f161 100644 +--- a/gcc/gimple.cc ++++ b/gcc/gimple.cc +@@ -2998,6 +2998,36 @@ nonbarrier_call_p (gimple *call) + return false; + } + ++static inline bool ++will_return_builtin_p (gimple *call) ++{ ++ if (!flag_builtin_will_return) ++ return false; ++ ++ if (!gimple_call_builtin_p (call, BUILT_IN_NORMAL)) ++ return false; ++ ++ switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call))) ++ { ++ case BUILT_IN_PREFETCH: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++bool ++will_return_call_p (gimple *call, function *fun) ++{ ++ int flags = gimple_call_flags (call); ++ if (!(flags & (ECF_CONST|ECF_PURE)) ++ || (flags & ECF_LOOPING_CONST_OR_PURE) ++ || stmt_can_throw_external (fun, call)) ++ return will_return_builtin_p (call); ++ ++ return true; ++} ++ + /* Callback for walk_stmt_load_store_ops. + + Return TRUE if OP will dereference the tree stored in DATA, FALSE +diff --git a/gcc/gimple.h b/gcc/gimple.h +index 77a5a07e9..bb05a7664 100644 +--- a/gcc/gimple.h ++++ b/gcc/gimple.h +@@ -1628,6 +1628,7 @@ extern bool gimple_asm_clobbers_memory_p (const gasm *); + extern void dump_decl_set (FILE *, bitmap); + extern bool nonfreeing_call_p (gimple *); + extern bool nonbarrier_call_p (gimple *); ++extern bool will_return_call_p (gimple *, function *); + extern bool infer_nonnull_range (gimple *, tree); + extern bool infer_nonnull_range_by_dereference (gimple *, tree); + extern bool infer_nonnull_range_by_attribute (gimple *, tree); +diff --git a/gcc/tree-ssa-pre.cc b/gcc/tree-ssa-pre.cc +index 98134b5d3..b5264133a 100644 +--- a/gcc/tree-ssa-pre.cc ++++ b/gcc/tree-ssa-pre.cc +@@ -3988,9 +3988,7 @@ compute_avail (function *fun) + that forbids hoisting possibly trapping expressions + before it. */ + int flags = gimple_call_flags (stmt); +- if (!(flags & (ECF_CONST|ECF_PURE)) +- || (flags & ECF_LOOPING_CONST_OR_PURE) +- || stmt_can_throw_external (fun, stmt)) ++ if (!will_return_call_p (stmt, fun)) + /* Defer setting of BB_MAY_NOTRETURN to avoid it + influencing the processing of the call itself. */ + set_bb_may_notreturn = true; +-- +2.33.0 +
View file
_service:tar_scm:0299-Backport-Disallow-pointer-operands-for-and-partly-PR.patch
Added
@@ -0,0 +1,156 @@ +From 3b109376d057342a31267ea4c9bd422d940874cb Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek <jakub@redhat.com> +Date: Thu, 31 Oct 2024 16:09:43 +0800 +Subject: PATCH 2/6 BackportDisallow pointer operands for |,^ and partly + &PR106878 + +Signed-off-by: Jakub Jelinek <jakub@redhat.com> +--- + gcc/match.pd | 6 ++++- + .../gcc.c-torture/compile/pr106878.c | 15 +++++++++++++ + gcc/tree-cfg.cc | 22 ++++++++++++++++--- + gcc/tree-ssa-reassoc.cc | 16 +++++++++++++- + 4 files changed, 54 insertions(+), 5 deletions(-) + create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr106878.c + +diff --git a/gcc/match.pd b/gcc/match.pd +index 8f41c292f..822e065e8 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -1655,6 +1655,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + && (int_fits_type_p (@1, TREE_TYPE (@0)) + || tree_nop_conversion_p (TREE_TYPE (@0), type))) + || types_match (@0, @1)) ++ && !POINTER_TYPE_P (TREE_TYPE (@0)) ++ && TREE_CODE (TREE_TYPE (@0)) != OFFSET_TYPE + /* ??? This transform conflicts with fold-const.cc doing + Convert (T)(x & c) into (T)x & (T)c, if c is an integer + constants (if x has signed type, the sign bit cannot be set +@@ -1691,7 +1693,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + (if (GIMPLE + && TREE_CODE (@1) != INTEGER_CST + && tree_nop_conversion_p (type, TREE_TYPE (@2)) +- && types_match (type, @0)) ++ && types_match (type, @0) ++ && !POINTER_TYPE_P (TREE_TYPE (@0)) ++ && TREE_CODE (TREE_TYPE (@0)) != OFFSET_TYPE) + (bitop @0 (convert @1))))) + + (for bitop (bit_and bit_ior) +diff --git a/gcc/testsuite/gcc.c-torture/compile/pr106878.c b/gcc/testsuite/gcc.c-torture/compile/pr106878.c +new file mode 100644 +index 000000000..c84571894 +--- /dev/null ++++ b/gcc/testsuite/gcc.c-torture/compile/pr106878.c +@@ -0,0 +1,15 @@ ++/* PR tree-optimization/106878 */ ++ ++typedef __INTPTR_TYPE__ intptr_t; ++typedef __UINTPTR_TYPE__ uintptr_t; ++int a; ++ ++int ++foo (const int *c) ++{ ++ uintptr_t d = ((intptr_t) c | (intptr_t) &a) & 65535 << 16; ++ intptr_t e = (intptr_t) c; ++ if (d != (e & 65535 << 16)) ++ return 1; ++ return 0; ++} +diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc +index 48b52f785..d33aaec8c 100644 +--- a/gcc/tree-cfg.cc ++++ b/gcc/tree-cfg.cc +@@ -4163,7 +4163,9 @@ verify_gimple_assign_binary (gassign *stmt) + case ROUND_MOD_EXPR: + case RDIV_EXPR: + case EXACT_DIV_EXPR: +- /* Disallow pointer and offset types for many of the binary gimple. */ ++ case BIT_IOR_EXPR: ++ case BIT_XOR_EXPR: ++ /* Disallow pointer and offset types for many of the binary gimple. */ + if (POINTER_TYPE_P (lhs_type) + || TREE_CODE (lhs_type) == OFFSET_TYPE) + { +@@ -4178,9 +4180,23 @@ verify_gimple_assign_binary (gassign *stmt) + + case MIN_EXPR: + case MAX_EXPR: +- case BIT_IOR_EXPR: +- case BIT_XOR_EXPR: ++ /* Continue with generic binary expression handling. */ ++ break; ++ + case BIT_AND_EXPR: ++ if (POINTER_TYPE_P (lhs_type) ++ && TREE_CODE (rhs2) == INTEGER_CST) ++ break; ++ /* Disallow pointer and offset types for many of the binary gimple. */ ++ if (POINTER_TYPE_P (lhs_type) ++ || TREE_CODE (lhs_type) == OFFSET_TYPE) ++ { ++ error ("invalid types for %qs", code_name); ++ debug_generic_expr (lhs_type); ++ debug_generic_expr (rhs1_type); ++ debug_generic_expr (rhs2_type); ++ return true; ++ } + /* Continue with generic binary expression handling. */ + break; + +diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc +index e3d521e32..6baef4764 100644 +--- a/gcc/tree-ssa-reassoc.cc ++++ b/gcc/tree-ssa-reassoc.cc +@@ -3617,10 +3617,14 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, + tree type2 = NULL_TREE; + bool strict_overflow_p = false; + candidates.truncate (0); ++ if (POINTER_TYPE_P (type1)) ++ type1 = pointer_sized_int_node; + for (j = i; j; j = chainsj - 1) + { + tree type = TREE_TYPE (rangesj - 1.exp); + strict_overflow_p |= rangesj - 1.strict_overflow_p; ++ if (POINTER_TYPE_P (type)) ++ type = pointer_sized_int_node; + if ((b % 4) == 3) + { + /* For the signed < 0 cases, the types should be +@@ -3651,6 +3655,8 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, + tree type = TREE_TYPE (rangesj - 1.exp); + if (j == k) + continue; ++ if (POINTER_TYPE_P (type)) ++ type = pointer_sized_int_node; + if ((b % 4) == 3) + { + if (!useless_type_conversion_p (type1, type)) +@@ -3680,7 +3686,7 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, + op = r->exp; + continue; + } +- if (id == l) ++ if (id == l || POINTER_TYPE_P (TREE_TYPE (op))) + { + code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR; + g = gimple_build_assign (make_ssa_name (type1), code, op); +@@ -3704,6 +3710,14 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, + gimple_seq_add_stmt_without_update (&seq, g); + op = gimple_assign_lhs (g); + } ++ type1 = TREE_TYPE (rangesk - 1.exp); ++ if (POINTER_TYPE_P (type1)) ++ { ++ gimple *g ++ = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op); ++ gimple_seq_add_stmt_without_update (&seq, g); ++ op = gimple_assign_lhs (g); ++ } + candidates.pop (); + if (update_range_test (&rangesk - 1, NULL, candidates.address (), + candidates.length (), opcode, ops, op, +-- +2.33.0 +
View file
_service:tar_scm:0300-Remove-erroneous-pattern-from-gimple-ifcvt.patch
Added
@@ -0,0 +1,55 @@ +From 91ef8899a80e493042fd2687ad89064c9f90cf17 Mon Sep 17 00:00:00 2001 +From: Pronin Alexander <pronin.alexander@huawei.com> +Date: Thu, 31 Oct 2024 16:14:34 +0800 +Subject: PATCH 3/6 Remove erroneous pattern from gimple ifcvt + +Signed-off-by: Pronin Alexander <pronin.alexander@huawei.com> +--- + gcc/match.pd | 2 +- + gcc/testsuite/gcc.dg/ifcvt-gimple-1.c | 21 +++++++++++++++++++++ + 2 files changed, 22 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.dg/ifcvt-gimple-1.c + +diff --git a/gcc/match.pd b/gcc/match.pd +index 8f41c292f..2dd6581d1 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -4276,7 +4276,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + + (if (flag_if_conversion_gimple) +- (for simple_op (plus minus bit_and bit_ior bit_xor) ++ (for simple_op (plus minus bit_ior bit_xor) + (simplify + (cond @0 (simple_op @1 INTEGER_CST@2) @1) + (switch +diff --git a/gcc/testsuite/gcc.dg/ifcvt-gimple-1.c b/gcc/testsuite/gcc.dg/ifcvt-gimple-1.c +new file mode 100644 +index 000000000..381a4ad51 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/ifcvt-gimple-1.c +@@ -0,0 +1,21 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -fno-inline -fif-conversion-gimple" } */ ++ ++#include <stdlib.h> ++ ++void foo(int a, int *p) { ++ *p = a; ++} ++ ++void verify (int a) { ++ if (a != 3) ++ abort (); ++} ++ ++int main() { ++ int a = 0; ++ foo (3, &a); ++ int tmp = (a > 7) ? a & 1 : a; ++ verify (tmp); ++ return 0; ++} +-- +2.33.0 +
View file
_service:tar_scm:0301-Add-required-check-for-iteration-through-uses.patch
Added
@@ -0,0 +1,33 @@ +From ca24d352e98e357f4f7b8f0d262201765705a08a Mon Sep 17 00:00:00 2001 +From: Pronin Alexander <pronin.alexander@huawei.com> +Date: Thu, 31 Oct 2024 16:31:33 +0800 +Subject: PATCH 4/6 Add required check for iteration through uses + +Signed-off-by: Pronin Alexander <pronin.alexander@huawei.com> +--- + gcc/tree-ssa-math-opts.cc | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc +index 2c06b8a60..80c06fa01 100644 +--- a/gcc/tree-ssa-math-opts.cc ++++ b/gcc/tree-ssa-math-opts.cc +@@ -4938,8 +4938,13 @@ convert_double_size_mul (gimple_stmt_iterator *gsi, gimple *stmt) + + /* Find the mult low part getter. */ + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, match3) +- if (gimple_assign_rhs_code (use_stmt) == REALPART_EXPR) +- break; ++ { ++ if (!is_gimple_assign (use_stmt)) ++ continue; ++ ++ if (gimple_assign_rhs_code (use_stmt) == REALPART_EXPR) ++ break; ++ } + + /* Create high and low (if needed) parts extractors. */ + /* Low part. */ +-- +2.33.0 +
View file
_service:tar_scm:0302-Added-param-for-optimization-for-merging-bb-s-with-c.patch
Added
@@ -0,0 +1,158 @@ +From 210147e28d542a03588ba3c3fa473301a03bb687 Mon Sep 17 00:00:00 2001 +From: Gmyrikov Konstantin <gmyrikov.konstantin@huawei-partners.com> +Date: Thu, 31 Oct 2024 16:45:15 +0800 +Subject: PATCH 6/6 Added param for optimization for merging bb's with cheap + insns.Zero param means turned off optimization(default implementation),One + means turned on + +Signed-off-by: Gmyrikov Konstantin <gmyrikov.konstantin@huawei-partners.com> +--- + gcc/params.opt | 4 +++ + gcc/testsuite/gcc.dg/if_comb1.c | 13 +++++++++ + gcc/testsuite/gcc.dg/if_comb2.c | 13 +++++++++ + gcc/testsuite/gcc.dg/if_comb3.c | 12 +++++++++ + gcc/tree-ssa-ifcombine.cc | 47 ++++++++++++++++++++++++++++++--- + 5 files changed, 86 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/if_comb1.c + create mode 100644 gcc/testsuite/gcc.dg/if_comb2.c + create mode 100644 gcc/testsuite/gcc.dg/if_comb3.c + +diff --git a/gcc/params.opt b/gcc/params.opt +index fc700ab79..3ddfaf5b2 100644 +--- a/gcc/params.opt ++++ b/gcc/params.opt +@@ -789,6 +789,10 @@ Maximum number of VALUEs handled during a single find_base_term call. + Common Joined UInteger Var(param_max_vrp_switch_assertions) Init(10) Param Optimization + Maximum number of assertions to add along the default edge of a switch statement during VRP. + ++-param=merge-assign-stmts-ifcombine= ++Common Joined UInteger Var(param_merge_assign_stmts_ifcombine) Init(0) IntegerRange(0, 1) Param Optimization ++Whether bb's with cheap gimple_assign stmts should be merged in the ifcombine pass. ++ + -param=min-crossjump-insns= + Common Joined UInteger Var(param_min_crossjump_insns) Init(5) IntegerRange(1, 65536) Param Optimization + The minimum number of matching instructions to consider for crossjumping. +diff --git a/gcc/testsuite/gcc.dg/if_comb1.c b/gcc/testsuite/gcc.dg/if_comb1.c +new file mode 100644 +index 000000000..e00adc37d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/if_comb1.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Ofast -S --param=merge-assign-stmts-ifcombine=1 -fdump-tree-ifcombine" } */ ++ ++int foo (double a, double b, int c) ++{ ++ if (c < 10 || a - b > 1.0) ++ return 0; ++ else ++ return 1; ++} ++ ++/* { dg-final { scan-tree-dump "optimizing two comparisons" "ifcombine"} } */ ++/* { dg-final { scan-tree-dump "Merging blocks" "ifcombine"} } */ +diff --git a/gcc/testsuite/gcc.dg/if_comb2.c b/gcc/testsuite/gcc.dg/if_comb2.c +new file mode 100644 +index 000000000..176e7e726 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/if_comb2.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Ofast -S --param=merge-assign-stmts-ifcombine=1 -fdump-tree-ifcombine" } */ ++ ++int foo (int a, int b, int c) ++{ ++ if (a > 1 || b * c < 10) ++ return 0; ++ else ++ return 1; ++} ++ ++/* { dg-final { scan-tree-dump "optimizing two comparisons" "ifcombine"} } */ ++/* { dg-final { scan-tree-dump "Merging blocks" "ifcombine"} } */ +diff --git a/gcc/testsuite/gcc.dg/if_comb3.c b/gcc/testsuite/gcc.dg/if_comb3.c +new file mode 100644 +index 000000000..aa2e4510c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/if_comb3.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Ofast -S --param=merge-assign-stmts-ifcombine=1 -fdump-tree-ifcombine" } */ ++ ++int foo (int a, int b, int c) ++{ ++ if (a > 1 && b + c < 10) ++ a++; ++ return a; ++} ++ ++/* { dg-final { scan-tree-dump "optimizing two comparisons" "ifcombine"} } */ ++/* { dg-final { scan-tree-dump "Merging blocks" "ifcombine"} } */ +diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc +index ce9bbebf9..264a8bcae 100644 +--- a/gcc/tree-ssa-ifcombine.cc ++++ b/gcc/tree-ssa-ifcombine.cc +@@ -110,6 +110,18 @@ recognize_if_then_else (basic_block cond_bb, + return true; + } + ++/* Verify if gimple insn cheap for param=merge-assign-stmts-ifcombine ++ optimization. */ ++ ++bool is_insn_cheap (enum tree_code t) ++{ ++ static enum tree_code cheap_insns = {MULT_EXPR, PLUS_EXPR, MINUS_EXPR}; ++ for (int i = 0; i < sizeof (cheap_insns)/sizeof (enum tree_code); i++) ++ if (t == cheap_insnsi) ++ return 1; ++ return 0; ++} ++ + /* Verify if the basic block BB does not have side-effects. Return + true in this case, else false. */ + +@@ -572,9 +584,38 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, + = param_logical_op_non_short_circuit; + if (!logical_op_non_short_circuit || sanitize_coverage_p ()) + return false; +- /* Only do this optimization if the inner bb contains only the conditional. */ +- if (!gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb (inner_cond_bb))) +- return false; ++ if (param_merge_assign_stmts_ifcombine) ++ { ++ int number_cheap_insns = 0; ++ int number_conds = 0; ++ for (auto i = gsi_start_nondebug_after_labels_bb ++ (outer_cond_bb); !gsi_end_p (i); gsi_next_nondebug (&i)) ++ if (gimple_code (gsi_stmt (i)) == GIMPLE_ASSIGN ++ && is_insn_cheap (gimple_assign_rhs_code (gsi_stmt (i)))) ++ number_cheap_insns++; ++ else if (gimple_code (gsi_stmt (i)) == GIMPLE_COND) ++ number_conds++; ++ for (auto i = gsi_start_nondebug_after_labels_bb ++ (inner_cond_bb); !gsi_end_p (i); gsi_next_nondebug (&i)) ++ if (gimple_code (gsi_stmt (i)) == GIMPLE_ASSIGN ++ && is_insn_cheap (gimple_assign_rhs_code (gsi_stmt (i)))) ++ number_cheap_insns++; ++ else if (gimple_code (gsi_stmt (i)) == GIMPLE_COND) ++ number_conds++; ++ if (!(number_cheap_insns == 1 && number_conds == 2) ++ && !gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb ++ (inner_cond_bb))) ++ return false; ++ } ++ else ++ { ++ /* Only do this optimization if the inner bb contains ++ only the conditional. */ ++ if (!gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb ++ (inner_cond_bb))) ++ return false; ++ } ++ + t1 = fold_build2_loc (gimple_location (inner_cond), + inner_cond_code, + boolean_type_node, +-- +2.33.0 +
View file
_service:tar_scm:0303-Add-generation-of-stream-in-functions-for-pre-versio.patch
Added
@@ -0,0 +1,6263 @@ +From 4789a6eae616df0b7d07901114c91a2099e4d56d Mon Sep 17 00:00:00 2001 +From: wangchunyang <wangchunyang15@huawei.com> +Date: Wed, 13 Nov 2024 11:26:16 +0800 +Subject: PATCH 1/2 Add generation of stream in functions for pre-version lto + objects + +--- + gcc/lto-streamer.h | 6 + + gcc/opt-read.awk | 1 + + gcc/optc-save-gen.awk | 6044 ++++++++++++++++++++++++++++++++++++++++- + 3 files changed, 6050 insertions(+), 1 deletion(-) + +diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h +index 597e9e405..9db1a20b3 100644 +--- a/gcc/lto-streamer.h ++++ b/gcc/lto-streamer.h +@@ -943,12 +943,18 @@ void cl_target_option_stream_in (class data_in *, + struct bitpack_d *, + struct cl_target_option *); + ++void cl_target_option_stream_in_prev (class data_in *, ++ struct bitpack_d *, ++ struct cl_target_option *); ++ + void cl_optimization_stream_out (struct output_block *, + struct bitpack_d *, struct cl_optimization *); + + void cl_optimization_stream_in (class data_in *, + struct bitpack_d *, struct cl_optimization *); + ++void cl_optimization_stream_in_prev (class data_in *, ++ struct bitpack_d *, struct cl_optimization *); + + + /* In lto-opts.cc. */ +diff --git a/gcc/opt-read.awk b/gcc/opt-read.awk +index ce3617c8d..624cf6e3d 100644 +--- a/gcc/opt-read.awk ++++ b/gcc/opt-read.awk +@@ -71,6 +71,7 @@ BEGIN { + n_target_save++ + + extra_target_varsn_extra_target_vars = name ++ extra_target_vars_setname = 1 + extra_target_var_typesn_extra_target_vars = type + n_extra_target_vars++ + } +diff --git a/gcc/optc-save-gen.awk b/gcc/optc-save-gen.awk +index 76e9b3cb9..7c012dd4e 100644 +--- a/gcc/optc-save-gen.awk ++++ b/gcc/optc-save-gen.awk +@@ -174,6 +174,8 @@ print " unsigned HOST_WIDE_INT mask = 0;"; + j = 0; + k = 0; + for (i = 0; i < n_opt_other; i++) { ++ var_opt_other_jvar_opt_otheri = j; ++ var_opt_other_kvar_opt_otheri = k; + print " if (opts_set->x_" var_opt_otheri ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -185,6 +187,8 @@ for (i = 0; i < n_opt_other; i++) { + } + + for (i = 0; i < n_opt_int; i++) { ++ var_opt_int_jvar_opt_inti = j; ++ var_opt_int_kvar_opt_inti = k; + print " if (opts_set->x_" var_opt_inti ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -196,6 +200,8 @@ for (i = 0; i < n_opt_int; i++) { + } + + for (i = 0; i < n_opt_enum; i++) { ++ var_opt_enum_jvar_opt_enumi = j; ++ var_opt_enum_kvar_opt_enumi = k; + print " if (opts_set->x_" var_opt_enumi ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -207,6 +213,8 @@ for (i = 0; i < n_opt_enum; i++) { + } + + for (i = 0; i < n_opt_short; i++) { ++ var_opt_short_jvar_opt_shorti = j; ++ var_opt_short_kvar_opt_shorti = k; + print " if (opts_set->x_" var_opt_shorti ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -218,6 +226,8 @@ for (i = 0; i < n_opt_short; i++) { + } + + for (i = 0; i < n_opt_char; i++) { ++ var_opt_char_jvar_opt_chari = j; ++ var_opt_char_kvar_opt_chari = k; + print " if (opts_set->x_" var_opt_chari ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -229,6 +239,8 @@ for (i = 0; i < n_opt_char; i++) { + } + + for (i = 0; i < n_opt_string; i++) { ++ var_opt_string_jvar_opt_stringi = j; ++ var_opt_string_kvar_opt_stringi = k; + print " if (opts_set->x_" var_opt_stringi ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -604,6 +616,8 @@ for (i = 0; i < n_extra_target_vars; i++) { + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } ++ extra_target_vars_jextra_target_varsi = j; ++ extra_target_vars_kextra_target_varsi = k; + print " if (opts_set->x_" extra_target_varsi ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -622,6 +636,8 @@ for (i = 0; i < n_target_other; i++) { + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } ++ var_target_other_jvar_target_otheri = j; ++ var_target_other_kvar_target_otheri = k; + print " if (opts_set->x_" var_target_otheri ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -636,6 +652,8 @@ for (i = 0; i < n_target_enum; i++) { + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } ++ var_target_enum_jvar_target_enumi = j; ++ var_target_enum_kvar_target_enumi = k; + print " if (opts_set->x_" var_target_enumi ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -654,6 +672,8 @@ for (i = 0; i < n_target_int; i++) { + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } ++ var_target_int_jvar_target_inti = j; ++ var_target_int_kvar_target_inti = k; + print " if (opts_set->x_" var_target_inti ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -668,6 +688,8 @@ for (i = 0; i < n_target_short; i++) { + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } ++ var_target_short_jvar_target_shorti = j; ++ var_target_short_kvar_target_shorti = k; + print " if (opts_set->x_" var_target_shorti ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -682,6 +704,8 @@ for (i = 0; i < n_target_char; i++) { + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } ++ var_target_char_jvar_target_chari = j; ++ var_target_char_kvar_target_chari = k; + print " if (opts_set->x_" var_target_chari ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -696,6 +720,8 @@ for (i = 0; i < n_target_string; i++) { + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } ++ var_target_string_jvar_target_stringi = j; ++ var_target_string_kvar_target_stringi = k; + print " if (opts_set->x_" var_target_stringi ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -1038,6 +1064,7 @@ for (i = 0; i < n_target_save; i++) { + sub(" *" name "$", "", type) + if (target_save_decli ~ "^const char \\*+_" alnum "+$") { + var_target_strn_target_str++ = name; ++ var_target_str_setname = 1; + string_options_namesname++ + } + else { +@@ -1048,12 +1075,14 @@ for (i = 0; i < n_target_save; i++) { + sub("\\.+", "", name) + sub(" ^ +$", "", type) + var_target_arrayn_target_array = name ++ var_target_array_setname = 1 + var_target_array_typen_target_array = type + var_target_array_sizen_target_array++ = size + } + else { + var_target_val_typen_target_val = type; + var_target_valn_target_val++ = name; ++ var_target_val_setname = 1; + } + } + } +@@ -1069,17 +1098,21 @@ if (have_save) { + + var_list_seenname++; + otype = var_type_struct(flagsi) +- if (otype ~ "^const char \\**$") ++ if (otype ~ "^const char \\**$") { + var_target_strn_target_str++ = "x_" name; ++ var_target_str_set"x_" name = 1;
View file
_service:tar_scm:0304-Add-multi-version-lto-symbol-parse-cross-lto-units-i.patch
Added
@@ -0,0 +1,963 @@ +From f81a5b294711e3a420fe66702f0d9221332271c4 Mon Sep 17 00:00:00 2001 +From: h00564365 <huangxiaoquan1@huawei.com> +Date: Wed, 13 Nov 2024 17:18:01 +0800 +Subject: PATCH 2/2 Add multi-version lto symbol parse, cross lto units + ipa-inline extension, and lto compression algorithm specified. + +--- + gcc/common.opt | 20 +++ + gcc/config/aarch64/aarch64.cc | 41 ++++++ + gcc/doc/tm.texi | 6 + + gcc/doc/tm.texi.in | 2 + + gcc/ipa-inline.cc | 141 ++++++++++++++++++- + gcc/lto-compress.cc | 6 +- + gcc/lto-section-in.cc | 5 + + gcc/lto-streamer-out.cc | 7 +- + gcc/lto-wrapper.cc | 4 + + gcc/optc-save-gen.awk | 57 ++++++++ + gcc/opth-gen.awk | 3 + + gcc/opts.cc | 46 ++++++ + gcc/target.def | 10 ++ + gcc/testsuite/gcc.dg/lto/binary-inline-1_0.c | 15 ++ + gcc/testsuite/gcc.dg/lto/binary-inline-1_1.c | 6 + + gcc/testsuite/gcc.dg/lto/binary-inline-2_0.c | 15 ++ + gcc/testsuite/gcc.dg/lto/binary-inline-2_1.c | 5 + + gcc/testsuite/gcc.dg/lto/binary-inline-3_0.c | 15 ++ + gcc/testsuite/gcc.dg/lto/binary-inline-3_1.c | 10 ++ + gcc/tree-streamer-in.cc | 58 +++++++- + lto-plugin/lto-plugin.c | 83 +++++++++++ + 21 files changed, 547 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-1_0.c + create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-1_1.c + create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-2_0.c + create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-2_1.c + create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-3_0.c + create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-3_1.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index be5fcc681..78cfc333a 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1928,6 +1928,21 @@ finline-atomics + Common Var(flag_inline_atomics) Init(1) Optimization + Inline __atomic operations when a lock free instruction sequence is available. + ++fmulti-version-lib= ++Common Joined Var(multi_version_lib_string) ++Use specify LTO stream in mode for specified target (object or lib). If there ++are multiple target files, use commas (,) to separate them and without spaces. ++ ++finline-force ++Common Var(flag_inline_force) Init(0) Optimization ++Force perform ipa inline when march options are incompatible between functions. ++ ++finline-force= ++Common Joined Var(force_inline_targets_string) ++Force perform ipa inline specified target(object or lib) when march options are ++incompatible between functions. If there are multiple target files, use commas ++(,) to separate them and without spaces. ++ + fcf-protection + Common RejectNegative Alias(fcf-protection=,full) + +@@ -2168,6 +2183,11 @@ flto-partition= + Common Joined RejectNegative Enum(lto_partition_model) Var(flag_lto_partition) Init(LTO_PARTITION_BALANCED) + Specify the algorithm to partition symbols and vars at linktime. + ++flto-compression-algorithm= ++Common Joined Var(lto_compression_algorithm) ++-flto-compression-algorithm=<format> Generate lto compression in zlib/zstd ++format <format>. ++ + ; The initial value of -1 comes from Z_DEFAULT_COMPRESSION in zlib.h. + flto-compression-level= + Common Joined RejectNegative UInteger Var(flag_lto_compression_level) Init(-1) IntegerRange(0, 19) +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 025a3c478..f095f17aa 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -20829,6 +20829,44 @@ aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr) + arch->name, extension.c_str ()); + } + ++/* Implement TARGET_OPTION_PRINT_DIFF. */ ++ ++static void ++aarch64_option_print_diff (FILE *file, int indent, ++ struct cl_target_option *ptr1, ++ struct cl_target_option *ptr2) ++{ ++ const char *const cpu1 ++ = aarch64_get_tune_cpu (ptr1->x_selected_tune)->name; ++ const struct processor *arch1 = aarch64_get_arch (ptr1->x_selected_arch); ++ std::string extension1 ++ = aarch64_get_extension_string_for_isa_flags (ptr1->x_aarch64_isa_flags, ++ arch1->flags); ++ ++ const char *const cpu2 ++ = aarch64_get_tune_cpu (ptr2->x_selected_tune)->name; ++ const struct processor *arch2 = aarch64_get_arch (ptr2->x_selected_arch); ++ std::string extension2 ++ = aarch64_get_extension_string_for_isa_flags (ptr2->x_aarch64_isa_flags, ++ arch2->flags); ++ ++ if (cpu1 != cpu2 && (!cpu1 || !cpu2 || strcmp (cpu1, cpu2))) ++ fprintf (file, "%*s%s (%s/%s)\n", indent, "", ++ "cpu", cpu1 ? cpu1 : "(null)", cpu2 ? cpu2 : "(null)"); ++ ++ if (arch1->name != arch2->name ++ && (!arch1->name || !arch2->name || strcmp (arch1->name, arch2->name))) ++ fprintf (file, "%*s%s (%s/%s)\n", indent, "", ++ "arch", arch1->name ? arch1->name : "(null)", ++ arch2->name ? arch2->name : "(null)"); ++ ++ if (extension1 != extension2) ++ fprintf (file, "%*s%s (%s/%s)\n", indent, "", ++ "extension", ++ extension1.empty () ? "(null)" : extension1.c_str (), ++ extension2.empty () ? "(null)" : extension2.c_str ()); ++} ++ + static GTY(()) tree aarch64_previous_fndecl; + + void +@@ -31161,6 +31199,9 @@ aarch64_libgcc_floating_mode_supported_p + #undef TARGET_OPTION_PRINT + #define TARGET_OPTION_PRINT aarch64_option_print + ++#undef TARGET_OPTION_PRINT_DIFF ++#define TARGET_OPTION_PRINT_DIFF aarch64_option_print_diff ++ + #undef TARGET_OPTION_VALID_ATTRIBUTE_P + #define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p + +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 1e96521e6..50bbbbc42 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -10589,6 +10589,12 @@ information in the @code{struct cl_target_option} structure for + function-specific options. + @end deftypefn + ++@deftypefn {Target Hook} void TARGET_OPTION_PRINT_DIFF (FILE *@var{file}, int @var{indent}, struct cl_target_option *@var{ptr1}, struct cl_target_option *@var{ptr2}) ++This hook is called to print diff additional target-specific ++information in the ptr1 and ptr2 @code{struct cl_target_option} structure for ++function-specific options. ++@end deftypefn ++ + @deftypefn {Target Hook} bool TARGET_OPTION_PRAGMA_PARSE (tree @var{args}, tree @var{pop_target}) + This target hook parses the options for @code{#pragma GCC target}, which + sets the target-specific options for functions that occur later in the +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index 2dd515659..cfda60304 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -6985,6 +6985,8 @@ on this implementation detail. + + @hook TARGET_OPTION_PRINT + ++@hook TARGET_OPTION_PRINT_DIFF ++ + @hook TARGET_OPTION_PRAGMA_PARSE + + @hook TARGET_OPTION_OVERRIDE +diff --git a/gcc/ipa-inline.cc b/gcc/ipa-inline.cc +index f8bb072c4..8d5cc9a84 100644 +--- a/gcc/ipa-inline.cc ++++ b/gcc/ipa-inline.cc +@@ -90,6 +90,8 @@ along with GCC; see the file COPYING3. If not see + the need for offline copy of the function. */ + + #include "config.h" ++#define INCLUDE_SET ++#define INCLUDE_STRING + #include "system.h" + #include "coretypes.h" + #include "backend.h" +@@ -127,6 +129,7 @@ typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t; + static int overall_size; + static profile_count max_count; + static profile_count spec_rem; ++static std::set<std::string> force_inline_targets; + + /* Return false when inlining edge E would lead to violating + limits on function unit growth or stack usage growth. +@@ -222,6 +225,38 @@ caller_growth_limits (struct cgraph_edge *e) + return true; + } + ++/* Warn and prompt the user, and output only once for the file pair where ++ the function is located. */ ++ ++static void ++prompt_inline_failed_target_option_reason (struct cgraph_edge *e) ++{ ++ static std::set<std::pair<void*, void*>> address_pair_set; ++ if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH ++ && !cl_target_option_eq_major (target_opts_for_fn (e->caller->decl), ++ target_opts_for_fn (e->callee->ultimate_alias_target ()->decl)) ++ && e->caller->lto_file_data
View file
_service:tar_scm:0305-Backport-varasm-Handle-private-COMDAT-function-symbo.patch
Added
@@ -0,0 +1,296 @@ +From bbb4954294d010977fcfb96931384101cf015a44 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek <jakub@redhat.com> +Date: Mon, 26 Feb 2024 17:55:07 +0100 +Subject: PATCH Backportvarasm: Handle private COMDAT function symbol + reference in readonly data section PR113617 + +If default_elf_select_rtx_section is called to put a reference to some +local symbol defined in a comdat section into memory, which happens more often +since the r14-4944 RA change, linking might fail. +default_elf_select_rtx_section puts such constants into .data.rel.ro.local +etc. sections and if linker chooses comdat sections from some other TU +and discards the one to which a relocation in .data.rel.ro.local remains, +linker diagnoses error. References to private comdat symbols can only appear +from functions or data objects in the same comdat group, so the following +patch arranges using .data.rel.ro.local.pool.<comdat_name> and similar sections. + +2024-02-26 Jakub Jelinek <jakub@redhat.com> + H.J. Lu <hjl.tools@gmail.com> + + PR rtl-optimization/113617 + * varasm.cc (default_elf_select_rtx_section): For + references to private symbols in comdat sections + use .data.relro.local.pool.<comdat>, .data.relro.pool.<comdat> + or .rodata.<comdat> comdat sections. + + * g++.dg/other/pr113617.C: New test. + * g++.dg/other/pr113617.h: New test. + * g++.dg/other/pr113617-aux.cc: New test. +--- + gcc/testsuite/g++.dg/other/pr113617-aux.cc | 9 ++ + gcc/testsuite/g++.dg/other/pr113617.C | 27 +++++ + gcc/testsuite/g++.dg/other/pr113617.h | 132 +++++++++++++++++++++ + gcc/varasm.cc | 48 +++++++- + 4 files changed, 215 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/g++.dg/other/pr113617-aux.cc + create mode 100644 gcc/testsuite/g++.dg/other/pr113617.C + create mode 100644 gcc/testsuite/g++.dg/other/pr113617.h + +diff --git a/gcc/testsuite/g++.dg/other/pr113617-aux.cc b/gcc/testsuite/g++.dg/other/pr113617-aux.cc +new file mode 100644 +index 000000000..e6900e05a +--- /dev/null ++++ b/gcc/testsuite/g++.dg/other/pr113617-aux.cc +@@ -0,0 +1,9 @@ ++// PR rtl-optimization/113617 ++// { dg-do link { target { c++17 && c++14_down } } } ++ ++#include "pr113617.h" ++ ++void qux() { ++ A<long long> a; ++ a.foo(0, 0); ++} +diff --git a/gcc/testsuite/g++.dg/other/pr113617.C b/gcc/testsuite/g++.dg/other/pr113617.C +new file mode 100644 +index 000000000..a02dda142 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/other/pr113617.C +@@ -0,0 +1,27 @@ ++// PR rtl-optimization/113617 ++// { dg-do link { target c++11 } } ++// { dg-options "-O2" } ++// { dg-additional-options "-fPIC" { target fpic } } */ ++// { dg-additional-options "-shared" { target shared } } */ ++// { dg-additional-sources pr113617-aux.cc } ++ ++#include "pr113617.h" ++ ++int z; ++long xx1; ++void corge() { ++ A<long long> a; ++ a.foo(xx1, 0); ++} ++ ++typedef unsigned long int VV __attribute__((vector_size (2 * sizeof (long)))); ++VV vv; ++__attribute__((noipa)) static void fn1 (void) {} ++__attribute__((noipa)) static void fn2 (void) {} ++ ++void ++fn3 () ++{ ++ VV a = { (unsigned long) &fn1, (unsigned long) &fn2 }; ++ vv = a; ++} +diff --git a/gcc/testsuite/g++.dg/other/pr113617.h b/gcc/testsuite/g++.dg/other/pr113617.h +new file mode 100644 +index 000000000..4d30eddbc +--- /dev/null ++++ b/gcc/testsuite/g++.dg/other/pr113617.h +@@ -0,0 +1,132 @@ ++namespace { ++template <int V> struct J { static constexpr int value = V; }; ++template <bool V> using K = J<V>; ++using M = K<true>; ++template <int> struct L { template <typename _Tp, typename> using type = _Tp; }; ++template <bool _Cond, typename _If, typename _Else> using N = typename L<_Cond>::type<_If, _Else>; ++M k; ++template <typename _Tp> struct O { using type = _Tp; }; ++template <typename _Up> ++struct P : N<M ::value, O<_Up>, _Up> {}; ++template <typename _Tp> struct Q { using type = typename P<_Tp>::type; }; ++} ++namespace R { ++struct H; ++enum G {}; ++template <typename> class S; ++struct T { using U = bool (*) (H &, const H &, G); U F; }; ++template <typename, typename> class B; ++template <typename _R, typename _F, typename... _A> ++struct B<_R(_A...), _F> { ++ static bool F(H &, const H &, G) { return false; } ++ __attribute__((noipa)) static _R bar(const H &) {} ++}; ++template <typename _R, typename... _A> ++struct S<_R(_A...)> : T { ++ template <typename _F> using AH = B<_R(), _F>; ++ template <typename _F> S(_F) { ++ using AG = AH<_F>; ++ barr = AG::bar; ++ F = AG::F; ++ } ++ using AF = _R (*)(const H &); ++ AF barr; ++}; ++template <typename> class I; ++template <typename _F, typename... _B> ++struct I<_F(_B...)> {}; ++template <typename> using W = decltype(k); ++template <int, typename _F, typename... _B> struct V { ++ typedef I<typename Q<_F>::type(typename Q<_B>::type...)> type; ++}; ++template <typename _F, typename... _B> ++__attribute__((noipa)) typename V<W<_F>::value, _F, _B...>::type ++baz(_F, _B...) { return typename V<W<_F>::value, _F, _B...>::type (); } ++template <typename _Tp> struct AJ { ++ template <typename _Up> struct _Ptr { using type = _Up *; }; ++ using AI = typename _Ptr<_Tp>::type; ++}; ++template <typename _Tp> struct Y { ++ using AI = typename AJ<_Tp>::AI; ++ AI operator->(); ++}; ++} ++extern int z; ++namespace N1 { ++namespace N2 { ++namespace N3 { ++enum Z { Z1, Z2 }; ++template <int> struct X { ++ template <typename _F> ++ __attribute__((noipa)) void boo(long long, long long, long long, _F &) {} ++}; ++struct AC { ++ AC(int); ++ void m1(R::S<void()>); ++}; ++template <typename> ++__attribute__((noipa)) void garply(void *, long long, long long, long long) {} ++template <> ++template <typename _F> ++void X<Z2>::boo(long long, long long x, long long y, _F &fi) { ++ AC pool(z); ++ for (;;) { ++ auto job = R::baz(garply<_F>, &fi, y, y, x); ++ pool.m1(job); ++ } ++} ++struct AB { ++ static AB &bleh(); ++ template <typename _F> ++ void boo(long first, long x, long y, _F fi) { ++ switch (ab1) { ++ case Z1: ++ ab2->boo(first, x, y, fi); ++ case Z2: ++ ab3->boo(first, x, y, fi); ++ } ++ } ++ Z ab1; ++ R::Y<X<Z1>> ab2; ++ R::Y<X<Z2>> ab3; ++}; ++template <typename, bool> struct C; ++template <typename _F> struct C<_F, false> { ++ __attribute__((noipa)) C(_F) {} ++ void boo(long first, long x, long y) { ++ auto u = AB::bleh(); ++ u.boo(first, x, y, *this); ++ } ++}; ++template <typename _F> struct AA { typedef C<_F, 0> type; }; ++} ++} ++} ++struct AD { ++ template <typename _F> ++ static void boo(long first, long x, long y, _F f) {
View file
_service:tar_scm:0306-RISC-V-Install-libstdc-libcc1-etc-to-lib64-instead-o.patch
Added
@@ -0,0 +1,65 @@ +From 84edbc6544ed872aedb3cb6f6d0feb8647ff1d8b Mon Sep 17 00:00:00 2001 +From: YunQiang Su <yunqiang@isrc.iscas.ac.cn> +Date: Mon, 14 Oct 2024 10:09:46 +0800 +Subject: PATCH RISC-V: Install libstdc++/libcc1 etc to /lib64 instead of lib + +The problem is that if we are configured with `--disable-multilib`, + gcc -print-multi-os-directory +outputs + . +Thus the dest to install libraries is set to + /usr/lib/. +While other platforms (x86-64, arm64) it will be + /usr/lib/../lib64 +Let's sync riscv64 with them + +Another problem is that + gcc -print-file-name=libzstd.so.1 +will output + /usr/lib64/lp64d/../lib64/libzstd.so.1 +which is also need to patched. +--- + gcc/config.gcc | 3 +++ + gcc/config/riscv/linux.h | 2 ++ + gcc/config/riscv/t-openEuler | 2 ++ + 3 files changed, 7 insertions(+) + create mode 100644 gcc/config/riscv/t-openEuler + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 19b21a280..23c5bee2b 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -2453,6 +2453,9 @@ riscv*-*-linux*) + xyes) tmake_file="${tmake_file} riscv/t-linux-multilib" ;; + *) echo "Unknown value for enable_multilib"; exit 1 + esac ++ case "x${target_vendor}" in ++ xopenEuler) tmake_file="${tmake_file} riscv/t-openEuler" ++ esac + tmake_file="${tmake_file} riscv/t-riscv riscv/t-linux" + gnu_ld=yes + gas=yes +diff --git a/gcc/config/riscv/linux.h b/gcc/config/riscv/linux.h +index b5c6c5027..a8d65f4e0 100644 +--- a/gcc/config/riscv/linux.h ++++ b/gcc/config/riscv/linux.h +@@ -62,6 +62,8 @@ along with GCC; see the file COPYING3. If not see + #define TARGET_ASM_FILE_END file_end_indicate_exec_stack + + #define STARTFILE_PREFIX_SPEC \ ++ "/lib" XLEN_SPEC "/ " \ ++ "/usr/lib" XLEN_SPEC "/ " \ + "/lib" XLEN_SPEC "/" ABI_SPEC "/ " \ + "/usr/lib" XLEN_SPEC "/" ABI_SPEC "/ " \ + "/lib/ " \ +diff --git a/gcc/config/riscv/t-openEuler b/gcc/config/riscv/t-openEuler +new file mode 100644 +index 000000000..26541dd08 +--- /dev/null ++++ b/gcc/config/riscv/t-openEuler +@@ -0,0 +1,2 @@ ++MULTILIB_OPTIONS = mabi=lp64d ++MULTILIB_DIRNAMES = ../lib64 +-- +2.39.5 (Apple Git-154) +
View file
_service:tar_scm:0307-Set-fallback-value-for-print-multi-os-directory.patch
Added
@@ -0,0 +1,105 @@ +From 0d157b14f361f8319f4694c54c6e01ac8f59d278 Mon Sep 17 00:00:00 2001 +From: YunQiang Su <yunqiang@isrc.iscas.ac.cn> +Date: Tue, 8 Oct 2024 17:56:23 +0800 +Subject: PATCH 1/2 Set fallback value for -print-multi-os-directory + +Clang doesn't support -print-multi-os-directory option. +So let's set the fallback value (../lib64) if it is empty. + +This is only needed for the projects built by hostcc: + gcc, libcc1, libiberty + +The projects for targets only, will always built by gcc itself. +--- + gcc/configure | 3 +++ + libcc1/configure | 6 ++++++ + libcc1/configure.ac | 3 +++ + libiberty/Makefile.in | 5 ++++- + libtool.m4 | 3 +++ + 5 files changed, 19 insertions(+), 1 deletion(-) + +diff --git a/gcc/configure b/gcc/configure +index 7e64599b0..ef0449edd 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -18598,6 +18598,9 @@ if test "$GCC" = yes; then + # and add multilib dir if necessary. + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` ++ if -z "$lt_multi_os_dir" ;then ++ lt_multi_os_dir=../lib64 ++ fi + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path/$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" +diff --git a/libcc1/configure b/libcc1/configure +index 01cfb2806..3c437d690 100755 +--- a/libcc1/configure ++++ b/libcc1/configure +@@ -9701,6 +9701,9 @@ if test "$GCC" = yes; then + # and add multilib dir if necessary. + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` ++ if -z "$lt_multi_os_dir" ;then ++ lt_multi_os_dir=../lib64 ++ fi + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path/$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" +@@ -14865,6 +14868,9 @@ libsuffix= + if test "$GXX" = yes; then + libsuffix=`$CXX -print-multi-os-directory` + fi ++if -z "$libsuffix" ;then ++ libsuffix=../lib64 ++fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for socket libraries" >&5 +diff --git a/libcc1/configure.ac b/libcc1/configure.ac +index 36f5a7e09..acd7c4c04 100644 +--- a/libcc1/configure.ac ++++ b/libcc1/configure.ac +@@ -72,6 +72,9 @@ libsuffix= + if test "$GXX" = yes; then + libsuffix=`$CXX -print-multi-os-directory` + fi ++if -z "$libsuffix" ;then ++ libsuffix=../lib64 ++fi + AC_SUBST(libsuffix) + + dnl Test for -lsocket and -lnsl. Copied from libgo/configure.ac. +diff --git a/libiberty/Makefile.in b/libiberty/Makefile.in +index 1b17c2e3a..2bfa00de5 100644 +--- a/libiberty/Makefile.in ++++ b/libiberty/Makefile.in +@@ -385,7 +385,10 @@ install-strip: install + # multilib-specific flags, it's overridden by FLAGS_TO_PASS from the + # default multilib, so we have to take CFLAGS into account as well, + # since it will be passed the multilib flags. +-MULTIOSDIR = `$(CC) $(CFLAGS) -print-multi-os-directory` ++MULTIOSDIR = `$(CC) $(CFLAGS) -print-multi-os-directory 2>/dev/null` ++ifeq ($(MULTIOSDIR),) ++ MULTIOSDIR = ../lib64 ++endif + install_to_libdir: all + if test -n "${target_header_dir}"; then \ + ${mkinstalldirs} $(DESTDIR)$(libdir)/$(MULTIOSDIR); \ +diff --git a/libtool.m4 b/libtool.m4 +index 17f8e5f30..86fc1e705 100644 +--- a/libtool.m4 ++++ b/libtool.m4 +@@ -2059,6 +2059,9 @@ if test "$GCC" = yes; then + # and add multilib dir if necessary. + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` ++ if -z "$lt_multi_os_dir" ;then ++ lt_multi_os_dir=../lib64 ++ fi + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path/$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" +-- +2.47.0 +
View file
_service:tar_scm:0308-Fix-enum-INPUT-MIDDLE-FINAL-aes_stage.patch
Added
@@ -0,0 +1,108 @@ +From 1624bdceb341e0034c22ce46bc2e422726f76cce Mon Sep 17 00:00:00 2001 +From: YunQiang Su <yunqiang@isrc.iscas.ac.cn> +Date: Tue, 8 Oct 2024 17:59:56 +0800 +Subject: PATCH 2/2 Fix enum { INPUT, MIDDLE, FINAL } aes_stage + +The FINAL is defined in ansidecl.h. +Let's rename the elements to + aesINPUT, aseMIDDLE, aseFINAL +to avoid conflits. + +I find this problem when trying to build gcc with clang. +In fact FINAL is defined to empty for clang, and `final` for gcc. +So it coincidentally worked for gcc. +--- + gcc/crypto-accel.cc | 28 ++++++++++++++-------------- + 1 file changed, 14 insertions(+), 14 deletions(-) + +diff --git a/gcc/crypto-accel.cc b/gcc/crypto-accel.cc +index e7766a585..716c4a38b 100644 +--- a/gcc/crypto-accel.cc ++++ b/gcc/crypto-accel.cc +@@ -1251,7 +1251,7 @@ public: + + /* AES stage description. Required for some specializations + for curtain rounds. */ +-typedef enum { INPUT, MIDDLE, FINAL } aes_stage; ++typedef enum { aesINPUT, aesMIDDLE, aesFINAL } aes_stage; + + /* AES entity description. It can be both round or state inside round. + It provides interface for unified analysis between blocks of 4 parts: +@@ -1356,7 +1356,7 @@ struct state_input + + /* Input round state uses special input. */ + template<> +-struct state_input<INPUT> ++struct state_input<aesINPUT> + { + typedef std::pair<rtx, unsigned HOST_WIDE_INT> type; + +@@ -1389,7 +1389,7 @@ struct state_output + + /* Final round state generates special output. */ + template<> +-struct state_output<FINAL> ++struct state_output<aesFINAL> + { + typedef std::pair<rtx, unsigned HOST_WIDE_INT> type; + +@@ -1409,7 +1409,7 @@ struct round_input + + /* Input round uses special input just as its state. */ + template<> +-struct round_input<INPUT> ++struct round_input<aesINPUT> + { + typedef std::pair<rtx, unsigned HOST_WIDE_INT> type; + }; +@@ -1437,7 +1437,7 @@ struct round_output + AES encryption. */ + template<> + template<> +-void round_output<INPUT>::reorder<aes_decrypt_table> (type &out) ++void round_output<aesINPUT>::reorder<aes_decrypt_table> (type &out) + { + gcc_assert (out.size () == 4); + std::swap (out1, out3); +@@ -1445,14 +1445,14 @@ void round_output<INPUT>::reorder<aes_decrypt_table> (type &out) + + template<> + template<> +-void round_output<MIDDLE>::reorder<aes_decrypt_table> (type &out) ++void round_output<aesMIDDLE>::reorder<aes_decrypt_table> (type &out) + { +- round_output<INPUT>::reorder<aes_decrypt_table> (out); ++ round_output<aesINPUT>::reorder<aes_decrypt_table> (out); + } + + /* Final round generates special output. */ + template<> +-struct round_output<FINAL> : state_output<FINAL> ++struct round_output<aesFINAL> : state_output<aesFINAL> + { + template<typename T> + static void finalize (type &out, const T &v) +@@ -1644,14 +1644,14 @@ public: + typedef std::map<rtx_insn *, aes_table_ref<T> > table_ref_map; + + /* AES states typedefs. */ +- typedef aes_state<input_info, INPUT, T> aes_input_state; +- typedef aes_state<round_input_info<T>, MIDDLE, T> aes_body_state; +- typedef aes_state<round_input_info<T>, FINAL, T> aes_final_state; ++ typedef aes_state<input_info, aesINPUT, T> aes_input_state; ++ typedef aes_state<round_input_info<T>, aesMIDDLE, T> aes_body_state; ++ typedef aes_state<round_input_info<T>, aesFINAL, T> aes_final_state; + + /* AES rounds typedefs. */ +- typedef aes_round<input_info, INPUT, T> aes_input_round; +- typedef aes_round<round_input_info<T>, MIDDLE, T> aes_body_round; +- typedef aes_round<round_input_info<T>, FINAL, T> aes_final_round; ++ typedef aes_round<input_info, aesINPUT, T> aes_input_round; ++ typedef aes_round<round_input_info<T>, aesMIDDLE, T> aes_body_round; ++ typedef aes_round<round_input_info<T>, aesFINAL, T> aes_final_round; + + bool run (); + +-- +2.47.0 +
View file
_service:tar_scm:Fix-indentation-and-numbering-errors.diff
Added
@@ -0,0 +1,205 @@ +diff --git a/libphobos/libdruntime/Makefile.in b/libphobos/libdruntime/Makefile.in +index 91cd653623b..b686f5eb492 100644 +--- a/libphobos/libdruntime/Makefile.in ++++ b/libphobos/libdruntime/Makefile.in +@@ -124,13 +124,13 @@ target_triplet = @target@ + # CPU specific sources + @DRUNTIME_CPU_AARCH64_TRUE@am__append_11 = config/aarch64/switchcontext.S + @DRUNTIME_CPU_ARM_TRUE@am__append_12 = config/arm/switchcontext.S +-@DRUNTIME_CPU_LOONGARCH_TRUE@am__append_13 = config/loongarch/switchcontext.S +-@DRUNTIME_CPU_MIPS_TRUE@am__append_14 = config/mips/switchcontext.S +-@DRUNTIME_CPU_POWERPC_TRUE@am__append_15 = config/powerpc/switchcontext.S +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__append_16 = config/mingw/switchcontext.S +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__append_17 = config/x86/switchcontext.S +-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__append_18 = config/systemz/get_tls_offset.S +-@DRUNTIME_CPU_S390_TRUE@am__append_19 = config/s390/get_tls_offset.S ++@DRUNTIME_CPU_MIPS_TRUE@am__append_13 = config/mips/switchcontext.S ++@DRUNTIME_CPU_POWERPC_TRUE@am__append_14 = config/powerpc/switchcontext.S ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__append_15 = config/mingw/switchcontext.S ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__append_16 = config/x86/switchcontext.S ++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__append_17 = config/systemz/get_tls_offset.S ++@DRUNTIME_CPU_S390_TRUE@am__append_18 = config/s390/get_tls_offset.S ++@DRUNTIME_CPU_LOONGARCH_TRUE@am__append_19 = config/loongarch/switchcontext.S + subdir = libdruntime + ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 + am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \ +@@ -475,14 +475,14 @@ am__objects_22 = core/sys/solaris/dlfcn.lo core/sys/solaris/elf.lo \ + @DRUNTIME_OS_SOLARIS_TRUE@am__objects_23 = $(am__objects_22) + @DRUNTIME_CPU_AARCH64_TRUE@am__objects_24 = config/aarch64/libgdruntime_la-switchcontext.lo + @DRUNTIME_CPU_ARM_TRUE@am__objects_25 = config/arm/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_26 = config/loongarch/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_MIPS_TRUE@am__objects_27 = config/mips/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_POWERPC_TRUE@am__objects_28 = config/powerpc/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_29 = config/mingw/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_30 = config/x86/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_31 = config/systemz/libgdruntime_la-get_tls_offset.lo +-@DRUNTIME_CPU_S390_TRUE@am__objects_32 = config/s390/libgdruntime_la-get_tls_offset.lo +-am__objects_33 = $(am__objects_6) $(am__objects_8) $(am__objects_10) \ ++@DRUNTIME_CPU_MIPS_TRUE@am__objects_26 = config/mips/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_POWERPC_TRUE@am__objects_27 = config/powerpc/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_28 = config/mingw/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_29 = config/x86/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_30 = config/systemz/libgdruntime_la-get_tls_offset.lo ++@DRUNTIME_CPU_S390_TRUE@am__objects_31 = config/s390/libgdruntime_la-get_tls_offset.lo ++@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_32 = config/loongarch/libgdruntime_la-switchcontext.lo ++am__objects_33 = $(am__objects_5) $(am__objects_7) $(am__objects_9) \ + $(am__objects_11) $(am__objects_13) $(am__objects_15) \ + $(am__objects_17) $(am__objects_19) $(am__objects_21) \ + $(am__objects_23) $(am__objects_24) $(am__objects_25) \ +@@ -500,22 +500,22 @@ am__objects_36 = core/stdc/libgdruntime_convenience_la-errno_.lo + @DRUNTIME_OS_MINGW_TRUE@ config/mingw/libgdruntime_convenience_la-msvc.lo + @DRUNTIME_CPU_AARCH64_TRUE@am__objects_38 = config/aarch64/libgdruntime_convenience_la-switchcontext.lo + @DRUNTIME_CPU_ARM_TRUE@am__objects_39 = config/arm/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_40 = config/loongarch/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_MIPS_TRUE@am__objects_41 = config/mips/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_POWERPC_TRUE@am__objects_42 = config/powerpc/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_43 = config/mingw/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_44 = config/x86/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_45 = config/systemz/libgdruntime_convenience_la-get_tls_offset.lo +-@DRUNTIME_CPU_S390_TRUE@am__objects_46 = config/s390/libgdruntime_convenience_la-get_tls_offset.lo ++@DRUNTIME_CPU_MIPS_TRUE@am__objects_40 = config/mips/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_POWERPC_TRUE@am__objects_41 = config/powerpc/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_42 = config/mingw/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_43 = config/x86/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_44 = config/systemz/libgdruntime_convenience_la-get_tls_offset.lo ++@DRUNTIME_CPU_S390_TRUE@am__objects_45 = config/s390/libgdruntime_convenience_la-get_tls_offset.lo ++@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_46 = config/loongarch/libgdruntime_convenience_la-switchcontext.lo + am__objects_47 = $(am__objects_5) $(am__objects_7) $(am__objects_9) \ + $(am__objects_11) $(am__objects_13) $(am__objects_15) \ +- $(am__objects_17) $(am__objects_19) $(am__objects_36) \ +- $(am__objects_23) $(am__objects_37) $(am__objects_38) \ +- $(am__objects_39) $(am__objects_40) $(am__objects_41) \ +- $(am__objects_42) $(am__objects_43) $(am__objects_44) \ +- $(am__objects_45) $(am__objects_46) +-am__objects_48 = $(am__objects_1) $(am__objects_35) $(am__objects_3) \ +- $(am__objects_47) $(am__objects_33) ++ $(am__objects_17) $(am__objects_19) $(am__objects_37) \ ++ $(am__objects_23) $(am__objects_38) $(am__objects_39) \ ++ $(am__objects_40) $(am__objects_41) $(am__objects_42) \ ++ $(am__objects_43) $(am__objects_44) $(am__objects_45) \ ++ $(am__objects_46) ++am__objects_48 = $(am__objects_1) $(am__objects_36) $(am__objects_3) \ ++ $(am__objects_47) $(am__objects_34) + am__objects_49 = $(am__objects_48) + am_libgdruntime_convenience_la_OBJECTS = $(am__objects_49) + libgdruntime_convenience_la_OBJECTS = \ +@@ -1905,11 +1905,6 @@ config/arm/$(am__dirstamp): + @: > config/arm/$(am__dirstamp) + config/arm/libgdruntime_la-switchcontext.lo: \ + config/arm/$(am__dirstamp) +-config/loongarch/$(am__dirstamp): +- @$(MKDIR_P) config/loongarch +- @: > config/loongarch/$(am__dirstamp) +-config/loongarch/libgdruntime_la-switchcontext.lo: \ +- config/loongarch/$(am__dirstamp) + config/mips/$(am__dirstamp): + @$(MKDIR_P) config/mips + @: > config/mips/$(am__dirstamp) +@@ -1937,6 +1932,11 @@ config/s390/$(am__dirstamp): + @: > config/s390/$(am__dirstamp) + config/s390/libgdruntime_la-get_tls_offset.lo: \ + config/s390/$(am__dirstamp) ++config/loongarch/$(am__dirstamp): ++ @$(MKDIR_P) config/loongarch ++ @: > config/loongarch/$(am__dirstamp) ++config/loongarch/libgdruntime_la-switchcontext.lo: \ ++ config/loongarch/$(am__dirstamp) + gcc/config.lo: gcc/$(am__dirstamp) + gcc/libbacktrace.lo: gcc/$(am__dirstamp) + +@@ -1950,8 +1950,6 @@ config/aarch64/libgdruntime_convenience_la-switchcontext.lo: \ + config/aarch64/$(am__dirstamp) + config/arm/libgdruntime_convenience_la-switchcontext.lo: \ + config/arm/$(am__dirstamp) +-config/loongarch/libgdruntime_convenience_la-switchcontext.lo: \ +- config/loongarch/$(am__dirstamp) + config/mips/libgdruntime_convenience_la-switchcontext.lo: \ + config/mips/$(am__dirstamp) + config/powerpc/libgdruntime_convenience_la-switchcontext.lo: \ +@@ -1964,6 +1962,8 @@ config/systemz/libgdruntime_convenience_la-get_tls_offset.lo: \ + config/systemz/$(am__dirstamp) + config/s390/libgdruntime_convenience_la-get_tls_offset.lo: \ + config/s390/$(am__dirstamp) ++config/loongarch/libgdruntime_convenience_la-switchcontext.lo: \ ++ config/loongarch/$(am__dirstamp) + + libgdruntime_convenience.la: $(libgdruntime_convenience_la_OBJECTS) $(libgdruntime_convenience_la_DEPENDENCIES) $(EXTRA_libgdruntime_convenience_la_DEPENDENCIES) + $(AM_V_GEN)$(libgdruntime_convenience_la_LINK) $(libgdruntime_convenience_la_OBJECTS) $(libgdruntime_convenience_la_LIBADD) $(LIBS) +@@ -1976,14 +1976,14 @@ mostlyclean-compile: + -rm -f config/arm/*.lo + -rm -f config/mingw/*.$(OBJEXT) + -rm -f config/mingw/*.lo +- -rm -f config/loongarch/*.$(OBJEXT) +- -rm -f config/loongarch/*.lo + -rm -f config/mips/*.$(OBJEXT) + -rm -f config/mips/*.lo + -rm -f config/powerpc/*.$(OBJEXT) + -rm -f config/powerpc/*.lo + -rm -f config/s390/*.$(OBJEXT) + -rm -f config/s390/*.lo ++ -rm -f config/loongarch/*.$(OBJEXT) ++ -rm -f config/loongarch/*.lo + -rm -f config/systemz/*.$(OBJEXT) + -rm -f config/systemz/*.lo + -rm -f config/x86/*.$(OBJEXT) +@@ -2101,10 +2101,7 @@ config/aarch64/libgdruntime_la-switchcontext.lo: config/aarch64/switchcontext.S + config/arm/libgdruntime_la-switchcontext.lo: config/arm/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/arm/libgdruntime_la-switchcontext.lo `test -f 'config/arm/switchcontext.S' || echo '$(srcdir)/'`config/arm/switchcontext.S + +-config/loongarch/libgdruntime_la-switchcontext.lo: config/loongarch/switchcontext.S +- $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) +- +-onfig/mips/libgdruntime_la-switchcontext.lo: config/mips/switchcontext.S ++config/mips/libgdruntime_la-switchcontext.lo: config/mips/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/mips/libgdruntime_la-switchcontext.lo `test -f 'config/mips/switchcontext.S' || echo '$(srcdir)/'`config/mips/switchcontext.S + + config/powerpc/libgdruntime_la-switchcontext.lo: config/powerpc/switchcontext.S +@@ -2122,18 +2119,21 @@ config/systemz/libgdruntime_la-get_tls_offset.lo: config/systemz/get_tls_offset. + config/s390/libgdruntime_la-get_tls_offset.lo: config/s390/get_tls_offset.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/s390/libgdruntime_la-get_tls_offset.lo `test -f 'config/s390/get_tls_offset.S' || echo '$(srcdir)/'`config/s390/get_tls_offset.S + ++config/loongarch/libgdruntime_la-switchcontext.lo: config/loongarch/switchcontext.S ++ $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/loongarch/libgdruntime_la-switchcontext.lo `test -f 'config/loongarch/switchcontext.S' || echo '$(srcdir)/'`config/loongarch/switchcontext.S ++ + config/aarch64/libgdruntime_convenience_la-switchcontext.lo: config/aarch64/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/aarch64/libgdruntime_convenience_la-switchcontext.lo `test -f 'config/aarch64/switchcontext.S' || echo '$(srcdir)/'`config/aarch64/switchcontext.S + + config/arm/libgdruntime_convenience_la-switchcontext.lo: config/arm/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/arm/libgdruntime_convenience_la-switchcontext.lo `test -f 'config/arm/switchcontext.S' || echo '$(srcdir)/'`config/arm/switchcontext.S + +-config/loongarch/libgdruntime_convenience_la-switchcontext.lo: config/loongarch/switchcontext.S +- $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM +- + config/mips/libgdruntime_convenience_la-switchcontext.lo: config/mips/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/mips/libgdruntime_convenience_la-switchcontext.lo `test -f 'config/mips/switchcontext.S' || echo '$(srcdir)/'`config/mips/switchcontext.S + ++config/loongarch/libgdruntime_convenience_la-switchcontext.lo: config/loongarch/switchcontext.S ++ $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/loongarch/libgdruntime_convenience_la-switchcontext.lo `test -f 'config/loongarch/switchcontext.S' || echo '$(srcdir)/'`config/loongarch/switchcontext.S ++ + config/powerpc/libgdruntime_convenience_la-switchcontext.lo: config/powerpc/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/powerpc/libgdruntime_convenience_la-switchcontext.lo `test -f 'config/powerpc/switchcontext.S' || echo '$(srcdir)/'`config/powerpc/switchcontext.S + +@@ -2178,10 +2178,10 @@ clean-libtool: + -rm -rf config/aarch64/.libs config/aarch64/_libs + -rm -rf config/arm/.libs config/arm/_libs + -rm -rf config/mingw/.libs config/mingw/_libs +- -rm -rf config/loongarch/.libs config/loongarch/_libs + -rm -rf config/mips/.libs config/mips/_libs + -rm -rf config/powerpc/.libs config/powerpc/_libs + -rm -rf config/s390/.libs config/s390/_libs ++ -rm -rf config/loongarch/.libs config/loongarch/_libs + -rm -rf config/systemz/.libs config/systemz/_libs + -rm -rf config/x86/.libs config/x86/_libs + -rm -rf core/.libs core/_libs +@@ -2340,10 +2340,10 @@ distclean-generic: + -rm -f config/aarch64/$(am__dirstamp) + -rm -f config/arm/$(am__dirstamp) + -rm -f config/mingw/$(am__dirstamp) +- -rm -f config/loongarch/$(am__dirstamp) + -rm -f config/mips/$(am__dirstamp)
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2